PyPI - edsl - Versions diffs - 0.1.37.dev4__py3-none-any.whl → 0.1.37.dev6__py3-none-any.whl - Mend

edsl 0.1.37.dev4py3-none-any.whl → 0.1.37.dev6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

edsl/__version__.py +1 -1
edsl/agents/Agent.py +86 -35
edsl/agents/AgentList.py +5 -0
edsl/agents/InvigilatorBase.py +2 -23
edsl/agents/PromptConstructor.py +147 -106
edsl/agents/descriptors.py +17 -4
edsl/config.py +1 -1
edsl/conjure/AgentConstructionMixin.py +11 -3
edsl/conversation/Conversation.py +66 -14
edsl/conversation/chips.py +95 -0
edsl/coop/coop.py +134 -3
edsl/data/Cache.py +1 -1
edsl/exceptions/BaseException.py +21 -0
edsl/exceptions/__init__.py +7 -3
edsl/exceptions/agents.py +17 -19
edsl/exceptions/results.py +11 -8
edsl/exceptions/scenarios.py +22 -0
edsl/exceptions/surveys.py +13 -10
edsl/inference_services/InferenceServicesCollection.py +32 -9
edsl/jobs/Jobs.py +265 -53
edsl/jobs/interviews/InterviewExceptionEntry.py +5 -1
edsl/jobs/tasks/TaskHistory.py +1 -0
edsl/language_models/KeyLookup.py +30 -0
edsl/language_models/LanguageModel.py +47 -59
edsl/language_models/__init__.py +1 -0
edsl/prompts/Prompt.py +8 -4
edsl/questions/QuestionBase.py +53 -13
edsl/questions/QuestionBasePromptsMixin.py +1 -33
edsl/questions/QuestionFunctional.py +2 -2
edsl/questions/descriptors.py +23 -28
edsl/results/DatasetExportMixin.py +25 -1
edsl/results/Result.py +16 -1
edsl/results/Results.py +31 -120
edsl/results/ResultsDBMixin.py +1 -1
edsl/results/Selector.py +18 -1
edsl/scenarios/Scenario.py +48 -12
edsl/scenarios/ScenarioHtmlMixin.py +7 -2
edsl/scenarios/ScenarioList.py +12 -1
edsl/surveys/Rule.py +10 -4
edsl/surveys/Survey.py +100 -77
edsl/utilities/utilities.py +18 -0
{edsl-0.1.37.dev4.dist-info → edsl-0.1.37.dev6.dist-info}/METADATA +1 -1
{edsl-0.1.37.dev4.dist-info → edsl-0.1.37.dev6.dist-info}/RECORD +45 -41
{edsl-0.1.37.dev4.dist-info → edsl-0.1.37.dev6.dist-info}/LICENSE +0 -0
{edsl-0.1.37.dev4.dist-info → edsl-0.1.37.dev6.dist-info}/WHEEL +0 -0

edsl/results/Results.py CHANGED Viewed

@@ -7,11 +7,17 @@ from __future__ import annotations
 import json
 import random
 from collections import UserList, defaultdict
-from typing import Optional, Callable, Any, Type, Union, List
+from typing import Optional, Callable, Any, Type, Union, List, TYPE_CHECKING
+if TYPE_CHECKING:
+    from edsl import Survey, Cache, AgentList, ModelList, ScenarioList
+    from edsl.results.Result import Result
+    from edsl.jobs.tasks.TaskHistory import TaskHistory
 from simpleeval import EvalWithCompoundTypes
 from edsl.exceptions.results import (
+    ResultsError,
     ResultsBadMutationstringError,
     ResultsColumnNotFoundError,
     ResultsInvalidNameError,
@@ -40,7 +46,7 @@ class Mixins(
     ResultsGGMixin,
     ResultsToolsMixin,
 ):
-    def print_long(self, max_rows=None) -> None:
+    def print_long(self, max_rows: int = None) -> None:
         """Print the results in long format.
         >>> from edsl.results import Results
@@ -84,13 +90,13 @@ class Results(UserList, Mixins, Base):
     def __init__(
         self,
-        survey: Optional["Survey"] = None,
-        data: Optional[list["Result"]] = None,
+        survey: Optional[Survey] = None,
+        data: Optional[list[Result]] = None,
         created_columns: Optional[list[str]] = None,
-        cache: Optional["Cache"] = None,
+        cache: Optional[Cache] = None,
         job_uuid: Optional[str] = None,
         total_results: Optional[int] = None,
-        task_history: Optional["TaskHistory"] = None,
+        task_history: Optional[TaskHistory] = None,
     ):
         """Instantiate a `Results` object with a survey and a list of `Result` objects.
@@ -235,11 +241,11 @@ class Results(UserList, Mixins, Base):
         >>> r3 = r + r2
         """
         if self.survey != other.survey:
-            raise Exception(
-                "The surveys are not the same so they cannot be added together."
+            raise ResultsError(
+                "The surveys are not the same so the the results cannot be added together."
             )
         if self.created_columns != other.created_columns:
-            raise Exception(
+            raise ResultsError(
                 "The created columns are not the same so they cannot be added together."
             )
@@ -258,17 +264,6 @@ class Results(UserList, Mixins, Base):
         from IPython.display import HTML
         json_str = json.dumps(self.to_dict()["data"], indent=4)
-        # from pygments import highlight
-        # from pygments.lexers import JsonLexer
-        # 3from pygments.formatters import HtmlFormatter
-        # formatted_json = highlight(
-        #    json_str,
-        #    JsonLexer(),
-        #    HtmlFormatter(style="default", full=True, noclasses=True),
-        # )
-        # return HTML(formatted_json).data
-        # print(json_str)
         return f"<pre>{json_str}</pre>"
     def _to_dict(self, sort=False):
@@ -328,7 +323,7 @@ class Results(UserList, Mixins, Base):
     def hashes(self) -> set:
         return set(hash(result) for result in self.data)
-    def sample(self, n: int) -> "Results":
+    def sample(self, n: int) -> Results:
         """Return a random sample of the results.
         :param n: The number of samples to return.
@@ -346,7 +341,7 @@ class Results(UserList, Mixins, Base):
                 indices = list(range(len(values)))
                 sampled_indices = random.sample(indices, n)
                 if n > len(indices):
-                    raise ValueError(
+                    raise ResultsError(
                         f"Cannot sample {n} items from a list of length {len(indices)}."
                     )
             entry[key] = [values[i] for i in sampled_indices]
@@ -399,11 +394,12 @@ class Results(UserList, Mixins, Base):
         - Uses the key_to_data_type property of the Result class.
         - Includes any columns that the user has created with `mutate`
         """
-        d = {}
+        d: dict = {}
         for result in self.data:
             d.update(result.key_to_data_type)
         for column in self.created_columns:
             d[column] = "answer"
         return d
     @property
@@ -453,7 +449,7 @@ class Results(UserList, Mixins, Base):
         from edsl.utilities.utilities import shorten_string
         if not self.survey:
-            raise Exception("Survey is not defined so no answer keys are available.")
+            raise ResultsError("Survey is not defined so no answer keys are available.")
         answer_keys = self._data_type_to_keys["answer"]
         answer_keys = {k for k in answer_keys if "_comment" not in k}
@@ -466,7 +462,7 @@ class Results(UserList, Mixins, Base):
         return sorted_dict
     @property
-    def agents(self) -> "AgentList":
+    def agents(self) -> AgentList:
         """Return a list of all of the agents in the Results.
         Example:
@@ -480,7 +476,7 @@ class Results(UserList, Mixins, Base):
         return AgentList([r.agent for r in self.data])
     @property
-    def models(self) -> list[Type["LanguageModel"]]:
+    def models(self) -> ModelList:
         """Return a list of all of the models in the Results.
         Example:
@@ -489,10 +485,12 @@ class Results(UserList, Mixins, Base):
         >>> r.models[0]
         Model(model_name = ...)
         """
-        return [r.model for r in self.data]
+        from edsl import ModelList
+        return ModelList([r.model for r in self.data])
     @property
-    def scenarios(self) -> "ScenarioList":
+    def scenarios(self) -> ScenarioList:
         """Return a list of all of the scenarios in the Results.
         Example:
@@ -569,7 +567,7 @@ class Results(UserList, Mixins, Base):
         )
         return sorted(list(all_keys))
-    def first(self) -> "Result":
+    def first(self) -> Result:
         """Return the first observation in the results.
         Example:
@@ -819,7 +817,7 @@ class Results(UserList, Mixins, Base):
         return Results(survey=self.survey, data=new_data, created_columns=None)
-    def select(self, *columns: Union[str, list[str]]) -> "Dataset":
+    def select(self, *columns: Union[str, list[str]]) -> Results:
         """
         Select data from the results and format it.
@@ -832,93 +830,12 @@ class Results(UserList, Mixins, Base):
         Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
         >>> results.select('how_feeling', 'model', 'how_feeling')
-        Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'model.model': ['...', '...', '...', '...']}, {'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
+        Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'model.model': ['...', '...', '...', '...']}, {'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
         >>> from edsl import Results; r = Results.example(); r.select('answer.how_feeling_y')
         Dataset([{'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
         """
-        # if len(self) == 0:
-        #    raise Exception("No data to select from---the Results object is empty.")
-        if not columns or columns == ("*",) or columns == (None,):
-            # is the users passes nothing, then we'll return all the columns
-            columns = ("*.*",)
-        if isinstance(columns[0], list):
-            columns = tuple(columns[0])
-        def get_data_types_to_return(parsed_data_type):
-            if parsed_data_type == "*":  # they want all of the columns
-                return self.known_data_types
-            else:
-                if parsed_data_type not in self.known_data_types:
-                    raise Exception(
-                        f"Data type {parsed_data_type} not found in data. Did you mean one of {self.known_data_types}"
-                    )
-                return [parsed_data_type]
-        # we're doing to populate this with the data we want to fetch
-        to_fetch = defaultdict(list)
-        new_data = []
-        items_in_order = []
-        # iterate through the passed columns
-        for column in columns:
-            # a user could pass 'result.how_feeling' or just 'how_feeling'
-            matches = self._matching_columns(column)
-            if len(matches) > 1:
-                raise Exception(
-                    f"Column '{column}' is ambiguous. Did you mean one of {matches}?"
-                )
-            if len(matches) == 0 and ".*" not in column:
-                raise Exception(f"Column '{column}' not found in data.")
-            if len(matches) == 1:
-                column = matches[0]
-            parsed_data_type, parsed_key = self._parse_column(column)
-            data_types = get_data_types_to_return(parsed_data_type)
-            found_once = False  # we need to track this to make sure we found the key at least once
-            for data_type in data_types:
-                # the keys for that data_type e.g.,# if data_type is 'answer', then the keys are 'how_feeling', 'how_feeling_comment', etc.
-                relevant_keys = self._data_type_to_keys[data_type]
-                for key in relevant_keys:
-                    if key == parsed_key or parsed_key == "*":
-                        found_once = True
-                        to_fetch[data_type].append(key)
-                        items_in_order.append(data_type + "." + key)
-            if not found_once:
-                raise Exception(f"Key {parsed_key} not found in data.")
-        for data_type in to_fetch:
-            for key in to_fetch[data_type]:
-                entries = self._fetch_list(data_type, key)
-                new_data.append({data_type + "." + key: entries})
-        def sort_by_key_order(dictionary):
-            # Extract the single key from the dictionary
-            single_key = next(iter(dictionary))
-            # Return the index of this key in the list_of_keys
-            return items_in_order.index(single_key)
-        # sorted(new_data, key=sort_by_key_order)
-        from edsl.results.Dataset import Dataset
-        sorted_new_data = []
-        # WORKS but slow
-        for key in items_in_order:
-            for d in new_data:
-                if key in d:
-                    sorted_new_data.append(d)
-                    break
-        return Dataset(sorted_new_data)
-    def select(self, *columns: Union[str, list[str]]) -> "Results":
         from edsl.results.Selector import Selector
         if len(self) == 0:
@@ -1028,6 +945,7 @@ class Results(UserList, Mixins, Base):
         Traceback (most recent call last):
         ...
         edsl.exceptions.results.ResultsFilterError: You must use '==' instead of '=' in the filter expression.
+        ...
         >>> r.filter("how_feeling == 'Great' or how_feeling == 'Terrible'").select('how_feeling').print()
         ┏━━━━━━━━━━━━━━┓
@@ -1105,6 +1023,7 @@ class Results(UserList, Mixins, Base):
             stop_on_exception=True,
             skip_retry=True,
             raise_validation_errors=True,
+            disable_remote_cache=True,
             disable_remote_inference=True,
         )
         return results
@@ -1112,14 +1031,6 @@ class Results(UserList, Mixins, Base):
     def rich_print(self):
         """Display an object as a table."""
         pass
-        # with io.StringIO() as buf:
-        #     console = Console(file=buf, record=True)
-        #     for index, result in enumerate(self):
-        #         console.print(f"Result {index}")
-        #         console.print(result.rich_print())
-        #     return console.export_text()
     def __str__(self):
         data = self.to_dict()["data"]

edsl/results/ResultsDBMixin.py CHANGED Viewed

@@ -93,7 +93,7 @@ class ResultsDBMixin:
             from sqlalchemy import create_engine
             engine = create_engine("sqlite:///:memory:")
-            df = self.to_pandas(remove_prefix=remove_prefix)
+            df = self.to_pandas(remove_prefix=remove_prefix, lists_as_strings=True)
             df.to_sql("self", engine, index=False, if_exists="replace")
             return engine.connect()
         else:

edsl/results/Selector.py CHANGED Viewed

@@ -12,6 +12,7 @@ class Selector:
         fetch_list_func,
         columns: List[str],
     ):
+        """Selects columns from a Results object"""
         self.known_data_types = known_data_types
         self._data_type_to_keys = data_type_to_keys
         self._key_to_data_type = key_to_data_type
@@ -21,10 +22,19 @@ class Selector:
     def select(self, *columns: Union[str, List[str]]) -> "Dataset":
         columns = self._normalize_columns(columns)
         to_fetch = self._get_columns_to_fetch(columns)
+        # breakpoint()
         new_data = self._fetch_data(to_fetch)
         return Dataset(new_data)
     def _normalize_columns(self, columns: Union[str, List[str]]) -> tuple:
+        """Normalize the columns to a tuple of strings
+        >>> s = Selector([], {}, {}, lambda x, y: x, [])
+        >>> s._normalize_columns([["a", "b"], ])
+        ('a', 'b')
+        >>> s._normalize_columns(None)
+        ('*.*',)
+        """
         if not columns or columns == ("*",) or columns == (None,):
             return ("*.*",)
         if isinstance(columns[0], list):
@@ -37,6 +47,7 @@ class Selector:
         for column in columns:
             matches = self._find_matching_columns(column)
+            # breakpoint()
             self._validate_matches(column, matches)
             if len(matches) == 1:
@@ -52,7 +63,7 @@ class Selector:
             search_in_list = self.columns
         else:
             search_in_list = [s.split(".")[1] for s in self.columns]
+        # breakpoint()
         matches = [s for s in search_in_list if s.startswith(partial_name)]
         return [partial_name] if partial_name in matches else matches
@@ -116,3 +127,9 @@ class Selector:
                 new_data.append({f"{data_type}.{key}": entries})
         return [d for key in self.items_in_order for d in new_data if key in d]
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()

edsl/scenarios/Scenario.py CHANGED Viewed

@@ -11,18 +11,26 @@ from uuid import uuid4
 from edsl.Base import Base
 from edsl.scenarios.ScenarioHtmlMixin import ScenarioHtmlMixin
 from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
+from edsl.exceptions.scenarios import ScenarioError
 class Scenario(Base, UserDict, ScenarioHtmlMixin):
     """A Scenario is a dictionary of keys/values.
-    They can be used parameterize edsl questions."""
+    They can be used parameterize EDSL questions."""
+    __doc__ = "https://docs.expectedparrot.com/en/latest/scenarios.html"
     def __init__(self, data: Union[dict, None] = None, name: str = None):
         """Initialize a new Scenario.
-        :param data: A dictionary of keys/values for parameterizing questions.
-        """
+        # :param data: A dictionary of keys/values for parameterizing questions.
+        #"""
+        if not isinstance(data, dict) and data is not None:
+            raise EDSLScenarioError(
+                "You must pass in a dictionary to initialize a Scenario."
+            )
         self.data = data if data is not None else {}
         self.name = name
@@ -41,13 +49,6 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
         return ScenarioList([copy.deepcopy(self) for _ in range(n)])
-    # @property
-    # def has_image(self) -> bool:
-    #     """Return whether the scenario has an image."""
-    #     if not hasattr(self, "_has_image"):
-    #         self._has_image = False
-    #     return self._has_image
     @property
     def has_jinja_braces(self) -> bool:
         """Return whether the scenario has jinja braces. This matters for rendering.
@@ -106,7 +107,9 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
             s = Scenario(data1 | data2)
             return s
-    def rename(self, replacement_dict: dict) -> "Scenario":
+    def rename(
+        self, old_name_or_replacement_dict: dict, new_name: Optional[str] = None
+    ) -> "Scenario":
         """Rename the keys of a scenario.
         :param replacement_dict: A dictionary of old keys to new keys.
@@ -116,7 +119,16 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
         >>> s = Scenario({"food": "wood chips"})
         >>> s.rename({"food": "food_preference"})
         Scenario({'food_preference': 'wood chips'})
+        >>> s = Scenario({"food": "wood chips"})
+        >>> s.rename("food", "snack")
+        Scenario({'snack': 'wood chips'})
         """
+        if isinstance(old_name_or_replacement_dict, str) and new_name is not None:
+            replacement_dict = {old_name_or_replacement_dict: new_name}
+        else:
+            replacement_dict = old_name_or_replacement_dict
         new_scenario = Scenario()
         for key, value in self.items():
             if key in replacement_dict:
@@ -216,6 +228,20 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
                 new_scenario[key] = self[key]
         return new_scenario
+    def keep(self, list_of_keys: List[str]) -> "Scenario":
+        """Keep a subset of keys from a scenario.
+        :param list_of_keys: The keys to keep.
+        Example:
+        >>> s = Scenario({"food": "wood chips", "drink": "water"})
+        >>> s.keep(["food"])
+        Scenario({'food': 'wood chips'})
+        """
+        return self.select(list_of_keys)
     @classmethod
     def from_url(cls, url: str, field_name: Optional[str] = "text") -> "Scenario":
         """Creates a scenario from a URL.
@@ -231,7 +257,17 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
     @classmethod
     def from_file(cls, file_path: str, field_name: str) -> "Scenario":
-        """Creates a scenario from a file."""
+        """Creates a scenario from a file.
+        >>> import tempfile
+        >>> with tempfile.NamedTemporaryFile(suffix=".txt", mode="w") as f:
+        ...     _ = f.write("This is a test.")
+        ...     _ = f.flush()
+        ...     s = Scenario.from_file(f.name, "file")
+        >>> s
+        Scenario({'file': FileStore(path='...')})
+        """
         from edsl.scenarios.FileStore import FileStore
         fs = FileStore(file_path)

edsl/scenarios/ScenarioHtmlMixin.py CHANGED Viewed

@@ -1,19 +1,24 @@
 import requests
+from typing import Optional
 from requests.adapters import HTTPAdapter
 from requests.packages.urllib3.util.retry import Retry
 class ScenarioHtmlMixin:
     @classmethod
-    def from_html(cls, url: str) -> "Scenario":
+    def from_html(cls, url: str, field_name: Optional[str] = None) -> "Scenario":
         """Create a scenario from HTML content.
         :param html: The HTML content.
+        :param field_name: The name of the field containing the HTML content.
         """
         html = cls.fetch_html(url)
         text = cls.extract_text(html)
-        return cls({"url": url, "html": html, "text": text})
+        if not field_name:
+            field_name = "text"
+        return cls({"url": url, "html": html, field_name: text})
     def fetch_html(url):
         # Define the user-agent to mimic a browser

edsl/scenarios/ScenarioList.py CHANGED Viewed

@@ -538,6 +538,17 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         """
         return ScenarioList([scenario.drop(fields) for scenario in self.data])
+    def keep(self, *fields) -> ScenarioList:
+        """Keep only the specified fields in the scenarios.
+        Example:
+        >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
+        >>> s.keep('a')
+        ScenarioList([Scenario({'a': 1}), Scenario({'a': 1})])
+        """
+        return ScenarioList([scenario.keep(fields) for scenario in self.data])
     @classmethod
     def from_list(
         cls, name: str, values: list, func: Optional[Callable] = None
@@ -1050,7 +1061,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         elif isinstance(key, int):
             return super().__getitem__(key)
         else:
-            return self.to_dict()[key]
+            return self._to_dict()[key]
     def to_agent_list(self):
         """Convert the ScenarioList to an AgentList.

edsl/surveys/Rule.py CHANGED Viewed

@@ -25,6 +25,8 @@ from jinja2 import Template
 from rich import print
 from simpleeval import EvalWithCompoundTypes
+from edsl.exceptions.surveys import SurveyError
 from edsl.exceptions import (
     SurveyRuleCannotEvaluateError,
     SurveyRuleCollectionHasNoRulesAtNodeError,
@@ -47,11 +49,11 @@ class QuestionIndex:
     def __set__(self, obj, value):
         if not isinstance(value, (int, EndOfSurvey.__class__)):
-            raise ValueError(f"{self.name} must be an integer or EndOfSurvey")
+            raise SurveyError(f"{self.name} must be an integer or EndOfSurvey")
         if self.name == "_next_q" and isinstance(value, int):
             current_q = getattr(obj, "_current_q")
             if value <= current_q:
-                raise ValueError("next_q must be greater than current_q")
+                raise SurveyError("next_q must be greater than current_q")
         setattr(obj, self.name, value)
@@ -100,13 +102,17 @@ class Rule:
                 raise SurveyRuleSendsYouBackwardsError
         if not self.next_q == EndOfSurvey and self.current_q > self.next_q:
-            raise SurveyRuleSendsYouBackwardsError
+            raise SurveyRuleSendsYouBackwardsError(
+                f"current_q: {self.current_q}, next_q: {self.next_q}"
+            )
         # get the AST for the expression - used to extract the variables referenced in the expression
         try:
             self.ast_tree = ast.parse(self.expression)
         except SyntaxError:
-            raise SurveyRuleSkipLogicSyntaxError
+            raise SurveyRuleSkipLogicSyntaxError(
+                f"The expression {self.expression} is not valid Python syntax."
+            )
         # get the names of the variables in the expression
         # e.g., q1 == 'yes' -> ['q1']

edsl 0.1.37.dev4__py3-none-any.whl → 0.1.37.dev6__py3-none-any.whl

edsl 0.1.37.dev4py3-none-any.whl → 0.1.37.dev6py3-none-any.whl