PyPI - edsl - Versions diffs - 0.1.28__py3-none-any.whl → 0.1.29__py3-none-any.whl - Mend

edsl 0.1.28py3-none-any.whl → 0.1.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

edsl/Base.py +18 -18
edsl/__init__.py +24 -24
edsl/__version__.py +1 -1
edsl/agents/Agent.py +77 -41
edsl/agents/AgentList.py +35 -6
edsl/agents/Invigilator.py +19 -1
edsl/agents/InvigilatorBase.py +15 -10
edsl/agents/PromptConstructionMixin.py +342 -100
edsl/agents/descriptors.py +2 -1
edsl/base/Base.py +289 -0
edsl/config.py +2 -1
edsl/conjure/InputData.py +39 -8
edsl/coop/coop.py +188 -151
edsl/coop/utils.py +43 -75
edsl/data/Cache.py +19 -5
edsl/data/SQLiteDict.py +11 -3
edsl/jobs/Answers.py +15 -1
edsl/jobs/Jobs.py +92 -47
edsl/jobs/buckets/ModelBuckets.py +4 -2
edsl/jobs/buckets/TokenBucket.py +1 -2
edsl/jobs/interviews/Interview.py +3 -9
edsl/jobs/interviews/InterviewStatusMixin.py +3 -3
edsl/jobs/interviews/InterviewTaskBuildingMixin.py +15 -10
edsl/jobs/runners/JobsRunnerAsyncio.py +21 -25
edsl/jobs/tasks/TaskHistory.py +4 -3
edsl/language_models/LanguageModel.py +5 -11
edsl/language_models/ModelList.py +3 -3
edsl/language_models/repair.py +8 -7
edsl/notebooks/Notebook.py +40 -3
edsl/prompts/Prompt.py +31 -19
edsl/questions/QuestionBase.py +38 -13
edsl/questions/QuestionBudget.py +5 -6
edsl/questions/QuestionCheckBox.py +7 -3
edsl/questions/QuestionExtract.py +5 -3
edsl/questions/QuestionFreeText.py +3 -3
edsl/questions/QuestionFunctional.py +0 -3
edsl/questions/QuestionList.py +3 -4
edsl/questions/QuestionMultipleChoice.py +16 -8
edsl/questions/QuestionNumerical.py +4 -3
edsl/questions/QuestionRank.py +5 -3
edsl/questions/__init__.py +4 -3
edsl/questions/descriptors.py +4 -2
edsl/questions/question_registry.py +20 -31
edsl/questions/settings.py +1 -1
edsl/results/Dataset.py +31 -0
edsl/results/DatasetExportMixin.py +493 -0
edsl/results/Result.py +22 -74
edsl/results/Results.py +105 -67
edsl/results/ResultsDBMixin.py +7 -3
edsl/results/ResultsExportMixin.py +22 -537
edsl/results/ResultsGGMixin.py +3 -3
edsl/results/ResultsToolsMixin.py +5 -5
edsl/scenarios/FileStore.py +140 -0
edsl/scenarios/Scenario.py +5 -6
edsl/scenarios/ScenarioList.py +44 -15
edsl/scenarios/ScenarioListExportMixin.py +32 -0
edsl/scenarios/ScenarioListPdfMixin.py +2 -1
edsl/scenarios/__init__.py +1 -0
edsl/study/ObjectEntry.py +89 -13
edsl/study/ProofOfWork.py +5 -2
edsl/study/SnapShot.py +4 -8
edsl/study/Study.py +21 -14
edsl/study/__init__.py +2 -0
edsl/surveys/MemoryPlan.py +11 -4
edsl/surveys/Survey.py +46 -7
edsl/surveys/SurveyExportMixin.py +4 -2
edsl/surveys/SurveyFlowVisualizationMixin.py +6 -4
edsl/tools/plotting.py +4 -2
edsl/utilities/__init__.py +21 -21
edsl/utilities/interface.py +66 -45
edsl/utilities/utilities.py +11 -13
{edsl-0.1.28.dist-info → edsl-0.1.29.dist-info}/METADATA +11 -10
{edsl-0.1.28.dist-info → edsl-0.1.29.dist-info}/RECORD +75 -72
edsl-0.1.28.dist-info/entry_points.txt +0 -3
{edsl-0.1.28.dist-info → edsl-0.1.29.dist-info}/LICENSE +0 -0
{edsl-0.1.28.dist-info → edsl-0.1.29.dist-info}/WHEEL +0 -0

edsl/results/Results.py CHANGED Viewed

@@ -5,16 +5,10 @@ It is not typically instantiated directly, but is returned by the run method of
 from __future__ import annotations
 import json
-import hashlib
 import random
 from collections import UserList, defaultdict
 from typing import Optional, Callable, Any, Type, Union, List
-from pygments import highlight
-from pygments.lexers import JsonLexer
-from pygments.formatters import HtmlFormatter
-from IPython.display import HTML
 from simpleeval import EvalWithCompoundTypes
 from edsl.exceptions.results import (
@@ -24,29 +18,17 @@ from edsl.exceptions.results import (
     ResultsMutateError,
     ResultsFilterError,
 )
-from edsl.agents import Agent, AgentList
-from edsl.language_models.LanguageModel import LanguageModel
-from edsl.results.Dataset import Dataset
-from edsl.results.Result import Result
 from edsl.results.ResultsExportMixin import ResultsExportMixin
-from edsl.scenarios import Scenario
-# from edsl.scenarios.ScenarioList import ScenarioList
-from edsl.surveys import Survey
-from edsl.data.Cache import Cache
-from edsl.utilities import (
-    is_valid_variable_name,
-    shorten_string,
-)
-from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
-from edsl.utilities.utilities import dict_hash
 from edsl.results.ResultsToolsMixin import ResultsToolsMixin
 from edsl.results.ResultsDBMixin import ResultsDBMixin
 from edsl.results.ResultsGGMixin import ResultsGGMixin
+from edsl.results.ResultsFetchMixin import ResultsFetchMixin
+from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
+from edsl.utilities.utilities import dict_hash
 from edsl.Base import Base
-from edsl.results.ResultsFetchMixin import ResultsFetchMixin
 class Mixins(
@@ -56,7 +38,22 @@ class Mixins(
     ResultsGGMixin,
     ResultsToolsMixin,
 ):
-    pass
+    def print_long(self, max_rows=None) -> None:
+        """Print the results in long format.
+        >>> from edsl.results import Results
+        >>> r = Results.example()
+        >>> r.select('how_feeling').print_long(max_rows = 2)
+        ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━┓
+        ┃ Result index ┃ Key         ┃ Value ┃
+        ┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━┩
+        │ 0            │ how_feeling │ OK    │
+        │ 1            │ how_feeling │ Great │
+        └──────────────┴─────────────┴───────┘
+        """
+        from edsl.utilities.interface import print_results_long
+        print_results_long(self, max_rows=max_rows)
 class Results(UserList, Mixins, Base):
@@ -84,10 +81,10 @@ class Results(UserList, Mixins, Base):
     def __init__(
         self,
-        survey: Optional[Survey] = None,
-        data: Optional[list[Result]] = None,
+        survey: Optional["Survey"] = None,
+        data: Optional[list["Result"]] = None,
         created_columns: Optional[list[str]] = None,
-        cache: Optional[Cache] = None,
+        cache: Optional["Cache"] = None,
         job_uuid: Optional[str] = None,
         total_results: Optional[int] = None,
     ):
@@ -100,6 +97,8 @@ class Results(UserList, Mixins, Base):
         :param total_results: An integer representing the total number of results.
         """
         super().__init__(data)
+        from edsl.data.Cache import Cache
         self.survey = survey
         self.created_columns = created_columns or []
         self._job_uuid = job_uuid
@@ -125,6 +124,10 @@ class Results(UserList, Mixins, Base):
         raise TypeError("Invalid argument type")
     def _update_results(self) -> None:
+        from edsl import Agent, Scenario
+        from edsl.language_models import LanguageModel
+        from edsl.results import Result
         if self._job_uuid and len(self.data) < self._total_results:
             results = [
                 Result(
@@ -165,16 +168,16 @@ class Results(UserList, Mixins, Base):
         )
     def __repr__(self) -> str:
-        # return f"Results(data = {self.data}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
-        return f"""Results object
-                Size: {len(self.data)}.
-                Survey questions: {[q.question_name for q in self.survey.questions]}.
-                Created columns: {self.created_columns}
-                Hash: {hash(self)}
-            """
+        return f"Results(data = {self.data}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
     def _repr_html_(self) -> str:
+        from IPython.display import HTML
         json_str = json.dumps(self.to_dict()["data"], indent=4)
+        from pygments import highlight
+        from pygments.lexers import JsonLexer
+        from pygments.formatters import HtmlFormatter
         formatted_json = highlight(
             json_str,
             JsonLexer(),
@@ -183,6 +186,8 @@ class Results(UserList, Mixins, Base):
         return HTML(formatted_json).data
     def _to_dict(self, sort=False):
+        from edsl.data.Cache import Cache
         if sort:
             data = sorted([result for result in self.data], key=lambda x: hash(x))
         else:
@@ -232,6 +237,31 @@ class Results(UserList, Mixins, Base):
     def hashes(self) -> set:
         return set(hash(result) for result in self.data)
+    def sample(self, n: int) -> "Results":
+        """Return a random sample of the results.
+        :param n: The number of samples to return.
+        >>> from edsl.results import Results
+        >>> r = Results.example()
+        >>> len(r.sample(2))
+        2
+        """
+        indices = None
+        for entry in self:
+            key, values = list(entry.items())[0]
+            if indices is None:  # gets the indices for the first time
+                indices = list(range(len(values)))
+                sampled_indices = random.sample(indices, n)
+                if n > len(indices):
+                    raise ValueError(
+                        f"Cannot sample {n} items from a list of length {len(indices)}."
+                    )
+            entry[key] = [values[i] for i in sampled_indices]
+        return self
     @classmethod
     @remove_edsl_version
     def from_dict(cls, data: dict[str, Any]) -> Results:
@@ -247,12 +277,20 @@ class Results(UserList, Mixins, Base):
         >>> r == r2
         True
         """
-        results = cls(
-            survey=Survey.from_dict(data["survey"]),
-            data=[Result.from_dict(r) for r in data["data"]],
-            created_columns=data.get("created_columns", None),
-            cache=Cache.from_dict(data.get("cache")) if "cache" in data else Cache(),
-        )
+        from edsl import Survey, Cache
+        from edsl.results.Result import Result
+        try:
+            results = cls(
+                survey=Survey.from_dict(data["survey"]),
+                data=[Result.from_dict(r) for r in data["data"]],
+                created_columns=data.get("created_columns", None),
+                cache=(
+                    Cache.from_dict(data.get("cache")) if "cache" in data else Cache()
+                ),
+            )
+        except Exception as e:
+            breakpoint()
         return results
     ######################
@@ -319,6 +357,8 @@ class Results(UserList, Mixins, Base):
         >>> r.answer_keys
         {'how_feeling': 'How are you this {{ period }}?', 'how_feeling_yesterday': 'How were you feeling yesterday {{ period }}?'}
         """
+        from edsl.utilities.utilities import shorten_string
         if not self.survey:
             raise Exception("Survey is not defined so no answer keys are available.")
@@ -333,7 +373,7 @@ class Results(UserList, Mixins, Base):
         return sorted_dict
     @property
-    def agents(self) -> AgentList:
+    def agents(self) -> "AgentList":
         """Return a list of all of the agents in the Results.
         Example:
@@ -342,10 +382,12 @@ class Results(UserList, Mixins, Base):
         >>> r.agents
         AgentList([Agent(traits = {'status': 'Joyful'}), Agent(traits = {'status': 'Joyful'}), Agent(traits = {'status': 'Sad'}), Agent(traits = {'status': 'Sad'})])
         """
+        from edsl import AgentList
         return AgentList([r.agent for r in self.data])
     @property
-    def models(self) -> list[Type[LanguageModel]]:
+    def models(self) -> list[Type["LanguageModel"]]:
         """Return a list of all of the models in the Results.
         Example:
@@ -467,7 +509,7 @@ class Results(UserList, Mixins, Base):
                     )
         return data_type, key
-    def first(self) -> Result:
+    def first(self) -> "Result":
         """Return the first observation in the results.
         Example:
@@ -585,6 +627,8 @@ class Results(UserList, Mixins, Base):
             )
         raw_var_name, expression = new_var_string.split("=", 1)
         var_name = raw_var_name.strip()
+        from edsl.utilities.utilities import is_valid_variable_name
         if not is_valid_variable_name(var_name):
             raise ResultsInvalidNameError(f"{var_name} is not a valid variable name.")
@@ -596,7 +640,7 @@ class Results(UserList, Mixins, Base):
                 names=result.combined_dict, functions=functions_dict
             )
-        def new_result(old_result: Result, var_name: str) -> Result:
+        def new_result(old_result: "Result", var_name: str) -> "Result":
             evaluator = create_evaluator(old_result)
             value = evaluator.eval(expression)
             new_result = old_result.copy()
@@ -686,7 +730,7 @@ class Results(UserList, Mixins, Base):
         return Results(survey=self.survey, data=new_data, created_columns=None)
-    def select(self, *columns: Union[str, list[str]]) -> Dataset:
+    def select(self, *columns: Union[str, list[str]]) -> "Dataset":
         """
         Select data from the results and format it.
@@ -698,6 +742,7 @@ class Results(UserList, Mixins, Base):
         >>> results.select('how_feeling')
         Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
         """
         if len(self) == 0:
             raise Exception("No data to select from---the Results object is empty.")
@@ -754,13 +799,22 @@ class Results(UserList, Mixins, Base):
             return items_in_order.index(single_key)
         sorted(new_data, key=sort_by_key_order)
+        from edsl.results.Dataset import Dataset
         return Dataset(new_data)
-    def sort_by(self, columns, reverse: bool = False) -> Results:
+    def sort_by(self, *columns: str, reverse: bool = False) -> Results:
+        import warnings
+        warnings.warn(
+            "sort_by is deprecated. Use order_by instead.", DeprecationWarning
+        )
+        return self.order_by(*columns, reverse=reverse)
+    def order_by(self, *columns: str, reverse: bool = False) -> Results:
         """Sort the results by one or more columns.
-        :param columns: A string or a list of strings that are column names.
+        :param columns: One or more column names as strings.
         :param reverse: A boolean that determines whether to sort in reverse order.
         Each column name can be a single key, e.g. "how_feeling", or a dot-separated string, e.g. "answer.how_feeling".
@@ -768,7 +822,7 @@ class Results(UserList, Mixins, Base):
         Example:
         >>> r = Results.example()
-        >>> r.sort_by(['how_feeling'], reverse=False).select('how_feeling').print()
+        >>> r.sort_by('how_feeling', reverse=False).select('how_feeling').print()
         ┏━━━━━━━━━━━━━━┓
         ┃ answer       ┃
         ┃ .how_feeling ┃
@@ -781,7 +835,7 @@ class Results(UserList, Mixins, Base):
         ├──────────────┤
         │ Terrible     │
         └──────────────┘
-        >>> r.sort_by(['how_feeling'], reverse=True).select('how_feeling').print()
+        >>> r.sort_by('how_feeling', reverse=True).select('how_feeling').print()
         ┏━━━━━━━━━━━━━━┓
         ┃ answer       ┃
         ┃ .how_feeling ┃
@@ -795,8 +849,6 @@ class Results(UserList, Mixins, Base):
         │ Great        │
         └──────────────┘
         """
-        if isinstance(columns, str):
-            columns = [columns]
         def to_numeric_if_possible(v):
             try:
@@ -805,28 +857,14 @@ class Results(UserList, Mixins, Base):
                 return v
         def sort_key(item):
-            # Create an empty list to store the key components for sorting
             key_components = []
-            # Loop through each column specified in the sort
             for col in columns:
-                # Parse the column into its data type and key
                 data_type, key = self._parse_column(col)
-                # Retrieve the value from the item based on the parsed data type and key
                 value = item.get_value(data_type, key)
-                # Convert the value to numeric if possible, and append it to the key components
                 key_components.append(to_numeric_if_possible(value))
-            # Convert the list of key components into a tuple to serve as the sorting key
             return tuple(key_components)
-        new_data = sorted(
-            self.data,
-            key=sort_key,
-            reverse=reverse,
-        )
+        new_data = sorted(self.data, key=sort_key, reverse=reverse)
         return Results(survey=self.survey, data=new_data, created_columns=None)
     def filter(self, expression: str) -> Results:
@@ -920,7 +958,7 @@ class Results(UserList, Mixins, Base):
         :param debug: if False, uses actual API calls
         """
-        from edsl.jobs import Jobs
+        from edsl.jobs.Jobs import Jobs
         from edsl.data.Cache import Cache
         c = Cache()

edsl/results/ResultsDBMixin.py CHANGED Viewed

@@ -1,8 +1,6 @@
 """Mixin for working with SQLite respresentation of a 'Results' object."""
-import pandas as pd
 import sqlite3
-from sqlalchemy import create_engine
 from enum import Enum
 from typing import Literal, Union, Optional
@@ -92,6 +90,8 @@ class ResultsDBMixin:
             conn.commit()
             return conn
         elif shape == SQLDataShape.WIDE:
+            from sqlalchemy import create_engine
             engine = create_engine("sqlite:///:memory:")
             df = self.to_pandas(remove_prefix=remove_prefix)
             df.to_sql("self", engine, index=False, if_exists="replace")
@@ -121,7 +121,7 @@ class ResultsDBMixin:
         to_list=False,
         to_latex=False,
         filename: Optional[str] = None,
-    ) -> Union[pd.DataFrame, str]:
+    ) -> Union["pd.DataFrame", str]:
         """Execute a SQL query and return the results as a DataFrame.
         :param query: The SQL query to execute
@@ -151,6 +151,8 @@ class ResultsDBMixin:
         2    Terrible
         3          OK
         """
+        import pandas as pd
         shape_enum = self._get_shape_enum(shape)
         conn = self._db(shape=shape_enum, remove_prefix=remove_prefix)
@@ -205,6 +207,8 @@ class ResultsDBMixin:
         ...
         <BLANKLINE>
         """
+        import pandas as pd
         shape_enum = self._get_shape_enum(shape)
         conn = self._db(shape=shape_enum, remove_prefix=remove_prefix)

edsl 0.1.28__py3-none-any.whl → 0.1.29__py3-none-any.whl

edsl 0.1.28py3-none-any.whl → 0.1.29py3-none-any.whl