PyPI - edsl - Versions diffs - 0.1.30.dev5__py3-none-any.whl → 0.1.31.dev1__py3-none-any.whl - Mend

edsl 0.1.30.dev5py3-none-any.whl → 0.1.31.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

edsl/__version__.py +1 -1
edsl/coop/utils.py +9 -1
edsl/jobs/buckets/TokenBucket.py +3 -3
edsl/jobs/interviews/Interview.py +10 -10
edsl/jobs/interviews/InterviewTaskBuildingMixin.py +9 -7
edsl/jobs/tasks/QuestionTaskCreator.py +2 -3
edsl/language_models/LanguageModel.py +6 -1
edsl/language_models/ModelList.py +8 -2
edsl/language_models/registry.py +12 -0
edsl/questions/QuestionFunctional.py +8 -7
edsl/questions/QuestionMultipleChoice.py +14 -12
edsl/questions/descriptors.py +6 -4
edsl/results/DatasetExportMixin.py +174 -76
edsl/results/Result.py +13 -11
edsl/results/Results.py +19 -16
edsl/results/ResultsToolsMixin.py +1 -1
edsl/scenarios/ScenarioList.py +44 -19
edsl/scenarios/ScenarioListExportMixin.py +1 -1
edsl/surveys/Survey.py +11 -8
{edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dev1.dist-info}/METADATA +2 -1
{edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dev1.dist-info}/RECORD +23 -23
{edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dev1.dist-info}/LICENSE +0 -0
{edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dev1.dist-info}/WHEEL +0 -0

edsl/results/DatasetExportMixin.py CHANGED Viewed

@@ -3,12 +3,13 @@
 import base64
 import csv
 import io
+import html
-from typing import Literal, Optional, Union
+from typing import Literal, Optional, Union, List
 class DatasetExportMixin:
-    """Mixin class"""
+    """Mixin class for exporting Dataset objects."""
     def relevant_columns(
         self, data_type: Optional[str] = None, remove_prefix=False
@@ -28,19 +29,64 @@ class DatasetExportMixin:
         >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
         ['answer.how_feeling', 'answer.how_feeling_yesterday']
+        >>> from edsl.results import Results
+        >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
+        ['model.frequency_penalty', 'model.logprobs', 'model.max_tokens', 'model.model', 'model.presence_penalty', 'model.temperature', 'model.top_logprobs', 'model.top_p']
+        >>> Results.example().relevant_columns(data_type = "flimflam")
+        Traceback (most recent call last):
+        ...
+        ValueError: No columns found for data type: flimflam. Available data types are: ['agent', 'answer', 'comment', 'model', 'prompt', 'question_options', 'question_text', 'question_type', 'raw_model_response', 'scenario'].
         """
         columns = [list(x.keys())[0] for x in self]
         if remove_prefix:
             columns = [column.split(".")[-1] for column in columns]
+        def get_data_type(column):
+            if "." in column:
+                return column.split(".")[0]
+            else:
+                return None
         if data_type:
+            all_columns = columns[:]
             columns = [
-                column for column in columns if column.split(".")[0] == data_type
+                column for column in columns if get_data_type(column) == data_type
             ]
+            if len(columns) == 0:
+                all_data_types = sorted(
+                    list(set(get_data_type(column) for column in all_columns))
+                )
+                raise ValueError(
+                    f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
+                )
         return columns
-    def _make_tabular(self, remove_prefix: bool, pretty_labels: Optional[dict] = None):
+    def num_observations(self):
+        """Return the number of observations in the dataset.
+        >>> from edsl.results import Results
+        >>> Results.example().num_observations()
+        4
+        """
+        _num_observations = None
+        for entry in self:
+            key, values = list(entry.items())[0]
+            if _num_observations is None:
+                _num_observations = len(values)
+            else:
+                if len(values) != _num_observations:
+                    raise ValueError(
+                        "The number of observations is not consistent across columns."
+                    )
+        return _num_observations
+    def _make_tabular(
+        self, remove_prefix: bool, pretty_labels: Optional[dict] = None
+    ) -> tuple[list, List[list]]:
         """Turn the results into a tabular format.
         :param remove_prefix: Whether to remove the prefix from the column names.
@@ -53,23 +99,29 @@ class DatasetExportMixin:
         >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
         (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
         """
-        d = {}
-        full_header = sorted(list(self.relevant_columns()))
-        for entry in self.data:
-            key, list_of_values = list(entry.items())[0]
-            d[key] = list_of_values
+        def create_dict_from_list_of_dicts(list_of_dicts):
+            for entry in list_of_dicts:
+                key, list_of_values = list(entry.items())[0]
+                yield key, list_of_values
+        tabular_repr = dict(create_dict_from_list_of_dicts(self.data))
+        full_header = [list(x.keys())[0] for x in self]
+        rows = []
+        for i in range(self.num_observations()):
+            row = [tabular_repr[h][i] for h in full_header]
+            rows.append(row)
         if remove_prefix:
             header = [h.split(".")[-1] for h in full_header]
         else:
             header = full_header
-        num_observations = len(list(self[0].values())[0])
-        rows = []
-        # rows.append(header)
-        for i in range(num_observations):
-            row = [d[h][i] for h in full_header]
-            rows.append(row)
         if pretty_labels is not None:
             header = [pretty_labels.get(h, h) for h in header]
         return header, rows
     def print_long(self):
@@ -91,7 +143,7 @@ class DatasetExportMixin:
         self,
         pretty_labels: Optional[dict] = None,
         filename: Optional[str] = None,
-        format: Literal["rich", "html", "markdown", "latex"] = None,
+        format: Optional[Literal["rich", "html", "markdown", "latex"]] = None,
         interactive: bool = False,
         split_at_dot: bool = True,
         max_rows=None,
@@ -108,6 +160,12 @@ class DatasetExportMixin:
         :param format: The format to print the results in. Options are 'rich', 'html', or 'markdown'.
         :param interactive: Whether to print the results interactively in a Jupyter notebook.
         :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
+        :param max_rows: The maximum number of rows to print.
+        :param tee: Whether to return the dataset.
+        :param iframe: Whether to display the table in an iframe.
+        :param iframe_height: The height of the iframe.
+        :param iframe_width: The width of the iframe.
+        :param web: Whether to display the table in a web browser.
         Example: Print in rich format at the terminal
@@ -188,91 +246,95 @@ class DatasetExportMixin:
         | Terrible |
         | OK |
         ...
+        >>> r.select('how_feeling').print(format='latex')
+        \\begin{tabular}{l}
+        \\toprule
+        ...
         """
         from IPython.display import HTML, display
         from edsl.utilities.utilities import is_notebook
-        if format is None:
-            if is_notebook():
-                format = "html"
-            else:
-                format = "rich"
+        def _determine_format(format):
+            if format is None:
+                if is_notebook():
+                    format = "html"
+                else:
+                    format = "rich"
+            if format not in ["rich", "html", "markdown", "latex"]:
+                raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
+            return format
+        format = _determine_format(format)
         if pretty_labels is None:
             pretty_labels = {}
-        else:
-            # if the user passes in pretty_labels, we don't want to split at the dot
+        if pretty_labels != {}:  # only split at dot if there are no pretty labels
             split_at_dot = False
-        if format not in ["rich", "html", "markdown", "latex"]:
-            raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
+        def _create_data():
+            for index, entry in enumerate(self):
+                key, list_of_values = list(entry.items())[0]
+                yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
+        new_data = list(_create_data())
-        new_data = []
-        for index, entry in enumerate(self):
-            key, list_of_values = list(entry.items())[0]
-            new_data.append({pretty_labels.get(key, key): list_of_values})
-        if max_rows is not None:
-            for entry in new_data:
-                for key in entry:
-                    actual_rows = len(entry[key])
-                    entry[key] = entry[key][:max_rows]
         if format == "rich":
             from edsl.utilities.interface import print_dataset_with_rich
             print_dataset_with_rich(
                 new_data, filename=filename, split_at_dot=split_at_dot
             )
-        elif format == "html":
-            notebook = is_notebook()
+            return self if tee else None
+        if format == "markdown":
+            from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
+            print_list_of_dicts_as_markdown_table(new_data, filename=filename)
+            return self if tee else None
+        if format == "latex":
+            df = self.to_pandas()
+            df.columns = [col.replace("_", " ") for col in df.columns]
+            latex_string = df.to_latex(index=False)
+            if filename is not None:
+                with open(filename, "w") as f:
+                    f.write(latex_string)
+            else:
+                print(latex_string)
+            return self if tee else None
+        if format == "html":
             from edsl.utilities.interface import print_list_of_dicts_as_html_table
             html_source = print_list_of_dicts_as_html_table(
                 new_data, interactive=interactive
             )
-            if iframe:
-                import html
-                height = iframe_height
-                width = iframe_width
-                escaped_output = html.escape(html_source)
-                # escaped_output = html_source
+            # if download_link:
+            #     from IPython.display import HTML, display
+            #     csv_file = output.getvalue()
+            #     b64 = base64.b64encode(csv_file.encode()).decode()
+            #     download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
+            #     #display(HTML(download_link))
+            if iframe:
                 iframe = f""""
-                <iframe srcdoc="{ escaped_output }" style="width: {width}px; height: {height}px;"></iframe>
+                <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
                 """
                 display(HTML(iframe))
-            elif notebook:
+            elif is_notebook():
                 display(HTML(html_source))
             else:
                 from edsl.utilities.interface import view_html
                 view_html(html_source)
-        elif format == "markdown":
-            from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
-            print_list_of_dicts_as_markdown_table(new_data, filename=filename)
-        elif format == "latex":
-            df = self.to_pandas()
-            df.columns = [col.replace("_", " ") for col in df.columns]
-            latex_string = df.to_latex()
-            if filename is not None:
-                with open(filename, "w") as f:
-                    f.write(latex_string)
-            else:
-                return latex_string
-            # raise NotImplementedError("Latex format not yet implemented.")
-            # latex_string = create_latex_table_from_data(new_data, filename=filename)
-            # if filename is None:
-            #     return latex_string
-            # Not working quite
-        else:
-            raise ValueError("format not recognized.")
-        if tee:
-            return self
+            return self if tee else None
     def to_csv(
         self,
@@ -293,10 +355,25 @@ class DatasetExportMixin:
         >>> r = Results.example()
         >>> r.select('how_feeling').to_csv()
         'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
         >>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
         'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
+        >>> import tempfile
+        >>> filename = tempfile.NamedTemporaryFile(delete=False).name
+        >>> r.select('how_feeling').to_csv(filename = filename)
+        >>> import os
+        >>> import csv
+        >>> with open(filename, newline='') as f:
+        ...     reader = csv.reader(f)
+        ...     for row in reader:
+        ...         print(row)
+        ['answer.how_feeling']
+        ['OK']
+        ['Great']
+        ['Terrible']
+        ['OK']
         """
         if pretty_labels is None:
             pretty_labels = {}
@@ -316,6 +393,8 @@ class DatasetExportMixin:
             writer.writerows(rows)
             if download_link:
+                from IPython.display import HTML, display
                 csv_file = output.getvalue()
                 b64 = base64.b64encode(csv_file.encode()).decode()
                 download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
@@ -323,6 +402,22 @@ class DatasetExportMixin:
             else:
                 return output.getvalue()
+    def download_link(self, pretty_labels: Optional[dict] = None) -> str:
+        """Return a download link for the results.
+        :param pretty_labels: A dictionary of pretty labels for the columns.
+        >>> from edsl.results import Results
+        >>> r = Results.example()
+        >>> r.select('how_feeling').download_link()
+        '<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
+        """
+        import base64
+        csv_string = self.to_csv(pretty_labels=pretty_labels)
+        b64 = base64.b64encode(csv_string.encode()).decode()
+        return f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
     def to_pandas(self, remove_prefix: bool = False) -> "pd.DataFrame":
         """Convert the results to a pandas DataFrame.
@@ -342,8 +437,8 @@ class DatasetExportMixin:
         csv_string = self.to_csv(remove_prefix=remove_prefix)
         csv_buffer = io.StringIO(csv_string)
         df = pd.read_csv(csv_buffer)
-        df_sorted = df.sort_index(axis=1)  # Sort columns alphabetically
-        return df_sorted
+        # df_sorted = df.sort_index(axis=1)  # Sort columns alphabetically
+        return df
     def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
         """Convert the results to a list of dictionaries, one per scenario.
@@ -362,7 +457,7 @@ class DatasetExportMixin:
     def to_agent_list(self, remove_prefix: bool = True):
         """Convert the results to a list of dictionaries, one per agent.
         :param remove_prefix: Whether to remove the prefix from the column names.
         >>> from edsl.results import Results
@@ -461,7 +556,10 @@ class DatasetExportMixin:
         return list_to_return
     def html(
-        self, filename: Optional[str] = None, cta: str = "Open in browser", return_link:bool=False
+        self,
+        filename: Optional[str] = None,
+        cta: str = "Open in browser",
+        return_link: bool = False,
     ):
         import os
         import tempfile
@@ -495,7 +593,7 @@ class DatasetExportMixin:
             return filename
     def tally(
-        self, *fields: Optional[str], top_n:Optional[int]=None, output="dict"
+        self, *fields: Optional[str], top_n: Optional[int] = None, output="dict"
     ) -> Union[dict, "Dataset"]:
         """Tally the values of a field or perform a cross-tab of multiple fields.

edsl/results/Result.py CHANGED Viewed

@@ -167,28 +167,30 @@ class Result(Base, UserDict):
             "answer": self.answer,
             "prompt": self.prompt,
             "raw_model_response": self.raw_model_response,
-#            "iteration": {"iteration": self.iteration},
+            #            "iteration": {"iteration": self.iteration},
             "question_text": question_text_dict,
             "question_options": question_options_dict,
             "question_type": question_type_dict,
             "comment": comments_dict,
         }
     def check_expression(self, expression) -> None:
         for key in self.problem_keys:
             if key in expression and not key + "." in expression:
-                raise ValueError(f"Key by iself {key} is problematic. Use the full key {key + '.' + key} name instead.")
+                raise ValueError(
+                    f"Key by iself {key} is problematic. Use the full key {key + '.' + key} name instead."
+                )
         return None
     def code(self):
         """Return a string of code that can be used to recreate the Result object."""
         raise NotImplementedError
     @property
     def problem_keys(self):
         """Return a list of keys that are problematic."""
         return self._problem_keys
     def _compute_combined_dict_and_problem_keys(self) -> None:
         combined = {}
         problem_keys = []
@@ -198,9 +200,9 @@ class Result(Base, UserDict):
             if key in combined:
                 # The key is already in the combined dict
                 problem_keys = problem_keys + [key]
             combined.update({key: sub_dict})
-            # I *think* this allows us to do do things like "answer.how_feelling" i.e., that the evaluator can use
+            # I *think* this allows us to do do things like "answer.how_feelling" i.e., that the evaluator can use
             # dot notation to access the subdicts.
         self._combined_dict = combined
         self._problem_keys = problem_keys
@@ -208,7 +210,7 @@ class Result(Base, UserDict):
     @property
     def combined_dict(self) -> dict[str, Any]:
         """Return a dictionary that includes all sub_dicts, but also puts the key-value pairs in each sub_dict as a key_value pair in the combined dictionary.
         >>> r = Result.example()
         >>> r.combined_dict['how_feeling']
         'OK'
@@ -216,7 +218,7 @@ class Result(Base, UserDict):
         if self._combined_dict is None or self._problem_keys is None:
             self._compute_combined_dict_and_problem_keys()
         return self._combined_dict
     @property
     def problem_keys(self):
         """Return a list of keys that are problematic."""
@@ -267,11 +269,11 @@ class Result(Base, UserDict):
     def __eq__(self, other) -> bool:
         """Return True if the Result object is equal to another Result object.
         >>> r = Result.example()
         >>> r == r
         True
         """
         return self.to_dict() == other.to_dict()

edsl/results/Results.py CHANGED Viewed

@@ -603,24 +603,26 @@ class Results(UserList, Mixins, Base):
             values = [d[key] for d in columns]
             self = self.add_column(key, values)
         return self
     @staticmethod
-    def _create_evaluator(result: Result, functions_dict: Optional[dict] = None) -> EvalWithCompoundTypes:
+    def _create_evaluator(
+        result: Result, functions_dict: Optional[dict] = None
+    ) -> EvalWithCompoundTypes:
         """Create an evaluator for the expression.
         >>> from unittest.mock import Mock
         >>> result = Mock()
-        >>> result.combined_dict = {'how_feeling': 'OK'}
+        >>> result.combined_dict = {'how_feeling': 'OK'}
         >>> evaluator = Results._create_evaluator(result = result, functions_dict = {})
         >>> evaluator.eval("how_feeling == 'OK'")
         True
         >>> result.combined_dict = {'answer': {'how_feeling': 'OK'}}
         >>> evaluator = Results._create_evaluator(result = result, functions_dict = {})
         >>> evaluator.eval("answer.how_feeling== 'OK'")
         True
         Note that you need to refer to the answer dictionary in the expression.
         >>> evaluator.eval("how_feeling== 'OK'")
@@ -827,8 +829,9 @@ class Results(UserList, Mixins, Base):
             # Return the index of this key in the list_of_keys
             return items_in_order.index(single_key)
-        #sorted(new_data, key=sort_by_key_order)
+        # sorted(new_data, key=sort_by_key_order)
         from edsl.results.Dataset import Dataset
         sorted_new_data = []
         # WORKS but slow
@@ -958,10 +961,10 @@ class Results(UserList, Mixins, Base):
             new_data = []
             for result in self.data:
                 evaluator = self._create_evaluator(result)
-                result.check_expression(expression) # check expression
+                result.check_expression(expression)  # check expression
                 if evaluator.eval(expression):
                     new_data.append(result)
         except ValueError as e:
             raise ResultsFilterError(
                 f"Error in filter. Exception:{e}",
@@ -970,14 +973,14 @@ class Results(UserList, Mixins, Base):
             )
         except Exception as e:
             raise ResultsFilterError(
-            f"""Error in filter. Exception:{e}.""",
-            f"""The expression you provided was: {expression}.""",
-            """Please make sure that the expression is a valid Python expression that evaluates to a boolean.""",
-            """For example, 'how_feeling == "Great"' is a valid expression, as is 'how_feeling in ["Great", "Terrible"]'., """,
-            """However, 'how_feeling = "Great"' is not a valid expression.""",
-            """See https://docs.expectedparrot.com/en/latest/results.html#filtering-results for more details."""
+                f"""Error in filter. Exception:{e}.""",
+                f"""The expression you provided was: {expression}.""",
+                """Please make sure that the expression is a valid Python expression that evaluates to a boolean.""",
+                """For example, 'how_feeling == "Great"' is a valid expression, as is 'how_feeling in ["Great", "Terrible"]'., """,
+                """However, 'how_feeling = "Great"' is not a valid expression.""",
+                """See https://docs.expectedparrot.com/en/latest/results.html#filtering-results for more details.""",
             )
         if len(new_data) == 0:
             import warnings

edsl/results/ResultsToolsMixin.py CHANGED Viewed

@@ -37,12 +37,12 @@ class ResultsToolsMixin:
         print_exceptions=False,
     ) -> dict:
         from edsl import ScenarioList
+        from edsl import QuestionCheckBox
         values = self.select(field).to_list()
         scenarios = ScenarioList.from_list("field", values).add_value(
             "context", context
         )
         q = QuestionCheckBox(
             question_text="""
             {{ context }}

edsl 0.1.30.dev5__py3-none-any.whl → 0.1.31.dev1__py3-none-any.whl

edsl 0.1.30.dev5py3-none-any.whl → 0.1.31.dev1py3-none-any.whl