PyPI - edsl - Versions diffs - 0.1.38.dev2__py3-none-any.whl → 0.1.38.dev4__py3-none-any.whl - Mend

edsl 0.1.38.dev2py3-none-any.whl → 0.1.38.dev4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

edsl/Base.py +60 -31
edsl/__version__.py +1 -1
edsl/agents/Agent.py +18 -9
edsl/agents/AgentList.py +59 -8
edsl/agents/Invigilator.py +18 -7
edsl/agents/InvigilatorBase.py +0 -19
edsl/agents/PromptConstructor.py +5 -4
edsl/config.py +8 -0
edsl/coop/coop.py +74 -7
edsl/data/Cache.py +27 -2
edsl/data/CacheEntry.py +8 -3
edsl/data/RemoteCacheSync.py +0 -19
edsl/enums.py +2 -0
edsl/inference_services/GoogleService.py +7 -15
edsl/inference_services/PerplexityService.py +163 -0
edsl/inference_services/registry.py +2 -0
edsl/jobs/Jobs.py +88 -548
edsl/jobs/JobsChecks.py +147 -0
edsl/jobs/JobsPrompts.py +268 -0
edsl/jobs/JobsRemoteInferenceHandler.py +239 -0
edsl/jobs/interviews/Interview.py +11 -11
edsl/jobs/runners/JobsRunnerAsyncio.py +140 -35
edsl/jobs/runners/JobsRunnerStatus.py +0 -2
edsl/jobs/tasks/TaskHistory.py +15 -16
edsl/language_models/LanguageModel.py +44 -84
edsl/language_models/ModelList.py +47 -1
edsl/language_models/registry.py +57 -4
edsl/prompts/Prompt.py +8 -3
edsl/questions/QuestionBase.py +20 -16
edsl/questions/QuestionExtract.py +3 -4
edsl/questions/question_registry.py +36 -6
edsl/results/CSSParameterizer.py +108 -0
edsl/results/Dataset.py +146 -15
edsl/results/DatasetExportMixin.py +231 -217
edsl/results/DatasetTree.py +134 -4
edsl/results/Result.py +18 -9
edsl/results/Results.py +145 -51
edsl/results/TableDisplay.py +198 -0
edsl/results/table_display.css +78 -0
edsl/scenarios/FileStore.py +187 -13
edsl/scenarios/Scenario.py +61 -4
edsl/scenarios/ScenarioJoin.py +127 -0
edsl/scenarios/ScenarioList.py +237 -62
edsl/surveys/Survey.py +16 -2
edsl/surveys/SurveyFlowVisualizationMixin.py +67 -9
edsl/surveys/instructions/Instruction.py +12 -0
edsl/templates/error_reporting/interview_details.html +3 -3
edsl/templates/error_reporting/interviews.html +18 -9
edsl/utilities/utilities.py +15 -0
{edsl-0.1.38.dev2.dist-info → edsl-0.1.38.dev4.dist-info}/METADATA +2 -1
{edsl-0.1.38.dev2.dist-info → edsl-0.1.38.dev4.dist-info}/RECORD +53 -45
{edsl-0.1.38.dev2.dist-info → edsl-0.1.38.dev4.dist-info}/LICENSE +0 -0
{edsl-0.1.38.dev2.dist-info → edsl-0.1.38.dev4.dist-info}/WHEEL +0 -0

edsl/results/DatasetTree.py CHANGED Viewed

@@ -1,10 +1,18 @@
-from typing import Dict, List, Any, Optional
+from typing import Dict, List, Any, Optional, List
 from docx import Document
 from docx.shared import Inches, Pt
 from docx.enum.text import WD_ALIGN_PARAGRAPH
 from docx.enum.style import WD_STYLE_TYPE
+def is_hashable(v):
+    try:
+        hash(v)
+        return True
+    except TypeError:
+        return False
 class TreeNode:
     def __init__(self, key=None, value=None):
         self.key = key
@@ -13,16 +21,21 @@ class TreeNode:
 class Tree:
-    def __init__(self, data: "Dataset"):
+    def __init__(self, data: "Dataset", node_order: Optional[List[str]] = None):
         d = {}
         for entry in data:
             d.update(entry)
         self.data = d
         self.root = None
+        self.node_order = node_order
+        self.construct_tree(node_order)
     def unique_values_by_keys(self) -> dict:
         unique_values = {}
-        for key, values in self.data.items():
+        for key, raw_values in self.data.items():
+            values = [v if is_hashable(v) else str(v) for v in raw_values]
             unique_values[key] = list(set(values))
         return unique_values
@@ -45,15 +58,25 @@ class Tree:
             current = self.root
             for level in node_order[:-1]:
                 value = self.data[level][i]
+                if not is_hashable(value):
+                    value = str(value)
                 if value not in current.children:
                     current.children[value] = TreeNode(key=level, value=value)
                 current = current.children[value]
             leaf_key = node_order[-1]
             leaf_value = self.data[leaf_key][i]
+            if not is_hashable(leaf_value):
+                leaf_value = str(leaf_value)
             if leaf_value not in current.children:
                 current.children[leaf_value] = TreeNode(key=leaf_key, value=leaf_value)
+    def __repr__(self):
+        if self.node_order is not None:
+            return f"Tree(Dataset({self.data}), node_order={self.node_order})"
+        else:
+            return f"Tree(Dataset({self.data}))"
     def print_tree(
         self, node: Optional[TreeNode] = None, level: int = 0, print_keys: bool = False
     ):
@@ -71,7 +94,10 @@ class Tree:
         for child in node.children.values():
             self.print_tree(child, level + 1, print_keys)
-    def to_docx(self, filename: str):
+    def to_docx(self, filename: Optional[str] = None):
+        if filename is None:
+            filename = "tree_structure.docx"
         doc = Document()
         # Create styles for headings
@@ -90,6 +116,110 @@ class Tree:
         self._add_to_docx(doc, self.root, 0)
         doc.save(filename)
+        from edsl.utilities.utilities import file_notice
+        file_notice(filename)
+    def _repr_html_(self):
+        """Returns an interactive HTML representation of the tree with collapsible sections."""
+        # Generate a unique ID for this tree instance
+        import uuid
+        tree_id = f"tree_{uuid.uuid4().hex[:8]}"
+        styles = f"""
+        <div class="{tree_id}">
+            <style>
+                .{tree_id} details {{
+                    margin-left: 20px;
+                }}
+                .{tree_id} summary {{
+                    cursor: pointer;
+                    margin: 2px 0;
+                }}
+                .{tree_id} .value {{
+                    font-family: monospace;
+                    background: #f5f5f5;
+                    padding: 2px 6px;
+                    border-radius: 3px;
+                    margin: 1px 0;
+                }}
+                .{tree_id} .key {{
+                    color: #666;
+                    font-style: italic;
+                }}
+            </style>
+        """
+        def node_to_html(node, level=0, print_keys=True):
+            if node is None:
+                return "Tree has not been constructed yet."
+            html = []
+            if node.value is not None:
+                # Create the node content
+                content = []
+                if print_keys and node.key is not None:
+                    content.append(f'<span class="key">{node.key}: </span>')
+                content.append(f'<span class="value">{node.value}</span>')
+                content_html = "".join(content)
+                if node.children:
+                    # Node with children
+                    html.append(f'<details {"open" if level < 1 else ""}>')
+                    html.append(f"<summary>{content_html}</summary>")
+                    for child in node.children.values():
+                        html.append(node_to_html(child, level + 1, print_keys))
+                    html.append("</details>")
+                else:
+                    # Leaf node
+                    html.append(f"<div>{content_html}</div>")
+            else:
+                # Root node with no value
+                if node.children:
+                    for child in node.children.values():
+                        html.append(node_to_html(child, level, print_keys))
+            return "\n".join(html)
+        tree_html = node_to_html(self.root)
+        return f"{styles}{tree_html}</div>"
+    # def _repr_html_(self):
+    #     """Returns an HTML representation of the tree, following the same logic as print_tree."""
+    #     styles = """
+    #     <style>
+    #         .tree-container {
+    #             font-family: monospace;
+    #             white-space: pre;
+    #             margin: 10px;
+    #         }
+    #     </style>
+    #     """
+    #     def node_to_html(node, level=0, print_keys=False):
+    #         if node is None:
+    #             node = self.root
+    #             if node is None:
+    #                 return "Tree has not been constructed yet."
+    #         html = []
+    #         if node.value is not None:
+    #             indent = "&nbsp;" * 2 * level  # Using &nbsp; for HTML spaces
+    #             if print_keys and node.key is not None:
+    #                 html.append(f"{indent}{node.key}: {node.value}<br>")
+    #             else:
+    #                 html.append(f"{indent}{node.value}<br>")
+    #         for child in node.children.values():
+    #             html.append(node_to_html(child, level + 1, print_keys))
+    #         return "".join(html)
+    #     tree_html = node_to_html(self.root)
+    #     return f'<div class="tree-container">{tree_html}</div>{styles}'
     def _add_to_docx(self, doc, node: TreeNode, level: int):
         if node.value is not None:

edsl/results/Result.py CHANGED Viewed

@@ -137,6 +137,15 @@ class Result(Base, UserDict):
         self._combined_dict = None
         self._problem_keys = None
+    def _repr_html_(self):
+        # d = self.to_dict(add_edsl_version=False)
+        d = self.to_dict(add_edsl_version=False)
+        data = [[k, v] for k, v in d.items()]
+        from tabulate import tabulate
+        table = str(tabulate(data, headers=["keys", "values"], tablefmt="html"))
+        return f"<pre>{table}</pre>"
     ###############
     # Used in Results
     ###############
@@ -156,15 +165,15 @@ class Result(Base, UserDict):
             if key in self.question_to_attributes:
                 # You might be tempted to just use the naked key
                 # but this is a bad idea because it pollutes the namespace
-                question_text_dict[key + "_question_text"] = (
-                    self.question_to_attributes[key]["question_text"]
-                )
-                question_options_dict[key + "_question_options"] = (
-                    self.question_to_attributes[key]["question_options"]
-                )
-                question_type_dict[key + "_question_type"] = (
-                    self.question_to_attributes[key]["question_type"]
-                )
+                question_text_dict[
+                    key + "_question_text"
+                ] = self.question_to_attributes[key]["question_text"]
+                question_options_dict[
+                    key + "_question_options"
+                ] = self.question_to_attributes[key]["question_options"]
+                question_type_dict[
+                    key + "_question_type"
+                ] = self.question_to_attributes[key]["question_type"]
         return {
             "agent": self.agent.traits

edsl/results/Results.py CHANGED Viewed

@@ -32,7 +32,7 @@ from edsl.results.ResultsDBMixin import ResultsDBMixin
 from edsl.results.ResultsGGMixin import ResultsGGMixin
 from edsl.results.ResultsFetchMixin import ResultsFetchMixin
-from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
+from edsl.utilities.decorators import remove_edsl_version
 from edsl.utilities.utilities import dict_hash
@@ -46,6 +46,9 @@ class Mixins(
     ResultsGGMixin,
     ResultsToolsMixin,
 ):
+    def long(self):
+        return self.table().long()
     def print_long(self, max_rows: int = None) -> None:
         """Print the results in long format.
@@ -73,6 +76,8 @@ class Results(UserList, Mixins, Base):
     It also has a list of created_columns, which are columns that have been created with `mutate` and are not part of the original data.
     """
+    __documentation__ = "https://docs.expectedparrot.com/en/latest/results.html"
     known_data_types = [
         "answer",
         "scenario",
@@ -121,13 +126,55 @@ class Results(UserList, Mixins, Base):
         if hasattr(self, "_add_output_functions"):
             self._add_output_functions()
+    def _summary(self) -> dict:
+        import reprlib
+        # import yaml
+        d = {
+            "EDSL Class": "Results",
+            # "docs_url": self.__documentation__,
+            "# of agents": len(set(self.agents)),
+            "# of distinct models": len(set(self.models)),
+            "# of observations": len(self),
+            "# Scenarios": len(set(self.scenarios)),
+            "Survey Length (# questions)": len(self.survey),
+            "Survey question names": reprlib.repr(self.survey.question_names),
+            "Object hash": hash(self),
+        }
+        return d
+    def compute_job_cost(self, include_cached_responses_in_cost=False) -> float:
+        """
+        Computes the cost of a completed job in USD.
+        """
+        total_cost = 0
+        for result in self:
+            for key in result.raw_model_response:
+                if key.endswith("_cost"):
+                    result_cost = result.raw_model_response[key]
+                    question_name = key.removesuffix("_cost")
+                    cache_used = result.cache_used_dict[question_name]
+                    if isinstance(result_cost, (int, float)):
+                        if include_cached_responses_in_cost:
+                            total_cost += result_cost
+                        elif not include_cached_responses_in_cost and not cache_used:
+                            total_cost += result_cost
+        return total_cost
     def leaves(self):
         leaves = []
         for result in self:
             leaves.extend(result.leaves())
         return leaves
-    def tree(
+    def tree(self, node_list: Optional[List[str]] = None):
+        return self.to_scenario_list().tree(node_list)
+    def interactive_tree(
         self,
         fold_attributes: Optional[List[str]] = None,
         drop: Optional[List[str]] = None,
@@ -260,13 +307,67 @@ class Results(UserList, Mixins, Base):
         return f"Results(data = {reprlib.repr(self.data)}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
+    def table(
+        self,
+        # selector_string: Optional[str] = "*.*",
+        *fields,
+        tablefmt: Optional[str] = None,
+        pretty_labels: Optional[dict] = None,
+        print_parameters: Optional[dict] = None,
+    ):
+        new_fields = []
+        for field in fields:
+            if "." in field:
+                data_type, key = field.split(".")
+                if data_type not in self.known_data_types:
+                    raise ResultsInvalidNameError(
+                        f"{data_type} is not a valid data type. Must be in {self.known_data_types}"
+                    )
+                if key == "*":
+                    for k in self._data_type_to_keys[data_type]:
+                        new_fields.append(k)
+                else:
+                    if key not in self._key_to_data_type:
+                        raise ResultsColumnNotFoundError(
+                            f"{key} is not a valid key. Must be in {self._key_to_data_type}"
+                        )
+                    new_fields.append(key)
+            else:
+                new_fields.append(field)
+        return (
+            self.to_scenario_list()
+            .to_dataset()
+            .table(
+                *new_fields,
+                tablefmt=tablefmt,
+                pretty_labels=pretty_labels,
+                print_parameters=print_parameters,
+            )
+        )
+        # return (
+        #     self.select(f"{selector_string}")
+        #     .to_scenario_list()
+        #     .table(*fields, tablefmt=tablefmt)
+        # )
     def _repr_html_(self) -> str:
-        # from IPython.display import HTML
+        d = self._summary()
+        from edsl import Scenario
-        json_str = json.dumps(self.to_dict(add_edsl_version=False)["data"], indent=4)
-        return f"<pre>{json_str}</pre>"
+        footer = f"<a href={self.__documentation__}>(docs)</a>"
-    def to_dict(self, sort=False, add_edsl_version=False) -> dict[str, Any]:
+        s = Scenario(d)
+        td = s.to_dataset().table(tablefmt="html")
+        return td._repr_html_() + footer
+    def to_dict(
+        self,
+        sort=False,
+        add_edsl_version=False,
+        include_cache=False,
+        include_task_history=False,
+    ) -> dict[str, Any]:
         from edsl.data.Cache import Cache
         if sort:
@@ -280,13 +381,21 @@ class Results(UserList, Mixins, Base):
             ],
             "survey": self.survey.to_dict(add_edsl_version=add_edsl_version),
             "created_columns": self.created_columns,
-            "cache": (
-                Cache()
-                if not hasattr(self, "cache")
-                else self.cache.to_dict(add_edsl_version=add_edsl_version)
-            ),
-            "task_history": self.task_history.to_dict(),
         }
+        if include_cache:
+            d.update(
+                {
+                    "cache": (
+                        Cache()
+                        if not hasattr(self, "cache")
+                        else self.cache.to_dict(add_edsl_version=add_edsl_version)
+                    )
+                }
+            )
+        if self.task_history.has_unfixed_exceptions or include_task_history:
+            d.update({"task_history": self.task_history.to_dict()})
         if add_edsl_version:
             from edsl import __version__
@@ -375,7 +484,11 @@ class Results(UserList, Mixins, Base):
                 cache=(
                     Cache.from_dict(data.get("cache")) if "cache" in data else Cache()
                 ),
-                task_history=TaskHistory.from_dict(data.get("task_history")),
+                task_history=(
+                    TaskHistory.from_dict(data.get("task_history"))
+                    if "task_history" in data
+                    else TaskHistory(interviews=[])
+                ),
             )
         except Exception as e:
             raise ResultsDeserializationError(f"Error in Results.from_dict: {e}")
@@ -875,31 +988,19 @@ class Results(UserList, Mixins, Base):
         >>> r = Results.example()
         >>> r.sort_by('how_feeling', reverse=False).select('how_feeling').print()
-        ┏━━━━━━━━━━━━━━┓
-        ┃ answer       ┃
-        ┃ .how_feeling ┃
-        ┡━━━━━━━━━━━━━━┩
-        │ Great        │
-        ├──────────────┤
-        │ OK           │
-        ├──────────────┤
-        │ OK           │
-        ├──────────────┤
-        │ Terrible     │
-        └──────────────┘
+        answer.how_feeling
+        --------------------
+        Great
+        OK
+        OK
+        Terrible
         >>> r.sort_by('how_feeling', reverse=True).select('how_feeling').print()
-        ┏━━━━━━━━━━━━━━┓
-        ┃ answer       ┃
-        ┃ .how_feeling ┃
-        ┡━━━━━━━━━━━━━━┩
-        │ Terrible     │
-        ├──────────────┤
-        │ OK           │
-        ├──────────────┤
-        │ OK           │
-        ├──────────────┤
-        │ Great        │
-        └──────────────┘
+        answer.how_feeling
+        --------------------
+        Terrible
+        OK
+        OK
+        Great
         """
         def to_numeric_if_possible(v):
@@ -932,12 +1033,9 @@ class Results(UserList, Mixins, Base):
         >>> r = Results.example()
         >>> r.filter("how_feeling == 'Great'").select('how_feeling').print()
-        ┏━━━━━━━━━━━━━━┓
-        ┃ answer       ┃
-        ┃ .how_feeling ┃
-        ┡━━━━━━━━━━━━━━┩
-        │ Great        │
-        └──────────────┘
+        answer.how_feeling
+        --------------------
+        Great
         Example usage: Using an OR operator in the filter expression.
@@ -948,14 +1046,10 @@ class Results(UserList, Mixins, Base):
         ...
         >>> r.filter("how_feeling == 'Great' or how_feeling == 'Terrible'").select('how_feeling').print()
-        ┏━━━━━━━━━━━━━━┓
-        ┃ answer       ┃
-        ┃ .how_feeling ┃
-        ┡━━━━━━━━━━━━━━┩
-        │ Great        │
-        ├──────────────┤
-        │ Terrible     │
-        └──────────────┘
+        answer.how_feeling
+        --------------------
+        Great
+        Terrible
         """
         def has_single_equals(string):

edsl 0.1.38.dev2__py3-none-any.whl → 0.1.38.dev4__py3-none-any.whl

edsl 0.1.38.dev2py3-none-any.whl → 0.1.38.dev4py3-none-any.whl