PyPI - edsl - Versions diffs - 0.1.58__py3-none-any.whl → 0.1.59__py3-none-any.whl - Mend

edsl 0.1.58py3-none-any.whl → 0.1.59py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

edsl/__version__.py +1 -1
edsl/agents/agent.py +23 -4
edsl/agents/agent_list.py +36 -6
edsl/coop/coop.py +103 -1
edsl/dataset/dataset.py +74 -0
edsl/dataset/dataset_operations_mixin.py +67 -62
edsl/inference_services/services/test_service.py +1 -1
edsl/interviews/exception_tracking.py +66 -20
edsl/invigilators/invigilators.py +5 -1
edsl/invigilators/prompt_constructor.py +299 -136
edsl/jobs/html_table_job_logger.py +18 -1
edsl/jobs/jobs_pricing_estimation.py +6 -2
edsl/jobs/jobs_remote_inference_logger.py +2 -0
edsl/jobs/remote_inference.py +34 -7
edsl/language_models/language_model.py +39 -2
edsl/prompts/prompt.py +1 -0
edsl/questions/question_list.py +76 -20
edsl/results/results.py +8 -1
edsl/scenarios/file_store.py +8 -12
edsl/scenarios/scenario.py +50 -2
edsl/scenarios/scenario_list.py +34 -12
edsl/surveys/survey.py +4 -0
edsl/tasks/task_history.py +180 -6
edsl/utilities/wikipedia.py +194 -0
{edsl-0.1.58.dist-info → edsl-0.1.59.dist-info}/METADATA +4 -3
{edsl-0.1.58.dist-info → edsl-0.1.59.dist-info}/RECORD +29 -28
{edsl-0.1.58.dist-info → edsl-0.1.59.dist-info}/LICENSE +0 -0
{edsl-0.1.58.dist-info → edsl-0.1.59.dist-info}/WHEEL +0 -0
{edsl-0.1.58.dist-info → edsl-0.1.59.dist-info}/entry_points.txt +0 -0

edsl/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.58"
1	+ __version__ = "0.1.59"

edsl/agents/agent.py CHANGED Viewed

@@ -426,6 +426,25 @@ class Agent(Base):
             self.traits_presentation_template = "Your traits: {{traits}}"
             self.set_traits_presentation_template = False
+    def drop(self, field_name: str) -> Agent:
+        """Drop a field from the agent.
+        Args:
+            field_name: The name of the field to drop.
+        """
+        d = self.to_dict()
+        if field_name in d['traits']:
+            d['traits'].pop(field_name)
+        elif field_name in d:
+            d.pop(field_name)
+        else:
+            raise AgentErrors((f"Field '{field_name}' not found in agent"
+                               f"Available fields: {d.keys()}"
+                               f"Available traits: {d['traits'].keys()}"
+                              ))
+        return Agent.from_dict(d)
     def duplicate(self) -> Agent:
         """Create a deep copy of this agent with all its traits and capabilities.
@@ -1213,7 +1232,7 @@ class Agent(Base):
         """
         return dict_hash(self.to_dict(add_edsl_version=False))
-    def to_dict(self, add_edsl_version=True) -> dict[str, Union[dict, bool]]:
+    def to_dict(self, add_edsl_version=True, full_dict=False) -> dict[str, Union[dict, bool]]:
         """Serialize to a dictionary with EDSL info.
         Example usage:
@@ -1230,11 +1249,11 @@ class Agent(Base):
         d["traits"] = copy.deepcopy(dict(self._traits))
         if self.name:
             d["name"] = self.name
-        if self.set_instructions:
+        if self.set_instructions or full_dict:
             d["instruction"] = self.instruction
-        if self.set_traits_presentation_template:
+        if self.set_traits_presentation_template or full_dict:
             d["traits_presentation_template"] = self.traits_presentation_template
-        if self.codebook:
+        if self.codebook or full_dict:
             d["codebook"] = self.codebook
         if add_edsl_version:
             from edsl import __version__

edsl/agents/agent_list.py CHANGED Viewed

@@ -47,13 +47,13 @@ class AgentList(UserList, Base, AgentListOperationsMixin):
     with methods for filtering, transforming, and analyzing collections of agents.
-    >>> AgentList.example().to_scenario_list()
-    ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5}), Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
+    >>> AgentList.example().to_scenario_list().drop('age')
+    ScenarioList([Scenario({'hair': 'brown', 'height': 5.5}), Scenario({'hair': 'brown', 'height': 5.5})])
     >>> AgentList.example().to_dataset()
     Dataset([{'age': [22, 22]}, {'hair': ['brown', 'brown']}, {'height': [5.5, 5.5]}])
-    >>> AgentList.example().to_pandas()
+    >>> AgentList.example().select('age', 'hair', 'height').to_pandas()
        age   hair  height
     0   22  brown     5.5
     1   22  brown     5.5
@@ -91,6 +91,28 @@ class AgentList(UserList, Base, AgentListOperationsMixin):
         if codebook is not None:
             self.set_codebook(codebook)
+    def set_instruction(self, instruction: str) -> None:
+        """Set the instruction for all agents in the list.
+        Args:
+            instruction: The instruction to set.
+        """
+        for agent in self.data:
+            agent.instruction = instruction
+        return None
+    def set_traits_presentation_template(self, traits_presentation_template: str) -> None:
+        """Set the traits presentation template for all agents in the list.
+        Args:
+            traits_presentation_template: The traits presentation template to set.
+        """
+        for agent in self.data:
+            agent.traits_presentation_template = traits_presentation_template
+        return None
     def shuffle(self, seed: Optional[str] = None) -> AgentList:
         """Randomly shuffle the agents in place.
@@ -119,6 +141,14 @@ class AgentList(UserList, Base, AgentListOperationsMixin):
         if seed:
             random.seed(seed)
         return AgentList(random.sample(self.data, n))
+    def drop(self, field_name: str) -> AgentList:
+        """Drop a field from the AgentList.
+        Args:
+            field_name: The name of the field to drop.
+        """
+        return AgentList([a.drop(field_name) for a in self.data])
     def duplicate(self) -> AgentList:
         """Create a deep copy of the AgentList.
@@ -478,7 +508,7 @@ class AgentList(UserList, Base, AgentListOperationsMixin):
             >>> al.to_dataset()
             Dataset([{'age': [22, 22]}, {'hair': ['brown', 'brown']}, {'height': [5.5, 5.5]}])
             >>> al.to_dataset(traits_only=False)  # doctest: +NORMALIZE_WHITESPACE
-            Dataset([{'age': [22, 22]}, {'hair': ['brown', 'brown']}, {'height': [5.5, 5.5]}, {'agent_parameters': [{'instruction': 'You are answering questions as if you were a human. Do not break character.', 'agent_name': None}, {'instruction': 'You are answering questions as if you were a human. Do not break character.', 'agent_name': None}]}])
+            Dataset([{'age': [22, 22]}, {'hair': ['brown', 'brown']}, {'height': [5.5, 5.5]}, {'agent_parameters': [{'instruction': 'You are answering questions as if you were a human. Do not break character.', 'agent_name': None, 'traits_presentation_template': 'Your traits: {{traits}}'}, {'instruction': 'You are answering questions as if you were a human. Do not break character.', 'agent_name': None, 'traits_presentation_template': 'Your traits: {{traits}}'}]}])
         """
         from ..dataset import Dataset
@@ -495,7 +525,7 @@ class AgentList(UserList, Base, AgentListOperationsMixin):
                 data[trait_key].append(agent.traits.get(trait_key, None))
             if not traits_only:
                 data["agent_parameters"].append(
-                    {"instruction": agent.instruction, "agent_name": agent.name}
+                    {"instruction": agent.instruction, "agent_name": agent.name, "traits_presentation_template": agent.traits_presentation_template}
                 )
         return Dataset([{key: entry} for key, entry in data.items()])

edsl/coop/coop.py CHANGED Viewed

@@ -2,6 +2,7 @@ import aiohttp
 import base64
 import json
 import requests
+import time
 from typing import Any, Optional, Union, Literal, List, TypedDict, TYPE_CHECKING
 from uuid import UUID
@@ -13,7 +14,9 @@ from ..caching import CacheEntry
 if TYPE_CHECKING:
     from ..jobs import Jobs
+    from ..scenarios import ScenarioList
     from ..surveys import Survey
+    from ..results import Results
 from .exceptions import (
     CoopInvalidURLError,
@@ -567,6 +570,7 @@ class Coop(CoopFunctionsMixin):
                     json.dumps(
                         object_dict,
                         default=self._json_handle_none,
+                        allow_nan=False,
                     )
                     if object_type != "scenario"
                     else ""
@@ -585,6 +589,7 @@ class Coop(CoopFunctionsMixin):
             json_data = json.dumps(
                 object_dict,
                 default=self._json_handle_none,
+                allow_nan=False,
             )
             headers = {"Content-Type": "application/json"}
             if response_json.get("upload_signed_url"):
@@ -928,6 +933,7 @@ class Coop(CoopFunctionsMixin):
                     json.dumps(
                         value.to_dict(),
                         default=self._json_handle_none,
+                        allow_nan=False,
                     )
                     if value
                     else None
@@ -1385,12 +1391,108 @@ class Coop(CoopFunctionsMixin):
         self._resolve_server_response(response)
         response_json = response.json()
         return {
-            "name": response_json.get("project_name"),
+            "project_name": response_json.get("project_name"),
             "uuid": response_json.get("uuid"),
             "admin_url": f"{self.url}/home/projects/{response_json.get('uuid')}",
             "respondent_url": f"{self.url}/respond/{response_json.get('uuid')}",
         }
+    def get_project(
+        self,
+        project_uuid: str,
+    ) -> dict:
+        """
+        Get a project from Coop.
+        """
+        response = self._send_server_request(
+            uri=f"api/v0/projects/{project_uuid}",
+            method="GET",
+        )
+        self._resolve_server_response(response)
+        response_json = response.json()
+        return {
+            "project_name": response_json.get("project_name"),
+            "project_job_uuids": response_json.get("job_uuids"),
+        }
+    def get_project_human_responses(
+        self,
+        project_uuid: str,
+    ) -> Union["Results", "ScenarioList"]:
+        """
+        Return a Results object with the human responses for a project.
+        If generating the Results object fails, a ScenarioList will be returned instead.
+        """
+        from ..agents import Agent, AgentList
+        from ..caching import Cache
+        from ..language_models import Model
+        from ..scenarios import Scenario, ScenarioList
+        from ..surveys import Survey
+        response = self._send_server_request(
+            uri=f"api/v0/projects/{project_uuid}/human-responses",
+            method="GET",
+        )
+        self._resolve_server_response(response)
+        response_json = response.json()
+        human_responses = response_json.get("human_responses", [])
+        try:
+            agent_list = AgentList()
+            for response in human_responses:
+                response_uuid = response.get("response_uuid")
+                if response_uuid is None:
+                    raise RuntimeError(
+                        "One of your responses is missing a unique identifier."
+                    )
+                response_dict = json.loads(response.get("response_json_string"))
+                a = Agent(name=response_uuid, instruction="")
+                def create_answer_function(response_data):
+                    def f(self, question, scenario):
+                        return response_data.get(question.question_name, None)
+                    return f
+                a.add_direct_question_answering_method(
+                    create_answer_function(response_dict)
+                )
+                agent_list.append(a)
+            survey_json_string = response_json.get("survey_json_string")
+            survey = Survey.from_dict(json.loads(survey_json_string))
+            model = Model("test")
+            results = (
+                survey.by(agent_list)
+                .by(model)
+                .run(
+                    cache=Cache(),
+                    disable_remote_cache=True,
+                    disable_remote_inference=True,
+                    print_exceptions=False,
+                )
+            )
+            return results
+        except Exception:
+            human_response_scenarios = []
+            for response in human_responses:
+                response_uuid = response.get("response_uuid")
+                if response_uuid is None:
+                    raise RuntimeError(
+                        "One of your responses is missing a unique identifier."
+                    )
+                response_dict = json.loads(response.get("response_json_string"))
+                response_dict["agent_name"] = response_uuid
+                scenario = Scenario(response_dict)
+                human_response_scenarios.append(scenario)
+            return ScenarioList(human_response_scenarios)
     def __repr__(self):
         """Return a string representation of the client."""
         return f"Client(api_key='{self.api_key}', url='{self.url}')"

edsl/dataset/dataset.py CHANGED Viewed

@@ -93,6 +93,38 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         """
         _, values = list(self.data[0].items())[0]
         return len(values)
+    def drop(self, field_name):
+        """
+        Returns a new Dataset with the specified field removed.
+        Args:
+            field_name (str): The name of the field to remove.
+        Returns:
+            Dataset: A new Dataset instance without the specified field.
+        Raises:
+            KeyError: If the field_name doesn't exist in the dataset.
+        Examples:
+            >>> from .dataset import Dataset
+            >>> d = Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
+            >>> d.drop('a')
+            Dataset([{'b': [4, 5, 6]}])
+            >>> # Testing drop with nonexistent field raises DatasetKeyError - tested in unit tests
+        """
+        from .dataset import Dataset
+        # Check if field exists in the dataset
+        if field_name not in self.relevant_columns():
+            raise DatasetKeyError(f"Field '{field_name}' not found in dataset")
+        # Create a new dataset without the specified field
+        new_data = [entry for entry in self.data if field_name not in entry]
+        return Dataset(new_data)
     def tail(self, n: int = 5) -> Dataset:
         """Return the last n observations in the dataset.
@@ -1054,6 +1086,48 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         return Dataset(new_data)
+    def unique(self) -> "Dataset":
+        """Return a new dataset with only unique observations.
+        Examples:
+            >>> d = Dataset([{'a': [1, 2, 2, 3]}, {'b': [4, 5, 5, 6]}])
+            >>> d.unique().data
+            [{'a': [1, 2, 3]}, {'b': [4, 5, 6]}]
+            >>> d = Dataset([{'x': ['a', 'a', 'b']}, {'y': [1, 1, 2]}])
+            >>> d.unique().data
+            [{'x': ['a', 'b']}, {'y': [1, 2]}]
+        """
+        # Get all column names and values
+        headers, data = self._tabular()
+        # Create a list of unique rows
+        unique_rows = []
+        seen = set()
+        for row in data:
+            # Convert the row to a hashable representation for comparison
+            # We need to handle potential unhashable types
+            try:
+                row_key = tuple(map(lambda x: str(x) if isinstance(x, (list, dict)) else x, row))
+                if row_key not in seen:
+                    seen.add(row_key)
+                    unique_rows.append(row)
+            except:
+                # Fallback for complex objects: compare based on string representation
+                row_str = str(row)
+                if row_str not in seen:
+                    seen.add(row_str)
+                    unique_rows.append(row)
+        # Create a new dataset with unique combinations
+        new_data = []
+        for i, header in enumerate(headers):
+            values = [row[i] for row in unique_rows]
+            new_data.append({header: values})
+        return Dataset(new_data)
 if __name__ == "__main__":
     import doctest

edsl/dataset/dataset_operations_mixin.py CHANGED Viewed

@@ -1070,7 +1070,6 @@ class DataOperationsBase:
             - All dictionaries in the field must have compatible structures
             - If a dictionary is missing a key, the corresponding value will be None
             - Non-dictionary values in the field will cause a warning
         Examples:
             >>> from edsl.dataset import Dataset
@@ -1086,48 +1085,85 @@ class DataOperationsBase:
             >>> d = Dataset([{'a': [{'a': 1, 'b': 2}]}, {'c': [5]}])
             >>> d.flatten('a', keep_original=True)
             Dataset([{'a': [{'a': 1, 'b': 2}]}, {'c': [5]}, {'a.a': [1]}, {'a.b': [2]}])
+            # Can also use unambiguous unprefixed field name
+            >>> result = Dataset([{'answer.pros_cons': [{'pros': ['Safety'], 'cons': ['Cost']}]}]).flatten('pros_cons')
+            >>> sorted(result.keys()) == ['answer.pros_cons.cons', 'answer.pros_cons.pros']
+            True
+            >>> sorted(result.to_dicts()[0].items()) == sorted({'cons': ['Cost'], 'pros': ['Safety']}.items())
+            True
         """
         from ..dataset import Dataset
         # Ensure the dataset isn't empty
         if not self.data:
             return self.copy()
-        # Find all columns that contain the field
-        matching_entries = []
-        for entry in self.data:
-            col_name = next(iter(entry.keys()))
-            if field == col_name or (
-                "." in col_name
-                and (col_name.endswith("." + field) or col_name.startswith(field + "."))
-            ):
-                matching_entries.append(entry)
-        # Check if the field is ambiguous
-        if len(matching_entries) > 1:
-            matching_cols = [next(iter(entry.keys())) for entry in matching_entries]
-            from .exceptions import DatasetValueError
-            raise DatasetValueError(
-                f"Ambiguous field name '{field}'. It matches multiple columns: {matching_cols}. "
-                f"Please specify the full column name to flatten."
-            )
-        # Get the number of observations
-        num_observations = self.num_observations()
-        # Find the column to flatten
+        # First try direct match with the exact field name
         field_entry = None
         for entry in self.data:
-            if field in entry:
+            col_name = next(iter(entry.keys()))
+            if field == col_name:
                 field_entry = entry
                 break
+        # If not found, try to match by unprefixed name
+        if field_entry is None:
+            # Find any columns that have field as their unprefixed name
+            candidates = []
+            for entry in self.data:
+                col_name = next(iter(entry.keys()))
+                if '.' in col_name:
+                    prefix, col_field = col_name.split('.', 1)
+                    if col_field == field:
+                        candidates.append(entry)
+            # If we found exactly one match by unprefixed name, use it
+            if len(candidates) == 1:
+                field_entry = candidates[0]
+            # If we found multiple matches, it's ambiguous
+            elif len(candidates) > 1:
+                matching_cols = [next(iter(entry.keys())) for entry in candidates]
+                from .exceptions import DatasetValueError
+                raise DatasetValueError(
+                    f"Ambiguous field name '{field}'. It matches multiple columns: {matching_cols}. "
+                    f"Please specify the full column name to flatten."
+                )
+            # If no candidates by unprefixed name, check partial matches
+            else:
+                partial_matches = []
+                for entry in self.data:
+                    col_name = next(iter(entry.keys()))
+                    if '.' in col_name and (
+                        col_name.endswith('.' + field) or
+                        col_name.startswith(field + '.')
+                    ):
+                        partial_matches.append(entry)
+                # If we found exactly one partial match, use it
+                if len(partial_matches) == 1:
+                    field_entry = partial_matches[0]
+                # If we found multiple partial matches, it's ambiguous
+                elif len(partial_matches) > 1:
+                    matching_cols = [next(iter(entry.keys())) for entry in partial_matches]
+                    from .exceptions import DatasetValueError
+                    raise DatasetValueError(
+                        f"Ambiguous field name '{field}'. It matches multiple columns: {matching_cols}. "
+                        f"Please specify the full column name to flatten."
+                    )
+        # Get the number of observations
+        num_observations = self.num_observations()
+        # If we still haven't found the field, it's not in the dataset
         if field_entry is None:
             warnings.warn(
                 f"Field '{field}' not found in dataset, returning original dataset"
             )
             return self.copy()
+        # Get the actual field name as it appears in the data
+        actual_field = next(iter(field_entry.keys()))
         # Create new dictionary for flattened data
         flattened_data = []
@@ -1135,14 +1171,14 @@ class DataOperationsBase:
         # Copy all existing columns except the one we're flattening (if keep_original is False)
         for entry in self.data:
             col_name = next(iter(entry.keys()))
-            if col_name != field or keep_original:
+            if col_name != actual_field or keep_original:
                 flattened_data.append(entry.copy())
         # Get field data and make sure it's valid
-        field_values = field_entry[field]
+        field_values = field_entry[actual_field]
         if not all(isinstance(item, dict) for item in field_values if item is not None):
             warnings.warn(
-                f"Field '{field}' contains non-dictionary values that cannot be flattened"
+                f"Field '{actual_field}' contains non-dictionary values that cannot be flattened"
             )
             return self.copy()
@@ -1162,7 +1198,7 @@ class DataOperationsBase:
                 new_values.append(value)
             # Add this as a new column
-            flattened_data.append({f"{field}.{key}": new_values})
+            flattened_data.append({f"{actual_field}.{key}": new_values})
         # Return a new Dataset with the flattened data
         return Dataset(flattened_data)
@@ -1244,37 +1280,6 @@ class DataOperationsBase:
         return result
-    def drop(self, field_name):
-        """
-        Returns a new Dataset with the specified field removed.
-        Args:
-            field_name (str): The name of the field to remove.
-        Returns:
-            Dataset: A new Dataset instance without the specified field.
-        Raises:
-            KeyError: If the field_name doesn't exist in the dataset.
-        Examples:
-            >>> from .dataset import Dataset
-            >>> d = Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
-            >>> d.drop('a')
-            Dataset([{'b': [4, 5, 6]}])
-            >>> # Testing drop with nonexistent field raises DatasetKeyError - tested in unit tests
-        """
-        from .dataset import Dataset
-        # Check if field exists in the dataset
-        if field_name not in self.relevant_columns():
-            raise DatasetKeyError(f"Field '{field_name}' not found in dataset")
-        # Create a new dataset without the specified field
-        new_data = [entry for entry in self.data if field_name not in entry]
-        return Dataset(new_data)
     def remove_prefix(self):
         """Returns a new Dataset with the prefix removed from all column names.

edsl/inference_services/services/test_service.py CHANGED Viewed

@@ -54,7 +54,7 @@ class TestService(InferenceServiceABC):
             input_token_name = cls.input_token_name
             output_token_name = cls.output_token_name
             _rpm = 1000
-            _tpm = 100000
+            _tpm = 8000000
             @property
             def _canned_response(self):

edsl 0.1.58__py3-none-any.whl → 0.1.59__py3-none-any.whl

edsl 0.1.58py3-none-any.whl → 0.1.59py3-none-any.whl