PyPI - edsl - Versions diffs - 0.1.60__py3-none-any.whl → 0.1.62__py3-none-any.whl - Mend

edsl 0.1.60py3-none-any.whl → 0.1.62py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

edsl/__version__.py +1 -1
edsl/agents/agent.py +65 -17
edsl/agents/agent_list.py +117 -33
edsl/base/base_class.py +88 -11
edsl/config/config_class.py +7 -2
edsl/coop/coop.py +1552 -95
edsl/coop/coop_jobs_objects.py +2 -2
edsl/coop/coop_prolific_filters.py +171 -0
edsl/coop/coop_regular_objects.py +3 -1
edsl/dataset/display/table_display.py +40 -7
edsl/db_list/sqlite_list.py +102 -3
edsl/jobs/data_structures.py +46 -31
edsl/jobs/jobs.py +73 -2
edsl/jobs/remote_inference.py +47 -13
edsl/prompts/prompt.py +7 -2
edsl/questions/loop_processor.py +289 -10
edsl/questions/question_registry.py +4 -1
edsl/questions/templates/dict/answering_instructions.jinja +0 -1
edsl/scenarios/file_store.py +69 -0
edsl/scenarios/scenario.py +233 -0
edsl/scenarios/scenario_list.py +31 -1
edsl/scenarios/scenario_source.py +605 -498
edsl/surveys/survey.py +198 -163
{edsl-0.1.60.dist-info → edsl-0.1.62.dist-info}/METADATA +3 -3
{edsl-0.1.60.dist-info → edsl-0.1.62.dist-info}/RECORD +28 -27
{edsl-0.1.60.dist-info → edsl-0.1.62.dist-info}/LICENSE +0 -0
{edsl-0.1.60.dist-info → edsl-0.1.62.dist-info}/WHEEL +0 -0
{edsl-0.1.60.dist-info → edsl-0.1.62.dist-info}/entry_points.txt +0 -0

edsl/coop/coop_jobs_objects.py CHANGED Viewed

@@ -26,7 +26,7 @@ class CoopJobsObjects(CoopObjects):
         c = Coop()
         job_details = [
-            c.remote_inference_get(obj["uuid"], include_json_string=True)
+            c.new_remote_inference_get(obj["uuid"], include_json_string=True)
             for obj in self
         ]
@@ -53,7 +53,7 @@ class CoopJobsObjects(CoopObjects):
         for obj in self:
             if obj.get("results_uuid"):
-                result = c.get(obj["results_uuid"])
+                result = c.pull(obj["results_uuid"], expected_object_type="results")
                 results.append(result)
         return results

edsl/coop/coop_prolific_filters.py ADDED Viewed

@@ -0,0 +1,171 @@
+import reprlib
+from typing import Optional
+from .exceptions import CoopValueError
+from ..scenarios import Scenario, ScenarioList
+class CoopProlificFilters(ScenarioList):
+    """Base class for Prolific filters supported on Coop.
+    This abstract class extends ScenarioList to provide common functionality
+    for working with Prolific filters.
+    """
+    def __init__(
+        self, data: Optional[list] = None, codebook: Optional[dict[str, str]] = None
+    ):
+        super().__init__(data, codebook)
+    def find(self, filter_id: str) -> Optional[Scenario]:
+        """
+        Find a filter by its ID. Raises a CoopValueError if the filter is not found.
+        >>> filters = coop.list_prolific_filters()
+        >>> filters.find("age")
+        Scenario(
+            {
+                "filter_id": "age",
+                "type": "range",
+                "range_filter_min": 18,
+                "range_filter_max": 100,
+                ...
+            }
+        """
+        # Prolific has inconsistent naming conventions for filters -
+        # some use underscores and some use dashes, so we need to check for both
+        id_with_dashes = filter_id.replace("_", "-")
+        id_with_underscores = filter_id.replace("-", "_")
+        for scenario in self:
+            if (
+                scenario["filter_id"] == id_with_dashes
+                or scenario["filter_id"] == id_with_underscores
+            ):
+                return scenario
+        raise CoopValueError(f"Filter with ID {filter_id} not found.")
+    def create_study_filter(
+        self,
+        filter_id: str,
+        min: Optional[int] = None,
+        max: Optional[int] = None,
+        values: Optional[list[str]] = None,
+    ) -> dict:
+        """
+        Create a valid filter dict that is compatible with Coop.create_prolific_study().
+        This function will raise a CoopValueError if:
+        - The filter ID is not found
+        - A range filter is provided with no min or max value, or a value that is outside of the allowed range
+        - A select filter is provided with no values, or a value that is not in the allowed options
+        For a select filter, you should pass a list of values:
+        >>> filters = coop.list_prolific_filters()
+        >>> filters.create_study_filter("current_country_of_residence", values=["United States", "Canada"])
+        {
+            "filter_id": "current_country_of_residence",
+            "selected_values": ["1", "45"],
+        }
+        For a range filter, you should pass a min and max value:
+        >>> filters.create_study_filter("age", min=20, max=40)
+        {
+            "filter_id": "age",
+            "selected_range": {
+                "lower": 20,
+                "upper": 40,
+            },
+        }
+        """
+        filter = self.find(filter_id)
+        # .find() has logic to handle inconsistent naming conventions for filter IDs,
+        # so we need to get the correct filter ID from the filter dict
+        correct_filter_id = filter.get("filter_id")
+        filter_type = filter.get("type")
+        if filter_type == "range":
+            filter_min = filter.get("range_filter_min")
+            filter_max = filter.get("range_filter_max")
+            if min is None and max is None:
+                raise CoopValueError("Range filters require both a min and max value.")
+            if min < filter_min:
+                raise CoopValueError(
+                    f"Min value {min} is less than the minimum allowed value {filter_min}."
+                )
+            if max > filter_max:
+                raise CoopValueError(
+                    f"Max value {max} is greater than the maximum allowed value {filter_max}."
+                )
+            if min > max:
+                raise CoopValueError("Min value cannot be greater than max value.")
+            return {
+                "filter_id": correct_filter_id,
+                "selected_range": {
+                    "lower": min,
+                    "upper": max,
+                },
+            }
+        elif filter_type == "select":
+            if values is None:
+                raise CoopValueError("Select filters require a list of values.")
+            if correct_filter_id == "custom_allowlist":
+                return {
+                    "filter_id": correct_filter_id,
+                    "selected_values": values,
+                }
+            try:
+                allowed_option_labels = filter.get("select_filter_options", {})
+                option_labels_to_ids = {v: k for k, v in allowed_option_labels.items()}
+                selected_option_ids = [option_labels_to_ids[value] for value in values]
+            except KeyError:
+                raise CoopValueError(
+                    f"Invalid value(s) provided for filter {filter_id}: {values}. "
+                    f"Call find() with the filter ID to examine the allowed values for this filter."
+                )
+            return {
+                "filter_id": correct_filter_id,
+                "selected_values": selected_option_ids,
+            }
+        else:
+            raise CoopValueError(f"Unsupported filter type: {filter_type}.")
+    def table(
+        self,
+        *fields,
+        tablefmt: Optional[str] = None,
+        pretty_labels: Optional[dict[str, str]] = None,
+    ) -> str:
+        """Return the CoopProlificFilters as a table with truncated options display for select filters."""
+        # Create a copy of the data with truncated options
+        truncated_scenarios = []
+        for scenario in self:
+            scenario_dict = dict(scenario)
+            if (
+                "select_filter_options" in scenario_dict
+                and scenario_dict["select_filter_options"] is not None
+            ):
+                # Create a truncated representation of the options list
+                formatter = reprlib.Repr()
+                formatter.maxstring = 50
+                select_filter_options = list(
+                    dict(scenario_dict["select_filter_options"]).values()
+                )
+                formatted_options = formatter.repr(select_filter_options)
+                scenario_dict["select_filter_options"] = formatted_options
+            truncated_scenarios.append(scenario_dict)
+        temp_scenario_list = ScenarioList([Scenario(s) for s in truncated_scenarios])
+        # Display the table with the truncated data
+        return temp_scenario_list.table(
+            *fields, tablefmt=tablefmt, pretty_labels=pretty_labels
+        )

edsl/coop/coop_regular_objects.py CHANGED Viewed

@@ -23,4 +23,6 @@ class CoopRegularObjects(CoopObjects):
         from ..coop import Coop
         c = Coop()
-        return [c.get(obj["uuid"]) for obj in self]
+        return [
+            c.pull(obj["uuid"], expected_object_type=obj["object_type"]) for obj in self
+        ]

edsl/dataset/display/table_display.py CHANGED Viewed

@@ -55,13 +55,46 @@ class TableDisplay:
             self.printing_parameters = {}
     def _repr_html_(self) -> str:
-        table_data = TableData(
-            headers=self.headers,
-            data=self.data,
-            parameters=self.printing_parameters,
-            raw_data_set=self.raw_data_set,
-        )
-        return self.renderer_class(table_data).render_html()
+        """
+        HTML representation for Jupyter/Colab notebooks.
+        The primary path uses the configured `renderer_class` to build an HTML
+        string.  Unfortunately, in shared or long-running notebook runtimes it
+        is not uncommon for binary dependencies (NumPy, Pandas, etc.) to get
+        into an incompatible state, raising import-time errors that would
+        otherwise bubble up to the notebook and obscure the actual table
+        output.  To make the developer experience smoother we catch *any*
+        exception, log/annotate it, and fall back to a plain-text rendering via
+        `tabulate`, wrapped in a <pre> block so at least a readable table is
+        shown.
+        """
+        try:
+            table_data = TableData(
+                headers=self.headers,
+                data=self.data,
+                parameters=self.printing_parameters,
+                raw_data_set=self.raw_data_set,
+            )
+            return self.renderer_class(table_data).render_html()
+        except Exception as exc:  # pragma: no cover
+            # --- graceful degradation -------------------------------------------------
+            try:
+                from tabulate import tabulate
+                plain = tabulate(
+                    self.data,
+                    headers=self.headers,
+                    tablefmt=self.tablefmt or "simple",
+                )
+            except Exception:
+                # Even `tabulate` failed – resort to the default __repr__.
+                plain = super().__repr__() if hasattr(super(), "__repr__") else str(self.data)
+            # Escape HTML-sensitive chars so the browser renders plain text.
+            import html
+            safe_plain = html.escape(plain)
+            return f"<pre>{safe_plain}\n\n[TableDisplay fallback – original error: {exc}]</pre>"
     def __repr__(self):
         # If rich format is requested, use RichRenderer

edsl/db_list/sqlite_list.py CHANGED Viewed

@@ -4,7 +4,7 @@ import os
 import json
 from typing import Any, Callable, Iterable, Iterator, List, Optional
 from abc import ABC, abstractmethod
-from collections.abc import MutableSequence
+from collections.abc import MutableSequence, MutableMapping
 class SQLiteList(MutableSequence, ABC):
@@ -97,7 +97,20 @@ class SQLiteList(MutableSequence, ABC):
         row = cursor.fetchone()
         if row is None:
             raise IndexError("list index out of range")
-        return self.deserialize(row[0])
+        obj = self.deserialize(row[0])
+        # If the stored object is a Scenario (or subclass), return a specialised proxy
+        try:
+            from edsl.scenarios.scenario import Scenario
+            if isinstance(obj, Scenario):
+                return self._make_scenario_proxy(self, index, obj)
+        except ImportError:
+            # Scenario not available – fall back to generic proxy
+            pass
+        # Generic proxy for other types
+        return self._RowProxy(self, index, obj)
     def __setitem__(self, index, value):
         if index < 0:
@@ -346,4 +359,90 @@ class SQLiteList(MutableSequence, ABC):
             self.conn.close()
             os.unlink(self.db_path)
         except:
-            pass
+            pass
+    class _RowProxy(MutableMapping):
+        """A write-through proxy returned by SQLiteList.__getitem__.
+        Any mutation on the proxy (e.g. proxy[key] = value) is immediately
+        re-serialised and written back to the underlying SQLite storage,
+        ensuring the database stays in sync with in-memory edits.
+        """
+        def __init__(self, parent: "SQLiteList", idx: int, obj: Any):
+            self._parent = parent
+            self._idx = idx
+            self._obj = obj  # The real deserialised object (e.g. Scenario)
+        # ---- MutableMapping interface ----
+        def __getitem__(self, key):
+            return self._obj[key]
+        def __setitem__(self, key, value):
+            self._obj[key] = value
+            # Propagate change back to SQLite via parent list
+            self._parent.__setitem__(self._idx, self._obj)
+        def __delitem__(self, key):
+            del self._obj[key]
+            self._parent.__setitem__(self._idx, self._obj)
+        def __iter__(self):
+            return iter(self._obj)
+        def __len__(self):
+            return len(self._obj)
+        # ---- Convenience helpers ----
+        def __getattr__(self, name):  # Delegate attribute access
+            return getattr(self._obj, name)
+        def __repr__(self):
+            return repr(self._obj)
+    # Specialised proxy for Scenario objects so isinstance(obj, Scenario) remains True.
+    # Defined lazily to avoid importing Scenario at module load time for performance.
+    @staticmethod
+    def _make_scenario_proxy(parent: "SQLiteList", idx: int, scenario_obj: Any):
+        """Create and return an on-the-fly proxy class inheriting from Scenario but
+        immediately removed from the global subclass registry so serialization
+        coverage tests ignore it.
+        """
+        from edsl.scenarios.scenario import Scenario  # local import
+        from edsl.base import RegisterSubclassesMeta
+        # Dynamically build class dict with required methods
+        def _proxy_setitem(self, key, value):
+            Scenario.__setitem__(self, key, value)  # super call avoids MRO confusion
+            from edsl.scenarios.scenario import Scenario as S
+            self._parent.__setitem__(self._idx, S(dict(self)))
+        def _proxy_delitem(self, key):
+            Scenario.__delitem__(self, key)
+            from edsl.scenarios.scenario import Scenario as S
+            self._parent.__setitem__(self._idx, S(dict(self)))
+        def _proxy_reduce(self):
+            from edsl.scenarios.scenario import Scenario as S
+            return (S, (dict(self),))
+        proxy_cls = type(
+            "_ScenarioRowProxy",
+            (Scenario,),
+            {
+                "__setitem__": _proxy_setitem,
+                "__delitem__": _proxy_delitem,
+                "__reduce__": _proxy_reduce,
+                "__module__": Scenario.__module__,
+            },
+        )
+        # Remove this helper class from global registry so tests ignore it
+        RegisterSubclassesMeta._registry.pop(proxy_cls.__name__, None)
+        # Instantiate
+        instance = proxy_cls(dict(scenario_obj))
+        # attach parent tracking attributes
+        instance._parent = parent
+        instance._idx = idx
+        return instance

edsl/jobs/data_structures.py CHANGED Viewed

@@ -5,6 +5,7 @@ from ..data_transfer_models import EDSLResultObjectInput
 # from edsl.data_transfer_models import VisibilityType
 from ..caching import Cache
 # Import BucketCollection lazily to avoid circular imports
 from ..key_management import KeyLookup
 from ..base import Base
@@ -18,23 +19,27 @@ if TYPE_CHECKING:
 VisibilityType = Literal["private", "public", "unlisted"]
 @dataclass
 class RunEnvironment:
     """
     Contains environment-related resources for job execution.
-    This dataclass holds references to shared resources and infrastructure components
-    needed for job execution. These components are typically long-lived and may be
+    This dataclass holds references to shared resources and infrastructure components
+    needed for job execution. These components are typically long-lived and may be
     shared across multiple job runs.
     Attributes:
         cache (Cache, optional): Cache for storing and retrieving interview results
         bucket_collection (BucketCollection, optional): Collection of token rate limit buckets
         key_lookup (KeyLookup, optional): Manager for API keys across models
         jobs_runner_status (JobsRunnerStatus, optional): Tracker for job execution progress
     """
     cache: Optional[Cache] = None
-    bucket_collection: Optional[Any] = None  # Using Any to avoid circular import of BucketCollection
+    bucket_collection: Optional[
+        Any
+    ] = None  # Using Any to avoid circular import of BucketCollection
     key_lookup: Optional[KeyLookup] = None
     jobs_runner_status: Optional["JobsRunnerStatus"] = None
@@ -43,11 +48,11 @@ class RunEnvironment:
 class RunParameters(Base):
     """
     Contains execution-specific parameters for job runs.
     This dataclass holds parameters that control the behavior of a specific job run,
     such as iteration count, error handling preferences, and remote execution options.
     Unlike RunEnvironment, these parameters are specific to a single job execution.
     Attributes:
         n (int): Number of iterations to run each interview, default is 1
         progress_bar (bool): Whether to show a progress bar, default is False
@@ -66,7 +71,9 @@ class RunParameters(Base):
         disable_remote_inference (bool): Whether to disable remote inference, default is False
         job_uuid (str, optional): UUID for the job, used for tracking
         fresh (bool): If True, ignore cache and generate new results, default is False
+        new_format (bool): If True, uses remote_inference_create method, if False uses old_remote_inference_create method, default is True
     """
     n: int = 1
     progress_bar: bool = False
     stop_on_exception: bool = False
@@ -82,8 +89,13 @@ class RunParameters(Base):
     disable_remote_cache: bool = False
     disable_remote_inference: bool = False
     job_uuid: Optional[str] = None
-    fresh: bool = False  # if True, will not use cache and will save new results to cache
-    memory_threshold: Optional[int] = None  # Threshold in bytes for Results SQLList memory management
+    fresh: bool = (
+        False  # if True, will not use cache and will save new results to cache
+    )
+    memory_threshold: Optional[
+        int
+    ] = None  # Threshold in bytes for Results SQLList memory management
+    new_format: bool = True  # if True, uses remote_inference_create, if False uses old_remote_inference_create
     def to_dict(self, add_edsl_version=False) -> dict:
         d = asdict(self)
@@ -110,24 +122,25 @@ class RunParameters(Base):
 class RunConfig:
     """
     Combines environment resources and execution parameters for a job run.
     This class brings together the two aspects of job configuration:
     1. Environment resources (caches, API keys, etc.) via RunEnvironment
     2. Execution parameters (iterations, error handling, etc.) via RunParameters
     It provides helper methods for modifying environment components after construction.
     Attributes:
         environment (RunEnvironment): The environment resources for the job
         parameters (RunParameters): The execution parameters for the job
     """
     environment: RunEnvironment
     parameters: RunParameters
     def add_environment(self, environment: RunEnvironment) -> None:
         """
         Replace the entire environment configuration.
         Parameters:
             environment (RunEnvironment): The new environment configuration
         """
@@ -136,7 +149,7 @@ class RunConfig:
     def add_bucket_collection(self, bucket_collection: "BucketCollection") -> None:
         """
         Set or replace the bucket collection in the environment.
         Parameters:
             bucket_collection (BucketCollection): The bucket collection to use
         """
@@ -145,7 +158,7 @@ class RunConfig:
     def add_cache(self, cache: Cache) -> None:
         """
         Set or replace the cache in the environment.
         Parameters:
             cache (Cache): The cache to use
         """
@@ -154,7 +167,7 @@ class RunConfig:
     def add_key_lookup(self, key_lookup: KeyLookup) -> None:
         """
         Set or replace the key lookup in the environment.
         Parameters:
             key_lookup (KeyLookup): The key lookup to use
         """
@@ -169,10 +182,10 @@ Additional data structures for working with job results and answers.
 class Answers(UserDict):
     """
     A specialized dictionary for holding interview response data.
     This class extends UserDict to provide a flexible container for survey answers,
     with special handling for response metadata like comments and token usage.
     Key features:
     - Stores answers by question name
     - Associates comments with their respective questions
@@ -185,14 +198,14 @@ class Answers(UserDict):
     ) -> None:
         """
         Add a response to the answers dictionary.
         This method processes a response and stores it in the dictionary with appropriate
         naming conventions for the answer itself, comments, and token usage tracking.
         Parameters:
             response (EDSLResultObjectInput): The response object containing answer data
             question (QuestionBase): The question that was answered
         Notes:
             - The main answer is stored with the question's name as the key
             - Comments are stored with "_comment" appended to the question name
@@ -201,31 +214,33 @@ class Answers(UserDict):
         answer = response.answer
         comment = response.comment
         generated_tokens = response.generated_tokens
         # Record token usage if available
         if generated_tokens:
             self[question.question_name + "_generated_tokens"] = generated_tokens
         # Record the primary answer
         self[question.question_name] = answer
         # Record comment if present
         if comment:
             self[question.question_name + "_comment"] = comment
         if getattr(response, "reasoning_summary", None):
-            self[question.question_name + "_reasoning_summary"] = response.reasoning_summary
+            self[
+                question.question_name + "_reasoning_summary"
+            ] = response.reasoning_summary
     def replace_missing_answers_with_none(self, survey: "Survey") -> None:
         """
         Replace missing answers with None for all questions in the survey.
         This method ensures that all questions in the survey have an entry in the
         answers dictionary, even if they were skipped during the interview.
         Parameters:
             survey (Survey): The survey containing the questions to check
         Notes:
             - Answers can be missing if the agent skips a question due to skip logic
             - This ensures consistent data structure even with partial responses
@@ -237,7 +252,7 @@ class Answers(UserDict):
     def to_dict(self) -> dict:
         """
         Convert the answers to a standard dictionary.
         Returns:
             dict: A plain dictionary containing all the answers data
         """
@@ -247,10 +262,10 @@ class Answers(UserDict):
     def from_dict(cls, d: dict) -> "Answers":
         """
         Create an Answers object from a dictionary.
         Parameters:
             d (dict): The dictionary containing answer data
         Returns:
             Answers: A new Answers instance with the provided data
         """

edsl 0.1.60__py3-none-any.whl → 0.1.62__py3-none-any.whl

edsl 0.1.60py3-none-any.whl → 0.1.62py3-none-any.whl