PyPI - edsl - Versions diffs - 0.1.59__py3-none-any.whl → 0.1.61__py3-none-any.whl - Mend

edsl 0.1.59py3-none-any.whl → 0.1.61py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

edsl/__version__.py +1 -1
edsl/agents/agent.py +65 -17
edsl/agents/agent_list.py +117 -33
edsl/base/base_class.py +80 -11
edsl/base/data_transfer_models.py +5 -0
edsl/base/enums.py +7 -2
edsl/config/config_class.py +7 -2
edsl/coop/coop.py +1295 -85
edsl/coop/coop_prolific_filters.py +171 -0
edsl/dataset/dataset_operations_mixin.py +2 -2
edsl/dataset/display/table_display.py +40 -7
edsl/db_list/sqlite_list.py +102 -3
edsl/inference_services/services/__init__.py +3 -1
edsl/inference_services/services/open_ai_service_v2.py +243 -0
edsl/jobs/data_structures.py +48 -30
edsl/jobs/jobs.py +73 -2
edsl/jobs/remote_inference.py +49 -15
edsl/key_management/key_lookup_builder.py +25 -3
edsl/language_models/language_model.py +2 -1
edsl/language_models/raw_response_handler.py +126 -7
edsl/questions/loop_processor.py +289 -10
edsl/questions/templates/dict/answering_instructions.jinja +0 -1
edsl/results/result.py +37 -0
edsl/results/results.py +1 -0
edsl/scenarios/scenario_list.py +31 -1
edsl/scenarios/scenario_source.py +606 -498
edsl/surveys/survey.py +198 -163
{edsl-0.1.59.dist-info → edsl-0.1.61.dist-info}/METADATA +4 -4
{edsl-0.1.59.dist-info → edsl-0.1.61.dist-info}/RECORD +32 -30
{edsl-0.1.59.dist-info → edsl-0.1.61.dist-info}/LICENSE +0 -0
{edsl-0.1.59.dist-info → edsl-0.1.61.dist-info}/WHEEL +0 -0
{edsl-0.1.59.dist-info → edsl-0.1.61.dist-info}/entry_points.txt +0 -0

edsl/jobs/data_structures.py CHANGED Viewed

@@ -5,6 +5,7 @@ from ..data_transfer_models import EDSLResultObjectInput
 # from edsl.data_transfer_models import VisibilityType
 from ..caching import Cache
 # Import BucketCollection lazily to avoid circular imports
 from ..key_management import KeyLookup
 from ..base import Base
@@ -18,23 +19,27 @@ if TYPE_CHECKING:
 VisibilityType = Literal["private", "public", "unlisted"]
 @dataclass
 class RunEnvironment:
     """
     Contains environment-related resources for job execution.
-    This dataclass holds references to shared resources and infrastructure components
-    needed for job execution. These components are typically long-lived and may be
+    This dataclass holds references to shared resources and infrastructure components
+    needed for job execution. These components are typically long-lived and may be
     shared across multiple job runs.
     Attributes:
         cache (Cache, optional): Cache for storing and retrieving interview results
         bucket_collection (BucketCollection, optional): Collection of token rate limit buckets
         key_lookup (KeyLookup, optional): Manager for API keys across models
         jobs_runner_status (JobsRunnerStatus, optional): Tracker for job execution progress
     """
     cache: Optional[Cache] = None
-    bucket_collection: Optional[Any] = None  # Using Any to avoid circular import of BucketCollection
+    bucket_collection: Optional[
+        Any
+    ] = None  # Using Any to avoid circular import of BucketCollection
     key_lookup: Optional[KeyLookup] = None
     jobs_runner_status: Optional["JobsRunnerStatus"] = None
@@ -43,11 +48,11 @@ class RunEnvironment:
 class RunParameters(Base):
     """
     Contains execution-specific parameters for job runs.
     This dataclass holds parameters that control the behavior of a specific job run,
     such as iteration count, error handling preferences, and remote execution options.
     Unlike RunEnvironment, these parameters are specific to a single job execution.
     Attributes:
         n (int): Number of iterations to run each interview, default is 1
         progress_bar (bool): Whether to show a progress bar, default is False
@@ -66,7 +71,9 @@ class RunParameters(Base):
         disable_remote_inference (bool): Whether to disable remote inference, default is False
         job_uuid (str, optional): UUID for the job, used for tracking
         fresh (bool): If True, ignore cache and generate new results, default is False
+        new_format (bool): If True, uses remote_inference_create method, if False uses old_remote_inference_create method, default is True
     """
     n: int = 1
     progress_bar: bool = False
     stop_on_exception: bool = False
@@ -82,8 +89,13 @@ class RunParameters(Base):
     disable_remote_cache: bool = False
     disable_remote_inference: bool = False
     job_uuid: Optional[str] = None
-    fresh: bool = False  # if True, will not use cache and will save new results to cache
-    memory_threshold: Optional[int] = None  # Threshold in bytes for Results SQLList memory management
+    fresh: bool = (
+        False  # if True, will not use cache and will save new results to cache
+    )
+    memory_threshold: Optional[
+        int
+    ] = None  # Threshold in bytes for Results SQLList memory management
+    new_format: bool = True  # if True, uses remote_inference_create, if False uses old_remote_inference_create
     def to_dict(self, add_edsl_version=False) -> dict:
         d = asdict(self)
@@ -110,24 +122,25 @@ class RunParameters(Base):
 class RunConfig:
     """
     Combines environment resources and execution parameters for a job run.
     This class brings together the two aspects of job configuration:
     1. Environment resources (caches, API keys, etc.) via RunEnvironment
     2. Execution parameters (iterations, error handling, etc.) via RunParameters
     It provides helper methods for modifying environment components after construction.
     Attributes:
         environment (RunEnvironment): The environment resources for the job
         parameters (RunParameters): The execution parameters for the job
     """
     environment: RunEnvironment
     parameters: RunParameters
     def add_environment(self, environment: RunEnvironment) -> None:
         """
         Replace the entire environment configuration.
         Parameters:
             environment (RunEnvironment): The new environment configuration
         """
@@ -136,7 +149,7 @@ class RunConfig:
     def add_bucket_collection(self, bucket_collection: "BucketCollection") -> None:
         """
         Set or replace the bucket collection in the environment.
         Parameters:
             bucket_collection (BucketCollection): The bucket collection to use
         """
@@ -145,7 +158,7 @@ class RunConfig:
     def add_cache(self, cache: Cache) -> None:
         """
         Set or replace the cache in the environment.
         Parameters:
             cache (Cache): The cache to use
         """
@@ -154,7 +167,7 @@ class RunConfig:
     def add_key_lookup(self, key_lookup: KeyLookup) -> None:
         """
         Set or replace the key lookup in the environment.
         Parameters:
             key_lookup (KeyLookup): The key lookup to use
         """
@@ -169,10 +182,10 @@ Additional data structures for working with job results and answers.
 class Answers(UserDict):
     """
     A specialized dictionary for holding interview response data.
     This class extends UserDict to provide a flexible container for survey answers,
     with special handling for response metadata like comments and token usage.
     Key features:
     - Stores answers by question name
     - Associates comments with their respective questions
@@ -185,14 +198,14 @@ class Answers(UserDict):
     ) -> None:
         """
         Add a response to the answers dictionary.
         This method processes a response and stores it in the dictionary with appropriate
         naming conventions for the answer itself, comments, and token usage tracking.
         Parameters:
             response (EDSLResultObjectInput): The response object containing answer data
             question (QuestionBase): The question that was answered
         Notes:
             - The main answer is stored with the question's name as the key
             - Comments are stored with "_comment" appended to the question name
@@ -201,28 +214,33 @@ class Answers(UserDict):
         answer = response.answer
         comment = response.comment
         generated_tokens = response.generated_tokens
         # Record token usage if available
         if generated_tokens:
             self[question.question_name + "_generated_tokens"] = generated_tokens
         # Record the primary answer
         self[question.question_name] = answer
         # Record comment if present
         if comment:
             self[question.question_name + "_comment"] = comment
+        if getattr(response, "reasoning_summary", None):
+            self[
+                question.question_name + "_reasoning_summary"
+            ] = response.reasoning_summary
     def replace_missing_answers_with_none(self, survey: "Survey") -> None:
         """
         Replace missing answers with None for all questions in the survey.
         This method ensures that all questions in the survey have an entry in the
         answers dictionary, even if they were skipped during the interview.
         Parameters:
             survey (Survey): The survey containing the questions to check
         Notes:
             - Answers can be missing if the agent skips a question due to skip logic
             - This ensures consistent data structure even with partial responses
@@ -234,7 +252,7 @@ class Answers(UserDict):
     def to_dict(self) -> dict:
         """
         Convert the answers to a standard dictionary.
         Returns:
             dict: A plain dictionary containing all the answers data
         """
@@ -244,10 +262,10 @@ class Answers(UserDict):
     def from_dict(cls, d: dict) -> "Answers":
         """
         Create an Answers object from a dictionary.
         Parameters:
             d (dict): The dictionary containing answer data
         Returns:
             Answers: A new Answers instance with the provided data
         """

edsl/jobs/jobs.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """
 The Jobs module is the core orchestration component of the EDSL framework.
-It provides functionality to define, configure, and execute computational jobs that
-involve multiple agents, scenarios, models, and a survey. Jobs are the primary way
+It provides functionality to define, configure, and execute computational jobs that
+involve multiple agents, scenarios, models, and a survey. Jobs are the primary way
 that users run large-scale experiments or simulations in EDSL.
 The Jobs class handles:
@@ -15,6 +15,7 @@ The Jobs class handles:
 This module is designed to be used by both application developers and researchers
 who need to run complex simulations with language models.
 """
 from __future__ import annotations
 import asyncio
 from typing import Optional, Union, TypeVar, Callable, cast
@@ -564,6 +565,7 @@ class Jobs(Base):
             remote_inference_description=self.run_config.parameters.remote_inference_description,
             remote_inference_results_visibility=self.run_config.parameters.remote_inference_results_visibility,
             fresh=self.run_config.parameters.fresh,
+            new_format=self.run_config.parameters.new_format,
         )
         return job_info
@@ -829,6 +831,7 @@ class Jobs(Base):
             key_lookup (KeyLookup, optional): Object to manage API keys
             memory_threshold (int, optional): Memory threshold in bytes for the Results object's SQLList,
                 controlling when data is offloaded to SQLite storage
+            new_format (bool): If True, uses remote_inference_create method, if False uses old_remote_inference_create method (default: True)
         Returns:
             Results: A Results object containing all responses and metadata
@@ -889,6 +892,7 @@ class Jobs(Base):
             key_lookup (KeyLookup, optional): Object to manage API keys
             memory_threshold (int, optional): Memory threshold in bytes for the Results object's SQLList,
                 controlling when data is offloaded to SQLite storage
+            new_format (bool): If True, uses remote_inference_create method, if False uses old_remote_inference_create method (default: True)
         Returns:
             Results: A Results object containing all responses and metadata
@@ -1084,6 +1088,73 @@ class Jobs(Base):
         """Return the code to create this instance."""
         raise JobsImplementationError("Code generation not implemented yet")
+    def humanize(
+        self,
+        project_name: str = "Project",
+        scenario_list_method: Optional[
+            Literal["randomize", "loop", "single_scenario"]
+        ] = None,
+        survey_description: Optional[str] = None,
+        survey_alias: Optional[str] = None,
+        survey_visibility: Optional["VisibilityType"] = "unlisted",
+        scenario_list_description: Optional[str] = None,
+        scenario_list_alias: Optional[str] = None,
+        scenario_list_visibility: Optional["VisibilityType"] = "unlisted",
+    ):
+        """
+        Send the survey and scenario list to Coop.
+        Then, create a project on Coop so you can share the survey with human respondents.
+        """
+        from edsl.coop import Coop
+        from edsl.coop.exceptions import CoopValueError
+        if len(self.agents) > 0 or len(self.models) > 0:
+            raise CoopValueError("We don't support humanize with agents or models yet.")
+        if len(self.scenarios) > 0 and scenario_list_method is None:
+            raise CoopValueError(
+                "You must specify both a scenario list and a scenario list method to use scenarios with your survey."
+            )
+        elif len(self.scenarios) == 0 and scenario_list_method is not None:
+            raise CoopValueError(
+                "You must specify both a scenario list and a scenario list method to use scenarios with your survey."
+            )
+        elif scenario_list_method is "loop":
+            questions, long_scenario_list = self.survey.to_long_format(self.scenarios)
+            # Replace the questions with new ones from the loop method
+            self.survey = Survey(questions)
+            self.scenarios = long_scenario_list
+            if len(self.scenarios) != 1:
+                raise CoopValueError("Something went wrong with the loop method.")
+        elif len(self.scenarios) != 1 and scenario_list_method == "single_scenario":
+            raise CoopValueError(
+                f"The single_scenario method requires exactly one scenario. "
+                f"If you have a scenario list with multiple scenarios, try using the randomize or loop methods."
+            )
+        if len(self.scenarios) == 0:
+            scenario_list = None
+        else:
+            scenario_list = self.scenarios
+        c = Coop()
+        project_details = c.create_project(
+            self.survey,
+            scenario_list,
+            scenario_list_method,
+            project_name,
+            survey_description,
+            survey_alias,
+            survey_visibility,
+            scenario_list_description,
+            scenario_list_alias,
+            scenario_list_visibility,
+        )
+        return project_details
 def main():
     """Run the module's doctests."""

edsl/jobs/remote_inference.py CHANGED Viewed

@@ -31,6 +31,7 @@ class RemoteJobInfo:
     creation_data: RemoteInferenceCreationInfo
     job_uuid: JobUUID
     logger: JobLogger
+    new_format: bool = True
 class JobsRemoteInferenceHandler:
@@ -85,7 +86,21 @@ class JobsRemoteInferenceHandler:
         remote_inference_description: Optional[str] = None,
         remote_inference_results_visibility: Optional["VisibilityType"] = "unlisted",
         fresh: Optional[bool] = False,
+        new_format: Optional[bool] = True,
     ) -> RemoteJobInfo:
+        """
+        Create a remote inference job and return job information.
+        Args:
+            iterations: Number of times to run each interview
+            remote_inference_description: Optional description for the remote job
+            remote_inference_results_visibility: Visibility setting for results
+            fresh: If True, ignore existing cache entries and generate new results
+            new_format: If True, use pull method for result retrieval; if False, use legacy get method
+        Returns:
+            RemoteJobInfo: Information about the created job including UUID and logger
+        """
         from ..coop import Coop
         logger = self._create_logger()
@@ -101,14 +116,24 @@ class JobsRemoteInferenceHandler:
         logger.add_info(
             "remote_cache_url", f"{self.expected_parrot_url}/home/remote-cache"
         )
-        remote_job_creation_data = coop.remote_inference_create(
-            self.jobs,
-            description=remote_inference_description,
-            status="queued",
-            iterations=iterations,
-            initial_results_visibility=remote_inference_results_visibility,
-            fresh=fresh,
-        )
+        if new_format:
+            remote_job_creation_data = coop.remote_inference_create(
+                self.jobs,
+                description=remote_inference_description,
+                status="queued",
+                iterations=iterations,
+                initial_results_visibility=remote_inference_results_visibility,
+                fresh=fresh,
+            )
+        else:
+            remote_job_creation_data = coop.old_remote_inference_create(
+                self.jobs,
+                description=remote_inference_description,
+                status="queued",
+                iterations=iterations,
+                initial_results_visibility=remote_inference_results_visibility,
+                fresh=fresh,
+            )
         logger.update(
             "Your survey is running at the Expected Parrot server...",
             status=JobsStatus.RUNNING,
@@ -141,6 +166,7 @@ class JobsRemoteInferenceHandler:
             creation_data=remote_job_creation_data,
             job_uuid=job_uuid,
             logger=logger,
+            new_format=new_format,
         )
     @staticmethod
@@ -164,7 +190,7 @@ class JobsRemoteInferenceHandler:
             return coop.remote_inference_get
     def _construct_object_fetcher(
-        self, testing_simulated_response: Optional[Any] = None
+        self, new_format: bool = True, testing_simulated_response: Optional[Any] = None
     ) -> Callable:
         "Constructs a function to fetch the results object from Coop."
         if testing_simulated_response is not None:
@@ -173,7 +199,10 @@ class JobsRemoteInferenceHandler:
             from ..coop import Coop
             coop = Coop()
-            return coop.get
+            if new_format:
+                return coop.pull
+            else:
+                return coop.get
     def _handle_cancelled_job(self, job_info: RemoteJobInfo) -> None:
         "Handles a cancelled job by logging the cancellation and updating the job status."
@@ -395,7 +424,6 @@ class JobsRemoteInferenceHandler:
         converter = CostConverter()
         for model_key, model_cost_dict in expenses_by_model.items():
             # Handle full cost (without cache)
             input_cost = model_cost_dict["input_cost_usd"]
             output_cost = model_cost_dict["output_cost_usd"]
@@ -417,9 +445,9 @@ class JobsRemoteInferenceHandler:
             model_cost_dict["input_cost_credits_with_cache"] = converter.usd_to_credits(
                 input_cost_with_cache
             )
-            model_cost_dict["output_cost_credits_with_cache"] = (
-                converter.usd_to_credits(output_cost_with_cache)
-            )
+            model_cost_dict[
+                "output_cost_credits_with_cache"
+            ] = converter.usd_to_credits(output_cost_with_cache)
         return list(expenses_by_model.values())
     def _fetch_results_and_log(
@@ -525,7 +553,10 @@ class JobsRemoteInferenceHandler:
         remote_job_data_fetcher = self._construct_remote_job_fetcher(
             testing_simulated_response
         )
-        object_fetcher = self._construct_object_fetcher(testing_simulated_response)
+        object_fetcher = self._construct_object_fetcher(
+            new_format=job_info.new_format,
+            testing_simulated_response=testing_simulated_response,
+        )
         job_in_queue = True
         while job_in_queue:
@@ -540,6 +571,7 @@ class JobsRemoteInferenceHandler:
         iterations: int = 1,
         remote_inference_description: Optional[str] = None,
         remote_inference_results_visibility: Optional[VisibilityType] = "unlisted",
+        new_format: Optional[bool] = True,
     ) -> Union["Results", None]:
         """
         Creates and polls a remote inference job asynchronously.
@@ -548,6 +580,7 @@ class JobsRemoteInferenceHandler:
         :param iterations: Number of times to run each interview
         :param remote_inference_description: Optional description for the remote job
         :param remote_inference_results_visibility: Visibility setting for results
+        :param new_format: If True, use pull method for result retrieval; if False, use legacy get method
         :return: Results object if successful, None if job fails or is cancelled
         """
         import asyncio
@@ -562,6 +595,7 @@ class JobsRemoteInferenceHandler:
                 iterations=iterations,
                 remote_inference_description=remote_inference_description,
                 remote_inference_results_visibility=remote_inference_results_visibility,
+                new_format=new_format,
             ),
         )
         if job_info is None:

edsl/key_management/key_lookup_builder.py CHANGED Viewed

@@ -363,13 +363,35 @@ class KeyLookupBuilder:
         >>> builder._add_api_key("OPENAI_API_KEY", "sk-1234", "env")
         >>> 'sk-1234' == builder.key_data["openai"][-1].value
         True
+        >>> 'sk-1234' == builder.key_data["openai_v2"][-1].value
+        True
         """
         service = api_keyname_to_service[key]
         new_entry = APIKeyEntry(service=service, name=key, value=value, source=source)
-        if service not in self.key_data:
-            self.key_data[service] = [new_entry]
+        # Special case for OPENAI_API_KEY - add to both openai and openai_v2
+        if key == "OPENAI_API_KEY":
+            # Add to openai service
+            openai_service = "openai"
+            openai_entry = APIKeyEntry(service=openai_service, name=key, value=value, source=source)
+            if openai_service not in self.key_data:
+                self.key_data[openai_service] = [openai_entry]
+            else:
+                self.key_data[openai_service].append(openai_entry)
+            # Add to openai_v2 service
+            openai_v2_service = "openai_v2"
+            openai_v2_entry = APIKeyEntry(service=openai_v2_service, name=key, value=value, source=source)
+            if openai_v2_service not in self.key_data:
+                self.key_data[openai_v2_service] = [openai_v2_entry]
+            else:
+                self.key_data[openai_v2_service].append(openai_v2_entry)
         else:
-            self.key_data[service].append(new_entry)
+            # Normal case for all other API keys
+            if service not in self.key_data:
+                self.key_data[service] = [new_entry]
+            else:
+                self.key_data[service].append(new_entry)
     def update_from_dict(self, d: dict) -> None:
         """

edsl/language_models/language_model.py CHANGED Viewed

@@ -174,7 +174,8 @@ class LanguageModel(
         """
         key_sequence = cls.key_sequence
         usage_sequence = cls.usage_sequence if hasattr(cls, "usage_sequence") else None
-        return RawResponseHandler(key_sequence, usage_sequence)
+        reasoning_sequence = cls.reasoning_sequence if hasattr(cls, "reasoning_sequence") else None
+        return RawResponseHandler(key_sequence, usage_sequence, reasoning_sequence)
     def __init__(
         self,

edsl 0.1.59__py3-none-any.whl → 0.1.61__py3-none-any.whl

edsl 0.1.59py3-none-any.whl → 0.1.61py3-none-any.whl