PyPI - edsl - Versions diffs - 0.1.60__py3-none-any.whl → 0.1.61__py3-none-any.whl - Mend

edsl 0.1.60py3-none-any.whl → 0.1.61py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

edsl/__version__.py +1 -1
edsl/agents/agent.py +65 -17
edsl/agents/agent_list.py +117 -33
edsl/base/base_class.py +80 -11
edsl/config/config_class.py +7 -2
edsl/coop/coop.py +1295 -85
edsl/coop/coop_prolific_filters.py +171 -0
edsl/dataset/display/table_display.py +40 -7
edsl/db_list/sqlite_list.py +102 -3
edsl/jobs/data_structures.py +46 -31
edsl/jobs/jobs.py +73 -2
edsl/jobs/remote_inference.py +49 -15
edsl/questions/loop_processor.py +289 -10
edsl/questions/templates/dict/answering_instructions.jinja +0 -1
edsl/scenarios/scenario_list.py +31 -1
edsl/scenarios/scenario_source.py +606 -498
edsl/surveys/survey.py +198 -163
{edsl-0.1.60.dist-info → edsl-0.1.61.dist-info}/METADATA +3 -3
{edsl-0.1.60.dist-info → edsl-0.1.61.dist-info}/RECORD +22 -21
{edsl-0.1.60.dist-info → edsl-0.1.61.dist-info}/LICENSE +0 -0
{edsl-0.1.60.dist-info → edsl-0.1.61.dist-info}/WHEEL +0 -0
{edsl-0.1.60.dist-info → edsl-0.1.61.dist-info}/entry_points.txt +0 -0

edsl/jobs/jobs.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """
 The Jobs module is the core orchestration component of the EDSL framework.
-It provides functionality to define, configure, and execute computational jobs that
-involve multiple agents, scenarios, models, and a survey. Jobs are the primary way
+It provides functionality to define, configure, and execute computational jobs that
+involve multiple agents, scenarios, models, and a survey. Jobs are the primary way
 that users run large-scale experiments or simulations in EDSL.
 The Jobs class handles:
@@ -15,6 +15,7 @@ The Jobs class handles:
 This module is designed to be used by both application developers and researchers
 who need to run complex simulations with language models.
 """
 from __future__ import annotations
 import asyncio
 from typing import Optional, Union, TypeVar, Callable, cast
@@ -564,6 +565,7 @@ class Jobs(Base):
             remote_inference_description=self.run_config.parameters.remote_inference_description,
             remote_inference_results_visibility=self.run_config.parameters.remote_inference_results_visibility,
             fresh=self.run_config.parameters.fresh,
+            new_format=self.run_config.parameters.new_format,
         )
         return job_info
@@ -829,6 +831,7 @@ class Jobs(Base):
             key_lookup (KeyLookup, optional): Object to manage API keys
             memory_threshold (int, optional): Memory threshold in bytes for the Results object's SQLList,
                 controlling when data is offloaded to SQLite storage
+            new_format (bool): If True, uses remote_inference_create method, if False uses old_remote_inference_create method (default: True)
         Returns:
             Results: A Results object containing all responses and metadata
@@ -889,6 +892,7 @@ class Jobs(Base):
             key_lookup (KeyLookup, optional): Object to manage API keys
             memory_threshold (int, optional): Memory threshold in bytes for the Results object's SQLList,
                 controlling when data is offloaded to SQLite storage
+            new_format (bool): If True, uses remote_inference_create method, if False uses old_remote_inference_create method (default: True)
         Returns:
             Results: A Results object containing all responses and metadata
@@ -1084,6 +1088,73 @@ class Jobs(Base):
         """Return the code to create this instance."""
         raise JobsImplementationError("Code generation not implemented yet")
+    def humanize(
+        self,
+        project_name: str = "Project",
+        scenario_list_method: Optional[
+            Literal["randomize", "loop", "single_scenario"]
+        ] = None,
+        survey_description: Optional[str] = None,
+        survey_alias: Optional[str] = None,
+        survey_visibility: Optional["VisibilityType"] = "unlisted",
+        scenario_list_description: Optional[str] = None,
+        scenario_list_alias: Optional[str] = None,
+        scenario_list_visibility: Optional["VisibilityType"] = "unlisted",
+    ):
+        """
+        Send the survey and scenario list to Coop.
+        Then, create a project on Coop so you can share the survey with human respondents.
+        """
+        from edsl.coop import Coop
+        from edsl.coop.exceptions import CoopValueError
+        if len(self.agents) > 0 or len(self.models) > 0:
+            raise CoopValueError("We don't support humanize with agents or models yet.")
+        if len(self.scenarios) > 0 and scenario_list_method is None:
+            raise CoopValueError(
+                "You must specify both a scenario list and a scenario list method to use scenarios with your survey."
+            )
+        elif len(self.scenarios) == 0 and scenario_list_method is not None:
+            raise CoopValueError(
+                "You must specify both a scenario list and a scenario list method to use scenarios with your survey."
+            )
+        elif scenario_list_method is "loop":
+            questions, long_scenario_list = self.survey.to_long_format(self.scenarios)
+            # Replace the questions with new ones from the loop method
+            self.survey = Survey(questions)
+            self.scenarios = long_scenario_list
+            if len(self.scenarios) != 1:
+                raise CoopValueError("Something went wrong with the loop method.")
+        elif len(self.scenarios) != 1 and scenario_list_method == "single_scenario":
+            raise CoopValueError(
+                f"The single_scenario method requires exactly one scenario. "
+                f"If you have a scenario list with multiple scenarios, try using the randomize or loop methods."
+            )
+        if len(self.scenarios) == 0:
+            scenario_list = None
+        else:
+            scenario_list = self.scenarios
+        c = Coop()
+        project_details = c.create_project(
+            self.survey,
+            scenario_list,
+            scenario_list_method,
+            project_name,
+            survey_description,
+            survey_alias,
+            survey_visibility,
+            scenario_list_description,
+            scenario_list_alias,
+            scenario_list_visibility,
+        )
+        return project_details
 def main():
     """Run the module's doctests."""

edsl/jobs/remote_inference.py CHANGED Viewed

@@ -31,6 +31,7 @@ class RemoteJobInfo:
     creation_data: RemoteInferenceCreationInfo
     job_uuid: JobUUID
     logger: JobLogger
+    new_format: bool = True
 class JobsRemoteInferenceHandler:
@@ -85,7 +86,21 @@ class JobsRemoteInferenceHandler:
         remote_inference_description: Optional[str] = None,
         remote_inference_results_visibility: Optional["VisibilityType"] = "unlisted",
         fresh: Optional[bool] = False,
+        new_format: Optional[bool] = True,
     ) -> RemoteJobInfo:
+        """
+        Create a remote inference job and return job information.
+        Args:
+            iterations: Number of times to run each interview
+            remote_inference_description: Optional description for the remote job
+            remote_inference_results_visibility: Visibility setting for results
+            fresh: If True, ignore existing cache entries and generate new results
+            new_format: If True, use pull method for result retrieval; if False, use legacy get method
+        Returns:
+            RemoteJobInfo: Information about the created job including UUID and logger
+        """
         from ..coop import Coop
         logger = self._create_logger()
@@ -101,14 +116,24 @@ class JobsRemoteInferenceHandler:
         logger.add_info(
             "remote_cache_url", f"{self.expected_parrot_url}/home/remote-cache"
         )
-        remote_job_creation_data = coop.remote_inference_create(
-            self.jobs,
-            description=remote_inference_description,
-            status="queued",
-            iterations=iterations,
-            initial_results_visibility=remote_inference_results_visibility,
-            fresh=fresh,
-        )
+        if new_format:
+            remote_job_creation_data = coop.remote_inference_create(
+                self.jobs,
+                description=remote_inference_description,
+                status="queued",
+                iterations=iterations,
+                initial_results_visibility=remote_inference_results_visibility,
+                fresh=fresh,
+            )
+        else:
+            remote_job_creation_data = coop.old_remote_inference_create(
+                self.jobs,
+                description=remote_inference_description,
+                status="queued",
+                iterations=iterations,
+                initial_results_visibility=remote_inference_results_visibility,
+                fresh=fresh,
+            )
         logger.update(
             "Your survey is running at the Expected Parrot server...",
             status=JobsStatus.RUNNING,
@@ -141,6 +166,7 @@ class JobsRemoteInferenceHandler:
             creation_data=remote_job_creation_data,
             job_uuid=job_uuid,
             logger=logger,
+            new_format=new_format,
         )
     @staticmethod
@@ -164,7 +190,7 @@ class JobsRemoteInferenceHandler:
             return coop.remote_inference_get
     def _construct_object_fetcher(
-        self, testing_simulated_response: Optional[Any] = None
+        self, new_format: bool = True, testing_simulated_response: Optional[Any] = None
     ) -> Callable:
         "Constructs a function to fetch the results object from Coop."
         if testing_simulated_response is not None:
@@ -173,7 +199,10 @@ class JobsRemoteInferenceHandler:
             from ..coop import Coop
             coop = Coop()
-            return coop.get
+            if new_format:
+                return coop.pull
+            else:
+                return coop.get
     def _handle_cancelled_job(self, job_info: RemoteJobInfo) -> None:
         "Handles a cancelled job by logging the cancellation and updating the job status."
@@ -395,7 +424,6 @@ class JobsRemoteInferenceHandler:
         converter = CostConverter()
         for model_key, model_cost_dict in expenses_by_model.items():
             # Handle full cost (without cache)
             input_cost = model_cost_dict["input_cost_usd"]
             output_cost = model_cost_dict["output_cost_usd"]
@@ -417,9 +445,9 @@ class JobsRemoteInferenceHandler:
             model_cost_dict["input_cost_credits_with_cache"] = converter.usd_to_credits(
                 input_cost_with_cache
             )
-            model_cost_dict["output_cost_credits_with_cache"] = (
-                converter.usd_to_credits(output_cost_with_cache)
-            )
+            model_cost_dict[
+                "output_cost_credits_with_cache"
+            ] = converter.usd_to_credits(output_cost_with_cache)
         return list(expenses_by_model.values())
     def _fetch_results_and_log(
@@ -525,7 +553,10 @@ class JobsRemoteInferenceHandler:
         remote_job_data_fetcher = self._construct_remote_job_fetcher(
             testing_simulated_response
         )
-        object_fetcher = self._construct_object_fetcher(testing_simulated_response)
+        object_fetcher = self._construct_object_fetcher(
+            new_format=job_info.new_format,
+            testing_simulated_response=testing_simulated_response,
+        )
         job_in_queue = True
         while job_in_queue:
@@ -540,6 +571,7 @@ class JobsRemoteInferenceHandler:
         iterations: int = 1,
         remote_inference_description: Optional[str] = None,
         remote_inference_results_visibility: Optional[VisibilityType] = "unlisted",
+        new_format: Optional[bool] = True,
     ) -> Union["Results", None]:
         """
         Creates and polls a remote inference job asynchronously.
@@ -548,6 +580,7 @@ class JobsRemoteInferenceHandler:
         :param iterations: Number of times to run each interview
         :param remote_inference_description: Optional description for the remote job
         :param remote_inference_results_visibility: Visibility setting for results
+        :param new_format: If True, use pull method for result retrieval; if False, use legacy get method
         :return: Results object if successful, None if job fails or is cancelled
         """
         import asyncio
@@ -562,6 +595,7 @@ class JobsRemoteInferenceHandler:
                 iterations=iterations,
                 remote_inference_description=remote_inference_description,
                 remote_inference_results_visibility=remote_inference_results_visibility,
+                new_format=new_format,
             ),
         )
         if job_info is None:

edsl/questions/loop_processor.py CHANGED Viewed

@@ -1,7 +1,9 @@
-from typing import List, Any, Dict
+from typing import List, Any, Dict, Tuple
 from jinja2 import Environment, Undefined
 from .question_base import QuestionBase
-from ..scenarios import ScenarioList
+from ..scenarios import Scenario, ScenarioList
+from ..surveys import Survey
 class LoopProcessor:
     def __init__(self, question: QuestionBase):
@@ -88,7 +90,10 @@ class LoopProcessor:
             return value
         from .exceptions import QuestionValueError
-        raise QuestionValueError(f"Unexpected value type: {type(value)} for key '{key}'")
+        raise QuestionValueError(
+            f"Unexpected value type: {type(value)} for key '{key}'"
+        )
     def _render_template(self, template: str, scenario: Dict[str, Any]) -> str:
         """Render a single template string.
@@ -124,21 +129,21 @@ class LoopProcessor:
             '{{ item.missing }}'
         """
         import re
         # Regular expression to find Jinja2 variables in the template
-        pattern = r'(?P<open>\{\{\s*)(?P<var>[a-zA-Z0-9_.]+)(?P<close>\s*\}\})'
+        pattern = r"(?P<open>\{\{\s*)(?P<var>[a-zA-Z0-9_.]+)(?P<close>\s*\}\})"
         def replace_var(match):
-            var_name = match.group('var')
+            var_name = match.group("var")
             # We're keeping the original formatting with braces
             # but not using these variables directly
             # open_brace = match.group('open')
             # close_brace = match.group('close')
             # Try to evaluate the variable in the context
             try:
                 # Handle nested attributes (like item.price)
-                parts = var_name.split('.')
+                parts = var_name.split(".")
                 value = scenario
                 for part in parts:
                     if part in value:
@@ -151,7 +156,7 @@ class LoopProcessor:
             except (KeyError, TypeError):
                 # Return the original variable name with the expected spacing
                 return f"{{ {var_name} }}".replace("{", "{{").replace("}", "}}")
         # Replace all variables in the template
         result = re.sub(pattern, replace_var, template)
         return result
@@ -191,6 +196,280 @@ class LoopProcessor:
         }
+class LongSurveyLoopProcessor:
+    """
+    A modified LoopProcessor that creates a long survey where each question is rendered for each scenario.
+    Returns a tuple of (long_questions, long_scenario_list).
+    The long scenario list is essentially a flattened scenario list with one scenario that has many fields.
+    Usage:
+    >>> from edsl.questions import QuestionMultipleChoice
+    >>> from edsl.surveys import Survey
+    >>> from edsl.scenarios import Scenario, ScenarioList
+    >>> q = QuestionMultipleChoice(question_name = "enjoy", question_text = "How much do you enjoy {{ scenario.activity }}?", question_options = ["Not at all", "Somewhat", "Very much"])
+    >>> scenarios = ScenarioList([Scenario({"activity": activity}) for activity in ["tennis", "racecar driving", "cycling"]])
+    >>> survey = Survey([q])
+    >>> loop_processor = LongSurveyLoopProcessor(survey, scenarios)
+    >>> long_questions_list, long_scenario_list = loop_processor.process_templates_for_all_questions()
+    """
+    def __init__(self, survey: Survey, scenario_list: ScenarioList):
+        self.survey = survey
+        self.scenario_list = scenario_list
+        self.env = Environment(undefined=Undefined)
+        self.long_scenario_dict = {}
+    def process_templates_for_all_questions(
+        self,
+    ) -> Tuple[List[QuestionBase], ScenarioList]:
+        long_questions_list = []
+        self.long_scenario_dict = {}
+        for question in self.survey.questions:
+            updates_for_one_question = self.process_templates(
+                question, self.scenario_list
+            )
+            long_questions_list.extend(updates_for_one_question)
+        long_scenario_list = ScenarioList([Scenario(data=self.long_scenario_dict)])
+        return long_questions_list, long_scenario_list
+    def process_templates(
+        self, question: QuestionBase, scenario_list: ScenarioList
+    ) -> List[QuestionBase]:
+        """Process templates for each scenario and return list of modified questions.
+        Args:
+            scenario_list: List of scenarios to process templates against
+        Returns:
+            List of QuestionBase objects with rendered templates
+        """
+        import re
+        questions = []
+        starting_name = question.question_name
+        # Check for Jinja2 variables in the question text
+        pattern = self._jinja_variable_pattern()
+        variables_in_question_text = (
+            re.search(pattern, question.question_text) is not None
+        )
+        if variables_in_question_text:
+            for index, scenario in enumerate(scenario_list):
+                question_data = question.to_dict().copy()
+                processed_data = self._process_data(question_data, scenario, index)
+                if processed_data["question_name"] == starting_name:
+                    processed_data["question_name"] += f"_{index}"
+                questions.append(QuestionBase.from_dict(processed_data))
+        else:
+            questions.append(question)
+        return questions
+    def _process_data(
+        self, data: Dict[str, Any], scenario: Dict[str, Any], scenario_index: int
+    ) -> Dict[str, Any]:
+        """Process all data fields according to their type.
+        Args:
+            data: Dictionary of question data
+            scenario: Current scenario to render templates against
+        Returns:
+            Processed dictionary with rendered templates
+        """
+        processed = {}
+        extended_scenario = scenario.copy()
+        extended_scenario.update({"scenario": scenario})
+        for key, value in [(k, v) for k, v in data.items() if v is not None]:
+            processed[key] = self._process_value(
+                key, value, extended_scenario, scenario_index
+            )
+        return processed
+    def _process_value(
+        self, key: str, value: Any, scenario: Dict[str, Any], scenario_index: int
+    ) -> Any:
+        """Process a single value according to its type.
+        Args:
+            key: Dictionary key
+            value: Value to process
+            scenario: Current scenario
+        Returns:
+            Processed value
+        """
+        if key == "question_options" and isinstance(value, str):
+            return value
+        if key == "option_labels":
+            return (
+                eval(self._render_template(value, scenario, scenario_index))
+                if isinstance(value, str)
+                else value
+            )
+        if isinstance(value, str):
+            return self._render_template(value, scenario, scenario_index)
+        if isinstance(value, list):
+            return self._process_list(value, scenario, scenario_index)
+        if isinstance(value, dict):
+            return self._process_dict(value, scenario, scenario_index)
+        if isinstance(value, (int, float)):
+            return value
+        from edsl.questions.exceptions import QuestionValueError
+        raise QuestionValueError(
+            f"Unexpected value type: {type(value)} for key '{key}'"
+        )
+    def _jinja_variable_pattern(self) -> str:
+        # Regular expression to find Jinja2 variables in the template
+        pattern = (
+            r"(?P<open>\{\{\s*)scenario\.(?P<var>[a-zA-Z0-9_.]+)(?P<close>\s*\}\})"
+        )
+        return pattern
+    def _render_template(
+        self, template: str, scenario: Dict[str, Any], scenario_index: int
+    ) -> str:
+        """Render a single template string.
+        Args:
+            template: Template string to render
+            scenario: Current scenario
+        Returns:
+            Rendered template string, preserving any unmatched template variables
+        Examples:
+            >>> from edsl.questions import QuestionBase
+            >>> from edsl.scenarios import Scenario, ScenarioList
+            >>> q = QuestionBase()
+            >>> q.question_text = "test"
+            >>> sl = ScenarioList([Scenario({"name": "World"}), Scenario({"name": "everyone"})])
+            >>> p = LongSurveyLoopProcessor(q, sl)
+            >>> p._render_template("Hello {{scenario.name}}!", {"name": "everyone"}, scenario_index=1)
+            'Hello {{ scenario.name_1 }}!'
+            >>> p._render_template("{{scenario.a}} and {{scenario.b}}", {"b": 6}, scenario_index=1)
+            '{{ a }} and {{ scenario.b_1 }}'
+            >>> p._render_template("{{scenario.x}} + {{scenario.y}} = {{scenario.z}}", {"x": 2, "y": 3}, scenario_index=5)
+            '{{ scenario.x_5 }} + {{ scenario.y_5 }} = {{ z }}'
+            >>> p._render_template("No variables here", {}, scenario_index=0)
+            'No variables here'
+            >>> p._render_template("{{scenario.item.price}}", {"item": {"price": 9.99}}, scenario_index=3)
+            '{{ scenario.item_3.price }}'
+            >>> p._render_template("{{scenario.item.missing}}", {"item": {"price": 9.99}}, scenario_index=3)
+            '{{ scenario.item_3.missing }}'
+        """
+        import re
+        # Regular expression to find Jinja2 variables in the template
+        pattern = self._jinja_variable_pattern()
+        def replace_var(match):
+            var_name = match.group("var")
+            # We're keeping the original formatting with braces
+            # but not using these variables directly
+            # open_brace = match.group('open')
+            # close_brace = match.group('close')
+            try:
+                # Handle nested attributes (like item.price)
+                parts = var_name.split(".")
+                base_var = parts[0]
+                self.long_scenario_dict.update(
+                    {f"{base_var}_{scenario_index}": scenario[base_var]}
+                )
+                if len(parts) > 1:
+                    non_name_parts = ".".join(parts[1:])
+                    result = (
+                        f"{{ scenario.{base_var}_{scenario_index}.{non_name_parts} }}"
+                    )
+                else:
+                    result = f"{{ scenario.{base_var}_{scenario_index} }}"
+                result = result.replace("{", "{{").replace("}", "}}")
+                return result
+            except (KeyError, TypeError) as e:
+                # Return the original variable name with the expected spacing
+                result = f"{{ {var_name} }}".replace("{", "{{").replace("}", "}}")
+                return result
+        # Replace all variables in the template
+        result = re.sub(pattern, replace_var, template)
+        return result
+    def _process_list(
+        self, items: List[Any], scenario: Dict[str, Any], scenario_index: int
+    ) -> List[Any]:
+        """Process all items in a list.
+        Args:
+            items: List of items to process
+            scenario: Current scenario
+        Returns:
+            List of processed items
+        """
+        return [
+            (
+                self._render_template(item, scenario, scenario_index)
+                if isinstance(item, str)
+                else item
+            )
+            for item in items
+        ]
+    def _process_dict(
+        self, data: Dict[str, Any], scenario: Dict[str, Any], scenario_index: int
+    ) -> Dict[str, Any]:
+        """Process all keys and values in a dictionary.
+        Args:
+            data: Dictionary to process
+            scenario: Current scenario
+        Returns:
+            Dictionary with processed keys and values
+        """
+        return {
+            (
+                self._render_template(k, scenario, scenario_index)
+                if isinstance(k, str)
+                else k
+            ): (
+                self._render_template(v, scenario, scenario_index)
+                if isinstance(v, str)
+                else v
+            )
+            for k, v in data.items()
+        }
 if __name__ == "__main__":
     import doctest

edsl/questions/templates/dict/answering_instructions.jinja CHANGED Viewed

@@ -1,5 +1,4 @@
 Please respond with a dictionary using the following keys: {{ answer_keys | join(', ') }}.
-Do not include "python" for create a code block. Just return the dictionary.
 {% if value_descriptions %}
 Here are descriptions of the values to provide:

edsl/scenarios/scenario_list.py CHANGED Viewed

@@ -159,7 +159,15 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
     # Required MutableSequence abstract methods
     def __getitem__(self, index):
-        """Get item at index."""
+        """Get item at index.
+        Example:
+            >>> from edsl.scenarios import Scenario, ScenarioList
+            >>> sl = ScenarioList([Scenario({'a': 12})])
+            >>> sl[0]['b'] = 100  # modify in-place
+            >>> sl[0]['b']
+            100
+        """
         if isinstance(index, slice):
             return self.__class__(list(self.data[index]), self.codebook.copy())
         return self.data[index]
@@ -356,7 +364,29 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
                 new_scenarios.append(Scenario(new_scenario))
         return new_scenarios
+    @classmethod
+    def from_prompt(self, description: str, name:Optional[str] = "item", target_number:int = 10, verbose = False):
+        from ..questions.question_list import QuestionList
+        q = QuestionList(question_name = name,
+                         question_text = description + f"\n Please try to return {target_number} examples.")
+        results = q.run(verbose = verbose)
+        return results.select(name).to_scenario_list().expand(name)
+    def __add__(self, other):
+        if isinstance(other, Scenario):
+            new_list = self.duplicate()
+            new_list.append(other)
+            return new_list
+        elif isinstance(other, ScenarioList):
+            new_list = self.duplicate()
+            for item in other:
+                new_list.append(item)
+        else:
+            raise ScenarioError("Don't know how to combine!")
+        return new_list
     @classmethod
     def from_search_terms(cls, search_terms: List[str]) -> ScenarioList:
         """Create a ScenarioList from a list of search terms, using Wikipedia.

edsl 0.1.60__py3-none-any.whl → 0.1.61__py3-none-any.whl

edsl 0.1.60py3-none-any.whl → 0.1.61py3-none-any.whl