PyPI - edsl - Versions diffs - 0.1.45__py3-none-any.whl → 0.1.46__py3-none-any.whl - Mend

edsl 0.1.45py3-none-any.whl → 0.1.46py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

edsl/Base.py +7 -3
edsl/__version__.py +1 -1
edsl/agents/PromptConstructor.py +26 -79
edsl/agents/QuestionInstructionPromptBuilder.py +70 -32
edsl/agents/QuestionTemplateReplacementsBuilder.py +12 -2
edsl/coop/coop.py +155 -94
edsl/data/RemoteCacheSync.py +10 -9
edsl/inference_services/AvailableModelFetcher.py +1 -1
edsl/jobs/AnswerQuestionFunctionConstructor.py +12 -1
edsl/jobs/Jobs.py +15 -17
edsl/jobs/JobsPrompts.py +49 -26
edsl/jobs/JobsRemoteInferenceHandler.py +4 -5
edsl/jobs/data_structures.py +3 -0
edsl/jobs/interviews/Interview.py +6 -3
edsl/language_models/LanguageModel.py +6 -0
edsl/questions/question_base_gen_mixin.py +2 -0
edsl/results/DatasetExportMixin.py +25 -4
edsl/scenarios/ScenarioList.py +153 -21
{edsl-0.1.45.dist-info → edsl-0.1.46.dist-info}/METADATA +2 -2
{edsl-0.1.45.dist-info → edsl-0.1.46.dist-info}/RECORD +22 -22
{edsl-0.1.45.dist-info → edsl-0.1.46.dist-info}/LICENSE +0 -0
{edsl-0.1.45.dist-info → edsl-0.1.46.dist-info}/WHEEL +0 -0

edsl/jobs/JobsPrompts.py CHANGED Viewed

@@ -18,6 +18,7 @@ from edsl.data.CacheEntry import CacheEntry
 logger = logging.getLogger(__name__)
 class JobsPrompts:
     def __init__(self, jobs: "Jobs"):
         self.interviews = jobs.interviews()
@@ -26,7 +27,9 @@ class JobsPrompts:
         self.survey = jobs.survey
         self._price_lookup = None
         self._agent_lookup = {agent: idx for idx, agent in enumerate(self.agents)}
-        self._scenario_lookup = {scenario: idx for idx, scenario in enumerate(self.scenarios)}
+        self._scenario_lookup = {
+            scenario: idx for idx, scenario in enumerate(self.scenarios)
+        }
     @property
     def price_lookup(self):
@@ -37,7 +40,7 @@ class JobsPrompts:
             self._price_lookup = c.fetch_prices()
         return self._price_lookup
-    def prompts(self) -> "Dataset":
+    def prompts(self, iterations=1) -> "Dataset":
         """Return a Dataset of prompts that will be used.
         >>> from edsl.jobs import Jobs
@@ -54,11 +57,11 @@ class JobsPrompts:
         models = []
         costs = []
         cache_keys = []
         for interview_index, interview in enumerate(interviews):
             logger.info(f"Processing interview {interview_index} of {len(interviews)}")
             interview_start = time.time()
             # Fetch invigilators timing
             invig_start = time.time()
             invigilators = [
@@ -66,8 +69,10 @@ class JobsPrompts:
                 for question in interview.survey.questions
             ]
             invig_end = time.time()
-            logger.debug(f"Time taken to fetch invigilators: {invig_end - invig_start:.4f}s")
+            logger.debug(
+                f"Time taken to fetch invigilators: {invig_end - invig_start:.4f}s"
+            )
             # Process prompts timing
             prompts_start = time.time()
             for _, invigilator in enumerate(invigilators):
@@ -75,13 +80,15 @@ class JobsPrompts:
                 get_prompts_start = time.time()
                 prompts = invigilator.get_prompts()
                 get_prompts_end = time.time()
-                logger.debug(f"Time taken to get prompts: {get_prompts_end - get_prompts_start:.4f}s")
+                logger.debug(
+                    f"Time taken to get prompts: {get_prompts_end - get_prompts_start:.4f}s"
+                )
                 user_prompt = prompts["user_prompt"]
                 system_prompt = prompts["system_prompt"]
                 user_prompts.append(user_prompt)
                 system_prompts.append(system_prompt)
                 # Index lookups timing
                 index_start = time.time()
                 agent_index = self._agent_lookup[invigilator.agent]
@@ -90,14 +97,18 @@ class JobsPrompts:
                 scenario_index = self._scenario_lookup[invigilator.scenario]
                 scenario_indices.append(scenario_index)
                 index_end = time.time()
-                logger.debug(f"Time taken for index lookups: {index_end - index_start:.4f}s")
+                logger.debug(
+                    f"Time taken for index lookups: {index_end - index_start:.4f}s"
+                )
                 # Model and question name assignment timing
                 assign_start = time.time()
                 models.append(invigilator.model.model)
                 question_names.append(invigilator.question.question_name)
                 assign_end = time.time()
-                logger.debug(f"Time taken for assignments: {assign_end - assign_start:.4f}s")
+                logger.debug(
+                    f"Time taken for assignments: {assign_end - assign_start:.4f}s"
+                )
                 # Cost estimation timing
                 cost_start = time.time()
@@ -109,32 +120,44 @@ class JobsPrompts:
                     model=invigilator.model.model,
                 )
                 cost_end = time.time()
-                logger.debug(f"Time taken to estimate prompt cost: {cost_end - cost_start:.4f}s")
+                logger.debug(
+                    f"Time taken to estimate prompt cost: {cost_end - cost_start:.4f}s"
+                )
                 costs.append(prompt_cost["cost_usd"])
                 # Cache key generation timing
                 cache_key_gen_start = time.time()
-                cache_key = CacheEntry.gen_key(
-                    model=invigilator.model.model,
-                    parameters=invigilator.model.parameters,
-                    system_prompt=system_prompt,
-                    user_prompt=user_prompt,
-                    iteration=0,
-                )
+                for iteration in range(iterations):
+                    cache_key = CacheEntry.gen_key(
+                        model=invigilator.model.model,
+                        parameters=invigilator.model.parameters,
+                        system_prompt=system_prompt,
+                        user_prompt=user_prompt,
+                        iteration=iteration,
+                    )
+                    cache_keys.append(cache_key)
                 cache_key_gen_end = time.time()
-                cache_keys.append(cache_key)
-                logger.debug(f"Time taken to generate cache key: {cache_key_gen_end - cache_key_gen_start:.4f}s")
+                logger.debug(
+                    f"Time taken to generate cache key: {cache_key_gen_end - cache_key_gen_start:.4f}s"
+                )
                 logger.debug("-" * 50)  # Separator between iterations
             prompts_end = time.time()
-            logger.info(f"Time taken to process prompts: {prompts_end - prompts_start:.4f}s")
+            logger.info(
+                f"Time taken to process prompts: {prompts_end - prompts_start:.4f}s"
+            )
             interview_end = time.time()
-            logger.info(f"Overall time taken for interview: {interview_end - interview_start:.4f}s")
+            logger.info(
+                f"Overall time taken for interview: {interview_end - interview_start:.4f}s"
+            )
             logger.info("Time breakdown:")
             logger.info(f"  Invigilators: {invig_end - invig_start:.4f}s")
             logger.info(f"  Prompts processing: {prompts_end - prompts_start:.4f}s")
-            logger.info(f"  Other overhead: {(interview_end - interview_start) - ((invig_end - invig_start) + (prompts_end - prompts_start)):.4f}s")
+            logger.info(
+                f"  Other overhead: {(interview_end - interview_start) - ((invig_end - invig_start) + (prompts_end - prompts_start)):.4f}s"
+            )
         d = Dataset(
             [

edsl/jobs/JobsRemoteInferenceHandler.py CHANGED Viewed

@@ -24,7 +24,7 @@ from edsl.jobs.JobsRemoteInferenceLogger import JobLogger
 class RemoteJobConstants:
     """Constants for remote job handling."""
-    REMOTE_JOB_POLL_INTERVAL = 1
+    REMOTE_JOB_POLL_INTERVAL = 4
     REMOTE_JOB_VERBOSE = False
     DISCORD_URL = "https://discord.com/invite/mxAYkjfy9m"
@@ -88,8 +88,8 @@ class JobsRemoteInferenceHandler:
         iterations: int = 1,
         remote_inference_description: Optional[str] = None,
         remote_inference_results_visibility: Optional[VisibilityType] = "unlisted",
+        fresh: Optional[bool] = False,
     ) -> RemoteJobInfo:
         from edsl.config import CONFIG
         from edsl.coop.coop import Coop
@@ -106,6 +106,7 @@ class JobsRemoteInferenceHandler:
             status="queued",
             iterations=iterations,
             initial_results_visibility=remote_inference_results_visibility,
+            fresh=fresh,
         )
         logger.update(
             "Your survey is running at the Expected Parrot server...",
@@ -277,9 +278,7 @@ class JobsRemoteInferenceHandler:
         job_in_queue = True
         while job_in_queue:
             result = self._attempt_fetch_job(
-                job_info,
-                remote_job_data_fetcher,
-                object_fetcher
+                job_info, remote_job_data_fetcher, object_fetcher
             )
             if result != "continue":
                 return result

edsl/jobs/data_structures.py CHANGED Viewed

@@ -36,6 +36,9 @@ class RunParameters(Base):
     disable_remote_cache: bool = False
     disable_remote_inference: bool = False
     job_uuid: Optional[str] = None
+    fresh: Optional[
+        bool
+    ] = False  # if True, will not use cache and will save new results to cache
     def to_dict(self, add_edsl_version=False) -> dict:
         d = asdict(self)

edsl/jobs/interviews/Interview.py CHANGED Viewed

@@ -238,9 +238,6 @@ class Interview:
         >>> run_config = RunConfig(parameters = RunParameters(), environment = RunEnvironment())
         >>> run_config.parameters.stop_on_exception = True
         >>> result, _ = asyncio.run(i.async_conduct_interview(run_config))
-        Traceback (most recent call last):
-        ...
-        asyncio.exceptions.CancelledError
         """
         from edsl.jobs.Jobs import RunConfig, RunParameters, RunEnvironment
@@ -262,6 +259,8 @@ class Interview:
         if model_buckets is None or hasattr(self.agent, "answer_question_directly"):
             model_buckets = ModelBuckets.infinity_bucket()
+        self.skip_flags = {q.question_name: False for q in self.survey.questions}
         # was "self.tasks" - is that necessary?
         self.tasks = self.task_manager.build_question_tasks(
             answer_func=AnswerQuestionFunctionConstructor(
@@ -310,6 +309,10 @@ class Interview:
         def handle_task(task, invigilator):
             try:
                 result: Answers = task.result()
+                if result == "skipped":
+                    result = invigilator.get_failed_task_result(
+                        failure_reason="Task was skipped."
+                    )
             except asyncio.CancelledError as e:  # task was cancelled
                 result = invigilator.get_failed_task_result(
                     failure_reason="Task was cancelled."

edsl/language_models/LanguageModel.py CHANGED Viewed

@@ -379,8 +379,10 @@ class LanguageModel(
         cached_response, cache_key = cache.fetch(**cache_call_params)
         if cache_used := cached_response is not None:
+ #           print("cache used")
             response = json.loads(cached_response)
         else:
+#            print("cache not used")
             f = (
                 self.remote_async_execute_model_call
                 if hasattr(self, "remote") and self.remote
@@ -400,7 +402,10 @@ class LanguageModel(
             )  # store the response in the cache
             assert new_cache_key == cache_key  # should be the same
+        #breakpoint()
         cost = self.cost(response)
+        #breakpoint()
         return ModelResponse(
             response=response,
             cache_used=cache_used,
@@ -465,6 +470,7 @@ class LanguageModel(
             model_outputs=model_outputs,
             edsl_dict=edsl_dict,
         )
+        #breakpoint()
         return agent_response_dict
     get_response = sync_wrapper(async_get_response)

edsl/questions/question_base_gen_mixin.py CHANGED Viewed

@@ -140,6 +140,8 @@ class QuestionBaseGenMixin:
             k: v for k, v in replacement_dict.items() if not isinstance(v, Scenario)
         }
+        strings_only_replacement_dict['scenario'] = strings_only_replacement_dict
         def _has_unrendered_variables(template_str: str, env: Environment) -> bool:
             """Check if the template string has any unrendered variables."""
             if not isinstance(template_str, str):

edsl/results/DatasetExportMixin.py CHANGED Viewed

@@ -735,11 +735,14 @@ class DatasetExportMixin:
         """
         Flatten a field containing a list of dictionaries into separate fields.
-        For example, if a dataset contains:
-        [{'data': [{'a': 1}, {'b': 2}], 'other': ['x', 'y']}]
+        >>> from edsl.results.Dataset import Dataset
+        >>> Dataset([{'a': [{'a': 1, 'b': 2}]}, {'c': [5] }]).flatten('a')
+        Dataset([{'c': [5]}, {'a.a': [1]}, {'a.b': [2]}])
+        >>> Dataset([{'answer.example': [{'a': 1, 'b': 2}]}, {'c': [5] }]).flatten('answer.example')
+        Dataset([{'c': [5]}, {'answer.example.a': [1]}, {'answer.example.b': [2]}])
-        After d.flatten('data'), it should become:
-        [{'other': ['x', 'y'], 'data.a': [1, None], 'data.b': [None, 2]}]
         Args:
             field: The field to flatten
@@ -753,6 +756,24 @@ class DatasetExportMixin:
         # Ensure the dataset isn't empty
         if not self.data:
             return self.copy()
+        # Find all columns that contain the field
+        matching_entries = []
+        for entry in self.data:
+            col_name = next(iter(entry.keys()))
+            if field == col_name or (
+                '.' in col_name and
+                (col_name.endswith('.' + field) or col_name.startswith(field + '.'))
+            ):
+                matching_entries.append(entry)
+        # Check if the field is ambiguous
+        if len(matching_entries) > 1:
+            matching_cols = [next(iter(entry.keys())) for entry in matching_entries]
+            raise ValueError(
+                f"Ambiguous field name '{field}'. It matches multiple columns: {matching_cols}. "
+                f"Please specify the full column name to flatten."
+            )
         # Get the number of observations
         num_observations = self.num_observations()

edsl/scenarios/ScenarioList.py CHANGED Viewed

@@ -436,35 +436,98 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
                 new_scenarios.append(new_scenario)
         return ScenarioList(new_scenarios)
-    def concatenate(self, fields: List[str], separator: str = ";") -> ScenarioList:
-        """Concatenate specified fields into a single field.
+    def _concatenate(self, fields: List[str], output_type: str = "string", separator: str = ";") -> ScenarioList:
+        """Private method to handle concatenation logic for different output types.
         :param fields: The fields to concatenate.
-        :param separator: The separator to use.
+        :param output_type: The type of output ("string", "list", or "set").
+        :param separator: The separator to use for string concatenation.
         Returns:
             ScenarioList: A new ScenarioList with concatenated fields.
-        Example:
-            >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
-            >>> s.concatenate(['a', 'b', 'c'])
-            ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
         """
+        # Check if fields is a string and raise an exception
+        if isinstance(fields, str):
+            raise ScenarioError(
+                f"The 'fields' parameter must be a list of field names, not a string. Got '{fields}'."
+            )
         new_scenarios = []
         for scenario in self:
             new_scenario = scenario.copy()
-            concat_values = []
+            values = []
             for field in fields:
                 if field in new_scenario:
-                    concat_values.append(str(new_scenario[field]))
+                    values.append(new_scenario[field])
                     del new_scenario[field]
             new_field_name = f"concat_{'_'.join(fields)}"
-            new_scenario[new_field_name] = separator.join(concat_values)
+            if output_type == "string":
+                # Convert all values to strings and join with separator
+                new_scenario[new_field_name] = separator.join(str(v) for v in values)
+            elif output_type == "list":
+                # Keep as a list
+                new_scenario[new_field_name] = values
+            elif output_type == "set":
+                # Convert to a set (removes duplicates)
+                new_scenario[new_field_name] = set(values)
+            else:
+                raise ValueError(f"Invalid output_type: {output_type}. Must be 'string', 'list', or 'set'.")
             new_scenarios.append(new_scenario)
         return ScenarioList(new_scenarios)
+    def concatenate(self, fields: List[str], separator: str = ";") -> ScenarioList:
+        """Concatenate specified fields into a single string field.
+        :param fields: The fields to concatenate.
+        :param separator: The separator to use.
+        Returns:
+            ScenarioList: A new ScenarioList with concatenated fields.
+        Example:
+            >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
+            >>> s.concatenate(['a', 'b', 'c'])
+            ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
+        """
+        return self._concatenate(fields, output_type="string", separator=separator)
+    def concatenate_to_list(self, fields: List[str]) -> ScenarioList:
+        """Concatenate specified fields into a single list field.
+        :param fields: The fields to concatenate.
+        Returns:
+            ScenarioList: A new ScenarioList with fields concatenated into a list.
+        Example:
+            >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
+            >>> s.concatenate_to_list(['a', 'b', 'c'])
+            ScenarioList([Scenario({'concat_a_b_c': [1, 2, 3]}), Scenario({'concat_a_b_c': [4, 5, 6]})])
+        """
+        return self._concatenate(fields, output_type="list")
+    def concatenate_to_set(self, fields: List[str]) -> ScenarioList:
+        """Concatenate specified fields into a single set field.
+        :param fields: The fields to concatenate.
+        Returns:
+            ScenarioList: A new ScenarioList with fields concatenated into a set.
+        Example:
+            >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
+            >>> s.concatenate_to_set(['a', 'b', 'c'])
+            ScenarioList([Scenario({'concat_a_b_c': {1, 2, 3}}), Scenario({'concat_a_b_c': {4, 5, 6}})])
+            >>> s = ScenarioList([Scenario({'a': 1, 'b': 1, 'c': 3})])
+            >>> s.concatenate_to_set(['a', 'b', 'c'])
+            ScenarioList([Scenario({'concat_a_b_c': {1, 3}})])
+        """
+        return self._concatenate(fields, output_type="set")
     def unpack_dict(
         self, field: str, prefix: Optional[str] = None, drop_field: bool = False
     ) -> ScenarioList:
@@ -937,16 +1000,42 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
     #     return new_list
     @classmethod
-    def from_sqlite(cls, filepath: str, table: str):
-        """Create a ScenarioList from a SQLite database."""
+    def from_sqlite(cls, filepath: str, table: Optional[str] = None, sql_query: Optional[str] = None):
+        """Create a ScenarioList from a SQLite database.
+        Args:
+            filepath (str): Path to the SQLite database file
+            table (Optional[str]): Name of table to query. If None, sql_query must be provided.
+            sql_query (Optional[str]): SQL query to execute. Used if table is None.
+        Returns:
+            ScenarioList: List of scenarios created from database rows
+        Raises:
+            ValueError: If both table and sql_query are None
+            sqlite3.Error: If there is an error executing the database query
+        """
         import sqlite3
-        with sqlite3.connect(filepath) as conn:
-            cursor = conn.cursor()
-            cursor.execute(f"SELECT * FROM {table}")
-            columns = [description[0] for description in cursor.description]
-            data = cursor.fetchall()
-        return cls([Scenario(dict(zip(columns, row))) for row in data])
+        if table is None and sql_query is None:
+            raise ValueError("Either table or sql_query must be provided")
+        try:
+            with sqlite3.connect(filepath) as conn:
+                cursor = conn.cursor()
+                if table is not None:
+                    cursor.execute(f"SELECT * FROM {table}")
+                else:
+                    cursor.execute(sql_query)
+                columns = [description[0] for description in cursor.description]
+                data = cursor.fetchall()
+            return cls([Scenario(dict(zip(columns, row))) for row in data])
+        except sqlite3.Error as e:
+            raise sqlite3.Error(f"Database error occurred: {str(e)}")
     @classmethod
     def from_latex(cls, tex_file_path: str):
@@ -1540,6 +1629,49 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
             new_scenarios.extend(replacement_scenarios)
         return ScenarioList(new_scenarios)
+    def collapse(self, field: str) -> ScenarioList:
+        """Collapse a ScenarioList by grouping on all fields except the specified one,
+        collecting the values of the specified field into a list.
+        Args:
+            field: The field to collapse (whose values will be collected into lists)
+        Returns:
+            ScenarioList: A new ScenarioList with the specified field collapsed into lists
+        Example:
+        >>> s = ScenarioList([
+        ...     Scenario({'category': 'fruit', 'color': 'red', 'item': 'apple'}),
+        ...     Scenario({'category': 'fruit', 'color': 'yellow', 'item': 'banana'}),
+        ...     Scenario({'category': 'fruit', 'color': 'red', 'item': 'cherry'}),
+        ...     Scenario({'category': 'vegetable', 'color': 'green', 'item': 'spinach'})
+        ... ])
+        >>> s.collapse('item')
+        ScenarioList([Scenario({'category': 'fruit', 'color': 'red', 'item': ['apple', 'cherry']}), Scenario({'category': 'fruit', 'color': 'yellow', 'item': ['banana']}), Scenario({'category': 'vegetable', 'color': 'green', 'item': ['spinach']})])
+        """
+        if not self:
+            return ScenarioList([])
+        # Determine all fields except the one to collapse
+        id_vars = [key for key in self[0].keys() if key != field]
+        # Group the scenarios
+        grouped = defaultdict(list)
+        for scenario in self:
+            # Create a tuple of the values of all fields except the one to collapse
+            key = tuple(scenario[id_var] for id_var in id_vars)
+            # Add the value of the field to collapse to the list for this key
+            grouped[key].append(scenario[field])
+        # Create a new ScenarioList with the collapsed field
+        result = []
+        for key, values in grouped.items():
+            new_scenario = dict(zip(id_vars, key))
+            new_scenario[field] = values
+            result.append(Scenario(new_scenario))
+        return ScenarioList(result)
 if __name__ == "__main__":
     import doctest

{edsl-0.1.45.dist-info → edsl-0.1.46.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: edsl
-Version: 0.1.45
+Version: 0.1.46
 Summary: Create and analyze LLM-based surveys
 Home-page: https://www.expectedparrot.com/
 License: MIT
@@ -242,5 +242,5 @@ An integrated platform for running experiments, sharing workflows and launching
 - <a href="https://blog.expectedparrot.com" target="_blank" rel="noopener noreferrer">Blog</a>
 ## Contact
-- <a href="mailto:info@expectedparrot.com" target="_blank" rel="noopener noreferrer">Email</a>.
+- <a href="mailto:info@expectedparrot.com" target="_blank" rel="noopener noreferrer">Email</a>

edsl 0.1.45__py3-none-any.whl → 0.1.46__py3-none-any.whl

edsl 0.1.45py3-none-any.whl → 0.1.46py3-none-any.whl