PyPI - edsl - Versions diffs - 0.1.62__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

edsl 0.1.62py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

edsl/__init__.py +66 -0
edsl/__version__.py +1 -1
edsl/base/base_class.py +45 -0
edsl/cli.py +93 -27
edsl/config/config_class.py +4 -0
edsl/coop/coop.py +141 -13
edsl/dataset/dataset.py +47 -41
edsl/dataset/dataset_operations_mixin.py +138 -15
edsl/dataset/report_from_template.py +509 -0
edsl/inference_services/services/azure_ai.py +8 -2
edsl/inference_services/services/open_ai_service.py +7 -5
edsl/jobs/jobs.py +4 -3
edsl/jobs/jobs_checks.py +11 -6
edsl/jobs/remote_inference.py +13 -6
edsl/results/result.py +93 -38
edsl/results/results.py +24 -15
edsl/scenarios/scenario_list.py +294 -130
edsl/scenarios/scenario_source.py +1 -1
{edsl-0.1.62.dist-info → edsl-1.0.0.dist-info}/METADATA +1 -1
{edsl-0.1.62.dist-info → edsl-1.0.0.dist-info}/RECORD +23 -22
{edsl-0.1.62.dist-info → edsl-1.0.0.dist-info}/LICENSE +0 -0
{edsl-0.1.62.dist-info → edsl-1.0.0.dist-info}/WHEEL +0 -0
{edsl-0.1.62.dist-info → edsl-1.0.0.dist-info}/entry_points.txt +0 -0

edsl/jobs/remote_inference.py CHANGED Viewed

@@ -219,15 +219,22 @@ class JobsRemoteInferenceHandler:
         self, job_info: RemoteJobInfo, remote_job_data: RemoteInferenceResponse
     ) -> None:
         "Handles a failed job by logging the error and updating the job status."
-        error_report_url = remote_job_data.get("latest_job_run_details", {}).get(
-            "error_report_url"
-        )
+        latest_job_run_details = remote_job_data.get("latest_job_run_details", {})
+        error_report_url = latest_job_run_details.get("error_report_url")
-        reason = remote_job_data.get("reason")
+        failure_reason = latest_job_run_details.get("failure_reason")
-        if reason == "insufficient funds":
+        if failure_reason == "insufficient funds":
+            failure_description = latest_job_run_details.get(
+                "failure_description",
+                "You don't have enough credits to start this job",
+            )
+            job_info.logger.update(
+                f"Insufficient funds: {failure_description}.",
+                status=JobsStatus.FAILED,
+            )
             job_info.logger.update(
-                f"Error: Insufficient balance to start the job. Add funds to your account at the [Credits page]({self.expected_parrot_url}/home/credits)",
+                f"Add funds to your account at the [Credits page]({self.expected_parrot_url}/home/credits).",
                 status=JobsStatus.FAILED,
             )

edsl/results/result.py CHANGED Viewed

@@ -99,6 +99,7 @@ class Result(Base, UserDict):
         cache_used_dict: Optional[dict[QuestionName, bool]] = None,
         indices: Optional[dict] = None,
         cache_keys: Optional[dict[QuestionName, str]] = None,
+        validated_dict: Optional[dict[QuestionName, bool]] = None,
     ):
         """Initialize a Result object.
@@ -135,6 +136,7 @@ class Result(Base, UserDict):
             "reasoning_summaries_dict": reasoning_summaries_dict or {},
             "cache_used_dict": cache_used_dict or {},
             "cache_keys": cache_keys or {},
+            "validated_dict": validated_dict or {},
         }
         super().__init__(**data)
         self.indices = indices
@@ -247,6 +249,7 @@ class Result(Base, UserDict):
             "question_type": sub_dicts_needing_new_keys["question_type"],
             "cache_used": new_cache_dict,
             "cache_keys": cache_keys,
+            "validated": self.data["validated_dict"],
         }
         if hasattr(self, "indices") and self.indices is not None:
             d["agent"].update({"agent_index": self.indices["agent"]})
@@ -460,7 +463,7 @@ class Result(Base, UserDict):
         if hasattr(self, "interview_hash"):
             d["interview_hash"] = self.interview_hash
         # Preserve the order attribute if it exists
         if hasattr(self, "order"):
             d["order"] = self.order
@@ -505,14 +508,15 @@ class Result(Base, UserDict):
             cache_used_dict=json_dict.get("cache_used_dict", {}),
             cache_keys=json_dict.get("cache_keys", {}),
             indices=json_dict.get("indices", None),
+            validated_dict=json_dict.get("validated_dict", {}),
         )
         if "interview_hash" in json_dict:
             result.interview_hash = json_dict["interview_hash"]
         # Restore the order attribute if it exists in the dictionary
         if "order" in json_dict:
             result.order = json_dict["order"]
         return result
     def __repr__(self):
@@ -604,9 +608,13 @@ class Result(Base, UserDict):
     def from_interview(cls, interview) -> Result:
         """Return a Result object from an interview dictionary, ensuring no reference to the original interview is maintained."""
         # Copy the valid results to avoid maintaining references
-        model_response_objects = list(interview.valid_results) if hasattr(interview, 'valid_results') else []
+        model_response_objects = (
+            list(interview.valid_results) if hasattr(interview, "valid_results") else []
+        )
         # Create a copy of the answers
-        extracted_answers = dict(interview.answers) if hasattr(interview, 'answers') else {}
+        extracted_answers = (
+            dict(interview.answers) if hasattr(interview, "answers") else {}
+        )
         def get_question_results(
             model_response_objects,
@@ -640,29 +648,47 @@ class Result(Base, UserDict):
             reasoning_summaries_dict = {}
             for k in answer_key_names:
                 reasoning_summary = question_results[k].reasoning_summary
                 # If reasoning summary is None but we have a raw model response, try to extract it
-                if reasoning_summary is None and hasattr(question_results[k], 'raw_model_response'):
+                if reasoning_summary is None and hasattr(
+                    question_results[k], "raw_model_response"
+                ):
                     try:
                         # Get the model class to access the reasoning_sequence
-                        model_class = interview.model.__class__ if hasattr(interview, 'model') else None
-                        if model_class and hasattr(model_class, 'reasoning_sequence'):
-                            from ..language_models.raw_response_handler import RawResponseHandler
+                        model_class = (
+                            interview.model.__class__
+                            if hasattr(interview, "model")
+                            else None
+                        )
+                        if model_class and hasattr(model_class, "reasoning_sequence"):
+                            from ..language_models.raw_response_handler import (
+                                RawResponseHandler,
+                            )
                             # Create a handler with the model's reasoning sequence
                             handler = RawResponseHandler(
-                                key_sequence=model_class.key_sequence if hasattr(model_class, 'key_sequence') else None,
-                                usage_sequence=model_class.usage_sequence if hasattr(model_class, 'usage_sequence') else None,
-                                reasoning_sequence=model_class.reasoning_sequence
+                                key_sequence=(
+                                    model_class.key_sequence
+                                    if hasattr(model_class, "key_sequence")
+                                    else None
+                                ),
+                                usage_sequence=(
+                                    model_class.usage_sequence
+                                    if hasattr(model_class, "usage_sequence")
+                                    else None
+                                ),
+                                reasoning_sequence=model_class.reasoning_sequence,
                             )
                             # Try to extract the reasoning summary
-                            reasoning_summary = handler.get_reasoning_summary(question_results[k].raw_model_response)
+                            reasoning_summary = handler.get_reasoning_summary(
+                                question_results[k].raw_model_response
+                            )
                     except Exception:
                         # If extraction fails, keep it as None
                         pass
                 reasoning_summaries_dict[k + "_reasoning_summary"] = reasoning_summary
             return reasoning_summaries_dict
@@ -726,39 +752,67 @@ class Result(Base, UserDict):
             return raw_model_results_dictionary, cache_used_dictionary
+        def get_validated_dictionary(model_response_objects):
+            validated_dict = {}
+            for result in model_response_objects:
+                validated_dict[f"{result.question_name}_validated"] = result.validated
+            return validated_dict
         # Save essential information from the interview before clearing references
-        agent_copy = interview.agent.copy() if hasattr(interview, 'agent') else None
-        scenario_copy = interview.scenario.copy() if hasattr(interview, 'scenario') else None
-        model_copy = interview.model.copy() if hasattr(interview, 'model') else None
-        iteration = interview.iteration if hasattr(interview, 'iteration') else 0
-        survey_copy = interview.survey.copy() if hasattr(interview, 'survey') and interview.survey else None
-        indices_copy = dict(interview.indices) if hasattr(interview, 'indices') and interview.indices else None
-        initial_hash = interview.initial_hash if hasattr(interview, 'initial_hash') else hash(interview)
+        agent_copy = interview.agent.copy() if hasattr(interview, "agent") else None
+        scenario_copy = (
+            interview.scenario.copy() if hasattr(interview, "scenario") else None
+        )
+        model_copy = interview.model.copy() if hasattr(interview, "model") else None
+        iteration = interview.iteration if hasattr(interview, "iteration") else 0
+        survey_copy = (
+            interview.survey.copy()
+            if hasattr(interview, "survey") and interview.survey
+            else None
+        )
+        indices_copy = (
+            dict(interview.indices)
+            if hasattr(interview, "indices") and interview.indices
+            else None
+        )
+        initial_hash = (
+            interview.initial_hash
+            if hasattr(interview, "initial_hash")
+            else hash(interview)
+        )
         # Process data to create dictionaries needed for Result
         question_results = get_question_results(model_response_objects)
         answer_key_names = list(question_results.keys())
-        generated_tokens_dict = get_generated_tokens_dict(answer_key_names) if answer_key_names else {}
+        generated_tokens_dict = (
+            get_generated_tokens_dict(answer_key_names) if answer_key_names else {}
+        )
         comments_dict = get_comments_dict(answer_key_names) if answer_key_names else {}
-        reasoning_summaries_dict = get_reasoning_summaries_dict(answer_key_names) if answer_key_names else {}
+        reasoning_summaries_dict = (
+            get_reasoning_summaries_dict(answer_key_names) if answer_key_names else {}
+        )
         # Get answers that are in the question results
         answer_dict = {}
         for k in answer_key_names:
             if k in extracted_answers:
                 answer_dict[k] = extracted_answers[k]
         cache_keys = get_cache_keys(model_response_objects)
         question_name_to_prompts = get_question_name_to_prompts(model_response_objects)
-        prompt_dictionary = get_prompt_dictionary(
-            answer_key_names, question_name_to_prompts
-        ) if answer_key_names else {}
+        prompt_dictionary = (
+            get_prompt_dictionary(answer_key_names, question_name_to_prompts)
+            if answer_key_names
+            else {}
+        )
         raw_model_results_dictionary, cache_used_dictionary = (
             get_raw_model_results_and_cache_used_dictionary(model_response_objects)
         )
+        validated_dictionary = get_validated_dictionary(model_response_objects)
         # Create the Result object with all copied data
         result = cls(
             agent=agent_copy,
@@ -775,22 +829,23 @@ class Result(Base, UserDict):
             cache_used_dict=cache_used_dictionary,
             indices=indices_copy,
             cache_keys=cache_keys,
+            validated_dict=validated_dictionary,
         )
         # Store only the hash, not the interview
         result.interview_hash = initial_hash
         # Clear references to help garbage collection of the interview
-        if hasattr(interview, 'clear_references'):
+        if hasattr(interview, "clear_references"):
             interview.clear_references()
         # Clear local references to help with garbage collection
         del model_response_objects
         del extracted_answers
         del question_results
         del answer_key_names
         del question_name_to_prompts
         return result

edsl/results/results.py CHANGED Viewed

@@ -274,6 +274,7 @@ class Results(MutableSequence, ResultsOperationsMixin, Base):
         "cache_used",
         "cache_keys",
         "reasoning_summary",
+        "validated",
     ]
     @classmethod
@@ -2205,14 +2206,16 @@ class Results(MutableSequence, ResultsOperationsMixin, Base):
                     "survey": self.survey.to_dict() if self.survey else None,
                     "created_columns": self.created_columns,
                     "cache": self.cache.to_dict() if hasattr(self, "cache") else None,
-                    "task_history": self.task_history.to_dict()
-                    if hasattr(self, "task_history")
-                    else None,
+                    "task_history": (
+                        self.task_history.to_dict()
+                        if hasattr(self, "task_history")
+                        else None
+                    ),
                     "completed": self.completed,
                     "job_uuid": self._job_uuid if hasattr(self, "_job_uuid") else None,
-                    "total_results": self._total_results
-                    if hasattr(self, "_total_results")
-                    else None,
+                    "total_results": (
+                        self._total_results if hasattr(self, "_total_results") else None
+                    ),
                 }
                 metadata_path = temp_path / "metadata.json"
@@ -2270,16 +2273,22 @@ class Results(MutableSequence, ResultsOperationsMixin, Base):
                 # 2. Create a new Results instance
                 results = cls(
-                    survey=Survey.from_dict(metadata["survey"])
-                    if metadata["survey"]
-                    else None,
+                    survey=(
+                        Survey.from_dict(metadata["survey"])
+                        if metadata["survey"]
+                        else None
+                    ),
                     created_columns=metadata["created_columns"],
-                    cache=Cache.from_dict(metadata["cache"])
-                    if metadata["cache"]
-                    else None,
-                    task_history=TaskHistory.from_dict(metadata["task_history"])
-                    if metadata["task_history"]
-                    else None,
+                    cache=(
+                        Cache.from_dict(metadata["cache"])
+                        if metadata["cache"]
+                        else None
+                    ),
+                    task_history=(
+                        TaskHistory.from_dict(metadata["task_history"])
+                        if metadata["task_history"]
+                        else None
+                    ),
                     job_uuid=metadata["job_uuid"],
                     total_results=metadata["total_results"],
                 )

edsl 0.1.62__py3-none-any.whl → 1.0.0__py3-none-any.whl

edsl 0.1.62py3-none-any.whl → 1.0.0py3-none-any.whl