PyPI - edsl - Versions diffs - 0.1.61__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

edsl 0.1.61py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

edsl/__init__.py +66 -0
edsl/__version__.py +1 -1
edsl/base/base_class.py +53 -0
edsl/cli.py +93 -27
edsl/config/config_class.py +4 -0
edsl/coop/coop.py +403 -28
edsl/coop/coop_jobs_objects.py +2 -2
edsl/coop/coop_regular_objects.py +3 -1
edsl/dataset/dataset.py +47 -41
edsl/dataset/dataset_operations_mixin.py +138 -15
edsl/dataset/report_from_template.py +509 -0
edsl/inference_services/services/azure_ai.py +8 -2
edsl/inference_services/services/open_ai_service.py +7 -5
edsl/jobs/jobs.py +5 -4
edsl/jobs/jobs_checks.py +11 -6
edsl/jobs/remote_inference.py +17 -10
edsl/prompts/prompt.py +7 -2
edsl/questions/question_registry.py +4 -1
edsl/results/result.py +93 -38
edsl/results/results.py +24 -15
edsl/scenarios/file_store.py +69 -0
edsl/scenarios/scenario.py +233 -0
edsl/scenarios/scenario_list.py +294 -130
edsl/scenarios/scenario_source.py +1 -2
{edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/METADATA +1 -1
{edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/RECORD +29 -28
{edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/LICENSE +0 -0
{edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/WHEEL +0 -0
{edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/entry_points.txt +0 -0

edsl/jobs/remote_inference.py CHANGED Viewed

@@ -176,7 +176,7 @@ class JobsRemoteInferenceHandler:
         from ..coop import Coop
         coop = Coop()
-        return coop.remote_inference_get(job_uuid)
+        return coop.new_remote_inference_get(job_uuid)
     def _construct_remote_job_fetcher(
         self, testing_simulated_response: Optional[Any] = None
@@ -219,15 +219,22 @@ class JobsRemoteInferenceHandler:
         self, job_info: RemoteJobInfo, remote_job_data: RemoteInferenceResponse
     ) -> None:
         "Handles a failed job by logging the error and updating the job status."
-        error_report_url = remote_job_data.get("latest_job_run_details", {}).get(
-            "error_report_url"
-        )
+        latest_job_run_details = remote_job_data.get("latest_job_run_details", {})
+        error_report_url = latest_job_run_details.get("error_report_url")
-        reason = remote_job_data.get("reason")
+        failure_reason = latest_job_run_details.get("failure_reason")
-        if reason == "insufficient funds":
+        if failure_reason == "insufficient funds":
+            failure_description = latest_job_run_details.get(
+                "failure_description",
+                "You don't have enough credits to start this job",
+            )
             job_info.logger.update(
-                f"Error: Insufficient balance to start the job. Add funds to your account at the [Credits page]({self.expected_parrot_url}/home/credits)",
+                f"Insufficient funds: {failure_description}.",
+                status=JobsStatus.FAILED,
+            )
+            job_info.logger.update(
+                f"Add funds to your account at the [Credits page]({self.expected_parrot_url}/home/credits).",
                 status=JobsStatus.FAILED,
             )
@@ -445,9 +452,9 @@ class JobsRemoteInferenceHandler:
             model_cost_dict["input_cost_credits_with_cache"] = converter.usd_to_credits(
                 input_cost_with_cache
             )
-            model_cost_dict[
-                "output_cost_credits_with_cache"
-            ] = converter.usd_to_credits(output_cost_with_cache)
+            model_cost_dict["output_cost_credits_with_cache"] = (
+                converter.usd_to_credits(output_cost_with_cache)
+            )
         return list(expenses_by_model.values())
     def _fetch_results_and_log(

edsl/prompts/prompt.py CHANGED Viewed

@@ -305,8 +305,13 @@ class Prompt(PersistenceMixin, RepresentationMixin):
         Returns (rendered_text, captured_variables).
         """
         # Combine replacements.
-        all_replacements = {**primary_replacement, **additional_replacements}
+        from ..scenarios import Scenario
+        # This fixed Issue 2027 - the scenario prefix  was not being recoginized in the template
+        if isinstance(primary_replacement, Scenario):
+            additional = {'scenario': primary_replacement.to_dict()}
+        else:
+            additional = {}
+        all_replacements = {**primary_replacement, **additional_replacements, **additional}
         # If no replacements and no Jinja variables, just return the text.
         if not all_replacements and not _find_template_variables(text):
             return text, template_vars.get_all()

edsl/questions/question_registry.py CHANGED Viewed

@@ -43,6 +43,7 @@ class Question(metaclass=Meta):
         subclass = get_question_classes.get(question_type, None)
         if subclass is None:
             from .exceptions import QuestionValueError
             raise QuestionValueError(
                 f"No question registered with question_type {question_type}"
             )
@@ -65,7 +66,7 @@ class Question(metaclass=Meta):
         from ..coop import Coop
         coop = Coop()
-        return coop.get(url_or_uuid, "question")
+        return coop.pull(url_or_uuid, "question")
     @classmethod
     def delete(cls, url_or_uuid: Union[str, UUID]):
@@ -146,6 +147,7 @@ def get_question_class(question_type):
     q2c = RegisterQuestionsMeta.question_types_to_classes()
     if question_type not in q2c:
         from .exceptions import QuestionValueError
         raise QuestionValueError(
             f"The question type, {question_type}, is not recognized. Recognied types are: {q2c.keys()}"
         )
@@ -171,4 +173,5 @@ question_purpose = {
 if __name__ == "__main__":
     import doctest
     doctest.testmod()

edsl/results/result.py CHANGED Viewed

@@ -99,6 +99,7 @@ class Result(Base, UserDict):
         cache_used_dict: Optional[dict[QuestionName, bool]] = None,
         indices: Optional[dict] = None,
         cache_keys: Optional[dict[QuestionName, str]] = None,
+        validated_dict: Optional[dict[QuestionName, bool]] = None,
     ):
         """Initialize a Result object.
@@ -135,6 +136,7 @@ class Result(Base, UserDict):
             "reasoning_summaries_dict": reasoning_summaries_dict or {},
             "cache_used_dict": cache_used_dict or {},
             "cache_keys": cache_keys or {},
+            "validated_dict": validated_dict or {},
         }
         super().__init__(**data)
         self.indices = indices
@@ -247,6 +249,7 @@ class Result(Base, UserDict):
             "question_type": sub_dicts_needing_new_keys["question_type"],
             "cache_used": new_cache_dict,
             "cache_keys": cache_keys,
+            "validated": self.data["validated_dict"],
         }
         if hasattr(self, "indices") and self.indices is not None:
             d["agent"].update({"agent_index": self.indices["agent"]})
@@ -460,7 +463,7 @@ class Result(Base, UserDict):
         if hasattr(self, "interview_hash"):
             d["interview_hash"] = self.interview_hash
         # Preserve the order attribute if it exists
         if hasattr(self, "order"):
             d["order"] = self.order
@@ -505,14 +508,15 @@ class Result(Base, UserDict):
             cache_used_dict=json_dict.get("cache_used_dict", {}),
             cache_keys=json_dict.get("cache_keys", {}),
             indices=json_dict.get("indices", None),
+            validated_dict=json_dict.get("validated_dict", {}),
         )
         if "interview_hash" in json_dict:
             result.interview_hash = json_dict["interview_hash"]
         # Restore the order attribute if it exists in the dictionary
         if "order" in json_dict:
             result.order = json_dict["order"]
         return result
     def __repr__(self):
@@ -604,9 +608,13 @@ class Result(Base, UserDict):
     def from_interview(cls, interview) -> Result:
         """Return a Result object from an interview dictionary, ensuring no reference to the original interview is maintained."""
         # Copy the valid results to avoid maintaining references
-        model_response_objects = list(interview.valid_results) if hasattr(interview, 'valid_results') else []
+        model_response_objects = (
+            list(interview.valid_results) if hasattr(interview, "valid_results") else []
+        )
         # Create a copy of the answers
-        extracted_answers = dict(interview.answers) if hasattr(interview, 'answers') else {}
+        extracted_answers = (
+            dict(interview.answers) if hasattr(interview, "answers") else {}
+        )
         def get_question_results(
             model_response_objects,
@@ -640,29 +648,47 @@ class Result(Base, UserDict):
             reasoning_summaries_dict = {}
             for k in answer_key_names:
                 reasoning_summary = question_results[k].reasoning_summary
                 # If reasoning summary is None but we have a raw model response, try to extract it
-                if reasoning_summary is None and hasattr(question_results[k], 'raw_model_response'):
+                if reasoning_summary is None and hasattr(
+                    question_results[k], "raw_model_response"
+                ):
                     try:
                         # Get the model class to access the reasoning_sequence
-                        model_class = interview.model.__class__ if hasattr(interview, 'model') else None
-                        if model_class and hasattr(model_class, 'reasoning_sequence'):
-                            from ..language_models.raw_response_handler import RawResponseHandler
+                        model_class = (
+                            interview.model.__class__
+                            if hasattr(interview, "model")
+                            else None
+                        )
+                        if model_class and hasattr(model_class, "reasoning_sequence"):
+                            from ..language_models.raw_response_handler import (
+                                RawResponseHandler,
+                            )
                             # Create a handler with the model's reasoning sequence
                             handler = RawResponseHandler(
-                                key_sequence=model_class.key_sequence if hasattr(model_class, 'key_sequence') else None,
-                                usage_sequence=model_class.usage_sequence if hasattr(model_class, 'usage_sequence') else None,
-                                reasoning_sequence=model_class.reasoning_sequence
+                                key_sequence=(
+                                    model_class.key_sequence
+                                    if hasattr(model_class, "key_sequence")
+                                    else None
+                                ),
+                                usage_sequence=(
+                                    model_class.usage_sequence
+                                    if hasattr(model_class, "usage_sequence")
+                                    else None
+                                ),
+                                reasoning_sequence=model_class.reasoning_sequence,
                             )
                             # Try to extract the reasoning summary
-                            reasoning_summary = handler.get_reasoning_summary(question_results[k].raw_model_response)
+                            reasoning_summary = handler.get_reasoning_summary(
+                                question_results[k].raw_model_response
+                            )
                     except Exception:
                         # If extraction fails, keep it as None
                         pass
                 reasoning_summaries_dict[k + "_reasoning_summary"] = reasoning_summary
             return reasoning_summaries_dict
@@ -726,39 +752,67 @@ class Result(Base, UserDict):
             return raw_model_results_dictionary, cache_used_dictionary
+        def get_validated_dictionary(model_response_objects):
+            validated_dict = {}
+            for result in model_response_objects:
+                validated_dict[f"{result.question_name}_validated"] = result.validated
+            return validated_dict
         # Save essential information from the interview before clearing references
-        agent_copy = interview.agent.copy() if hasattr(interview, 'agent') else None
-        scenario_copy = interview.scenario.copy() if hasattr(interview, 'scenario') else None
-        model_copy = interview.model.copy() if hasattr(interview, 'model') else None
-        iteration = interview.iteration if hasattr(interview, 'iteration') else 0
-        survey_copy = interview.survey.copy() if hasattr(interview, 'survey') and interview.survey else None
-        indices_copy = dict(interview.indices) if hasattr(interview, 'indices') and interview.indices else None
-        initial_hash = interview.initial_hash if hasattr(interview, 'initial_hash') else hash(interview)
+        agent_copy = interview.agent.copy() if hasattr(interview, "agent") else None
+        scenario_copy = (
+            interview.scenario.copy() if hasattr(interview, "scenario") else None
+        )
+        model_copy = interview.model.copy() if hasattr(interview, "model") else None
+        iteration = interview.iteration if hasattr(interview, "iteration") else 0
+        survey_copy = (
+            interview.survey.copy()
+            if hasattr(interview, "survey") and interview.survey
+            else None
+        )
+        indices_copy = (
+            dict(interview.indices)
+            if hasattr(interview, "indices") and interview.indices
+            else None
+        )
+        initial_hash = (
+            interview.initial_hash
+            if hasattr(interview, "initial_hash")
+            else hash(interview)
+        )
         # Process data to create dictionaries needed for Result
         question_results = get_question_results(model_response_objects)
         answer_key_names = list(question_results.keys())
-        generated_tokens_dict = get_generated_tokens_dict(answer_key_names) if answer_key_names else {}
+        generated_tokens_dict = (
+            get_generated_tokens_dict(answer_key_names) if answer_key_names else {}
+        )
         comments_dict = get_comments_dict(answer_key_names) if answer_key_names else {}
-        reasoning_summaries_dict = get_reasoning_summaries_dict(answer_key_names) if answer_key_names else {}
+        reasoning_summaries_dict = (
+            get_reasoning_summaries_dict(answer_key_names) if answer_key_names else {}
+        )
         # Get answers that are in the question results
         answer_dict = {}
         for k in answer_key_names:
             if k in extracted_answers:
                 answer_dict[k] = extracted_answers[k]
         cache_keys = get_cache_keys(model_response_objects)
         question_name_to_prompts = get_question_name_to_prompts(model_response_objects)
-        prompt_dictionary = get_prompt_dictionary(
-            answer_key_names, question_name_to_prompts
-        ) if answer_key_names else {}
+        prompt_dictionary = (
+            get_prompt_dictionary(answer_key_names, question_name_to_prompts)
+            if answer_key_names
+            else {}
+        )
         raw_model_results_dictionary, cache_used_dictionary = (
             get_raw_model_results_and_cache_used_dictionary(model_response_objects)
         )
+        validated_dictionary = get_validated_dictionary(model_response_objects)
         # Create the Result object with all copied data
         result = cls(
             agent=agent_copy,
@@ -775,22 +829,23 @@ class Result(Base, UserDict):
             cache_used_dict=cache_used_dictionary,
             indices=indices_copy,
             cache_keys=cache_keys,
+            validated_dict=validated_dictionary,
         )
         # Store only the hash, not the interview
         result.interview_hash = initial_hash
         # Clear references to help garbage collection of the interview
-        if hasattr(interview, 'clear_references'):
+        if hasattr(interview, "clear_references"):
             interview.clear_references()
         # Clear local references to help with garbage collection
         del model_response_objects
         del extracted_answers
         del question_results
         del answer_key_names
         del question_name_to_prompts
         return result

edsl/results/results.py CHANGED Viewed

@@ -274,6 +274,7 @@ class Results(MutableSequence, ResultsOperationsMixin, Base):
         "cache_used",
         "cache_keys",
         "reasoning_summary",
+        "validated",
     ]
     @classmethod
@@ -2205,14 +2206,16 @@ class Results(MutableSequence, ResultsOperationsMixin, Base):
                     "survey": self.survey.to_dict() if self.survey else None,
                     "created_columns": self.created_columns,
                     "cache": self.cache.to_dict() if hasattr(self, "cache") else None,
-                    "task_history": self.task_history.to_dict()
-                    if hasattr(self, "task_history")
-                    else None,
+                    "task_history": (
+                        self.task_history.to_dict()
+                        if hasattr(self, "task_history")
+                        else None
+                    ),
                     "completed": self.completed,
                     "job_uuid": self._job_uuid if hasattr(self, "_job_uuid") else None,
-                    "total_results": self._total_results
-                    if hasattr(self, "_total_results")
-                    else None,
+                    "total_results": (
+                        self._total_results if hasattr(self, "_total_results") else None
+                    ),
                 }
                 metadata_path = temp_path / "metadata.json"
@@ -2270,16 +2273,22 @@ class Results(MutableSequence, ResultsOperationsMixin, Base):
                 # 2. Create a new Results instance
                 results = cls(
-                    survey=Survey.from_dict(metadata["survey"])
-                    if metadata["survey"]
-                    else None,
+                    survey=(
+                        Survey.from_dict(metadata["survey"])
+                        if metadata["survey"]
+                        else None
+                    ),
                     created_columns=metadata["created_columns"],
-                    cache=Cache.from_dict(metadata["cache"])
-                    if metadata["cache"]
-                    else None,
-                    task_history=TaskHistory.from_dict(metadata["task_history"])
-                    if metadata["task_history"]
-                    else None,
+                    cache=(
+                        Cache.from_dict(metadata["cache"])
+                        if metadata["cache"]
+                        else None
+                    ),
+                    task_history=(
+                        TaskHistory.from_dict(metadata["task_history"])
+                        if metadata["task_history"]
+                        else None
+                    ),
                     job_uuid=metadata["job_uuid"],
                     total_results=metadata["total_results"],
                 )

edsl/scenarios/file_store.py CHANGED Viewed

@@ -512,6 +512,75 @@ class FileStore(Scenario):
         )
         return info
+    def offload(self, inplace=False) -> "FileStore":
+        """
+        Offloads base64-encoded content from the FileStore by replacing 'base64_string'
+        with 'offloaded'. This reduces memory usage.
+        Args:
+            inplace (bool): If True, modify the current FileStore. If False, return a new one.
+        Returns:
+            FileStore: The modified FileStore (either self or a new instance).
+        """
+        if inplace:
+            if hasattr(self, "base64_string"):
+                self.base64_string = "offloaded"
+            return self
+        else:
+            # Create a copy and offload it
+            file_store_dict = self.to_dict()
+            if "base64_string" in file_store_dict:
+                file_store_dict["base64_string"] = "offloaded"
+            return self.__class__.from_dict(file_store_dict)
+    def save_to_gcs_bucket(self, signed_url: str) -> dict:
+        """
+        Saves the FileStore's file content to a Google Cloud Storage bucket using a signed URL.
+        Args:
+            signed_url (str): The signed URL for uploading to GCS bucket
+        Returns:
+            dict: Response from the GCS upload operation
+        Raises:
+            ValueError: If base64_string is offloaded or missing
+            requests.RequestException: If the upload fails
+        """
+        import requests
+        import base64
+        # Check if content is available
+        if not hasattr(self, "base64_string") or self.base64_string == "offloaded":
+            raise ValueError(
+                "File content is not available (offloaded or missing). Cannot upload to GCS."
+            )
+        # Decode base64 content to bytes
+        try:
+            file_content = base64.b64decode(self.base64_string)
+        except Exception as e:
+            raise ValueError(f"Failed to decode base64 content: {e}")
+        # Prepare headers with proper content type
+        headers = {
+            "Content-Type": self.mime_type or "application/octet-stream",
+            "Content-Length": str(len(file_content)),
+        }
+        # Upload to GCS using the signed URL
+        response = requests.put(signed_url, data=file_content, headers=headers)
+        response.raise_for_status()
+        return {
+            "status": "success",
+            "status_code": response.status_code,
+            "file_size": len(file_content),
+            "mime_type": self.mime_type,
+            "file_extension": self.suffix,
+        }
     @classmethod
     def pull(cls, url_or_uuid: Union[str, UUID]) -> "FileStore":
         """

edsl 0.1.61__py3-none-any.whl → 1.0.0__py3-none-any.whl

edsl 0.1.61py3-none-any.whl → 1.0.0py3-none-any.whl