edsl 0.1.62__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -219,15 +219,22 @@ class JobsRemoteInferenceHandler:
219
219
  self, job_info: RemoteJobInfo, remote_job_data: RemoteInferenceResponse
220
220
  ) -> None:
221
221
  "Handles a failed job by logging the error and updating the job status."
222
- error_report_url = remote_job_data.get("latest_job_run_details", {}).get(
223
- "error_report_url"
224
- )
222
+ latest_job_run_details = remote_job_data.get("latest_job_run_details", {})
223
+ error_report_url = latest_job_run_details.get("error_report_url")
225
224
 
226
- reason = remote_job_data.get("reason")
225
+ failure_reason = latest_job_run_details.get("failure_reason")
227
226
 
228
- if reason == "insufficient funds":
227
+ if failure_reason == "insufficient funds":
228
+ failure_description = latest_job_run_details.get(
229
+ "failure_description",
230
+ "You don't have enough credits to start this job",
231
+ )
232
+ job_info.logger.update(
233
+ f"Insufficient funds: {failure_description}.",
234
+ status=JobsStatus.FAILED,
235
+ )
229
236
  job_info.logger.update(
230
- f"Error: Insufficient balance to start the job. Add funds to your account at the [Credits page]({self.expected_parrot_url}/home/credits)",
237
+ f"Add funds to your account at the [Credits page]({self.expected_parrot_url}/home/credits).",
231
238
  status=JobsStatus.FAILED,
232
239
  )
233
240
 
edsl/results/result.py CHANGED
@@ -99,6 +99,7 @@ class Result(Base, UserDict):
99
99
  cache_used_dict: Optional[dict[QuestionName, bool]] = None,
100
100
  indices: Optional[dict] = None,
101
101
  cache_keys: Optional[dict[QuestionName, str]] = None,
102
+ validated_dict: Optional[dict[QuestionName, bool]] = None,
102
103
  ):
103
104
  """Initialize a Result object.
104
105
 
@@ -135,6 +136,7 @@ class Result(Base, UserDict):
135
136
  "reasoning_summaries_dict": reasoning_summaries_dict or {},
136
137
  "cache_used_dict": cache_used_dict or {},
137
138
  "cache_keys": cache_keys or {},
139
+ "validated_dict": validated_dict or {},
138
140
  }
139
141
  super().__init__(**data)
140
142
  self.indices = indices
@@ -247,6 +249,7 @@ class Result(Base, UserDict):
247
249
  "question_type": sub_dicts_needing_new_keys["question_type"],
248
250
  "cache_used": new_cache_dict,
249
251
  "cache_keys": cache_keys,
252
+ "validated": self.data["validated_dict"],
250
253
  }
251
254
  if hasattr(self, "indices") and self.indices is not None:
252
255
  d["agent"].update({"agent_index": self.indices["agent"]})
@@ -460,7 +463,7 @@ class Result(Base, UserDict):
460
463
 
461
464
  if hasattr(self, "interview_hash"):
462
465
  d["interview_hash"] = self.interview_hash
463
-
466
+
464
467
  # Preserve the order attribute if it exists
465
468
  if hasattr(self, "order"):
466
469
  d["order"] = self.order
@@ -505,14 +508,15 @@ class Result(Base, UserDict):
505
508
  cache_used_dict=json_dict.get("cache_used_dict", {}),
506
509
  cache_keys=json_dict.get("cache_keys", {}),
507
510
  indices=json_dict.get("indices", None),
511
+ validated_dict=json_dict.get("validated_dict", {}),
508
512
  )
509
513
  if "interview_hash" in json_dict:
510
514
  result.interview_hash = json_dict["interview_hash"]
511
-
515
+
512
516
  # Restore the order attribute if it exists in the dictionary
513
517
  if "order" in json_dict:
514
518
  result.order = json_dict["order"]
515
-
519
+
516
520
  return result
517
521
 
518
522
  def __repr__(self):
@@ -604,9 +608,13 @@ class Result(Base, UserDict):
604
608
  def from_interview(cls, interview) -> Result:
605
609
  """Return a Result object from an interview dictionary, ensuring no reference to the original interview is maintained."""
606
610
  # Copy the valid results to avoid maintaining references
607
- model_response_objects = list(interview.valid_results) if hasattr(interview, 'valid_results') else []
611
+ model_response_objects = (
612
+ list(interview.valid_results) if hasattr(interview, "valid_results") else []
613
+ )
608
614
  # Create a copy of the answers
609
- extracted_answers = dict(interview.answers) if hasattr(interview, 'answers') else {}
615
+ extracted_answers = (
616
+ dict(interview.answers) if hasattr(interview, "answers") else {}
617
+ )
610
618
 
611
619
  def get_question_results(
612
620
  model_response_objects,
@@ -640,29 +648,47 @@ class Result(Base, UserDict):
640
648
  reasoning_summaries_dict = {}
641
649
  for k in answer_key_names:
642
650
  reasoning_summary = question_results[k].reasoning_summary
643
-
651
+
644
652
  # If reasoning summary is None but we have a raw model response, try to extract it
645
- if reasoning_summary is None and hasattr(question_results[k], 'raw_model_response'):
653
+ if reasoning_summary is None and hasattr(
654
+ question_results[k], "raw_model_response"
655
+ ):
646
656
  try:
647
657
  # Get the model class to access the reasoning_sequence
648
- model_class = interview.model.__class__ if hasattr(interview, 'model') else None
649
-
650
- if model_class and hasattr(model_class, 'reasoning_sequence'):
651
- from ..language_models.raw_response_handler import RawResponseHandler
652
-
658
+ model_class = (
659
+ interview.model.__class__
660
+ if hasattr(interview, "model")
661
+ else None
662
+ )
663
+
664
+ if model_class and hasattr(model_class, "reasoning_sequence"):
665
+ from ..language_models.raw_response_handler import (
666
+ RawResponseHandler,
667
+ )
668
+
653
669
  # Create a handler with the model's reasoning sequence
654
670
  handler = RawResponseHandler(
655
- key_sequence=model_class.key_sequence if hasattr(model_class, 'key_sequence') else None,
656
- usage_sequence=model_class.usage_sequence if hasattr(model_class, 'usage_sequence') else None,
657
- reasoning_sequence=model_class.reasoning_sequence
671
+ key_sequence=(
672
+ model_class.key_sequence
673
+ if hasattr(model_class, "key_sequence")
674
+ else None
675
+ ),
676
+ usage_sequence=(
677
+ model_class.usage_sequence
678
+ if hasattr(model_class, "usage_sequence")
679
+ else None
680
+ ),
681
+ reasoning_sequence=model_class.reasoning_sequence,
658
682
  )
659
-
683
+
660
684
  # Try to extract the reasoning summary
661
- reasoning_summary = handler.get_reasoning_summary(question_results[k].raw_model_response)
685
+ reasoning_summary = handler.get_reasoning_summary(
686
+ question_results[k].raw_model_response
687
+ )
662
688
  except Exception:
663
689
  # If extraction fails, keep it as None
664
690
  pass
665
-
691
+
666
692
  reasoning_summaries_dict[k + "_reasoning_summary"] = reasoning_summary
667
693
  return reasoning_summaries_dict
668
694
 
@@ -726,39 +752,67 @@ class Result(Base, UserDict):
726
752
 
727
753
  return raw_model_results_dictionary, cache_used_dictionary
728
754
 
755
+ def get_validated_dictionary(model_response_objects):
756
+ validated_dict = {}
757
+ for result in model_response_objects:
758
+ validated_dict[f"{result.question_name}_validated"] = result.validated
759
+ return validated_dict
760
+
729
761
  # Save essential information from the interview before clearing references
730
- agent_copy = interview.agent.copy() if hasattr(interview, 'agent') else None
731
- scenario_copy = interview.scenario.copy() if hasattr(interview, 'scenario') else None
732
- model_copy = interview.model.copy() if hasattr(interview, 'model') else None
733
- iteration = interview.iteration if hasattr(interview, 'iteration') else 0
734
- survey_copy = interview.survey.copy() if hasattr(interview, 'survey') and interview.survey else None
735
- indices_copy = dict(interview.indices) if hasattr(interview, 'indices') and interview.indices else None
736
- initial_hash = interview.initial_hash if hasattr(interview, 'initial_hash') else hash(interview)
762
+ agent_copy = interview.agent.copy() if hasattr(interview, "agent") else None
763
+ scenario_copy = (
764
+ interview.scenario.copy() if hasattr(interview, "scenario") else None
765
+ )
766
+ model_copy = interview.model.copy() if hasattr(interview, "model") else None
767
+ iteration = interview.iteration if hasattr(interview, "iteration") else 0
768
+ survey_copy = (
769
+ interview.survey.copy()
770
+ if hasattr(interview, "survey") and interview.survey
771
+ else None
772
+ )
773
+ indices_copy = (
774
+ dict(interview.indices)
775
+ if hasattr(interview, "indices") and interview.indices
776
+ else None
777
+ )
778
+ initial_hash = (
779
+ interview.initial_hash
780
+ if hasattr(interview, "initial_hash")
781
+ else hash(interview)
782
+ )
737
783
 
738
784
  # Process data to create dictionaries needed for Result
739
785
  question_results = get_question_results(model_response_objects)
740
786
  answer_key_names = list(question_results.keys())
741
- generated_tokens_dict = get_generated_tokens_dict(answer_key_names) if answer_key_names else {}
787
+ generated_tokens_dict = (
788
+ get_generated_tokens_dict(answer_key_names) if answer_key_names else {}
789
+ )
742
790
  comments_dict = get_comments_dict(answer_key_names) if answer_key_names else {}
743
- reasoning_summaries_dict = get_reasoning_summaries_dict(answer_key_names) if answer_key_names else {}
744
-
791
+ reasoning_summaries_dict = (
792
+ get_reasoning_summaries_dict(answer_key_names) if answer_key_names else {}
793
+ )
794
+
745
795
  # Get answers that are in the question results
746
796
  answer_dict = {}
747
797
  for k in answer_key_names:
748
798
  if k in extracted_answers:
749
799
  answer_dict[k] = extracted_answers[k]
750
-
800
+
751
801
  cache_keys = get_cache_keys(model_response_objects)
752
802
 
753
803
  question_name_to_prompts = get_question_name_to_prompts(model_response_objects)
754
- prompt_dictionary = get_prompt_dictionary(
755
- answer_key_names, question_name_to_prompts
756
- ) if answer_key_names else {}
757
-
804
+ prompt_dictionary = (
805
+ get_prompt_dictionary(answer_key_names, question_name_to_prompts)
806
+ if answer_key_names
807
+ else {}
808
+ )
809
+
758
810
  raw_model_results_dictionary, cache_used_dictionary = (
759
811
  get_raw_model_results_and_cache_used_dictionary(model_response_objects)
760
812
  )
761
813
 
814
+ validated_dictionary = get_validated_dictionary(model_response_objects)
815
+
762
816
  # Create the Result object with all copied data
763
817
  result = cls(
764
818
  agent=agent_copy,
@@ -775,22 +829,23 @@ class Result(Base, UserDict):
775
829
  cache_used_dict=cache_used_dictionary,
776
830
  indices=indices_copy,
777
831
  cache_keys=cache_keys,
832
+ validated_dict=validated_dictionary,
778
833
  )
779
-
834
+
780
835
  # Store only the hash, not the interview
781
836
  result.interview_hash = initial_hash
782
-
837
+
783
838
  # Clear references to help garbage collection of the interview
784
- if hasattr(interview, 'clear_references'):
839
+ if hasattr(interview, "clear_references"):
785
840
  interview.clear_references()
786
-
841
+
787
842
  # Clear local references to help with garbage collection
788
843
  del model_response_objects
789
844
  del extracted_answers
790
845
  del question_results
791
846
  del answer_key_names
792
847
  del question_name_to_prompts
793
-
848
+
794
849
  return result
795
850
 
796
851
 
edsl/results/results.py CHANGED
@@ -274,6 +274,7 @@ class Results(MutableSequence, ResultsOperationsMixin, Base):
274
274
  "cache_used",
275
275
  "cache_keys",
276
276
  "reasoning_summary",
277
+ "validated",
277
278
  ]
278
279
 
279
280
  @classmethod
@@ -2205,14 +2206,16 @@ class Results(MutableSequence, ResultsOperationsMixin, Base):
2205
2206
  "survey": self.survey.to_dict() if self.survey else None,
2206
2207
  "created_columns": self.created_columns,
2207
2208
  "cache": self.cache.to_dict() if hasattr(self, "cache") else None,
2208
- "task_history": self.task_history.to_dict()
2209
- if hasattr(self, "task_history")
2210
- else None,
2209
+ "task_history": (
2210
+ self.task_history.to_dict()
2211
+ if hasattr(self, "task_history")
2212
+ else None
2213
+ ),
2211
2214
  "completed": self.completed,
2212
2215
  "job_uuid": self._job_uuid if hasattr(self, "_job_uuid") else None,
2213
- "total_results": self._total_results
2214
- if hasattr(self, "_total_results")
2215
- else None,
2216
+ "total_results": (
2217
+ self._total_results if hasattr(self, "_total_results") else None
2218
+ ),
2216
2219
  }
2217
2220
 
2218
2221
  metadata_path = temp_path / "metadata.json"
@@ -2270,16 +2273,22 @@ class Results(MutableSequence, ResultsOperationsMixin, Base):
2270
2273
 
2271
2274
  # 2. Create a new Results instance
2272
2275
  results = cls(
2273
- survey=Survey.from_dict(metadata["survey"])
2274
- if metadata["survey"]
2275
- else None,
2276
+ survey=(
2277
+ Survey.from_dict(metadata["survey"])
2278
+ if metadata["survey"]
2279
+ else None
2280
+ ),
2276
2281
  created_columns=metadata["created_columns"],
2277
- cache=Cache.from_dict(metadata["cache"])
2278
- if metadata["cache"]
2279
- else None,
2280
- task_history=TaskHistory.from_dict(metadata["task_history"])
2281
- if metadata["task_history"]
2282
- else None,
2282
+ cache=(
2283
+ Cache.from_dict(metadata["cache"])
2284
+ if metadata["cache"]
2285
+ else None
2286
+ ),
2287
+ task_history=(
2288
+ TaskHistory.from_dict(metadata["task_history"])
2289
+ if metadata["task_history"]
2290
+ else None
2291
+ ),
2283
2292
  job_uuid=metadata["job_uuid"],
2284
2293
  total_results=metadata["total_results"],
2285
2294
  )