edsl 0.1.61__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +66 -0
- edsl/__version__.py +1 -1
- edsl/base/base_class.py +53 -0
- edsl/cli.py +93 -27
- edsl/config/config_class.py +4 -0
- edsl/coop/coop.py +403 -28
- edsl/coop/coop_jobs_objects.py +2 -2
- edsl/coop/coop_regular_objects.py +3 -1
- edsl/dataset/dataset.py +47 -41
- edsl/dataset/dataset_operations_mixin.py +138 -15
- edsl/dataset/report_from_template.py +509 -0
- edsl/inference_services/services/azure_ai.py +8 -2
- edsl/inference_services/services/open_ai_service.py +7 -5
- edsl/jobs/jobs.py +5 -4
- edsl/jobs/jobs_checks.py +11 -6
- edsl/jobs/remote_inference.py +17 -10
- edsl/prompts/prompt.py +7 -2
- edsl/questions/question_registry.py +4 -1
- edsl/results/result.py +93 -38
- edsl/results/results.py +24 -15
- edsl/scenarios/file_store.py +69 -0
- edsl/scenarios/scenario.py +233 -0
- edsl/scenarios/scenario_list.py +294 -130
- edsl/scenarios/scenario_source.py +1 -2
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/METADATA +1 -1
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/RECORD +29 -28
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/LICENSE +0 -0
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/WHEEL +0 -0
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/entry_points.txt +0 -0
edsl/jobs/remote_inference.py
CHANGED
@@ -176,7 +176,7 @@ class JobsRemoteInferenceHandler:
|
|
176
176
|
from ..coop import Coop
|
177
177
|
|
178
178
|
coop = Coop()
|
179
|
-
return coop.
|
179
|
+
return coop.new_remote_inference_get(job_uuid)
|
180
180
|
|
181
181
|
def _construct_remote_job_fetcher(
|
182
182
|
self, testing_simulated_response: Optional[Any] = None
|
@@ -219,15 +219,22 @@ class JobsRemoteInferenceHandler:
|
|
219
219
|
self, job_info: RemoteJobInfo, remote_job_data: RemoteInferenceResponse
|
220
220
|
) -> None:
|
221
221
|
"Handles a failed job by logging the error and updating the job status."
|
222
|
-
|
223
|
-
|
224
|
-
)
|
222
|
+
latest_job_run_details = remote_job_data.get("latest_job_run_details", {})
|
223
|
+
error_report_url = latest_job_run_details.get("error_report_url")
|
225
224
|
|
226
|
-
|
225
|
+
failure_reason = latest_job_run_details.get("failure_reason")
|
227
226
|
|
228
|
-
if
|
227
|
+
if failure_reason == "insufficient funds":
|
228
|
+
failure_description = latest_job_run_details.get(
|
229
|
+
"failure_description",
|
230
|
+
"You don't have enough credits to start this job",
|
231
|
+
)
|
229
232
|
job_info.logger.update(
|
230
|
-
f"
|
233
|
+
f"Insufficient funds: {failure_description}.",
|
234
|
+
status=JobsStatus.FAILED,
|
235
|
+
)
|
236
|
+
job_info.logger.update(
|
237
|
+
f"Add funds to your account at the [Credits page]({self.expected_parrot_url}/home/credits).",
|
231
238
|
status=JobsStatus.FAILED,
|
232
239
|
)
|
233
240
|
|
@@ -445,9 +452,9 @@ class JobsRemoteInferenceHandler:
|
|
445
452
|
model_cost_dict["input_cost_credits_with_cache"] = converter.usd_to_credits(
|
446
453
|
input_cost_with_cache
|
447
454
|
)
|
448
|
-
model_cost_dict[
|
449
|
-
|
450
|
-
|
455
|
+
model_cost_dict["output_cost_credits_with_cache"] = (
|
456
|
+
converter.usd_to_credits(output_cost_with_cache)
|
457
|
+
)
|
451
458
|
return list(expenses_by_model.values())
|
452
459
|
|
453
460
|
def _fetch_results_and_log(
|
edsl/prompts/prompt.py
CHANGED
@@ -305,8 +305,13 @@ class Prompt(PersistenceMixin, RepresentationMixin):
|
|
305
305
|
Returns (rendered_text, captured_variables).
|
306
306
|
"""
|
307
307
|
# Combine replacements.
|
308
|
-
|
309
|
-
|
308
|
+
from ..scenarios import Scenario
|
309
|
+
# This fixed Issue 2027 - the scenario prefix was not being recoginized in the template
|
310
|
+
if isinstance(primary_replacement, Scenario):
|
311
|
+
additional = {'scenario': primary_replacement.to_dict()}
|
312
|
+
else:
|
313
|
+
additional = {}
|
314
|
+
all_replacements = {**primary_replacement, **additional_replacements, **additional}
|
310
315
|
# If no replacements and no Jinja variables, just return the text.
|
311
316
|
if not all_replacements and not _find_template_variables(text):
|
312
317
|
return text, template_vars.get_all()
|
@@ -43,6 +43,7 @@ class Question(metaclass=Meta):
|
|
43
43
|
subclass = get_question_classes.get(question_type, None)
|
44
44
|
if subclass is None:
|
45
45
|
from .exceptions import QuestionValueError
|
46
|
+
|
46
47
|
raise QuestionValueError(
|
47
48
|
f"No question registered with question_type {question_type}"
|
48
49
|
)
|
@@ -65,7 +66,7 @@ class Question(metaclass=Meta):
|
|
65
66
|
from ..coop import Coop
|
66
67
|
|
67
68
|
coop = Coop()
|
68
|
-
return coop.
|
69
|
+
return coop.pull(url_or_uuid, "question")
|
69
70
|
|
70
71
|
@classmethod
|
71
72
|
def delete(cls, url_or_uuid: Union[str, UUID]):
|
@@ -146,6 +147,7 @@ def get_question_class(question_type):
|
|
146
147
|
q2c = RegisterQuestionsMeta.question_types_to_classes()
|
147
148
|
if question_type not in q2c:
|
148
149
|
from .exceptions import QuestionValueError
|
150
|
+
|
149
151
|
raise QuestionValueError(
|
150
152
|
f"The question type, {question_type}, is not recognized. Recognied types are: {q2c.keys()}"
|
151
153
|
)
|
@@ -171,4 +173,5 @@ question_purpose = {
|
|
171
173
|
|
172
174
|
if __name__ == "__main__":
|
173
175
|
import doctest
|
176
|
+
|
174
177
|
doctest.testmod()
|
edsl/results/result.py
CHANGED
@@ -99,6 +99,7 @@ class Result(Base, UserDict):
|
|
99
99
|
cache_used_dict: Optional[dict[QuestionName, bool]] = None,
|
100
100
|
indices: Optional[dict] = None,
|
101
101
|
cache_keys: Optional[dict[QuestionName, str]] = None,
|
102
|
+
validated_dict: Optional[dict[QuestionName, bool]] = None,
|
102
103
|
):
|
103
104
|
"""Initialize a Result object.
|
104
105
|
|
@@ -135,6 +136,7 @@ class Result(Base, UserDict):
|
|
135
136
|
"reasoning_summaries_dict": reasoning_summaries_dict or {},
|
136
137
|
"cache_used_dict": cache_used_dict or {},
|
137
138
|
"cache_keys": cache_keys or {},
|
139
|
+
"validated_dict": validated_dict or {},
|
138
140
|
}
|
139
141
|
super().__init__(**data)
|
140
142
|
self.indices = indices
|
@@ -247,6 +249,7 @@ class Result(Base, UserDict):
|
|
247
249
|
"question_type": sub_dicts_needing_new_keys["question_type"],
|
248
250
|
"cache_used": new_cache_dict,
|
249
251
|
"cache_keys": cache_keys,
|
252
|
+
"validated": self.data["validated_dict"],
|
250
253
|
}
|
251
254
|
if hasattr(self, "indices") and self.indices is not None:
|
252
255
|
d["agent"].update({"agent_index": self.indices["agent"]})
|
@@ -460,7 +463,7 @@ class Result(Base, UserDict):
|
|
460
463
|
|
461
464
|
if hasattr(self, "interview_hash"):
|
462
465
|
d["interview_hash"] = self.interview_hash
|
463
|
-
|
466
|
+
|
464
467
|
# Preserve the order attribute if it exists
|
465
468
|
if hasattr(self, "order"):
|
466
469
|
d["order"] = self.order
|
@@ -505,14 +508,15 @@ class Result(Base, UserDict):
|
|
505
508
|
cache_used_dict=json_dict.get("cache_used_dict", {}),
|
506
509
|
cache_keys=json_dict.get("cache_keys", {}),
|
507
510
|
indices=json_dict.get("indices", None),
|
511
|
+
validated_dict=json_dict.get("validated_dict", {}),
|
508
512
|
)
|
509
513
|
if "interview_hash" in json_dict:
|
510
514
|
result.interview_hash = json_dict["interview_hash"]
|
511
|
-
|
515
|
+
|
512
516
|
# Restore the order attribute if it exists in the dictionary
|
513
517
|
if "order" in json_dict:
|
514
518
|
result.order = json_dict["order"]
|
515
|
-
|
519
|
+
|
516
520
|
return result
|
517
521
|
|
518
522
|
def __repr__(self):
|
@@ -604,9 +608,13 @@ class Result(Base, UserDict):
|
|
604
608
|
def from_interview(cls, interview) -> Result:
|
605
609
|
"""Return a Result object from an interview dictionary, ensuring no reference to the original interview is maintained."""
|
606
610
|
# Copy the valid results to avoid maintaining references
|
607
|
-
model_response_objects =
|
611
|
+
model_response_objects = (
|
612
|
+
list(interview.valid_results) if hasattr(interview, "valid_results") else []
|
613
|
+
)
|
608
614
|
# Create a copy of the answers
|
609
|
-
extracted_answers =
|
615
|
+
extracted_answers = (
|
616
|
+
dict(interview.answers) if hasattr(interview, "answers") else {}
|
617
|
+
)
|
610
618
|
|
611
619
|
def get_question_results(
|
612
620
|
model_response_objects,
|
@@ -640,29 +648,47 @@ class Result(Base, UserDict):
|
|
640
648
|
reasoning_summaries_dict = {}
|
641
649
|
for k in answer_key_names:
|
642
650
|
reasoning_summary = question_results[k].reasoning_summary
|
643
|
-
|
651
|
+
|
644
652
|
# If reasoning summary is None but we have a raw model response, try to extract it
|
645
|
-
if reasoning_summary is None and hasattr(
|
653
|
+
if reasoning_summary is None and hasattr(
|
654
|
+
question_results[k], "raw_model_response"
|
655
|
+
):
|
646
656
|
try:
|
647
657
|
# Get the model class to access the reasoning_sequence
|
648
|
-
model_class =
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
658
|
+
model_class = (
|
659
|
+
interview.model.__class__
|
660
|
+
if hasattr(interview, "model")
|
661
|
+
else None
|
662
|
+
)
|
663
|
+
|
664
|
+
if model_class and hasattr(model_class, "reasoning_sequence"):
|
665
|
+
from ..language_models.raw_response_handler import (
|
666
|
+
RawResponseHandler,
|
667
|
+
)
|
668
|
+
|
653
669
|
# Create a handler with the model's reasoning sequence
|
654
670
|
handler = RawResponseHandler(
|
655
|
-
key_sequence=
|
656
|
-
|
657
|
-
|
671
|
+
key_sequence=(
|
672
|
+
model_class.key_sequence
|
673
|
+
if hasattr(model_class, "key_sequence")
|
674
|
+
else None
|
675
|
+
),
|
676
|
+
usage_sequence=(
|
677
|
+
model_class.usage_sequence
|
678
|
+
if hasattr(model_class, "usage_sequence")
|
679
|
+
else None
|
680
|
+
),
|
681
|
+
reasoning_sequence=model_class.reasoning_sequence,
|
658
682
|
)
|
659
|
-
|
683
|
+
|
660
684
|
# Try to extract the reasoning summary
|
661
|
-
reasoning_summary = handler.get_reasoning_summary(
|
685
|
+
reasoning_summary = handler.get_reasoning_summary(
|
686
|
+
question_results[k].raw_model_response
|
687
|
+
)
|
662
688
|
except Exception:
|
663
689
|
# If extraction fails, keep it as None
|
664
690
|
pass
|
665
|
-
|
691
|
+
|
666
692
|
reasoning_summaries_dict[k + "_reasoning_summary"] = reasoning_summary
|
667
693
|
return reasoning_summaries_dict
|
668
694
|
|
@@ -726,39 +752,67 @@ class Result(Base, UserDict):
|
|
726
752
|
|
727
753
|
return raw_model_results_dictionary, cache_used_dictionary
|
728
754
|
|
755
|
+
def get_validated_dictionary(model_response_objects):
|
756
|
+
validated_dict = {}
|
757
|
+
for result in model_response_objects:
|
758
|
+
validated_dict[f"{result.question_name}_validated"] = result.validated
|
759
|
+
return validated_dict
|
760
|
+
|
729
761
|
# Save essential information from the interview before clearing references
|
730
|
-
agent_copy = interview.agent.copy() if hasattr(interview,
|
731
|
-
scenario_copy =
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
762
|
+
agent_copy = interview.agent.copy() if hasattr(interview, "agent") else None
|
763
|
+
scenario_copy = (
|
764
|
+
interview.scenario.copy() if hasattr(interview, "scenario") else None
|
765
|
+
)
|
766
|
+
model_copy = interview.model.copy() if hasattr(interview, "model") else None
|
767
|
+
iteration = interview.iteration if hasattr(interview, "iteration") else 0
|
768
|
+
survey_copy = (
|
769
|
+
interview.survey.copy()
|
770
|
+
if hasattr(interview, "survey") and interview.survey
|
771
|
+
else None
|
772
|
+
)
|
773
|
+
indices_copy = (
|
774
|
+
dict(interview.indices)
|
775
|
+
if hasattr(interview, "indices") and interview.indices
|
776
|
+
else None
|
777
|
+
)
|
778
|
+
initial_hash = (
|
779
|
+
interview.initial_hash
|
780
|
+
if hasattr(interview, "initial_hash")
|
781
|
+
else hash(interview)
|
782
|
+
)
|
737
783
|
|
738
784
|
# Process data to create dictionaries needed for Result
|
739
785
|
question_results = get_question_results(model_response_objects)
|
740
786
|
answer_key_names = list(question_results.keys())
|
741
|
-
generated_tokens_dict =
|
787
|
+
generated_tokens_dict = (
|
788
|
+
get_generated_tokens_dict(answer_key_names) if answer_key_names else {}
|
789
|
+
)
|
742
790
|
comments_dict = get_comments_dict(answer_key_names) if answer_key_names else {}
|
743
|
-
reasoning_summaries_dict =
|
744
|
-
|
791
|
+
reasoning_summaries_dict = (
|
792
|
+
get_reasoning_summaries_dict(answer_key_names) if answer_key_names else {}
|
793
|
+
)
|
794
|
+
|
745
795
|
# Get answers that are in the question results
|
746
796
|
answer_dict = {}
|
747
797
|
for k in answer_key_names:
|
748
798
|
if k in extracted_answers:
|
749
799
|
answer_dict[k] = extracted_answers[k]
|
750
|
-
|
800
|
+
|
751
801
|
cache_keys = get_cache_keys(model_response_objects)
|
752
802
|
|
753
803
|
question_name_to_prompts = get_question_name_to_prompts(model_response_objects)
|
754
|
-
prompt_dictionary =
|
755
|
-
answer_key_names, question_name_to_prompts
|
756
|
-
|
757
|
-
|
804
|
+
prompt_dictionary = (
|
805
|
+
get_prompt_dictionary(answer_key_names, question_name_to_prompts)
|
806
|
+
if answer_key_names
|
807
|
+
else {}
|
808
|
+
)
|
809
|
+
|
758
810
|
raw_model_results_dictionary, cache_used_dictionary = (
|
759
811
|
get_raw_model_results_and_cache_used_dictionary(model_response_objects)
|
760
812
|
)
|
761
813
|
|
814
|
+
validated_dictionary = get_validated_dictionary(model_response_objects)
|
815
|
+
|
762
816
|
# Create the Result object with all copied data
|
763
817
|
result = cls(
|
764
818
|
agent=agent_copy,
|
@@ -775,22 +829,23 @@ class Result(Base, UserDict):
|
|
775
829
|
cache_used_dict=cache_used_dictionary,
|
776
830
|
indices=indices_copy,
|
777
831
|
cache_keys=cache_keys,
|
832
|
+
validated_dict=validated_dictionary,
|
778
833
|
)
|
779
|
-
|
834
|
+
|
780
835
|
# Store only the hash, not the interview
|
781
836
|
result.interview_hash = initial_hash
|
782
|
-
|
837
|
+
|
783
838
|
# Clear references to help garbage collection of the interview
|
784
|
-
if hasattr(interview,
|
839
|
+
if hasattr(interview, "clear_references"):
|
785
840
|
interview.clear_references()
|
786
|
-
|
841
|
+
|
787
842
|
# Clear local references to help with garbage collection
|
788
843
|
del model_response_objects
|
789
844
|
del extracted_answers
|
790
845
|
del question_results
|
791
846
|
del answer_key_names
|
792
847
|
del question_name_to_prompts
|
793
|
-
|
848
|
+
|
794
849
|
return result
|
795
850
|
|
796
851
|
|
edsl/results/results.py
CHANGED
@@ -274,6 +274,7 @@ class Results(MutableSequence, ResultsOperationsMixin, Base):
|
|
274
274
|
"cache_used",
|
275
275
|
"cache_keys",
|
276
276
|
"reasoning_summary",
|
277
|
+
"validated",
|
277
278
|
]
|
278
279
|
|
279
280
|
@classmethod
|
@@ -2205,14 +2206,16 @@ class Results(MutableSequence, ResultsOperationsMixin, Base):
|
|
2205
2206
|
"survey": self.survey.to_dict() if self.survey else None,
|
2206
2207
|
"created_columns": self.created_columns,
|
2207
2208
|
"cache": self.cache.to_dict() if hasattr(self, "cache") else None,
|
2208
|
-
"task_history":
|
2209
|
-
|
2210
|
-
|
2209
|
+
"task_history": (
|
2210
|
+
self.task_history.to_dict()
|
2211
|
+
if hasattr(self, "task_history")
|
2212
|
+
else None
|
2213
|
+
),
|
2211
2214
|
"completed": self.completed,
|
2212
2215
|
"job_uuid": self._job_uuid if hasattr(self, "_job_uuid") else None,
|
2213
|
-
"total_results":
|
2214
|
-
|
2215
|
-
|
2216
|
+
"total_results": (
|
2217
|
+
self._total_results if hasattr(self, "_total_results") else None
|
2218
|
+
),
|
2216
2219
|
}
|
2217
2220
|
|
2218
2221
|
metadata_path = temp_path / "metadata.json"
|
@@ -2270,16 +2273,22 @@ class Results(MutableSequence, ResultsOperationsMixin, Base):
|
|
2270
2273
|
|
2271
2274
|
# 2. Create a new Results instance
|
2272
2275
|
results = cls(
|
2273
|
-
survey=
|
2274
|
-
|
2275
|
-
|
2276
|
+
survey=(
|
2277
|
+
Survey.from_dict(metadata["survey"])
|
2278
|
+
if metadata["survey"]
|
2279
|
+
else None
|
2280
|
+
),
|
2276
2281
|
created_columns=metadata["created_columns"],
|
2277
|
-
cache=
|
2278
|
-
|
2279
|
-
|
2280
|
-
|
2281
|
-
|
2282
|
-
|
2282
|
+
cache=(
|
2283
|
+
Cache.from_dict(metadata["cache"])
|
2284
|
+
if metadata["cache"]
|
2285
|
+
else None
|
2286
|
+
),
|
2287
|
+
task_history=(
|
2288
|
+
TaskHistory.from_dict(metadata["task_history"])
|
2289
|
+
if metadata["task_history"]
|
2290
|
+
else None
|
2291
|
+
),
|
2283
2292
|
job_uuid=metadata["job_uuid"],
|
2284
2293
|
total_results=metadata["total_results"],
|
2285
2294
|
)
|
edsl/scenarios/file_store.py
CHANGED
@@ -512,6 +512,75 @@ class FileStore(Scenario):
|
|
512
512
|
)
|
513
513
|
return info
|
514
514
|
|
515
|
+
def offload(self, inplace=False) -> "FileStore":
|
516
|
+
"""
|
517
|
+
Offloads base64-encoded content from the FileStore by replacing 'base64_string'
|
518
|
+
with 'offloaded'. This reduces memory usage.
|
519
|
+
|
520
|
+
Args:
|
521
|
+
inplace (bool): If True, modify the current FileStore. If False, return a new one.
|
522
|
+
|
523
|
+
Returns:
|
524
|
+
FileStore: The modified FileStore (either self or a new instance).
|
525
|
+
"""
|
526
|
+
if inplace:
|
527
|
+
if hasattr(self, "base64_string"):
|
528
|
+
self.base64_string = "offloaded"
|
529
|
+
return self
|
530
|
+
else:
|
531
|
+
# Create a copy and offload it
|
532
|
+
file_store_dict = self.to_dict()
|
533
|
+
if "base64_string" in file_store_dict:
|
534
|
+
file_store_dict["base64_string"] = "offloaded"
|
535
|
+
return self.__class__.from_dict(file_store_dict)
|
536
|
+
|
537
|
+
def save_to_gcs_bucket(self, signed_url: str) -> dict:
|
538
|
+
"""
|
539
|
+
Saves the FileStore's file content to a Google Cloud Storage bucket using a signed URL.
|
540
|
+
|
541
|
+
Args:
|
542
|
+
signed_url (str): The signed URL for uploading to GCS bucket
|
543
|
+
|
544
|
+
Returns:
|
545
|
+
dict: Response from the GCS upload operation
|
546
|
+
|
547
|
+
Raises:
|
548
|
+
ValueError: If base64_string is offloaded or missing
|
549
|
+
requests.RequestException: If the upload fails
|
550
|
+
"""
|
551
|
+
import requests
|
552
|
+
import base64
|
553
|
+
|
554
|
+
# Check if content is available
|
555
|
+
if not hasattr(self, "base64_string") or self.base64_string == "offloaded":
|
556
|
+
raise ValueError(
|
557
|
+
"File content is not available (offloaded or missing). Cannot upload to GCS."
|
558
|
+
)
|
559
|
+
|
560
|
+
# Decode base64 content to bytes
|
561
|
+
try:
|
562
|
+
file_content = base64.b64decode(self.base64_string)
|
563
|
+
except Exception as e:
|
564
|
+
raise ValueError(f"Failed to decode base64 content: {e}")
|
565
|
+
|
566
|
+
# Prepare headers with proper content type
|
567
|
+
headers = {
|
568
|
+
"Content-Type": self.mime_type or "application/octet-stream",
|
569
|
+
"Content-Length": str(len(file_content)),
|
570
|
+
}
|
571
|
+
|
572
|
+
# Upload to GCS using the signed URL
|
573
|
+
response = requests.put(signed_url, data=file_content, headers=headers)
|
574
|
+
response.raise_for_status()
|
575
|
+
|
576
|
+
return {
|
577
|
+
"status": "success",
|
578
|
+
"status_code": response.status_code,
|
579
|
+
"file_size": len(file_content),
|
580
|
+
"mime_type": self.mime_type,
|
581
|
+
"file_extension": self.suffix,
|
582
|
+
}
|
583
|
+
|
515
584
|
@classmethod
|
516
585
|
def pull(cls, url_or_uuid: Union[str, UUID]) -> "FileStore":
|
517
586
|
"""
|