edsl 0.1.33__py3-none-any.whl → 0.1.33.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +3 -9
- edsl/__init__.py +3 -8
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +8 -40
- edsl/agents/AgentList.py +0 -43
- edsl/agents/Invigilator.py +219 -135
- edsl/agents/InvigilatorBase.py +59 -148
- edsl/agents/{PromptConstructor.py → PromptConstructionMixin.py} +89 -138
- edsl/agents/__init__.py +0 -1
- edsl/config.py +56 -47
- edsl/coop/coop.py +7 -50
- edsl/data/Cache.py +1 -35
- edsl/data_transfer_models.py +38 -73
- edsl/enums.py +0 -4
- edsl/exceptions/language_models.py +1 -25
- edsl/exceptions/questions.py +5 -62
- edsl/exceptions/results.py +0 -4
- edsl/inference_services/AnthropicService.py +11 -13
- edsl/inference_services/AwsBedrock.py +17 -19
- edsl/inference_services/AzureAI.py +20 -37
- edsl/inference_services/GoogleService.py +12 -16
- edsl/inference_services/GroqService.py +0 -2
- edsl/inference_services/InferenceServiceABC.py +3 -58
- edsl/inference_services/OpenAIService.py +54 -48
- edsl/inference_services/models_available_cache.py +6 -0
- edsl/inference_services/registry.py +0 -6
- edsl/jobs/Answers.py +12 -10
- edsl/jobs/Jobs.py +21 -36
- edsl/jobs/buckets/BucketCollection.py +15 -24
- edsl/jobs/buckets/TokenBucket.py +14 -93
- edsl/jobs/interviews/Interview.py +78 -366
- edsl/jobs/interviews/InterviewExceptionEntry.py +19 -85
- edsl/jobs/interviews/InterviewTaskBuildingMixin.py +286 -0
- edsl/jobs/interviews/{InterviewExceptionCollection.py → interview_exception_tracking.py} +68 -14
- edsl/jobs/interviews/retry_management.py +37 -0
- edsl/jobs/runners/JobsRunnerAsyncio.py +175 -146
- edsl/jobs/runners/JobsRunnerStatusMixin.py +333 -0
- edsl/jobs/tasks/QuestionTaskCreator.py +23 -30
- edsl/jobs/tasks/TaskHistory.py +213 -148
- edsl/language_models/LanguageModel.py +156 -261
- edsl/language_models/ModelList.py +2 -2
- edsl/language_models/RegisterLanguageModelsMeta.py +29 -14
- edsl/language_models/registry.py +6 -23
- edsl/language_models/repair.py +19 -0
- edsl/prompts/Prompt.py +2 -52
- edsl/questions/AnswerValidatorMixin.py +26 -23
- edsl/questions/QuestionBase.py +249 -329
- edsl/questions/QuestionBudget.py +41 -99
- edsl/questions/QuestionCheckBox.py +35 -227
- edsl/questions/QuestionExtract.py +27 -98
- edsl/questions/QuestionFreeText.py +29 -52
- edsl/questions/QuestionFunctional.py +0 -7
- edsl/questions/QuestionList.py +22 -141
- edsl/questions/QuestionMultipleChoice.py +65 -159
- edsl/questions/QuestionNumerical.py +46 -88
- edsl/questions/QuestionRank.py +24 -182
- edsl/questions/RegisterQuestionsMeta.py +12 -31
- edsl/questions/__init__.py +4 -3
- edsl/questions/derived/QuestionLikertFive.py +5 -10
- edsl/questions/derived/QuestionLinearScale.py +2 -15
- edsl/questions/derived/QuestionTopK.py +1 -10
- edsl/questions/derived/QuestionYesNo.py +3 -24
- edsl/questions/descriptors.py +7 -43
- edsl/questions/question_registry.py +2 -6
- edsl/results/Dataset.py +0 -20
- edsl/results/DatasetExportMixin.py +48 -46
- edsl/results/Result.py +5 -32
- edsl/results/Results.py +46 -135
- edsl/results/ResultsDBMixin.py +3 -3
- edsl/scenarios/FileStore.py +10 -71
- edsl/scenarios/Scenario.py +25 -96
- edsl/scenarios/ScenarioImageMixin.py +2 -2
- edsl/scenarios/ScenarioList.py +39 -361
- edsl/scenarios/ScenarioListExportMixin.py +0 -9
- edsl/scenarios/ScenarioListPdfMixin.py +4 -150
- edsl/study/SnapShot.py +1 -8
- edsl/study/Study.py +0 -32
- edsl/surveys/Rule.py +1 -10
- edsl/surveys/RuleCollection.py +5 -21
- edsl/surveys/Survey.py +310 -636
- edsl/surveys/SurveyExportMixin.py +9 -71
- edsl/surveys/SurveyFlowVisualizationMixin.py +1 -2
- edsl/surveys/SurveyQualtricsImport.py +4 -75
- edsl/utilities/gcp_bucket/simple_example.py +9 -0
- edsl/utilities/utilities.py +1 -9
- {edsl-0.1.33.dist-info → edsl-0.1.33.dev1.dist-info}/METADATA +2 -5
- edsl-0.1.33.dev1.dist-info/RECORD +209 -0
- edsl/TemplateLoader.py +0 -24
- edsl/auto/AutoStudy.py +0 -117
- edsl/auto/StageBase.py +0 -230
- edsl/auto/StageGenerateSurvey.py +0 -178
- edsl/auto/StageLabelQuestions.py +0 -125
- edsl/auto/StagePersona.py +0 -61
- edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
- edsl/auto/StagePersonaDimensionValues.py +0 -74
- edsl/auto/StagePersonaDimensions.py +0 -69
- edsl/auto/StageQuestions.py +0 -73
- edsl/auto/SurveyCreatorPipeline.py +0 -21
- edsl/auto/utilities.py +0 -224
- edsl/coop/PriceFetcher.py +0 -58
- edsl/inference_services/MistralAIService.py +0 -120
- edsl/inference_services/TestService.py +0 -80
- edsl/inference_services/TogetherAIService.py +0 -170
- edsl/jobs/FailedQuestion.py +0 -78
- edsl/jobs/runners/JobsRunnerStatus.py +0 -331
- edsl/language_models/fake_openai_call.py +0 -15
- edsl/language_models/fake_openai_service.py +0 -61
- edsl/language_models/utilities.py +0 -61
- edsl/questions/QuestionBaseGenMixin.py +0 -133
- edsl/questions/QuestionBasePromptsMixin.py +0 -266
- edsl/questions/Quick.py +0 -41
- edsl/questions/ResponseValidatorABC.py +0 -170
- edsl/questions/decorators.py +0 -21
- edsl/questions/prompt_templates/question_budget.jinja +0 -13
- edsl/questions/prompt_templates/question_checkbox.jinja +0 -32
- edsl/questions/prompt_templates/question_extract.jinja +0 -11
- edsl/questions/prompt_templates/question_free_text.jinja +0 -3
- edsl/questions/prompt_templates/question_linear_scale.jinja +0 -11
- edsl/questions/prompt_templates/question_list.jinja +0 -17
- edsl/questions/prompt_templates/question_multiple_choice.jinja +0 -33
- edsl/questions/prompt_templates/question_numerical.jinja +0 -37
- edsl/questions/templates/__init__.py +0 -0
- edsl/questions/templates/budget/__init__.py +0 -0
- edsl/questions/templates/budget/answering_instructions.jinja +0 -7
- edsl/questions/templates/budget/question_presentation.jinja +0 -7
- edsl/questions/templates/checkbox/__init__.py +0 -0
- edsl/questions/templates/checkbox/answering_instructions.jinja +0 -10
- edsl/questions/templates/checkbox/question_presentation.jinja +0 -22
- edsl/questions/templates/extract/__init__.py +0 -0
- edsl/questions/templates/extract/answering_instructions.jinja +0 -7
- edsl/questions/templates/extract/question_presentation.jinja +0 -1
- edsl/questions/templates/free_text/__init__.py +0 -0
- edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
- edsl/questions/templates/free_text/question_presentation.jinja +0 -1
- edsl/questions/templates/likert_five/__init__.py +0 -0
- edsl/questions/templates/likert_five/answering_instructions.jinja +0 -10
- edsl/questions/templates/likert_five/question_presentation.jinja +0 -12
- edsl/questions/templates/linear_scale/__init__.py +0 -0
- edsl/questions/templates/linear_scale/answering_instructions.jinja +0 -5
- edsl/questions/templates/linear_scale/question_presentation.jinja +0 -5
- edsl/questions/templates/list/__init__.py +0 -0
- edsl/questions/templates/list/answering_instructions.jinja +0 -4
- edsl/questions/templates/list/question_presentation.jinja +0 -5
- edsl/questions/templates/multiple_choice/__init__.py +0 -0
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +0 -9
- edsl/questions/templates/multiple_choice/html.jinja +0 -0
- edsl/questions/templates/multiple_choice/question_presentation.jinja +0 -12
- edsl/questions/templates/numerical/__init__.py +0 -0
- edsl/questions/templates/numerical/answering_instructions.jinja +0 -8
- edsl/questions/templates/numerical/question_presentation.jinja +0 -7
- edsl/questions/templates/rank/__init__.py +0 -0
- edsl/questions/templates/rank/answering_instructions.jinja +0 -11
- edsl/questions/templates/rank/question_presentation.jinja +0 -15
- edsl/questions/templates/top_k/__init__.py +0 -0
- edsl/questions/templates/top_k/answering_instructions.jinja +0 -8
- edsl/questions/templates/top_k/question_presentation.jinja +0 -22
- edsl/questions/templates/yes_no/__init__.py +0 -0
- edsl/questions/templates/yes_no/answering_instructions.jinja +0 -6
- edsl/questions/templates/yes_no/question_presentation.jinja +0 -12
- edsl/results/DatasetTree.py +0 -145
- edsl/results/Selector.py +0 -118
- edsl/results/tree_explore.py +0 -115
- edsl/surveys/instructions/ChangeInstruction.py +0 -47
- edsl/surveys/instructions/Instruction.py +0 -34
- edsl/surveys/instructions/InstructionCollection.py +0 -77
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/templates/error_reporting/base.html +0 -24
- edsl/templates/error_reporting/exceptions_by_model.html +0 -35
- edsl/templates/error_reporting/exceptions_by_question_name.html +0 -17
- edsl/templates/error_reporting/exceptions_by_type.html +0 -17
- edsl/templates/error_reporting/interview_details.html +0 -116
- edsl/templates/error_reporting/interviews.html +0 -10
- edsl/templates/error_reporting/overview.html +0 -5
- edsl/templates/error_reporting/performance_plot.html +0 -2
- edsl/templates/error_reporting/report.css +0 -74
- edsl/templates/error_reporting/report.html +0 -118
- edsl/templates/error_reporting/report.js +0 -25
- edsl-0.1.33.dist-info/RECORD +0 -295
- {edsl-0.1.33.dist-info → edsl-0.1.33.dev1.dist-info}/LICENSE +0 -0
- {edsl-0.1.33.dist-info → edsl-0.1.33.dev1.dist-info}/WHEEL +0 -0
edsl/results/Results.py
CHANGED
@@ -17,7 +17,6 @@ from edsl.exceptions.results import (
|
|
17
17
|
ResultsInvalidNameError,
|
18
18
|
ResultsMutateError,
|
19
19
|
ResultsFilterError,
|
20
|
-
ResultsDeserializationError,
|
21
20
|
)
|
22
21
|
|
23
22
|
from edsl.results.ResultsExportMixin import ResultsExportMixin
|
@@ -78,7 +77,6 @@ class Results(UserList, Mixins, Base):
|
|
78
77
|
"question_options",
|
79
78
|
"question_type",
|
80
79
|
"comment",
|
81
|
-
"generated_tokens",
|
82
80
|
]
|
83
81
|
|
84
82
|
def __init__(
|
@@ -110,81 +108,6 @@ class Results(UserList, Mixins, Base):
|
|
110
108
|
if hasattr(self, "_add_output_functions"):
|
111
109
|
self._add_output_functions()
|
112
110
|
|
113
|
-
def leaves(self):
|
114
|
-
leaves = []
|
115
|
-
for result in self:
|
116
|
-
leaves.extend(result.leaves())
|
117
|
-
return leaves
|
118
|
-
|
119
|
-
def tree(
|
120
|
-
self,
|
121
|
-
fold_attributes: Optional[List[str]] = None,
|
122
|
-
drop: Optional[List[str]] = None,
|
123
|
-
open_file=True,
|
124
|
-
) -> dict:
|
125
|
-
"""Return the results as a tree."""
|
126
|
-
from edsl.results.tree_explore import FoldableHTMLTableGenerator
|
127
|
-
|
128
|
-
if drop is None:
|
129
|
-
drop = []
|
130
|
-
|
131
|
-
valid_attributes = [
|
132
|
-
"model",
|
133
|
-
"scenario",
|
134
|
-
"agent",
|
135
|
-
"answer",
|
136
|
-
"question",
|
137
|
-
"iteration",
|
138
|
-
]
|
139
|
-
if fold_attributes is None:
|
140
|
-
fold_attributes = []
|
141
|
-
|
142
|
-
for attribute in fold_attributes:
|
143
|
-
if attribute not in valid_attributes:
|
144
|
-
raise ValueError(
|
145
|
-
f"Invalid fold attribute: {attribute}; must be in {valid_attributes}"
|
146
|
-
)
|
147
|
-
data = self.leaves()
|
148
|
-
generator = FoldableHTMLTableGenerator(data)
|
149
|
-
tree = generator.tree(fold_attributes=fold_attributes, drop=drop)
|
150
|
-
html_content = generator.generate_html(tree, fold_attributes)
|
151
|
-
import tempfile
|
152
|
-
from edsl.utilities.utilities import is_notebook
|
153
|
-
|
154
|
-
from IPython.display import display, HTML
|
155
|
-
|
156
|
-
if is_notebook():
|
157
|
-
import html
|
158
|
-
from IPython.display import display, HTML
|
159
|
-
|
160
|
-
height = 1000
|
161
|
-
width = 1000
|
162
|
-
escaped_output = html.escape(html_content)
|
163
|
-
# escaped_output = rendered_html
|
164
|
-
iframe = f""""
|
165
|
-
<iframe srcdoc="{ escaped_output }" style="width: {width}px; height: {height}px;"></iframe>
|
166
|
-
"""
|
167
|
-
display(HTML(iframe))
|
168
|
-
return None
|
169
|
-
|
170
|
-
with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
|
171
|
-
f.write(html_content.encode())
|
172
|
-
print(f"HTML file has been generated: {f.name}")
|
173
|
-
|
174
|
-
if open_file:
|
175
|
-
import webbrowser
|
176
|
-
import time
|
177
|
-
|
178
|
-
time.sleep(1) # Wait for 1 second
|
179
|
-
# webbrowser.open(f.name)
|
180
|
-
import os
|
181
|
-
|
182
|
-
filename = f.name
|
183
|
-
webbrowser.open(f"file://{os.path.abspath(filename)}")
|
184
|
-
|
185
|
-
else:
|
186
|
-
return html_content
|
187
|
-
|
188
111
|
def code(self):
|
189
112
|
raise NotImplementedError
|
190
113
|
|
@@ -245,9 +168,7 @@ class Results(UserList, Mixins, Base):
|
|
245
168
|
)
|
246
169
|
|
247
170
|
def __repr__(self) -> str:
|
248
|
-
|
249
|
-
|
250
|
-
return f"Results(data = {reprlib.repr(self.data)}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
|
171
|
+
return f"Results(data = {self.data}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
|
251
172
|
|
252
173
|
def _repr_html_(self) -> str:
|
253
174
|
from IPython.display import HTML
|
@@ -369,7 +290,8 @@ class Results(UserList, Mixins, Base):
|
|
369
290
|
),
|
370
291
|
)
|
371
292
|
except Exception as e:
|
372
|
-
|
293
|
+
print(e)
|
294
|
+
# breakpoint()
|
373
295
|
return results
|
374
296
|
|
375
297
|
######################
|
@@ -473,7 +395,7 @@ class Results(UserList, Mixins, Base):
|
|
473
395
|
|
474
396
|
>>> r = Results.example()
|
475
397
|
>>> r.models[0]
|
476
|
-
Model(model_name =
|
398
|
+
Model(model_name = 'gpt-4-1106-preview', temperature = 0.5, max_tokens = 1000, top_p = 1, frequency_penalty = 0, presence_penalty = 0, logprobs = False, top_logprobs = 3)
|
477
399
|
"""
|
478
400
|
return [r.model for r in self.data]
|
479
401
|
|
@@ -555,6 +477,39 @@ class Results(UserList, Mixins, Base):
|
|
555
477
|
)
|
556
478
|
return sorted(list(all_keys))
|
557
479
|
|
480
|
+
def _parse_column(self, column: str) -> tuple[str, str]:
|
481
|
+
"""
|
482
|
+
Parses a column name into a tuple containing a data type and a key.
|
483
|
+
|
484
|
+
>>> r = Results.example()
|
485
|
+
>>> r._parse_column("answer.how_feeling")
|
486
|
+
('answer', 'how_feeling')
|
487
|
+
|
488
|
+
The standard way a column is specified is with a dot-separated string, e.g. _parse_column("agent.status")
|
489
|
+
But you can also specify a single key, e.g. "status", in which case it will look up the data type.
|
490
|
+
"""
|
491
|
+
if "." in column:
|
492
|
+
data_type, key = column.split(".")
|
493
|
+
else:
|
494
|
+
try:
|
495
|
+
data_type, key = self._key_to_data_type[column], column
|
496
|
+
except KeyError:
|
497
|
+
import difflib
|
498
|
+
|
499
|
+
close_matches = difflib.get_close_matches(
|
500
|
+
column, self._key_to_data_type.keys()
|
501
|
+
)
|
502
|
+
if close_matches:
|
503
|
+
suggestions = ", ".join(close_matches)
|
504
|
+
raise ResultsColumnNotFoundError(
|
505
|
+
f"Column '{column}' not found in data. Did you mean: {suggestions}?"
|
506
|
+
)
|
507
|
+
else:
|
508
|
+
raise ResultsColumnNotFoundError(
|
509
|
+
f"Column {column} not found in data"
|
510
|
+
)
|
511
|
+
return data_type, key
|
512
|
+
|
558
513
|
def first(self) -> "Result":
|
559
514
|
"""Return the first observation in the results.
|
560
515
|
|
@@ -677,11 +632,9 @@ class Results(UserList, Mixins, Base):
|
|
677
632
|
"""
|
678
633
|
if functions_dict is None:
|
679
634
|
functions_dict = {}
|
680
|
-
|
635
|
+
return EvalWithCompoundTypes(
|
681
636
|
names=result.combined_dict, functions=functions_dict
|
682
637
|
)
|
683
|
-
evaluator.functions.update(int=int, float=float)
|
684
|
-
return evaluator
|
685
638
|
|
686
639
|
def mutate(
|
687
640
|
self, new_var_string: str, functions_dict: Optional[dict] = None
|
@@ -768,8 +721,8 @@ class Results(UserList, Mixins, Base):
|
|
768
721
|
|
769
722
|
def sample(
|
770
723
|
self,
|
771
|
-
n:
|
772
|
-
frac:
|
724
|
+
n: int = None,
|
725
|
+
frac: float = None,
|
773
726
|
with_replacement: bool = True,
|
774
727
|
seed: Optional[str] = "edsl",
|
775
728
|
) -> Results:
|
@@ -818,17 +771,13 @@ class Results(UserList, Mixins, Base):
|
|
818
771
|
Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
|
819
772
|
|
820
773
|
>>> results.select('how_feeling', 'model', 'how_feeling')
|
821
|
-
Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'model.model': ['
|
822
|
-
|
823
|
-
>>> from edsl import Results; r = Results.example(); r.select('answer.how_feeling_y')
|
824
|
-
Dataset([{'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
|
774
|
+
Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'model.model': ['gpt-4-1106-preview', 'gpt-4-1106-preview', 'gpt-4-1106-preview', 'gpt-4-1106-preview']}, {'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
|
825
775
|
"""
|
826
776
|
|
827
|
-
|
828
|
-
|
777
|
+
if len(self) == 0:
|
778
|
+
raise Exception("No data to select from---the Results object is empty.")
|
829
779
|
|
830
780
|
if not columns or columns == ("*",) or columns == (None,):
|
831
|
-
# is the users passes nothing, then we'll return all the columns
|
832
781
|
columns = ("*.*",)
|
833
782
|
|
834
783
|
if isinstance(columns[0], list):
|
@@ -852,16 +801,6 @@ class Results(UserList, Mixins, Base):
|
|
852
801
|
# iterate through the passed columns
|
853
802
|
for column in columns:
|
854
803
|
# a user could pass 'result.how_feeling' or just 'how_feeling'
|
855
|
-
matches = self._matching_columns(column)
|
856
|
-
if len(matches) > 1:
|
857
|
-
raise Exception(
|
858
|
-
f"Column '{column}' is ambiguous. Did you mean one of {matches}?"
|
859
|
-
)
|
860
|
-
if len(matches) == 0 and ".*" not in column:
|
861
|
-
raise Exception(f"Column '{column}' not found in data.")
|
862
|
-
if len(matches) == 1:
|
863
|
-
column = matches[0]
|
864
|
-
|
865
804
|
parsed_data_type, parsed_key = self._parse_column(column)
|
866
805
|
data_types = get_data_types_to_return(parsed_data_type)
|
867
806
|
found_once = False # we need to track this to make sure we found the key at least once
|
@@ -904,21 +843,6 @@ class Results(UserList, Mixins, Base):
|
|
904
843
|
|
905
844
|
return Dataset(sorted_new_data)
|
906
845
|
|
907
|
-
def select(self, *columns: Union[str, list[str]]) -> "Results":
|
908
|
-
from edsl.results.Selector import Selector
|
909
|
-
|
910
|
-
if len(self) == 0:
|
911
|
-
raise Exception("No data to select from---the Results object is empty.")
|
912
|
-
|
913
|
-
selector = Selector(
|
914
|
-
known_data_types=self.known_data_types,
|
915
|
-
data_type_to_keys=self._data_type_to_keys,
|
916
|
-
key_to_data_type=self._key_to_data_type,
|
917
|
-
fetch_list_func=self._fetch_list,
|
918
|
-
columns=self.columns,
|
919
|
-
)
|
920
|
-
return selector.select(*columns)
|
921
|
-
|
922
846
|
def sort_by(self, *columns: str, reverse: bool = False) -> Results:
|
923
847
|
import warnings
|
924
848
|
|
@@ -927,11 +851,6 @@ class Results(UserList, Mixins, Base):
|
|
927
851
|
)
|
928
852
|
return self.order_by(*columns, reverse=reverse)
|
929
853
|
|
930
|
-
def _parse_column(self, column: str) -> tuple[str, str]:
|
931
|
-
if "." in column:
|
932
|
-
return column.split(".")
|
933
|
-
return self._key_to_data_type[column], column
|
934
|
-
|
935
854
|
def order_by(self, *columns: str, reverse: bool = False) -> Results:
|
936
855
|
"""Sort the results by one or more columns.
|
937
856
|
|
@@ -1029,9 +948,7 @@ class Results(UserList, Mixins, Base):
|
|
1029
948
|
def has_single_equals(string):
|
1030
949
|
if "!=" in string:
|
1031
950
|
return False
|
1032
|
-
if "=" in string and not
|
1033
|
-
"==" in string or "<=" in string or ">=" in string
|
1034
|
-
):
|
951
|
+
if "=" in string and not "==" in string:
|
1035
952
|
return True
|
1036
953
|
|
1037
954
|
if has_single_equals(expression):
|
@@ -1072,7 +989,7 @@ class Results(UserList, Mixins, Base):
|
|
1072
989
|
return Results(survey=self.survey, data=new_data, created_columns=None)
|
1073
990
|
|
1074
991
|
@classmethod
|
1075
|
-
def example(cls, randomize: bool = False) -> Results:
|
992
|
+
def example(cls, debug: bool = False, randomize: bool = False) -> Results:
|
1076
993
|
"""Return an example `Results` object.
|
1077
994
|
|
1078
995
|
Example usage:
|
@@ -1086,13 +1003,7 @@ class Results(UserList, Mixins, Base):
|
|
1086
1003
|
|
1087
1004
|
c = Cache()
|
1088
1005
|
job = Jobs.example(randomize=randomize)
|
1089
|
-
results = job.run(
|
1090
|
-
cache=c,
|
1091
|
-
stop_on_exception=True,
|
1092
|
-
skip_retry=True,
|
1093
|
-
raise_validation_errors=True,
|
1094
|
-
disable_remote_inference=True,
|
1095
|
-
)
|
1006
|
+
results = job.run(cache=c, debug=debug)
|
1096
1007
|
return results
|
1097
1008
|
|
1098
1009
|
def rich_print(self):
|
edsl/results/ResultsDBMixin.py
CHANGED
@@ -136,9 +136,9 @@ class ResultsDBMixin:
|
|
136
136
|
|
137
137
|
>>> from edsl.results import Results
|
138
138
|
>>> r = Results.example()
|
139
|
-
>>> d = r.sql("select data_type, key, value from self where data_type = 'answer'
|
140
|
-
>>>
|
141
|
-
['
|
139
|
+
>>> d = r.sql("select data_type, key, value from self where data_type = 'answer' limit 3", shape="long")
|
140
|
+
>>> list(d['value'])
|
141
|
+
['OK', 'This is a real survey response from a human.', 'Great']
|
142
142
|
|
143
143
|
We can also return the data in wide format.
|
144
144
|
Note the use of single quotes to escape the column names, as required by sql.
|
edsl/scenarios/FileStore.py
CHANGED
@@ -120,22 +120,14 @@ class FileStore(Scenario):
|
|
120
120
|
return info
|
121
121
|
|
122
122
|
@classmethod
|
123
|
-
def pull(cls, uuid
|
124
|
-
scenario_version = Scenario.pull(uuid
|
123
|
+
def pull(cls, uuid):
|
124
|
+
scenario_version = Scenario.pull(uuid)
|
125
125
|
return cls.from_dict(scenario_version.to_dict())
|
126
126
|
|
127
127
|
|
128
128
|
class CSVFileStore(FileStore):
|
129
|
-
def __init__(
|
130
|
-
|
131
|
-
filename,
|
132
|
-
binary: Optional[bool] = None,
|
133
|
-
suffix: Optional[str] = None,
|
134
|
-
base64_string: Optional[str] = None,
|
135
|
-
):
|
136
|
-
super().__init__(
|
137
|
-
filename, binary=binary, base64_string=base64_string, suffix=".csv"
|
138
|
-
)
|
129
|
+
def __init__(self, filename):
|
130
|
+
super().__init__(filename, suffix=".csv")
|
139
131
|
|
140
132
|
@classmethod
|
141
133
|
def example(cls):
|
@@ -155,16 +147,8 @@ class CSVFileStore(FileStore):
|
|
155
147
|
|
156
148
|
|
157
149
|
class PDFFileStore(FileStore):
|
158
|
-
def __init__(
|
159
|
-
|
160
|
-
filename,
|
161
|
-
binary: Optional[bool] = None,
|
162
|
-
suffix: Optional[str] = None,
|
163
|
-
base64_string: Optional[str] = None,
|
164
|
-
):
|
165
|
-
super().__init__(
|
166
|
-
filename, binary=binary, base64_string=base64_string, suffix=".pdf"
|
167
|
-
)
|
150
|
+
def __init__(self, filename):
|
151
|
+
super().__init__(filename, suffix=".pdf")
|
168
152
|
|
169
153
|
def view(self):
|
170
154
|
pdf_path = self.to_tempfile()
|
@@ -241,16 +225,8 @@ class PDFFileStore(FileStore):
|
|
241
225
|
|
242
226
|
|
243
227
|
class PNGFileStore(FileStore):
|
244
|
-
def __init__(
|
245
|
-
|
246
|
-
filename,
|
247
|
-
binary: Optional[bool] = None,
|
248
|
-
suffix: Optional[str] = None,
|
249
|
-
base64_string: Optional[str] = None,
|
250
|
-
):
|
251
|
-
super().__init__(
|
252
|
-
filename, binary=binary, base64_string=base64_string, suffix=".png"
|
253
|
-
)
|
228
|
+
def __init__(self, filename):
|
229
|
+
super().__init__(filename, suffix=".png")
|
254
230
|
|
255
231
|
@classmethod
|
256
232
|
def example(cls):
|
@@ -275,16 +251,8 @@ class PNGFileStore(FileStore):
|
|
275
251
|
|
276
252
|
|
277
253
|
class SQLiteFileStore(FileStore):
|
278
|
-
def __init__(
|
279
|
-
|
280
|
-
filename,
|
281
|
-
binary: Optional[bool] = None,
|
282
|
-
suffix: Optional[str] = None,
|
283
|
-
base64_string: Optional[str] = None,
|
284
|
-
):
|
285
|
-
super().__init__(
|
286
|
-
filename, binary=binary, base64_string=base64_string, suffix=".sqlite"
|
287
|
-
)
|
254
|
+
def __init__(self, filename):
|
255
|
+
super().__init__(filename, suffix=".sqlite")
|
288
256
|
|
289
257
|
@classmethod
|
290
258
|
def example(cls):
|
@@ -297,8 +265,6 @@ class SQLiteFileStore(FileStore):
|
|
297
265
|
c.execute("""CREATE TABLE stocks (date text)""")
|
298
266
|
conn.commit()
|
299
267
|
|
300
|
-
return cls(f.name)
|
301
|
-
|
302
268
|
def view(self):
|
303
269
|
import subprocess
|
304
270
|
import os
|
@@ -307,33 +273,6 @@ class SQLiteFileStore(FileStore):
|
|
307
273
|
os.system(f"sqlite3 {sqlite_path}")
|
308
274
|
|
309
275
|
|
310
|
-
class HTMLFileStore(FileStore):
|
311
|
-
def __init__(
|
312
|
-
self,
|
313
|
-
filename,
|
314
|
-
binary: Optional[bool] = None,
|
315
|
-
suffix: Optional[str] = None,
|
316
|
-
base64_string: Optional[str] = None,
|
317
|
-
):
|
318
|
-
super().__init__(
|
319
|
-
filename, binary=binary, base64_string=base64_string, suffix=".html"
|
320
|
-
)
|
321
|
-
|
322
|
-
@classmethod
|
323
|
-
def example(cls):
|
324
|
-
import tempfile
|
325
|
-
|
326
|
-
with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
|
327
|
-
f.write("<html><body><h1>Test</h1></body></html>".encode())
|
328
|
-
return cls(f.name)
|
329
|
-
|
330
|
-
def view(self):
|
331
|
-
import webbrowser
|
332
|
-
|
333
|
-
html_path = self.to_tempfile()
|
334
|
-
webbrowser.open("file://" + html_path)
|
335
|
-
|
336
|
-
|
337
276
|
if __name__ == "__main__":
|
338
277
|
# file_path = "../conjure/examples/Ex11-2.sav"
|
339
278
|
# fs = FileStore(file_path)
|
edsl/scenarios/Scenario.py
CHANGED
@@ -5,10 +5,6 @@ import copy
|
|
5
5
|
import base64
|
6
6
|
import hashlib
|
7
7
|
import os
|
8
|
-
import reprlib
|
9
|
-
import imghdr
|
10
|
-
|
11
|
-
|
12
8
|
from collections import UserDict
|
13
9
|
from typing import Union, List, Optional, Generator
|
14
10
|
from uuid import uuid4
|
@@ -17,8 +13,6 @@ from edsl.scenarios.ScenarioImageMixin import ScenarioImageMixin
|
|
17
13
|
from edsl.scenarios.ScenarioHtmlMixin import ScenarioHtmlMixin
|
18
14
|
from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
|
19
15
|
|
20
|
-
from edsl.data_transfer_models import ImageInfo
|
21
|
-
|
22
16
|
|
23
17
|
class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
|
24
18
|
"""A Scenario is a dictionary of keys/values.
|
@@ -55,39 +49,6 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
|
|
55
49
|
self._has_image = False
|
56
50
|
return self._has_image
|
57
51
|
|
58
|
-
@property
|
59
|
-
def has_jinja_braces(self) -> bool:
|
60
|
-
"""Return whether the scenario has jinja braces. This matters for rendering.
|
61
|
-
|
62
|
-
>>> s = Scenario({"food": "I love {{wood chips}}"})
|
63
|
-
>>> s.has_jinja_braces
|
64
|
-
True
|
65
|
-
"""
|
66
|
-
for key, value in self.items():
|
67
|
-
if "{{" in str(value) and "}}" in value:
|
68
|
-
return True
|
69
|
-
return False
|
70
|
-
|
71
|
-
def convert_jinja_braces(
|
72
|
-
self, replacement_left="<<", replacement_right=">>"
|
73
|
-
) -> Scenario:
|
74
|
-
"""Convert Jinja braces to some other character.
|
75
|
-
|
76
|
-
>>> s = Scenario({"food": "I love {{wood chips}}"})
|
77
|
-
>>> s.convert_jinja_braces()
|
78
|
-
Scenario({'food': 'I love <<wood chips>>'})
|
79
|
-
|
80
|
-
"""
|
81
|
-
new_scenario = Scenario()
|
82
|
-
for key, value in self.items():
|
83
|
-
if isinstance(value, str):
|
84
|
-
new_scenario[key] = value.replace("{{", replacement_left).replace(
|
85
|
-
"}}", replacement_right
|
86
|
-
)
|
87
|
-
else:
|
88
|
-
new_scenario[key] = value
|
89
|
-
return new_scenario
|
90
|
-
|
91
52
|
@has_image.setter
|
92
53
|
def has_image(self, value):
|
93
54
|
self._has_image = value
|
@@ -181,7 +142,6 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
|
|
181
142
|
print_json(json.dumps(self.to_dict()))
|
182
143
|
|
183
144
|
def __repr__(self):
|
184
|
-
# return "Scenario(" + reprlib.repr(self.data) + ")"
|
185
145
|
return "Scenario(" + repr(self.data) + ")"
|
186
146
|
|
187
147
|
def _repr_html_(self):
|
@@ -236,54 +196,33 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
|
|
236
196
|
return cls({"url": url, field_name: text})
|
237
197
|
|
238
198
|
@classmethod
|
239
|
-
def from_image(
|
240
|
-
|
241
|
-
) -> "Scenario":
|
242
|
-
"""
|
243
|
-
Creates a scenario with a base64 encoding of an image.
|
244
|
-
|
245
|
-
Args:
|
246
|
-
image_path (str): Path to the image file.
|
247
|
-
|
248
|
-
Returns:
|
249
|
-
Scenario: A new Scenario instance with image information.
|
199
|
+
def from_image(cls, image_path: str) -> str:
|
200
|
+
"""Creates a scenario with a base64 encoding of an image.
|
250
201
|
|
251
202
|
Example:
|
203
|
+
|
252
204
|
>>> s = Scenario.from_image(Scenario.example_image())
|
253
205
|
>>> s
|
254
|
-
Scenario({'
|
206
|
+
Scenario({'file_path': '...', 'encoded_image': '...'})
|
255
207
|
"""
|
256
|
-
if not os.path.exists(image_path):
|
257
|
-
raise FileNotFoundError(f"Image file not found: {image_path}")
|
258
|
-
|
259
208
|
with open(image_path, "rb") as image_file:
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
file_path=image_path,
|
271
|
-
file_name=file_name,
|
272
|
-
image_format=image_format,
|
273
|
-
file_size=file_size,
|
274
|
-
encoded_image=base64.b64encode(file_content).decode("utf-8"),
|
275
|
-
)
|
276
|
-
|
277
|
-
scenario_data = {image_name: image_info}
|
278
|
-
s = cls(scenario_data)
|
279
|
-
s.has_image = True
|
280
|
-
return s
|
209
|
+
s = cls(
|
210
|
+
{
|
211
|
+
"file_path": image_path,
|
212
|
+
"encoded_image": base64.b64encode(image_file.read()).decode(
|
213
|
+
"utf-8"
|
214
|
+
),
|
215
|
+
}
|
216
|
+
)
|
217
|
+
s.has_image = True
|
218
|
+
return s
|
281
219
|
|
282
220
|
@classmethod
|
283
221
|
def from_pdf(cls, pdf_path):
|
284
|
-
#
|
285
|
-
import
|
222
|
+
import fitz # PyMuPDF
|
223
|
+
from edsl import Scenario
|
286
224
|
|
225
|
+
# Ensure the file exists
|
287
226
|
if not os.path.exists(pdf_path):
|
288
227
|
raise FileNotFoundError(f"The file {pdf_path} does not exist.")
|
289
228
|
|
@@ -297,14 +236,7 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
|
|
297
236
|
text = ""
|
298
237
|
for page_num in range(len(document)):
|
299
238
|
page = document.load_page(page_num)
|
300
|
-
|
301
|
-
|
302
|
-
# Sort blocks by their vertical position (y0) to maintain reading order
|
303
|
-
blocks.sort(key=lambda b: (b[1], b[0])) # Sort by y0 first, then x0
|
304
|
-
|
305
|
-
# Combine the text blocks in order
|
306
|
-
for block in blocks:
|
307
|
-
text += block[4] + "\n"
|
239
|
+
text = text + page.get_text()
|
308
240
|
|
309
241
|
# Create a dictionary for the combined text
|
310
242
|
page_info = {"filename": filename, "text": text}
|
@@ -491,21 +423,18 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
|
|
491
423
|
return table
|
492
424
|
|
493
425
|
@classmethod
|
494
|
-
def example(cls, randomize: bool = False
|
426
|
+
def example(cls, randomize: bool = False) -> Scenario:
|
495
427
|
"""
|
496
428
|
Returns an example Scenario instance.
|
497
429
|
|
498
430
|
:param randomize: If True, adds a random string to the value of the example key.
|
499
431
|
"""
|
500
|
-
if not
|
501
|
-
|
502
|
-
|
503
|
-
{
|
504
|
-
|
505
|
-
|
506
|
-
)
|
507
|
-
else:
|
508
|
-
return cls.from_image(cls.example_image())
|
432
|
+
addition = "" if not randomize else str(uuid4())
|
433
|
+
return cls(
|
434
|
+
{
|
435
|
+
"persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
|
436
|
+
}
|
437
|
+
)
|
509
438
|
|
510
439
|
def code(self) -> List[str]:
|
511
440
|
"""Return the code for the scenario."""
|
@@ -13,7 +13,7 @@ class ScenarioImageMixin:
|
|
13
13
|
>>> from edsl.scenarios.Scenario import Scenario
|
14
14
|
>>> s = Scenario({"food": "wood chips"})
|
15
15
|
>>> s.add_image(Scenario.example_image())
|
16
|
-
Scenario({'food': 'wood chips', '
|
16
|
+
Scenario({'food': 'wood chips', 'file_path': '...', 'encoded_image': '...'})
|
17
17
|
"""
|
18
18
|
new_scenario = self.from_image(image_path)
|
19
19
|
return self + new_scenario
|
@@ -33,7 +33,7 @@ class ScenarioImageMixin:
|
|
33
33
|
>>> from edsl.scenarios.Scenario import Scenario
|
34
34
|
>>> s = Scenario.from_image(Scenario.example_image())
|
35
35
|
>>> s
|
36
|
-
Scenario({'
|
36
|
+
Scenario({'file_path': '...', 'encoded_image': '...'})
|
37
37
|
"""
|
38
38
|
|
39
39
|
if image_path.startswith("http://") or image_path.startswith("https://"):
|