edsl 0.1.39__py3-none-any.whl → 0.1.39.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +116 -197
- edsl/__init__.py +7 -15
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +147 -351
- edsl/agents/AgentList.py +73 -211
- edsl/agents/Invigilator.py +50 -101
- edsl/agents/InvigilatorBase.py +70 -62
- edsl/agents/PromptConstructor.py +225 -143
- edsl/agents/__init__.py +1 -0
- edsl/agents/prompt_helpers.py +3 -3
- edsl/auto/AutoStudy.py +5 -18
- edsl/auto/StageBase.py +40 -53
- edsl/auto/StageQuestions.py +1 -2
- edsl/auto/utilities.py +6 -0
- edsl/config.py +2 -22
- edsl/conversation/car_buying.py +1 -2
- edsl/coop/PriceFetcher.py +1 -1
- edsl/coop/coop.py +47 -125
- edsl/coop/utils.py +14 -14
- edsl/data/Cache.py +27 -45
- edsl/data/CacheEntry.py +15 -12
- edsl/data/CacheHandler.py +12 -31
- edsl/data/RemoteCacheSync.py +46 -154
- edsl/data/__init__.py +3 -4
- edsl/data_transfer_models.py +1 -2
- edsl/enums.py +0 -27
- edsl/exceptions/__init__.py +50 -50
- edsl/exceptions/agents.py +0 -12
- edsl/exceptions/questions.py +6 -24
- edsl/exceptions/scenarios.py +0 -7
- edsl/inference_services/AnthropicService.py +19 -38
- edsl/inference_services/AwsBedrock.py +2 -0
- edsl/inference_services/AzureAI.py +2 -0
- edsl/inference_services/GoogleService.py +12 -7
- edsl/inference_services/InferenceServiceABC.py +85 -18
- edsl/inference_services/InferenceServicesCollection.py +79 -120
- edsl/inference_services/MistralAIService.py +3 -0
- edsl/inference_services/OpenAIService.py +35 -47
- edsl/inference_services/PerplexityService.py +3 -0
- edsl/inference_services/TestService.py +10 -11
- edsl/inference_services/TogetherAIService.py +3 -5
- edsl/jobs/Answers.py +14 -1
- edsl/jobs/Jobs.py +431 -356
- edsl/jobs/JobsChecks.py +10 -35
- edsl/jobs/JobsPrompts.py +4 -6
- edsl/jobs/JobsRemoteInferenceHandler.py +133 -205
- edsl/jobs/buckets/BucketCollection.py +3 -44
- edsl/jobs/buckets/TokenBucket.py +21 -53
- edsl/jobs/interviews/Interview.py +408 -143
- edsl/jobs/runners/JobsRunnerAsyncio.py +403 -88
- edsl/jobs/runners/JobsRunnerStatus.py +165 -133
- edsl/jobs/tasks/QuestionTaskCreator.py +19 -21
- edsl/jobs/tasks/TaskHistory.py +18 -38
- edsl/jobs/tasks/task_status_enum.py +2 -0
- edsl/language_models/KeyLookup.py +30 -0
- edsl/language_models/LanguageModel.py +236 -194
- edsl/language_models/ModelList.py +19 -28
- edsl/language_models/__init__.py +2 -1
- edsl/language_models/registry.py +190 -0
- edsl/language_models/repair.py +2 -2
- edsl/language_models/unused/ReplicateBase.py +83 -0
- edsl/language_models/utilities.py +4 -5
- edsl/notebooks/Notebook.py +14 -19
- edsl/prompts/Prompt.py +39 -29
- edsl/questions/{answer_validator_mixin.py → AnswerValidatorMixin.py} +2 -47
- edsl/questions/QuestionBase.py +214 -68
- edsl/questions/{question_base_gen_mixin.py → QuestionBaseGenMixin.py} +50 -57
- edsl/questions/QuestionBasePromptsMixin.py +3 -7
- edsl/questions/QuestionBudget.py +1 -1
- edsl/questions/QuestionCheckBox.py +3 -3
- edsl/questions/QuestionExtract.py +7 -5
- edsl/questions/QuestionFreeText.py +3 -2
- edsl/questions/QuestionList.py +18 -10
- edsl/questions/QuestionMultipleChoice.py +23 -67
- edsl/questions/QuestionNumerical.py +4 -2
- edsl/questions/QuestionRank.py +17 -7
- edsl/questions/{response_validator_abc.py → ResponseValidatorABC.py} +26 -40
- edsl/questions/SimpleAskMixin.py +3 -4
- edsl/questions/__init__.py +1 -2
- edsl/questions/derived/QuestionLinearScale.py +3 -6
- edsl/questions/derived/QuestionTopK.py +1 -1
- edsl/questions/descriptors.py +3 -17
- edsl/questions/question_registry.py +1 -1
- edsl/results/CSSParameterizer.py +1 -1
- edsl/results/Dataset.py +7 -170
- edsl/results/DatasetExportMixin.py +305 -168
- edsl/results/DatasetTree.py +8 -28
- edsl/results/Result.py +206 -298
- edsl/results/Results.py +131 -149
- edsl/results/ResultsDBMixin.py +238 -0
- edsl/results/ResultsExportMixin.py +0 -2
- edsl/results/{results_selector.py → Selector.py} +13 -23
- edsl/results/TableDisplay.py +171 -98
- edsl/results/__init__.py +1 -1
- edsl/scenarios/FileStore.py +239 -150
- edsl/scenarios/Scenario.py +193 -90
- edsl/scenarios/ScenarioHtmlMixin.py +3 -4
- edsl/scenarios/{scenario_join.py → ScenarioJoin.py} +6 -10
- edsl/scenarios/ScenarioList.py +244 -415
- edsl/scenarios/ScenarioListExportMixin.py +7 -0
- edsl/scenarios/ScenarioListPdfMixin.py +37 -15
- edsl/scenarios/__init__.py +2 -1
- edsl/study/ObjectEntry.py +1 -1
- edsl/study/SnapShot.py +1 -1
- edsl/study/Study.py +12 -5
- edsl/surveys/Rule.py +4 -5
- edsl/surveys/RuleCollection.py +27 -25
- edsl/surveys/Survey.py +791 -270
- edsl/surveys/SurveyCSS.py +8 -20
- edsl/surveys/{SurveyFlowVisualization.py → SurveyFlowVisualizationMixin.py} +9 -11
- edsl/surveys/__init__.py +2 -4
- edsl/surveys/descriptors.py +2 -6
- edsl/surveys/instructions/ChangeInstruction.py +2 -1
- edsl/surveys/instructions/Instruction.py +13 -4
- edsl/surveys/instructions/InstructionCollection.py +6 -11
- edsl/templates/error_reporting/interview_details.html +1 -1
- edsl/templates/error_reporting/report.html +1 -1
- edsl/tools/plotting.py +1 -1
- edsl/utilities/utilities.py +23 -35
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/METADATA +10 -12
- edsl-0.1.39.dev1.dist-info/RECORD +277 -0
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/WHEEL +1 -1
- edsl/agents/QuestionInstructionPromptBuilder.py +0 -128
- edsl/agents/QuestionTemplateReplacementsBuilder.py +0 -137
- edsl/agents/question_option_processor.py +0 -172
- edsl/coop/CoopFunctionsMixin.py +0 -15
- edsl/coop/ExpectedParrotKeyHandler.py +0 -125
- edsl/exceptions/inference_services.py +0 -5
- edsl/inference_services/AvailableModelCacheHandler.py +0 -184
- edsl/inference_services/AvailableModelFetcher.py +0 -215
- edsl/inference_services/ServiceAvailability.py +0 -135
- edsl/inference_services/data_structures.py +0 -134
- edsl/jobs/AnswerQuestionFunctionConstructor.py +0 -223
- edsl/jobs/FetchInvigilator.py +0 -47
- edsl/jobs/InterviewTaskManager.py +0 -98
- edsl/jobs/InterviewsConstructor.py +0 -50
- edsl/jobs/JobsComponentConstructor.py +0 -189
- edsl/jobs/JobsRemoteInferenceLogger.py +0 -239
- edsl/jobs/RequestTokenEstimator.py +0 -30
- edsl/jobs/async_interview_runner.py +0 -138
- edsl/jobs/buckets/TokenBucketAPI.py +0 -211
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/check_survey_scenario_compatibility.py +0 -85
- edsl/jobs/data_structures.py +0 -120
- edsl/jobs/decorators.py +0 -35
- edsl/jobs/jobs_status_enums.py +0 -9
- edsl/jobs/loggers/HTMLTableJobLogger.py +0 -304
- edsl/jobs/results_exceptions_handler.py +0 -98
- edsl/language_models/ComputeCost.py +0 -63
- edsl/language_models/PriceManager.py +0 -127
- edsl/language_models/RawResponseHandler.py +0 -106
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupBuilder.py +0 -273
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +0 -131
- edsl/language_models/model.py +0 -256
- edsl/notebooks/NotebookToLaTeX.py +0 -142
- edsl/questions/ExceptionExplainer.py +0 -77
- edsl/questions/HTMLQuestion.py +0 -103
- edsl/questions/QuestionMatrix.py +0 -265
- edsl/questions/data_structures.py +0 -20
- edsl/questions/loop_processor.py +0 -149
- edsl/questions/response_validator_factory.py +0 -34
- edsl/questions/templates/matrix/__init__.py +0 -1
- edsl/questions/templates/matrix/answering_instructions.jinja +0 -5
- edsl/questions/templates/matrix/question_presentation.jinja +0 -20
- edsl/results/MarkdownToDocx.py +0 -122
- edsl/results/MarkdownToPDF.py +0 -111
- edsl/results/TextEditor.py +0 -50
- edsl/results/file_exports.py +0 -252
- edsl/results/smart_objects.py +0 -96
- edsl/results/table_data_class.py +0 -12
- edsl/results/table_renderers.py +0 -118
- edsl/scenarios/ConstructDownloadLink.py +0 -109
- edsl/scenarios/DocumentChunker.py +0 -102
- edsl/scenarios/DocxScenario.py +0 -16
- edsl/scenarios/PdfExtractor.py +0 -40
- edsl/scenarios/directory_scanner.py +0 -96
- edsl/scenarios/file_methods.py +0 -85
- edsl/scenarios/handlers/__init__.py +0 -13
- edsl/scenarios/handlers/csv.py +0 -49
- edsl/scenarios/handlers/docx.py +0 -76
- edsl/scenarios/handlers/html.py +0 -37
- edsl/scenarios/handlers/json.py +0 -111
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/scenarios/handlers/md.py +0 -51
- edsl/scenarios/handlers/pdf.py +0 -68
- edsl/scenarios/handlers/png.py +0 -39
- edsl/scenarios/handlers/pptx.py +0 -105
- edsl/scenarios/handlers/py.py +0 -294
- edsl/scenarios/handlers/sql.py +0 -313
- edsl/scenarios/handlers/sqlite.py +0 -149
- edsl/scenarios/handlers/txt.py +0 -33
- edsl/scenarios/scenario_selector.py +0 -156
- edsl/surveys/ConstructDAG.py +0 -92
- edsl/surveys/EditSurvey.py +0 -221
- edsl/surveys/InstructionHandler.py +0 -100
- edsl/surveys/MemoryManagement.py +0 -72
- edsl/surveys/RuleManager.py +0 -172
- edsl/surveys/Simulator.py +0 -75
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/utilities/PrettyList.py +0 -56
- edsl/utilities/is_notebook.py +0 -18
- edsl/utilities/is_valid_variable_name.py +0 -11
- edsl/utilities/remove_edsl_version.py +0 -24
- edsl-0.1.39.dist-info/RECORD +0 -358
- /edsl/questions/{register_questions_meta.py → RegisterQuestionsMeta.py} +0 -0
- /edsl/results/{results_fetch_mixin.py → ResultsFetchMixin.py} +0 -0
- /edsl/results/{results_tools_mixin.py → ResultsToolsMixin.py} +0 -0
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/LICENSE +0 -0
edsl/scenarios/Scenario.py
CHANGED
@@ -2,65 +2,54 @@
|
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
import copy
|
5
|
+
import hashlib
|
5
6
|
import os
|
6
7
|
import json
|
7
8
|
from collections import UserDict
|
8
|
-
from typing import Union, List, Optional,
|
9
|
+
from typing import Union, List, Optional, Generator
|
9
10
|
from uuid import uuid4
|
10
11
|
|
11
12
|
from edsl.Base import Base
|
12
13
|
from edsl.scenarios.ScenarioHtmlMixin import ScenarioHtmlMixin
|
13
|
-
from edsl.utilities.
|
14
|
+
from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
|
14
15
|
from edsl.exceptions.scenarios import ScenarioError
|
15
16
|
|
16
|
-
if TYPE_CHECKING:
|
17
|
-
from edsl.scenarios.ScenarioList import ScenarioList
|
18
|
-
from edsl.results.Dataset import Dataset
|
19
|
-
|
20
17
|
|
21
18
|
class DisplayJSON:
|
22
|
-
|
23
|
-
|
24
|
-
def __init__(self, input_dict: dict):
|
25
|
-
self.text = json.dumps(input_dict, indent=4)
|
19
|
+
def __init__(self, dict):
|
20
|
+
self.text = json.dumps(dict, indent=4)
|
26
21
|
|
27
22
|
def __repr__(self):
|
28
23
|
return self.text
|
29
24
|
|
30
25
|
|
31
26
|
class DisplayYAML:
|
32
|
-
|
33
|
-
|
34
|
-
def __init__(self, input_dict: dict):
|
27
|
+
def __init__(self, dict):
|
35
28
|
import yaml
|
36
29
|
|
37
|
-
self.text = yaml.dump(
|
30
|
+
self.text = yaml.dump(dict)
|
38
31
|
|
39
32
|
def __repr__(self):
|
40
33
|
return self.text
|
41
34
|
|
42
35
|
|
43
36
|
class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
44
|
-
"""A Scenario is a dictionary of keys/values
|
37
|
+
"""A Scenario is a dictionary of keys/values.
|
38
|
+
|
39
|
+
They can be used parameterize EDSL questions."""
|
45
40
|
|
46
41
|
__documentation__ = "https://docs.expectedparrot.com/en/latest/scenarios.html"
|
47
42
|
|
48
|
-
def __init__(self, data:
|
43
|
+
def __init__(self, data: Union[dict, None] = None, name: str = None):
|
49
44
|
"""Initialize a new Scenario.
|
50
45
|
|
51
|
-
:param data: A dictionary of keys/values for parameterizing questions.
|
52
|
-
|
53
|
-
"""
|
46
|
+
# :param data: A dictionary of keys/values for parameterizing questions.
|
47
|
+
#"""
|
54
48
|
if not isinstance(data, dict) and data is not None:
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
raise ScenarioError(
|
59
|
-
f"You must pass in a dictionary to initialize a Scenario. You passed in {data}",
|
60
|
-
"Exception message:" + str(e),
|
61
|
-
)
|
49
|
+
raise EDSLScenarioError(
|
50
|
+
"You must pass in a dictionary to initialize a Scenario."
|
51
|
+
)
|
62
52
|
|
63
|
-
super().__init__()
|
64
53
|
self.data = data if data is not None else {}
|
65
54
|
self.name = name
|
66
55
|
|
@@ -70,6 +59,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
70
59
|
:param n: The number of times to replicate the scenario.
|
71
60
|
|
72
61
|
Example:
|
62
|
+
|
73
63
|
>>> s = Scenario({"food": "wood chips"})
|
74
64
|
>>> s.replicate(2)
|
75
65
|
ScenarioList([Scenario({'food': 'wood chips'}), Scenario({'food': 'wood chips'})])
|
@@ -92,13 +82,13 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
92
82
|
return True
|
93
83
|
return False
|
94
84
|
|
95
|
-
def
|
96
|
-
self, replacement_left
|
85
|
+
def convert_jinja_braces(
|
86
|
+
self, replacement_left="<<", replacement_right=">>"
|
97
87
|
) -> Scenario:
|
98
88
|
"""Convert Jinja braces to some other character.
|
99
89
|
|
100
90
|
>>> s = Scenario({"food": "I love {{wood chips}}"})
|
101
|
-
>>> s.
|
91
|
+
>>> s.convert_jinja_braces()
|
102
92
|
Scenario({'food': 'I love <<wood chips>>'})
|
103
93
|
|
104
94
|
"""
|
@@ -112,7 +102,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
112
102
|
new_scenario[key] = value
|
113
103
|
return new_scenario
|
114
104
|
|
115
|
-
def __add__(self, other_scenario: Scenario) -> Scenario:
|
105
|
+
def __add__(self, other_scenario: "Scenario") -> "Scenario":
|
116
106
|
"""Combine two scenarios by taking the union of their keys
|
117
107
|
|
118
108
|
If the other scenario is None, then just return self.
|
@@ -137,14 +127,11 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
137
127
|
return s
|
138
128
|
|
139
129
|
def rename(
|
140
|
-
self,
|
141
|
-
|
142
|
-
new_name: Optional[str] = None,
|
143
|
-
) -> Scenario:
|
130
|
+
self, old_name_or_replacement_dict: dict, new_name: Optional[str] = None
|
131
|
+
) -> "Scenario":
|
144
132
|
"""Rename the keys of a scenario.
|
145
133
|
|
146
|
-
:param
|
147
|
-
:param new_name: The new name of the key.
|
134
|
+
:param replacement_dict: A dictionary of old keys to new keys.
|
148
135
|
|
149
136
|
Example:
|
150
137
|
|
@@ -169,26 +156,13 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
169
156
|
new_scenario[key] = value
|
170
157
|
return new_scenario
|
171
158
|
|
172
|
-
def new_column_names(self, new_names: List[str]) -> Scenario:
|
173
|
-
"""Rename the keys of a scenario.
|
174
|
-
|
175
|
-
>>> s = Scenario({"food": "wood chips"})
|
176
|
-
>>> s.new_column_names(["food_preference"])
|
177
|
-
Scenario({'food_preference': 'wood chips'})
|
178
|
-
"""
|
179
|
-
try:
|
180
|
-
assert len(new_names) == len(self.keys())
|
181
|
-
except AssertionError:
|
182
|
-
print("The number of new names must match the number of keys.")
|
183
|
-
|
184
|
-
new_scenario = Scenario()
|
185
|
-
for new_names, value in zip(new_names, self.values()):
|
186
|
-
new_scenario[new_names] = value
|
187
|
-
return new_scenario
|
188
|
-
|
189
159
|
def table(self, tablefmt: str = "grid") -> str:
|
190
|
-
|
191
|
-
|
160
|
+
from edsl.results.Dataset import Dataset
|
161
|
+
|
162
|
+
keys = [key for key, value in self.items()]
|
163
|
+
values = [value for key, value in self.items()]
|
164
|
+
d = Dataset([{"key": keys}, {"value": values}])
|
165
|
+
return d.table(tablefmt=tablefmt)
|
192
166
|
|
193
167
|
def json(self):
|
194
168
|
return DisplayJSON(self.to_dict(add_edsl_version=False))
|
@@ -198,7 +172,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
198
172
|
|
199
173
|
return DisplayYAML(self.to_dict(add_edsl_version=False))
|
200
174
|
|
201
|
-
def to_dict(self, add_edsl_version
|
175
|
+
def to_dict(self, add_edsl_version=True) -> dict:
|
202
176
|
"""Convert a scenario to a dictionary.
|
203
177
|
|
204
178
|
Example:
|
@@ -226,7 +200,8 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
226
200
|
return d
|
227
201
|
|
228
202
|
def __hash__(self) -> int:
|
229
|
-
"""
|
203
|
+
"""
|
204
|
+
Return a hash of the scenario.
|
230
205
|
|
231
206
|
Example:
|
232
207
|
|
@@ -238,23 +213,44 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
238
213
|
|
239
214
|
return dict_hash(self.to_dict(add_edsl_version=False))
|
240
215
|
|
216
|
+
def print(self):
|
217
|
+
from rich import print_json
|
218
|
+
import json
|
219
|
+
|
220
|
+
print_json(json.dumps(self.to_dict()))
|
221
|
+
|
241
222
|
def __repr__(self):
|
242
223
|
return "Scenario(" + repr(self.data) + ")"
|
243
224
|
|
244
225
|
def to_dataset(self) -> "Dataset":
|
245
|
-
|
246
|
-
|
247
|
-
>>> s = Scenario({"food": "wood chips"})
|
248
|
-
>>> s.to_dataset()
|
249
|
-
Dataset([{'key': ['food']}, {'value': ['wood chips']}])
|
250
|
-
"""
|
226
|
+
# d = Dataset([{'a.b':[1,2,3,4]}])
|
251
227
|
from edsl.results.Dataset import Dataset
|
252
228
|
|
253
|
-
keys =
|
254
|
-
values =
|
229
|
+
keys = [key for key, value in self.items()]
|
230
|
+
values = [value for key, value in self.items()]
|
255
231
|
return Dataset([{"key": keys}, {"value": values}])
|
256
232
|
|
257
|
-
def
|
233
|
+
def _repr_html_(self):
|
234
|
+
from tabulate import tabulate
|
235
|
+
import reprlib
|
236
|
+
|
237
|
+
d = self.to_dict(add_edsl_version=False)
|
238
|
+
# return self.to_dataset()
|
239
|
+
r = reprlib.Repr()
|
240
|
+
r.maxstring = 70
|
241
|
+
|
242
|
+
data = [[k, r.repr(v)] for k, v in d.items()]
|
243
|
+
from tabulate import tabulate
|
244
|
+
|
245
|
+
if hasattr(self, "__documentation__"):
|
246
|
+
footer = f"<a href='{self.__documentation__}'>(docs)</a></p>"
|
247
|
+
else:
|
248
|
+
footer = ""
|
249
|
+
|
250
|
+
table = str(tabulate(data, headers=["keys", "values"], tablefmt="html"))
|
251
|
+
return f"<pre>{table}</pre>" + footer
|
252
|
+
|
253
|
+
def select(self, list_of_keys: List[str]) -> "Scenario":
|
258
254
|
"""Select a subset of keys from a scenario.
|
259
255
|
|
260
256
|
:param list_of_keys: The keys to select.
|
@@ -270,7 +266,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
270
266
|
new_scenario[key] = self[key]
|
271
267
|
return new_scenario
|
272
268
|
|
273
|
-
def drop(self, list_of_keys:
|
269
|
+
def drop(self, list_of_keys: List[str]) -> "Scenario":
|
274
270
|
"""Drop a subset of keys from a scenario.
|
275
271
|
|
276
272
|
:param list_of_keys: The keys to drop.
|
@@ -324,7 +320,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
324
320
|
... _ = f.flush()
|
325
321
|
... s = Scenario.from_file(f.name, "file")
|
326
322
|
>>> s
|
327
|
-
Scenario({'file': FileStore(path='...'
|
323
|
+
Scenario({'file': FileStore(path='...')})
|
328
324
|
|
329
325
|
"""
|
330
326
|
from edsl.scenarios.FileStore import FileStore
|
@@ -355,10 +351,35 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
355
351
|
return cls.from_file(image_path, image_name)
|
356
352
|
|
357
353
|
@classmethod
|
358
|
-
def from_pdf(cls, pdf_path
|
359
|
-
|
354
|
+
def from_pdf(cls, pdf_path):
|
355
|
+
# Ensure the file exists
|
356
|
+
import fitz
|
357
|
+
|
358
|
+
if not os.path.exists(pdf_path):
|
359
|
+
raise FileNotFoundError(f"The file {pdf_path} does not exist.")
|
360
|
+
|
361
|
+
# Open the PDF file
|
362
|
+
document = fitz.open(pdf_path)
|
363
|
+
|
364
|
+
# Get the filename from the path
|
365
|
+
filename = os.path.basename(pdf_path)
|
360
366
|
|
361
|
-
|
367
|
+
# Iterate through each page and extract text
|
368
|
+
text = ""
|
369
|
+
for page_num in range(len(document)):
|
370
|
+
page = document.load_page(page_num)
|
371
|
+
blocks = page.get_text("blocks") # Extract text blocks
|
372
|
+
|
373
|
+
# Sort blocks by their vertical position (y0) to maintain reading order
|
374
|
+
blocks.sort(key=lambda b: (b[1], b[0])) # Sort by y0 first, then x0
|
375
|
+
|
376
|
+
# Combine the text blocks in order
|
377
|
+
for block in blocks:
|
378
|
+
text += block[4] + "\n"
|
379
|
+
|
380
|
+
# Create a dictionary for the combined text
|
381
|
+
page_info = {"filename": filename, "text": text}
|
382
|
+
return Scenario(page_info)
|
362
383
|
|
363
384
|
@classmethod
|
364
385
|
def from_docx(cls, docx_path: str) -> "Scenario":
|
@@ -378,9 +399,52 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
378
399
|
Scenario({'file_path': 'test.docx', 'text': 'EDSL Survey\\nThis is a test.'})
|
379
400
|
>>> import os; os.remove("test.docx")
|
380
401
|
"""
|
381
|
-
from
|
402
|
+
from docx import Document
|
403
|
+
|
404
|
+
doc = Document(docx_path)
|
382
405
|
|
383
|
-
|
406
|
+
# Extract all text
|
407
|
+
full_text = []
|
408
|
+
for para in doc.paragraphs:
|
409
|
+
full_text.append(para.text)
|
410
|
+
|
411
|
+
# Join the text from all paragraphs
|
412
|
+
text = "\n".join(full_text)
|
413
|
+
return Scenario({"file_path": docx_path, "text": text})
|
414
|
+
|
415
|
+
@staticmethod
|
416
|
+
def _line_chunks(text, num_lines: int) -> Generator[str, None, None]:
|
417
|
+
"""Split a text into chunks of a given size.
|
418
|
+
|
419
|
+
:param text: The text to split.
|
420
|
+
:param num_lines: The number of lines in each chunk.
|
421
|
+
|
422
|
+
Example:
|
423
|
+
|
424
|
+
>>> list(Scenario._line_chunks("This is a test.\\nThis is a test. This is a test.", 1))
|
425
|
+
['This is a test.', 'This is a test. This is a test.']
|
426
|
+
"""
|
427
|
+
lines = text.split("\n")
|
428
|
+
for i in range(0, len(lines), num_lines):
|
429
|
+
chunk = "\n".join(lines[i : i + num_lines])
|
430
|
+
yield chunk
|
431
|
+
|
432
|
+
@staticmethod
|
433
|
+
def _word_chunks(text, num_words: int) -> Generator[str, None, None]:
|
434
|
+
"""Split a text into chunks of a given size.
|
435
|
+
|
436
|
+
:param text: The text to split.
|
437
|
+
:param num_words: The number of words in each chunk.
|
438
|
+
|
439
|
+
Example:
|
440
|
+
|
441
|
+
>>> list(Scenario._word_chunks("This is a test.", 2))
|
442
|
+
['This is', 'a test.']
|
443
|
+
"""
|
444
|
+
words = text.split()
|
445
|
+
for i in range(0, len(words), num_words):
|
446
|
+
chunk = " ".join(words[i : i + num_words])
|
447
|
+
yield chunk
|
384
448
|
|
385
449
|
def chunk(
|
386
450
|
self,
|
@@ -431,11 +495,36 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
431
495
|
...
|
432
496
|
ValueError: You must specify either num_words or num_lines, but not both.
|
433
497
|
"""
|
434
|
-
from edsl.scenarios.
|
498
|
+
from edsl.scenarios.ScenarioList import ScenarioList
|
435
499
|
|
436
|
-
|
437
|
-
field, num_words
|
438
|
-
|
500
|
+
if num_words is not None:
|
501
|
+
chunks = list(self._word_chunks(self[field], num_words))
|
502
|
+
|
503
|
+
if num_lines is not None:
|
504
|
+
chunks = list(self._line_chunks(self[field], num_lines))
|
505
|
+
|
506
|
+
if num_words is None and num_lines is None:
|
507
|
+
raise ValueError("You must specify either num_words or num_lines.")
|
508
|
+
|
509
|
+
if num_words is not None and num_lines is not None:
|
510
|
+
raise ValueError(
|
511
|
+
"You must specify either num_words or num_lines, but not both."
|
512
|
+
)
|
513
|
+
|
514
|
+
scenarios = []
|
515
|
+
for i, chunk in enumerate(chunks):
|
516
|
+
new_scenario = copy.deepcopy(self)
|
517
|
+
new_scenario[field] = chunk
|
518
|
+
new_scenario[field + "_chunk"] = i
|
519
|
+
if include_original:
|
520
|
+
if hash_original:
|
521
|
+
new_scenario[field + "_original"] = hashlib.md5(
|
522
|
+
self[field].encode()
|
523
|
+
).hexdigest()
|
524
|
+
else:
|
525
|
+
new_scenario[field + "_original"] = self[field]
|
526
|
+
scenarios.append(new_scenario)
|
527
|
+
return ScenarioList(scenarios)
|
439
528
|
|
440
529
|
@classmethod
|
441
530
|
@remove_edsl_version
|
@@ -458,30 +547,44 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
458
547
|
return cls(d)
|
459
548
|
|
460
549
|
def _table(self) -> tuple[dict, list]:
|
461
|
-
"""Prepare generic table data.
|
462
|
-
>>> s = Scenario({"food": "wood chips"})
|
463
|
-
>>> s._table()
|
464
|
-
([{'Attribute': 'data', 'Value': "{'food': 'wood chips'}"}, {'Attribute': 'name', 'Value': 'None'}], ['Attribute', 'Value'])
|
465
|
-
"""
|
550
|
+
"""Prepare generic table data."""
|
466
551
|
table_data = []
|
467
552
|
for attr_name, attr_value in self.__dict__.items():
|
468
553
|
table_data.append({"Attribute": attr_name, "Value": repr(attr_value)})
|
469
554
|
column_names = ["Attribute", "Value"]
|
470
555
|
return table_data, column_names
|
471
556
|
|
557
|
+
def rich_print(self) -> "Table":
|
558
|
+
"""Display an object as a rich table."""
|
559
|
+
from rich.table import Table
|
560
|
+
|
561
|
+
table_data, column_names = self._table()
|
562
|
+
table = Table(title=f"{self.__class__.__name__} Attributes")
|
563
|
+
for column in column_names:
|
564
|
+
table.add_column(column, style="bold")
|
565
|
+
|
566
|
+
for row in table_data:
|
567
|
+
row_data = [row[column] for column in column_names]
|
568
|
+
table.add_row(*row_data)
|
569
|
+
|
570
|
+
return table
|
571
|
+
|
472
572
|
@classmethod
|
473
|
-
def example(cls, randomize: bool = False) -> Scenario:
|
573
|
+
def example(cls, randomize: bool = False, has_image=False) -> Scenario:
|
474
574
|
"""
|
475
575
|
Returns an example Scenario instance.
|
476
576
|
|
477
577
|
:param randomize: If True, adds a random string to the value of the example key.
|
478
578
|
"""
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
579
|
+
if not has_image:
|
580
|
+
addition = "" if not randomize else str(uuid4())
|
581
|
+
return cls(
|
582
|
+
{
|
583
|
+
"persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
|
584
|
+
}
|
585
|
+
)
|
586
|
+
else:
|
587
|
+
return cls.from_image(cls.example_image())
|
485
588
|
|
486
589
|
def code(self) -> List[str]:
|
487
590
|
"""Return the code for the scenario."""
|
@@ -1,4 +1,7 @@
|
|
1
|
+
import requests
|
1
2
|
from typing import Optional
|
3
|
+
from requests.adapters import HTTPAdapter
|
4
|
+
from requests.packages.urllib3.util.retry import Retry
|
2
5
|
|
3
6
|
|
4
7
|
class ScenarioHtmlMixin:
|
@@ -19,10 +22,6 @@ class ScenarioHtmlMixin:
|
|
19
22
|
|
20
23
|
def fetch_html(url):
|
21
24
|
# Define the user-agent to mimic a browser
|
22
|
-
import requests
|
23
|
-
from requests.adapters import HTTPAdapter
|
24
|
-
from requests.packages.urllib3.util.retry import Retry
|
25
|
-
|
26
25
|
headers = {
|
27
26
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
28
27
|
}
|
@@ -1,9 +1,9 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
from typing import Union, TYPE_CHECKING
|
3
3
|
|
4
|
-
if TYPE_CHECKING:
|
5
|
-
|
6
|
-
|
4
|
+
# if TYPE_CHECKING:
|
5
|
+
from edsl.scenarios.ScenarioList import ScenarioList
|
6
|
+
from edsl.scenarios.Scenario import Scenario
|
7
7
|
|
8
8
|
|
9
9
|
class ScenarioJoin:
|
@@ -23,7 +23,7 @@ class ScenarioJoin:
|
|
23
23
|
self.left = left
|
24
24
|
self.right = right
|
25
25
|
|
26
|
-
def left_join(self, by: Union[str, list[str]]) ->
|
26
|
+
def left_join(self, by: Union[str, list[str]]) -> ScenarioList:
|
27
27
|
"""Perform a left join between the two ScenarioLists.
|
28
28
|
|
29
29
|
Args:
|
@@ -35,8 +35,6 @@ class ScenarioJoin:
|
|
35
35
|
Raises:
|
36
36
|
ValueError: If by is empty or if any join keys don't exist in both ScenarioLists
|
37
37
|
"""
|
38
|
-
from edsl.scenarios.ScenarioList import ScenarioList
|
39
|
-
|
40
38
|
self._validate_join_keys(by)
|
41
39
|
by_keys = [by] if isinstance(by, str) else by
|
42
40
|
|
@@ -88,8 +86,6 @@ class ScenarioJoin:
|
|
88
86
|
self, by_keys: list[str], other_dict: dict, all_keys: set
|
89
87
|
) -> list[Scenario]:
|
90
88
|
"""Create the joined scenarios."""
|
91
|
-
from edsl.scenarios.Scenario import Scenario
|
92
|
-
|
93
89
|
new_scenarios = []
|
94
90
|
|
95
91
|
for scenario in self.left:
|
@@ -109,8 +105,8 @@ class ScenarioJoin:
|
|
109
105
|
def _handle_matching_scenario(
|
110
106
|
self,
|
111
107
|
new_scenario: dict,
|
112
|
-
left_scenario:
|
113
|
-
right_scenario:
|
108
|
+
left_scenario: Scenario,
|
109
|
+
right_scenario: Scenario,
|
114
110
|
by_keys: list[str],
|
115
111
|
) -> None:
|
116
112
|
"""Handle merging of matching scenarios and conflict warnings."""
|