edsl 0.1.38.dev4__py3-none-any.whl → 0.1.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. edsl/Base.py +197 -116
  2. edsl/__init__.py +15 -7
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +351 -147
  5. edsl/agents/AgentList.py +211 -73
  6. edsl/agents/Invigilator.py +101 -50
  7. edsl/agents/InvigilatorBase.py +62 -70
  8. edsl/agents/PromptConstructor.py +143 -225
  9. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  10. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  11. edsl/agents/__init__.py +0 -1
  12. edsl/agents/prompt_helpers.py +3 -3
  13. edsl/agents/question_option_processor.py +172 -0
  14. edsl/auto/AutoStudy.py +18 -5
  15. edsl/auto/StageBase.py +53 -40
  16. edsl/auto/StageQuestions.py +2 -1
  17. edsl/auto/utilities.py +0 -6
  18. edsl/config.py +22 -2
  19. edsl/conversation/car_buying.py +2 -1
  20. edsl/coop/CoopFunctionsMixin.py +15 -0
  21. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  22. edsl/coop/PriceFetcher.py +1 -1
  23. edsl/coop/coop.py +125 -47
  24. edsl/coop/utils.py +14 -14
  25. edsl/data/Cache.py +45 -27
  26. edsl/data/CacheEntry.py +12 -15
  27. edsl/data/CacheHandler.py +31 -12
  28. edsl/data/RemoteCacheSync.py +154 -46
  29. edsl/data/__init__.py +4 -3
  30. edsl/data_transfer_models.py +2 -1
  31. edsl/enums.py +27 -0
  32. edsl/exceptions/__init__.py +50 -50
  33. edsl/exceptions/agents.py +12 -0
  34. edsl/exceptions/inference_services.py +5 -0
  35. edsl/exceptions/questions.py +24 -6
  36. edsl/exceptions/scenarios.py +7 -0
  37. edsl/inference_services/AnthropicService.py +38 -19
  38. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  39. edsl/inference_services/AvailableModelFetcher.py +215 -0
  40. edsl/inference_services/AwsBedrock.py +0 -2
  41. edsl/inference_services/AzureAI.py +0 -2
  42. edsl/inference_services/GoogleService.py +7 -12
  43. edsl/inference_services/InferenceServiceABC.py +18 -85
  44. edsl/inference_services/InferenceServicesCollection.py +120 -79
  45. edsl/inference_services/MistralAIService.py +0 -3
  46. edsl/inference_services/OpenAIService.py +47 -35
  47. edsl/inference_services/PerplexityService.py +0 -3
  48. edsl/inference_services/ServiceAvailability.py +135 -0
  49. edsl/inference_services/TestService.py +11 -10
  50. edsl/inference_services/TogetherAIService.py +5 -3
  51. edsl/inference_services/data_structures.py +134 -0
  52. edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
  53. edsl/jobs/Answers.py +1 -14
  54. edsl/jobs/FetchInvigilator.py +47 -0
  55. edsl/jobs/InterviewTaskManager.py +98 -0
  56. edsl/jobs/InterviewsConstructor.py +50 -0
  57. edsl/jobs/Jobs.py +356 -431
  58. edsl/jobs/JobsChecks.py +35 -10
  59. edsl/jobs/JobsComponentConstructor.py +189 -0
  60. edsl/jobs/JobsPrompts.py +6 -4
  61. edsl/jobs/JobsRemoteInferenceHandler.py +205 -133
  62. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  63. edsl/jobs/RequestTokenEstimator.py +30 -0
  64. edsl/jobs/async_interview_runner.py +138 -0
  65. edsl/jobs/buckets/BucketCollection.py +44 -3
  66. edsl/jobs/buckets/TokenBucket.py +53 -21
  67. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  68. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  69. edsl/jobs/check_survey_scenario_compatibility.py +85 -0
  70. edsl/jobs/data_structures.py +120 -0
  71. edsl/jobs/decorators.py +35 -0
  72. edsl/jobs/interviews/Interview.py +143 -408
  73. edsl/jobs/jobs_status_enums.py +9 -0
  74. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  75. edsl/jobs/results_exceptions_handler.py +98 -0
  76. edsl/jobs/runners/JobsRunnerAsyncio.py +88 -403
  77. edsl/jobs/runners/JobsRunnerStatus.py +133 -165
  78. edsl/jobs/tasks/QuestionTaskCreator.py +21 -19
  79. edsl/jobs/tasks/TaskHistory.py +38 -18
  80. edsl/jobs/tasks/task_status_enum.py +0 -2
  81. edsl/language_models/ComputeCost.py +63 -0
  82. edsl/language_models/LanguageModel.py +194 -236
  83. edsl/language_models/ModelList.py +28 -19
  84. edsl/language_models/PriceManager.py +127 -0
  85. edsl/language_models/RawResponseHandler.py +106 -0
  86. edsl/language_models/ServiceDataSources.py +0 -0
  87. edsl/language_models/__init__.py +1 -2
  88. edsl/language_models/key_management/KeyLookup.py +63 -0
  89. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  90. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  91. edsl/language_models/key_management/__init__.py +0 -0
  92. edsl/language_models/key_management/models.py +131 -0
  93. edsl/language_models/model.py +256 -0
  94. edsl/language_models/repair.py +2 -2
  95. edsl/language_models/utilities.py +5 -4
  96. edsl/notebooks/Notebook.py +19 -14
  97. edsl/notebooks/NotebookToLaTeX.py +142 -0
  98. edsl/prompts/Prompt.py +29 -39
  99. edsl/questions/ExceptionExplainer.py +77 -0
  100. edsl/questions/HTMLQuestion.py +103 -0
  101. edsl/questions/QuestionBase.py +68 -214
  102. edsl/questions/QuestionBasePromptsMixin.py +7 -3
  103. edsl/questions/QuestionBudget.py +1 -1
  104. edsl/questions/QuestionCheckBox.py +3 -3
  105. edsl/questions/QuestionExtract.py +5 -7
  106. edsl/questions/QuestionFreeText.py +2 -3
  107. edsl/questions/QuestionList.py +10 -18
  108. edsl/questions/QuestionMatrix.py +265 -0
  109. edsl/questions/QuestionMultipleChoice.py +67 -23
  110. edsl/questions/QuestionNumerical.py +2 -4
  111. edsl/questions/QuestionRank.py +7 -17
  112. edsl/questions/SimpleAskMixin.py +4 -3
  113. edsl/questions/__init__.py +2 -1
  114. edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +47 -2
  115. edsl/questions/data_structures.py +20 -0
  116. edsl/questions/derived/QuestionLinearScale.py +6 -3
  117. edsl/questions/derived/QuestionTopK.py +1 -1
  118. edsl/questions/descriptors.py +17 -3
  119. edsl/questions/loop_processor.py +149 -0
  120. edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +57 -50
  121. edsl/questions/question_registry.py +1 -1
  122. edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +40 -26
  123. edsl/questions/response_validator_factory.py +34 -0
  124. edsl/questions/templates/matrix/__init__.py +1 -0
  125. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  126. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  127. edsl/results/CSSParameterizer.py +1 -1
  128. edsl/results/Dataset.py +170 -7
  129. edsl/results/DatasetExportMixin.py +168 -305
  130. edsl/results/DatasetTree.py +28 -8
  131. edsl/results/MarkdownToDocx.py +122 -0
  132. edsl/results/MarkdownToPDF.py +111 -0
  133. edsl/results/Result.py +298 -206
  134. edsl/results/Results.py +149 -131
  135. edsl/results/ResultsExportMixin.py +2 -0
  136. edsl/results/TableDisplay.py +98 -171
  137. edsl/results/TextEditor.py +50 -0
  138. edsl/results/__init__.py +1 -1
  139. edsl/results/file_exports.py +252 -0
  140. edsl/results/{Selector.py → results_selector.py} +23 -13
  141. edsl/results/smart_objects.py +96 -0
  142. edsl/results/table_data_class.py +12 -0
  143. edsl/results/table_renderers.py +118 -0
  144. edsl/scenarios/ConstructDownloadLink.py +109 -0
  145. edsl/scenarios/DocumentChunker.py +102 -0
  146. edsl/scenarios/DocxScenario.py +16 -0
  147. edsl/scenarios/FileStore.py +150 -239
  148. edsl/scenarios/PdfExtractor.py +40 -0
  149. edsl/scenarios/Scenario.py +90 -193
  150. edsl/scenarios/ScenarioHtmlMixin.py +4 -3
  151. edsl/scenarios/ScenarioList.py +415 -244
  152. edsl/scenarios/ScenarioListExportMixin.py +0 -7
  153. edsl/scenarios/ScenarioListPdfMixin.py +15 -37
  154. edsl/scenarios/__init__.py +1 -2
  155. edsl/scenarios/directory_scanner.py +96 -0
  156. edsl/scenarios/file_methods.py +85 -0
  157. edsl/scenarios/handlers/__init__.py +13 -0
  158. edsl/scenarios/handlers/csv.py +49 -0
  159. edsl/scenarios/handlers/docx.py +76 -0
  160. edsl/scenarios/handlers/html.py +37 -0
  161. edsl/scenarios/handlers/json.py +111 -0
  162. edsl/scenarios/handlers/latex.py +5 -0
  163. edsl/scenarios/handlers/md.py +51 -0
  164. edsl/scenarios/handlers/pdf.py +68 -0
  165. edsl/scenarios/handlers/png.py +39 -0
  166. edsl/scenarios/handlers/pptx.py +105 -0
  167. edsl/scenarios/handlers/py.py +294 -0
  168. edsl/scenarios/handlers/sql.py +313 -0
  169. edsl/scenarios/handlers/sqlite.py +149 -0
  170. edsl/scenarios/handlers/txt.py +33 -0
  171. edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +10 -6
  172. edsl/scenarios/scenario_selector.py +156 -0
  173. edsl/study/ObjectEntry.py +1 -1
  174. edsl/study/SnapShot.py +1 -1
  175. edsl/study/Study.py +5 -12
  176. edsl/surveys/ConstructDAG.py +92 -0
  177. edsl/surveys/EditSurvey.py +221 -0
  178. edsl/surveys/InstructionHandler.py +100 -0
  179. edsl/surveys/MemoryManagement.py +72 -0
  180. edsl/surveys/Rule.py +5 -4
  181. edsl/surveys/RuleCollection.py +25 -27
  182. edsl/surveys/RuleManager.py +172 -0
  183. edsl/surveys/Simulator.py +75 -0
  184. edsl/surveys/Survey.py +270 -791
  185. edsl/surveys/SurveyCSS.py +20 -8
  186. edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +11 -9
  187. edsl/surveys/SurveyToApp.py +141 -0
  188. edsl/surveys/__init__.py +4 -2
  189. edsl/surveys/descriptors.py +6 -2
  190. edsl/surveys/instructions/ChangeInstruction.py +1 -2
  191. edsl/surveys/instructions/Instruction.py +4 -13
  192. edsl/surveys/instructions/InstructionCollection.py +11 -6
  193. edsl/templates/error_reporting/interview_details.html +1 -1
  194. edsl/templates/error_reporting/report.html +1 -1
  195. edsl/tools/plotting.py +1 -1
  196. edsl/utilities/PrettyList.py +56 -0
  197. edsl/utilities/is_notebook.py +18 -0
  198. edsl/utilities/is_valid_variable_name.py +11 -0
  199. edsl/utilities/remove_edsl_version.py +24 -0
  200. edsl/utilities/utilities.py +35 -23
  201. {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/METADATA +12 -10
  202. edsl-0.1.39.dist-info/RECORD +358 -0
  203. {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/WHEEL +1 -1
  204. edsl/language_models/KeyLookup.py +0 -30
  205. edsl/language_models/registry.py +0 -190
  206. edsl/language_models/unused/ReplicateBase.py +0 -83
  207. edsl/results/ResultsDBMixin.py +0 -238
  208. edsl-0.1.38.dev4.dist-info/RECORD +0 -277
  209. /edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +0 -0
  210. /edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +0 -0
  211. /edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +0 -0
  212. {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/LICENSE +0 -0
@@ -2,54 +2,65 @@
2
2
 
3
3
  from __future__ import annotations
4
4
  import copy
5
- import hashlib
6
5
  import os
7
6
  import json
8
7
  from collections import UserDict
9
- from typing import Union, List, Optional, Generator
8
+ from typing import Union, List, Optional, TYPE_CHECKING, Collection
10
9
  from uuid import uuid4
11
10
 
12
11
  from edsl.Base import Base
13
12
  from edsl.scenarios.ScenarioHtmlMixin import ScenarioHtmlMixin
14
- from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
13
+ from edsl.utilities.remove_edsl_version import remove_edsl_version
15
14
  from edsl.exceptions.scenarios import ScenarioError
16
15
 
16
+ if TYPE_CHECKING:
17
+ from edsl.scenarios.ScenarioList import ScenarioList
18
+ from edsl.results.Dataset import Dataset
19
+
17
20
 
18
21
  class DisplayJSON:
19
- def __init__(self, dict):
20
- self.text = json.dumps(dict, indent=4)
22
+ """Display a dictionary as JSON."""
23
+
24
+ def __init__(self, input_dict: dict):
25
+ self.text = json.dumps(input_dict, indent=4)
21
26
 
22
27
  def __repr__(self):
23
28
  return self.text
24
29
 
25
30
 
26
31
  class DisplayYAML:
27
- def __init__(self, dict):
32
+ """Display a dictionary as YAML."""
33
+
34
+ def __init__(self, input_dict: dict):
28
35
  import yaml
29
36
 
30
- self.text = yaml.dump(dict)
37
+ self.text = yaml.dump(input_dict)
31
38
 
32
39
  def __repr__(self):
33
40
  return self.text
34
41
 
35
42
 
36
43
  class Scenario(Base, UserDict, ScenarioHtmlMixin):
37
- """A Scenario is a dictionary of keys/values.
38
-
39
- They can be used parameterize EDSL questions."""
44
+ """A Scenario is a dictionary of keys/values that can be used to parameterize questions."""
40
45
 
41
46
  __documentation__ = "https://docs.expectedparrot.com/en/latest/scenarios.html"
42
47
 
43
- def __init__(self, data: Union[dict, None] = None, name: str = None):
48
+ def __init__(self, data: Optional[dict] = None, name: Optional[str] = None):
44
49
  """Initialize a new Scenario.
45
50
 
46
- # :param data: A dictionary of keys/values for parameterizing questions.
47
- #"""
51
+ :param data: A dictionary of keys/values for parameterizing questions.
52
+ :param name: The name of the scenario.
53
+ """
48
54
  if not isinstance(data, dict) and data is not None:
49
- raise EDSLScenarioError(
50
- "You must pass in a dictionary to initialize a Scenario."
51
- )
55
+ try:
56
+ data = dict(data)
57
+ except Exception as e:
58
+ raise ScenarioError(
59
+ f"You must pass in a dictionary to initialize a Scenario. You passed in {data}",
60
+ "Exception message:" + str(e),
61
+ )
52
62
 
63
+ super().__init__()
53
64
  self.data = data if data is not None else {}
54
65
  self.name = name
55
66
 
@@ -59,7 +70,6 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
59
70
  :param n: The number of times to replicate the scenario.
60
71
 
61
72
  Example:
62
-
63
73
  >>> s = Scenario({"food": "wood chips"})
64
74
  >>> s.replicate(2)
65
75
  ScenarioList([Scenario({'food': 'wood chips'}), Scenario({'food': 'wood chips'})])
@@ -82,13 +92,13 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
82
92
  return True
83
93
  return False
84
94
 
85
- def convert_jinja_braces(
86
- self, replacement_left="<<", replacement_right=">>"
95
+ def _convert_jinja_braces(
96
+ self, replacement_left: str = "<<", replacement_right: str = ">>"
87
97
  ) -> Scenario:
88
98
  """Convert Jinja braces to some other character.
89
99
 
90
100
  >>> s = Scenario({"food": "I love {{wood chips}}"})
91
- >>> s.convert_jinja_braces()
101
+ >>> s._convert_jinja_braces()
92
102
  Scenario({'food': 'I love <<wood chips>>'})
93
103
 
94
104
  """
@@ -102,7 +112,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
102
112
  new_scenario[key] = value
103
113
  return new_scenario
104
114
 
105
- def __add__(self, other_scenario: "Scenario") -> "Scenario":
115
+ def __add__(self, other_scenario: Scenario) -> Scenario:
106
116
  """Combine two scenarios by taking the union of their keys
107
117
 
108
118
  If the other scenario is None, then just return self.
@@ -127,11 +137,14 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
127
137
  return s
128
138
 
129
139
  def rename(
130
- self, old_name_or_replacement_dict: dict, new_name: Optional[str] = None
131
- ) -> "Scenario":
140
+ self,
141
+ old_name_or_replacement_dict: Union[str, dict[str, str]],
142
+ new_name: Optional[str] = None,
143
+ ) -> Scenario:
132
144
  """Rename the keys of a scenario.
133
145
 
134
- :param replacement_dict: A dictionary of old keys to new keys.
146
+ :param old_name_or_replacement_dict: A dictionary of old keys to new keys *OR* a string of the old key.
147
+ :param new_name: The new name of the key.
135
148
 
136
149
  Example:
137
150
 
@@ -156,13 +169,26 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
156
169
  new_scenario[key] = value
157
170
  return new_scenario
158
171
 
159
- def table(self, tablefmt: str = "grid") -> str:
160
- from edsl.results.Dataset import Dataset
172
+ def new_column_names(self, new_names: List[str]) -> Scenario:
173
+ """Rename the keys of a scenario.
174
+
175
+ >>> s = Scenario({"food": "wood chips"})
176
+ >>> s.new_column_names(["food_preference"])
177
+ Scenario({'food_preference': 'wood chips'})
178
+ """
179
+ try:
180
+ assert len(new_names) == len(self.keys())
181
+ except AssertionError:
182
+ print("The number of new names must match the number of keys.")
183
+
184
+ new_scenario = Scenario()
185
+ for new_names, value in zip(new_names, self.values()):
186
+ new_scenario[new_names] = value
187
+ return new_scenario
161
188
 
162
- keys = [key for key, value in self.items()]
163
- values = [value for key, value in self.items()]
164
- d = Dataset([{"key": keys}, {"value": values}])
165
- return d.table(tablefmt=tablefmt)
189
+ def table(self, tablefmt: str = "grid") -> str:
190
+ """Display a scenario as a table."""
191
+ return self.to_dataset().table(tablefmt=tablefmt)
166
192
 
167
193
  def json(self):
168
194
  return DisplayJSON(self.to_dict(add_edsl_version=False))
@@ -172,7 +198,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
172
198
 
173
199
  return DisplayYAML(self.to_dict(add_edsl_version=False))
174
200
 
175
- def to_dict(self, add_edsl_version=True) -> dict:
201
+ def to_dict(self, add_edsl_version: bool = True) -> dict:
176
202
  """Convert a scenario to a dictionary.
177
203
 
178
204
  Example:
@@ -200,8 +226,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
200
226
  return d
201
227
 
202
228
  def __hash__(self) -> int:
203
- """
204
- Return a hash of the scenario.
229
+ """Return a hash of the scenario.
205
230
 
206
231
  Example:
207
232
 
@@ -213,44 +238,23 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
213
238
 
214
239
  return dict_hash(self.to_dict(add_edsl_version=False))
215
240
 
216
- def print(self):
217
- from rich import print_json
218
- import json
219
-
220
- print_json(json.dumps(self.to_dict()))
221
-
222
241
  def __repr__(self):
223
242
  return "Scenario(" + repr(self.data) + ")"
224
243
 
225
244
  def to_dataset(self) -> "Dataset":
226
- # d = Dataset([{'a.b':[1,2,3,4]}])
245
+ """Convert a scenario to a dataset.
246
+
247
+ >>> s = Scenario({"food": "wood chips"})
248
+ >>> s.to_dataset()
249
+ Dataset([{'key': ['food']}, {'value': ['wood chips']}])
250
+ """
227
251
  from edsl.results.Dataset import Dataset
228
252
 
229
- keys = [key for key, value in self.items()]
230
- values = [value for key, value in self.items()]
253
+ keys = list(self.keys())
254
+ values = list(self.values())
231
255
  return Dataset([{"key": keys}, {"value": values}])
232
256
 
233
- def _repr_html_(self):
234
- from tabulate import tabulate
235
- import reprlib
236
-
237
- d = self.to_dict(add_edsl_version=False)
238
- # return self.to_dataset()
239
- r = reprlib.Repr()
240
- r.maxstring = 70
241
-
242
- data = [[k, r.repr(v)] for k, v in d.items()]
243
- from tabulate import tabulate
244
-
245
- if hasattr(self, "__documentation__"):
246
- footer = f"<a href='{self.__documentation__}'>(docs)</a></p>"
247
- else:
248
- footer = ""
249
-
250
- table = str(tabulate(data, headers=["keys", "values"], tablefmt="html"))
251
- return f"<pre>{table}</pre>" + footer
252
-
253
- def select(self, list_of_keys: List[str]) -> "Scenario":
257
+ def select(self, list_of_keys: Collection[str]) -> "Scenario":
254
258
  """Select a subset of keys from a scenario.
255
259
 
256
260
  :param list_of_keys: The keys to select.
@@ -266,7 +270,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
266
270
  new_scenario[key] = self[key]
267
271
  return new_scenario
268
272
 
269
- def drop(self, list_of_keys: List[str]) -> "Scenario":
273
+ def drop(self, list_of_keys: Collection[str]) -> "Scenario":
270
274
  """Drop a subset of keys from a scenario.
271
275
 
272
276
  :param list_of_keys: The keys to drop.
@@ -320,7 +324,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
320
324
  ... _ = f.flush()
321
325
  ... s = Scenario.from_file(f.name, "file")
322
326
  >>> s
323
- Scenario({'file': FileStore(path='...')})
327
+ Scenario({'file': FileStore(path='...', ...)})
324
328
 
325
329
  """
326
330
  from edsl.scenarios.FileStore import FileStore
@@ -351,35 +355,10 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
351
355
  return cls.from_file(image_path, image_name)
352
356
 
353
357
  @classmethod
354
- def from_pdf(cls, pdf_path):
355
- # Ensure the file exists
356
- import fitz
357
-
358
- if not os.path.exists(pdf_path):
359
- raise FileNotFoundError(f"The file {pdf_path} does not exist.")
360
-
361
- # Open the PDF file
362
- document = fitz.open(pdf_path)
363
-
364
- # Get the filename from the path
365
- filename = os.path.basename(pdf_path)
358
+ def from_pdf(cls, pdf_path: str):
359
+ from edsl.scenarios.PdfExtractor import PdfExtractor
366
360
 
367
- # Iterate through each page and extract text
368
- text = ""
369
- for page_num in range(len(document)):
370
- page = document.load_page(page_num)
371
- blocks = page.get_text("blocks") # Extract text blocks
372
-
373
- # Sort blocks by their vertical position (y0) to maintain reading order
374
- blocks.sort(key=lambda b: (b[1], b[0])) # Sort by y0 first, then x0
375
-
376
- # Combine the text blocks in order
377
- for block in blocks:
378
- text += block[4] + "\n"
379
-
380
- # Create a dictionary for the combined text
381
- page_info = {"filename": filename, "text": text}
382
- return Scenario(page_info)
361
+ return PdfExtractor(pdf_path, cls).get_object()
383
362
 
384
363
  @classmethod
385
364
  def from_docx(cls, docx_path: str) -> "Scenario":
@@ -399,52 +378,9 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
399
378
  Scenario({'file_path': 'test.docx', 'text': 'EDSL Survey\\nThis is a test.'})
400
379
  >>> import os; os.remove("test.docx")
401
380
  """
402
- from docx import Document
403
-
404
- doc = Document(docx_path)
381
+ from edsl.scenarios.DocxScenario import DocxScenario
405
382
 
406
- # Extract all text
407
- full_text = []
408
- for para in doc.paragraphs:
409
- full_text.append(para.text)
410
-
411
- # Join the text from all paragraphs
412
- text = "\n".join(full_text)
413
- return Scenario({"file_path": docx_path, "text": text})
414
-
415
- @staticmethod
416
- def _line_chunks(text, num_lines: int) -> Generator[str, None, None]:
417
- """Split a text into chunks of a given size.
418
-
419
- :param text: The text to split.
420
- :param num_lines: The number of lines in each chunk.
421
-
422
- Example:
423
-
424
- >>> list(Scenario._line_chunks("This is a test.\\nThis is a test. This is a test.", 1))
425
- ['This is a test.', 'This is a test. This is a test.']
426
- """
427
- lines = text.split("\n")
428
- for i in range(0, len(lines), num_lines):
429
- chunk = "\n".join(lines[i : i + num_lines])
430
- yield chunk
431
-
432
- @staticmethod
433
- def _word_chunks(text, num_words: int) -> Generator[str, None, None]:
434
- """Split a text into chunks of a given size.
435
-
436
- :param text: The text to split.
437
- :param num_words: The number of words in each chunk.
438
-
439
- Example:
440
-
441
- >>> list(Scenario._word_chunks("This is a test.", 2))
442
- ['This is', 'a test.']
443
- """
444
- words = text.split()
445
- for i in range(0, len(words), num_words):
446
- chunk = " ".join(words[i : i + num_words])
447
- yield chunk
383
+ return Scenario(DocxScenario(docx_path).get_scenario_dict())
448
384
 
449
385
  def chunk(
450
386
  self,
@@ -495,36 +431,11 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
495
431
  ...
496
432
  ValueError: You must specify either num_words or num_lines, but not both.
497
433
  """
498
- from edsl.scenarios.ScenarioList import ScenarioList
434
+ from edsl.scenarios.DocumentChunker import DocumentChunker
499
435
 
500
- if num_words is not None:
501
- chunks = list(self._word_chunks(self[field], num_words))
502
-
503
- if num_lines is not None:
504
- chunks = list(self._line_chunks(self[field], num_lines))
505
-
506
- if num_words is None and num_lines is None:
507
- raise ValueError("You must specify either num_words or num_lines.")
508
-
509
- if num_words is not None and num_lines is not None:
510
- raise ValueError(
511
- "You must specify either num_words or num_lines, but not both."
512
- )
513
-
514
- scenarios = []
515
- for i, chunk in enumerate(chunks):
516
- new_scenario = copy.deepcopy(self)
517
- new_scenario[field] = chunk
518
- new_scenario[field + "_chunk"] = i
519
- if include_original:
520
- if hash_original:
521
- new_scenario[field + "_original"] = hashlib.md5(
522
- self[field].encode()
523
- ).hexdigest()
524
- else:
525
- new_scenario[field + "_original"] = self[field]
526
- scenarios.append(new_scenario)
527
- return ScenarioList(scenarios)
436
+ return DocumentChunker(self).chunk(
437
+ field, num_words, num_lines, include_original, hash_original
438
+ )
528
439
 
529
440
  @classmethod
530
441
  @remove_edsl_version
@@ -547,44 +458,30 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
547
458
  return cls(d)
548
459
 
549
460
  def _table(self) -> tuple[dict, list]:
550
- """Prepare generic table data."""
461
+ """Prepare generic table data.
462
+ >>> s = Scenario({"food": "wood chips"})
463
+ >>> s._table()
464
+ ([{'Attribute': 'data', 'Value': "{'food': 'wood chips'}"}, {'Attribute': 'name', 'Value': 'None'}], ['Attribute', 'Value'])
465
+ """
551
466
  table_data = []
552
467
  for attr_name, attr_value in self.__dict__.items():
553
468
  table_data.append({"Attribute": attr_name, "Value": repr(attr_value)})
554
469
  column_names = ["Attribute", "Value"]
555
470
  return table_data, column_names
556
471
 
557
- def rich_print(self) -> "Table":
558
- """Display an object as a rich table."""
559
- from rich.table import Table
560
-
561
- table_data, column_names = self._table()
562
- table = Table(title=f"{self.__class__.__name__} Attributes")
563
- for column in column_names:
564
- table.add_column(column, style="bold")
565
-
566
- for row in table_data:
567
- row_data = [row[column] for column in column_names]
568
- table.add_row(*row_data)
569
-
570
- return table
571
-
572
472
  @classmethod
573
- def example(cls, randomize: bool = False, has_image=False) -> Scenario:
473
+ def example(cls, randomize: bool = False) -> Scenario:
574
474
  """
575
475
  Returns an example Scenario instance.
576
476
 
577
477
  :param randomize: If True, adds a random string to the value of the example key.
578
478
  """
579
- if not has_image:
580
- addition = "" if not randomize else str(uuid4())
581
- return cls(
582
- {
583
- "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
584
- }
585
- )
586
- else:
587
- return cls.from_image(cls.example_image())
479
+ addition = "" if not randomize else str(uuid4())
480
+ return cls(
481
+ {
482
+ "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
483
+ }
484
+ )
588
485
 
589
486
  def code(self) -> List[str]:
590
487
  """Return the code for the scenario."""
@@ -1,7 +1,4 @@
1
- import requests
2
1
  from typing import Optional
3
- from requests.adapters import HTTPAdapter
4
- from requests.packages.urllib3.util.retry import Retry
5
2
 
6
3
 
7
4
  class ScenarioHtmlMixin:
@@ -22,6 +19,10 @@ class ScenarioHtmlMixin:
22
19
 
23
20
  def fetch_html(url):
24
21
  # Define the user-agent to mimic a browser
22
+ import requests
23
+ from requests.adapters import HTTPAdapter
24
+ from requests.packages.urllib3.util.retry import Retry
25
+
25
26
  headers = {
26
27
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
27
28
  }