edsl 0.1.39__py3-none-any.whl → 0.1.39.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. edsl/Base.py +116 -197
  2. edsl/__init__.py +7 -15
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +147 -351
  5. edsl/agents/AgentList.py +73 -211
  6. edsl/agents/Invigilator.py +50 -101
  7. edsl/agents/InvigilatorBase.py +70 -62
  8. edsl/agents/PromptConstructor.py +225 -143
  9. edsl/agents/__init__.py +1 -0
  10. edsl/agents/prompt_helpers.py +3 -3
  11. edsl/auto/AutoStudy.py +5 -18
  12. edsl/auto/StageBase.py +40 -53
  13. edsl/auto/StageQuestions.py +1 -2
  14. edsl/auto/utilities.py +6 -0
  15. edsl/config.py +2 -22
  16. edsl/conversation/car_buying.py +1 -2
  17. edsl/coop/PriceFetcher.py +1 -1
  18. edsl/coop/coop.py +47 -125
  19. edsl/coop/utils.py +14 -14
  20. edsl/data/Cache.py +27 -45
  21. edsl/data/CacheEntry.py +15 -12
  22. edsl/data/CacheHandler.py +12 -31
  23. edsl/data/RemoteCacheSync.py +46 -154
  24. edsl/data/__init__.py +3 -4
  25. edsl/data_transfer_models.py +1 -2
  26. edsl/enums.py +0 -27
  27. edsl/exceptions/__init__.py +50 -50
  28. edsl/exceptions/agents.py +0 -12
  29. edsl/exceptions/questions.py +6 -24
  30. edsl/exceptions/scenarios.py +0 -7
  31. edsl/inference_services/AnthropicService.py +19 -38
  32. edsl/inference_services/AwsBedrock.py +2 -0
  33. edsl/inference_services/AzureAI.py +2 -0
  34. edsl/inference_services/GoogleService.py +12 -7
  35. edsl/inference_services/InferenceServiceABC.py +85 -18
  36. edsl/inference_services/InferenceServicesCollection.py +79 -120
  37. edsl/inference_services/MistralAIService.py +3 -0
  38. edsl/inference_services/OpenAIService.py +35 -47
  39. edsl/inference_services/PerplexityService.py +3 -0
  40. edsl/inference_services/TestService.py +10 -11
  41. edsl/inference_services/TogetherAIService.py +3 -5
  42. edsl/jobs/Answers.py +14 -1
  43. edsl/jobs/Jobs.py +431 -356
  44. edsl/jobs/JobsChecks.py +10 -35
  45. edsl/jobs/JobsPrompts.py +4 -6
  46. edsl/jobs/JobsRemoteInferenceHandler.py +133 -205
  47. edsl/jobs/buckets/BucketCollection.py +3 -44
  48. edsl/jobs/buckets/TokenBucket.py +21 -53
  49. edsl/jobs/interviews/Interview.py +408 -143
  50. edsl/jobs/runners/JobsRunnerAsyncio.py +403 -88
  51. edsl/jobs/runners/JobsRunnerStatus.py +165 -133
  52. edsl/jobs/tasks/QuestionTaskCreator.py +19 -21
  53. edsl/jobs/tasks/TaskHistory.py +18 -38
  54. edsl/jobs/tasks/task_status_enum.py +2 -0
  55. edsl/language_models/KeyLookup.py +30 -0
  56. edsl/language_models/LanguageModel.py +236 -194
  57. edsl/language_models/ModelList.py +19 -28
  58. edsl/language_models/__init__.py +2 -1
  59. edsl/language_models/registry.py +190 -0
  60. edsl/language_models/repair.py +2 -2
  61. edsl/language_models/unused/ReplicateBase.py +83 -0
  62. edsl/language_models/utilities.py +4 -5
  63. edsl/notebooks/Notebook.py +14 -19
  64. edsl/prompts/Prompt.py +39 -29
  65. edsl/questions/{answer_validator_mixin.py → AnswerValidatorMixin.py} +2 -47
  66. edsl/questions/QuestionBase.py +214 -68
  67. edsl/questions/{question_base_gen_mixin.py → QuestionBaseGenMixin.py} +50 -57
  68. edsl/questions/QuestionBasePromptsMixin.py +3 -7
  69. edsl/questions/QuestionBudget.py +1 -1
  70. edsl/questions/QuestionCheckBox.py +3 -3
  71. edsl/questions/QuestionExtract.py +7 -5
  72. edsl/questions/QuestionFreeText.py +3 -2
  73. edsl/questions/QuestionList.py +18 -10
  74. edsl/questions/QuestionMultipleChoice.py +23 -67
  75. edsl/questions/QuestionNumerical.py +4 -2
  76. edsl/questions/QuestionRank.py +17 -7
  77. edsl/questions/{response_validator_abc.py → ResponseValidatorABC.py} +26 -40
  78. edsl/questions/SimpleAskMixin.py +3 -4
  79. edsl/questions/__init__.py +1 -2
  80. edsl/questions/derived/QuestionLinearScale.py +3 -6
  81. edsl/questions/derived/QuestionTopK.py +1 -1
  82. edsl/questions/descriptors.py +3 -17
  83. edsl/questions/question_registry.py +1 -1
  84. edsl/results/CSSParameterizer.py +1 -1
  85. edsl/results/Dataset.py +7 -170
  86. edsl/results/DatasetExportMixin.py +305 -168
  87. edsl/results/DatasetTree.py +8 -28
  88. edsl/results/Result.py +206 -298
  89. edsl/results/Results.py +131 -149
  90. edsl/results/ResultsDBMixin.py +238 -0
  91. edsl/results/ResultsExportMixin.py +0 -2
  92. edsl/results/{results_selector.py → Selector.py} +13 -23
  93. edsl/results/TableDisplay.py +171 -98
  94. edsl/results/__init__.py +1 -1
  95. edsl/scenarios/FileStore.py +239 -150
  96. edsl/scenarios/Scenario.py +193 -90
  97. edsl/scenarios/ScenarioHtmlMixin.py +3 -4
  98. edsl/scenarios/{scenario_join.py → ScenarioJoin.py} +6 -10
  99. edsl/scenarios/ScenarioList.py +244 -415
  100. edsl/scenarios/ScenarioListExportMixin.py +7 -0
  101. edsl/scenarios/ScenarioListPdfMixin.py +37 -15
  102. edsl/scenarios/__init__.py +2 -1
  103. edsl/study/ObjectEntry.py +1 -1
  104. edsl/study/SnapShot.py +1 -1
  105. edsl/study/Study.py +12 -5
  106. edsl/surveys/Rule.py +4 -5
  107. edsl/surveys/RuleCollection.py +27 -25
  108. edsl/surveys/Survey.py +791 -270
  109. edsl/surveys/SurveyCSS.py +8 -20
  110. edsl/surveys/{SurveyFlowVisualization.py → SurveyFlowVisualizationMixin.py} +9 -11
  111. edsl/surveys/__init__.py +2 -4
  112. edsl/surveys/descriptors.py +2 -6
  113. edsl/surveys/instructions/ChangeInstruction.py +2 -1
  114. edsl/surveys/instructions/Instruction.py +13 -4
  115. edsl/surveys/instructions/InstructionCollection.py +6 -11
  116. edsl/templates/error_reporting/interview_details.html +1 -1
  117. edsl/templates/error_reporting/report.html +1 -1
  118. edsl/tools/plotting.py +1 -1
  119. edsl/utilities/utilities.py +23 -35
  120. {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/METADATA +10 -12
  121. edsl-0.1.39.dev1.dist-info/RECORD +277 -0
  122. {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/WHEEL +1 -1
  123. edsl/agents/QuestionInstructionPromptBuilder.py +0 -128
  124. edsl/agents/QuestionTemplateReplacementsBuilder.py +0 -137
  125. edsl/agents/question_option_processor.py +0 -172
  126. edsl/coop/CoopFunctionsMixin.py +0 -15
  127. edsl/coop/ExpectedParrotKeyHandler.py +0 -125
  128. edsl/exceptions/inference_services.py +0 -5
  129. edsl/inference_services/AvailableModelCacheHandler.py +0 -184
  130. edsl/inference_services/AvailableModelFetcher.py +0 -215
  131. edsl/inference_services/ServiceAvailability.py +0 -135
  132. edsl/inference_services/data_structures.py +0 -134
  133. edsl/jobs/AnswerQuestionFunctionConstructor.py +0 -223
  134. edsl/jobs/FetchInvigilator.py +0 -47
  135. edsl/jobs/InterviewTaskManager.py +0 -98
  136. edsl/jobs/InterviewsConstructor.py +0 -50
  137. edsl/jobs/JobsComponentConstructor.py +0 -189
  138. edsl/jobs/JobsRemoteInferenceLogger.py +0 -239
  139. edsl/jobs/RequestTokenEstimator.py +0 -30
  140. edsl/jobs/async_interview_runner.py +0 -138
  141. edsl/jobs/buckets/TokenBucketAPI.py +0 -211
  142. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  143. edsl/jobs/check_survey_scenario_compatibility.py +0 -85
  144. edsl/jobs/data_structures.py +0 -120
  145. edsl/jobs/decorators.py +0 -35
  146. edsl/jobs/jobs_status_enums.py +0 -9
  147. edsl/jobs/loggers/HTMLTableJobLogger.py +0 -304
  148. edsl/jobs/results_exceptions_handler.py +0 -98
  149. edsl/language_models/ComputeCost.py +0 -63
  150. edsl/language_models/PriceManager.py +0 -127
  151. edsl/language_models/RawResponseHandler.py +0 -106
  152. edsl/language_models/ServiceDataSources.py +0 -0
  153. edsl/language_models/key_management/KeyLookup.py +0 -63
  154. edsl/language_models/key_management/KeyLookupBuilder.py +0 -273
  155. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  156. edsl/language_models/key_management/__init__.py +0 -0
  157. edsl/language_models/key_management/models.py +0 -131
  158. edsl/language_models/model.py +0 -256
  159. edsl/notebooks/NotebookToLaTeX.py +0 -142
  160. edsl/questions/ExceptionExplainer.py +0 -77
  161. edsl/questions/HTMLQuestion.py +0 -103
  162. edsl/questions/QuestionMatrix.py +0 -265
  163. edsl/questions/data_structures.py +0 -20
  164. edsl/questions/loop_processor.py +0 -149
  165. edsl/questions/response_validator_factory.py +0 -34
  166. edsl/questions/templates/matrix/__init__.py +0 -1
  167. edsl/questions/templates/matrix/answering_instructions.jinja +0 -5
  168. edsl/questions/templates/matrix/question_presentation.jinja +0 -20
  169. edsl/results/MarkdownToDocx.py +0 -122
  170. edsl/results/MarkdownToPDF.py +0 -111
  171. edsl/results/TextEditor.py +0 -50
  172. edsl/results/file_exports.py +0 -252
  173. edsl/results/smart_objects.py +0 -96
  174. edsl/results/table_data_class.py +0 -12
  175. edsl/results/table_renderers.py +0 -118
  176. edsl/scenarios/ConstructDownloadLink.py +0 -109
  177. edsl/scenarios/DocumentChunker.py +0 -102
  178. edsl/scenarios/DocxScenario.py +0 -16
  179. edsl/scenarios/PdfExtractor.py +0 -40
  180. edsl/scenarios/directory_scanner.py +0 -96
  181. edsl/scenarios/file_methods.py +0 -85
  182. edsl/scenarios/handlers/__init__.py +0 -13
  183. edsl/scenarios/handlers/csv.py +0 -49
  184. edsl/scenarios/handlers/docx.py +0 -76
  185. edsl/scenarios/handlers/html.py +0 -37
  186. edsl/scenarios/handlers/json.py +0 -111
  187. edsl/scenarios/handlers/latex.py +0 -5
  188. edsl/scenarios/handlers/md.py +0 -51
  189. edsl/scenarios/handlers/pdf.py +0 -68
  190. edsl/scenarios/handlers/png.py +0 -39
  191. edsl/scenarios/handlers/pptx.py +0 -105
  192. edsl/scenarios/handlers/py.py +0 -294
  193. edsl/scenarios/handlers/sql.py +0 -313
  194. edsl/scenarios/handlers/sqlite.py +0 -149
  195. edsl/scenarios/handlers/txt.py +0 -33
  196. edsl/scenarios/scenario_selector.py +0 -156
  197. edsl/surveys/ConstructDAG.py +0 -92
  198. edsl/surveys/EditSurvey.py +0 -221
  199. edsl/surveys/InstructionHandler.py +0 -100
  200. edsl/surveys/MemoryManagement.py +0 -72
  201. edsl/surveys/RuleManager.py +0 -172
  202. edsl/surveys/Simulator.py +0 -75
  203. edsl/surveys/SurveyToApp.py +0 -141
  204. edsl/utilities/PrettyList.py +0 -56
  205. edsl/utilities/is_notebook.py +0 -18
  206. edsl/utilities/is_valid_variable_name.py +0 -11
  207. edsl/utilities/remove_edsl_version.py +0 -24
  208. edsl-0.1.39.dist-info/RECORD +0 -358
  209. /edsl/questions/{register_questions_meta.py → RegisterQuestionsMeta.py} +0 -0
  210. /edsl/results/{results_fetch_mixin.py → ResultsFetchMixin.py} +0 -0
  211. /edsl/results/{results_tools_mixin.py → ResultsToolsMixin.py} +0 -0
  212. {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/LICENSE +0 -0
@@ -2,65 +2,54 @@
2
2
 
3
3
  from __future__ import annotations
4
4
  import copy
5
+ import hashlib
5
6
  import os
6
7
  import json
7
8
  from collections import UserDict
8
- from typing import Union, List, Optional, TYPE_CHECKING, Collection
9
+ from typing import Union, List, Optional, Generator
9
10
  from uuid import uuid4
10
11
 
11
12
  from edsl.Base import Base
12
13
  from edsl.scenarios.ScenarioHtmlMixin import ScenarioHtmlMixin
13
- from edsl.utilities.remove_edsl_version import remove_edsl_version
14
+ from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
14
15
  from edsl.exceptions.scenarios import ScenarioError
15
16
 
16
- if TYPE_CHECKING:
17
- from edsl.scenarios.ScenarioList import ScenarioList
18
- from edsl.results.Dataset import Dataset
19
-
20
17
 
21
18
  class DisplayJSON:
22
- """Display a dictionary as JSON."""
23
-
24
- def __init__(self, input_dict: dict):
25
- self.text = json.dumps(input_dict, indent=4)
19
+ def __init__(self, dict):
20
+ self.text = json.dumps(dict, indent=4)
26
21
 
27
22
  def __repr__(self):
28
23
  return self.text
29
24
 
30
25
 
31
26
  class DisplayYAML:
32
- """Display a dictionary as YAML."""
33
-
34
- def __init__(self, input_dict: dict):
27
+ def __init__(self, dict):
35
28
  import yaml
36
29
 
37
- self.text = yaml.dump(input_dict)
30
+ self.text = yaml.dump(dict)
38
31
 
39
32
  def __repr__(self):
40
33
  return self.text
41
34
 
42
35
 
43
36
  class Scenario(Base, UserDict, ScenarioHtmlMixin):
44
- """A Scenario is a dictionary of keys/values that can be used to parameterize questions."""
37
+ """A Scenario is a dictionary of keys/values.
38
+
39
+ They can be used parameterize EDSL questions."""
45
40
 
46
41
  __documentation__ = "https://docs.expectedparrot.com/en/latest/scenarios.html"
47
42
 
48
- def __init__(self, data: Optional[dict] = None, name: Optional[str] = None):
43
+ def __init__(self, data: Union[dict, None] = None, name: str = None):
49
44
  """Initialize a new Scenario.
50
45
 
51
- :param data: A dictionary of keys/values for parameterizing questions.
52
- :param name: The name of the scenario.
53
- """
46
+ # :param data: A dictionary of keys/values for parameterizing questions.
47
+ #"""
54
48
  if not isinstance(data, dict) and data is not None:
55
- try:
56
- data = dict(data)
57
- except Exception as e:
58
- raise ScenarioError(
59
- f"You must pass in a dictionary to initialize a Scenario. You passed in {data}",
60
- "Exception message:" + str(e),
61
- )
49
+ raise EDSLScenarioError(
50
+ "You must pass in a dictionary to initialize a Scenario."
51
+ )
62
52
 
63
- super().__init__()
64
53
  self.data = data if data is not None else {}
65
54
  self.name = name
66
55
 
@@ -70,6 +59,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
70
59
  :param n: The number of times to replicate the scenario.
71
60
 
72
61
  Example:
62
+
73
63
  >>> s = Scenario({"food": "wood chips"})
74
64
  >>> s.replicate(2)
75
65
  ScenarioList([Scenario({'food': 'wood chips'}), Scenario({'food': 'wood chips'})])
@@ -92,13 +82,13 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
92
82
  return True
93
83
  return False
94
84
 
95
- def _convert_jinja_braces(
96
- self, replacement_left: str = "<<", replacement_right: str = ">>"
85
+ def convert_jinja_braces(
86
+ self, replacement_left="<<", replacement_right=">>"
97
87
  ) -> Scenario:
98
88
  """Convert Jinja braces to some other character.
99
89
 
100
90
  >>> s = Scenario({"food": "I love {{wood chips}}"})
101
- >>> s._convert_jinja_braces()
91
+ >>> s.convert_jinja_braces()
102
92
  Scenario({'food': 'I love <<wood chips>>'})
103
93
 
104
94
  """
@@ -112,7 +102,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
112
102
  new_scenario[key] = value
113
103
  return new_scenario
114
104
 
115
- def __add__(self, other_scenario: Scenario) -> Scenario:
105
+ def __add__(self, other_scenario: "Scenario") -> "Scenario":
116
106
  """Combine two scenarios by taking the union of their keys
117
107
 
118
108
  If the other scenario is None, then just return self.
@@ -137,14 +127,11 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
137
127
  return s
138
128
 
139
129
  def rename(
140
- self,
141
- old_name_or_replacement_dict: Union[str, dict[str, str]],
142
- new_name: Optional[str] = None,
143
- ) -> Scenario:
130
+ self, old_name_or_replacement_dict: dict, new_name: Optional[str] = None
131
+ ) -> "Scenario":
144
132
  """Rename the keys of a scenario.
145
133
 
146
- :param old_name_or_replacement_dict: A dictionary of old keys to new keys *OR* a string of the old key.
147
- :param new_name: The new name of the key.
134
+ :param replacement_dict: A dictionary of old keys to new keys.
148
135
 
149
136
  Example:
150
137
 
@@ -169,26 +156,13 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
169
156
  new_scenario[key] = value
170
157
  return new_scenario
171
158
 
172
- def new_column_names(self, new_names: List[str]) -> Scenario:
173
- """Rename the keys of a scenario.
174
-
175
- >>> s = Scenario({"food": "wood chips"})
176
- >>> s.new_column_names(["food_preference"])
177
- Scenario({'food_preference': 'wood chips'})
178
- """
179
- try:
180
- assert len(new_names) == len(self.keys())
181
- except AssertionError:
182
- print("The number of new names must match the number of keys.")
183
-
184
- new_scenario = Scenario()
185
- for new_names, value in zip(new_names, self.values()):
186
- new_scenario[new_names] = value
187
- return new_scenario
188
-
189
159
  def table(self, tablefmt: str = "grid") -> str:
190
- """Display a scenario as a table."""
191
- return self.to_dataset().table(tablefmt=tablefmt)
160
+ from edsl.results.Dataset import Dataset
161
+
162
+ keys = [key for key, value in self.items()]
163
+ values = [value for key, value in self.items()]
164
+ d = Dataset([{"key": keys}, {"value": values}])
165
+ return d.table(tablefmt=tablefmt)
192
166
 
193
167
  def json(self):
194
168
  return DisplayJSON(self.to_dict(add_edsl_version=False))
@@ -198,7 +172,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
198
172
 
199
173
  return DisplayYAML(self.to_dict(add_edsl_version=False))
200
174
 
201
- def to_dict(self, add_edsl_version: bool = True) -> dict:
175
+ def to_dict(self, add_edsl_version=True) -> dict:
202
176
  """Convert a scenario to a dictionary.
203
177
 
204
178
  Example:
@@ -226,7 +200,8 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
226
200
  return d
227
201
 
228
202
  def __hash__(self) -> int:
229
- """Return a hash of the scenario.
203
+ """
204
+ Return a hash of the scenario.
230
205
 
231
206
  Example:
232
207
 
@@ -238,23 +213,44 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
238
213
 
239
214
  return dict_hash(self.to_dict(add_edsl_version=False))
240
215
 
216
+ def print(self):
217
+ from rich import print_json
218
+ import json
219
+
220
+ print_json(json.dumps(self.to_dict()))
221
+
241
222
  def __repr__(self):
242
223
  return "Scenario(" + repr(self.data) + ")"
243
224
 
244
225
  def to_dataset(self) -> "Dataset":
245
- """Convert a scenario to a dataset.
246
-
247
- >>> s = Scenario({"food": "wood chips"})
248
- >>> s.to_dataset()
249
- Dataset([{'key': ['food']}, {'value': ['wood chips']}])
250
- """
226
+ # d = Dataset([{'a.b':[1,2,3,4]}])
251
227
  from edsl.results.Dataset import Dataset
252
228
 
253
- keys = list(self.keys())
254
- values = list(self.values())
229
+ keys = [key for key, value in self.items()]
230
+ values = [value for key, value in self.items()]
255
231
  return Dataset([{"key": keys}, {"value": values}])
256
232
 
257
- def select(self, list_of_keys: Collection[str]) -> "Scenario":
233
+ def _repr_html_(self):
234
+ from tabulate import tabulate
235
+ import reprlib
236
+
237
+ d = self.to_dict(add_edsl_version=False)
238
+ # return self.to_dataset()
239
+ r = reprlib.Repr()
240
+ r.maxstring = 70
241
+
242
+ data = [[k, r.repr(v)] for k, v in d.items()]
243
+ from tabulate import tabulate
244
+
245
+ if hasattr(self, "__documentation__"):
246
+ footer = f"<a href='{self.__documentation__}'>(docs)</a></p>"
247
+ else:
248
+ footer = ""
249
+
250
+ table = str(tabulate(data, headers=["keys", "values"], tablefmt="html"))
251
+ return f"<pre>{table}</pre>" + footer
252
+
253
+ def select(self, list_of_keys: List[str]) -> "Scenario":
258
254
  """Select a subset of keys from a scenario.
259
255
 
260
256
  :param list_of_keys: The keys to select.
@@ -270,7 +266,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
270
266
  new_scenario[key] = self[key]
271
267
  return new_scenario
272
268
 
273
- def drop(self, list_of_keys: Collection[str]) -> "Scenario":
269
+ def drop(self, list_of_keys: List[str]) -> "Scenario":
274
270
  """Drop a subset of keys from a scenario.
275
271
 
276
272
  :param list_of_keys: The keys to drop.
@@ -324,7 +320,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
324
320
  ... _ = f.flush()
325
321
  ... s = Scenario.from_file(f.name, "file")
326
322
  >>> s
327
- Scenario({'file': FileStore(path='...', ...)})
323
+ Scenario({'file': FileStore(path='...')})
328
324
 
329
325
  """
330
326
  from edsl.scenarios.FileStore import FileStore
@@ -355,10 +351,35 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
355
351
  return cls.from_file(image_path, image_name)
356
352
 
357
353
  @classmethod
358
- def from_pdf(cls, pdf_path: str):
359
- from edsl.scenarios.PdfExtractor import PdfExtractor
354
+ def from_pdf(cls, pdf_path):
355
+ # Ensure the file exists
356
+ import fitz
357
+
358
+ if not os.path.exists(pdf_path):
359
+ raise FileNotFoundError(f"The file {pdf_path} does not exist.")
360
+
361
+ # Open the PDF file
362
+ document = fitz.open(pdf_path)
363
+
364
+ # Get the filename from the path
365
+ filename = os.path.basename(pdf_path)
360
366
 
361
- return PdfExtractor(pdf_path, cls).get_object()
367
+ # Iterate through each page and extract text
368
+ text = ""
369
+ for page_num in range(len(document)):
370
+ page = document.load_page(page_num)
371
+ blocks = page.get_text("blocks") # Extract text blocks
372
+
373
+ # Sort blocks by their vertical position (y0) to maintain reading order
374
+ blocks.sort(key=lambda b: (b[1], b[0])) # Sort by y0 first, then x0
375
+
376
+ # Combine the text blocks in order
377
+ for block in blocks:
378
+ text += block[4] + "\n"
379
+
380
+ # Create a dictionary for the combined text
381
+ page_info = {"filename": filename, "text": text}
382
+ return Scenario(page_info)
362
383
 
363
384
  @classmethod
364
385
  def from_docx(cls, docx_path: str) -> "Scenario":
@@ -378,9 +399,52 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
378
399
  Scenario({'file_path': 'test.docx', 'text': 'EDSL Survey\\nThis is a test.'})
379
400
  >>> import os; os.remove("test.docx")
380
401
  """
381
- from edsl.scenarios.DocxScenario import DocxScenario
402
+ from docx import Document
403
+
404
+ doc = Document(docx_path)
382
405
 
383
- return Scenario(DocxScenario(docx_path).get_scenario_dict())
406
+ # Extract all text
407
+ full_text = []
408
+ for para in doc.paragraphs:
409
+ full_text.append(para.text)
410
+
411
+ # Join the text from all paragraphs
412
+ text = "\n".join(full_text)
413
+ return Scenario({"file_path": docx_path, "text": text})
414
+
415
+ @staticmethod
416
+ def _line_chunks(text, num_lines: int) -> Generator[str, None, None]:
417
+ """Split a text into chunks of a given size.
418
+
419
+ :param text: The text to split.
420
+ :param num_lines: The number of lines in each chunk.
421
+
422
+ Example:
423
+
424
+ >>> list(Scenario._line_chunks("This is a test.\\nThis is a test. This is a test.", 1))
425
+ ['This is a test.', 'This is a test. This is a test.']
426
+ """
427
+ lines = text.split("\n")
428
+ for i in range(0, len(lines), num_lines):
429
+ chunk = "\n".join(lines[i : i + num_lines])
430
+ yield chunk
431
+
432
+ @staticmethod
433
+ def _word_chunks(text, num_words: int) -> Generator[str, None, None]:
434
+ """Split a text into chunks of a given size.
435
+
436
+ :param text: The text to split.
437
+ :param num_words: The number of words in each chunk.
438
+
439
+ Example:
440
+
441
+ >>> list(Scenario._word_chunks("This is a test.", 2))
442
+ ['This is', 'a test.']
443
+ """
444
+ words = text.split()
445
+ for i in range(0, len(words), num_words):
446
+ chunk = " ".join(words[i : i + num_words])
447
+ yield chunk
384
448
 
385
449
  def chunk(
386
450
  self,
@@ -431,11 +495,36 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
431
495
  ...
432
496
  ValueError: You must specify either num_words or num_lines, but not both.
433
497
  """
434
- from edsl.scenarios.DocumentChunker import DocumentChunker
498
+ from edsl.scenarios.ScenarioList import ScenarioList
435
499
 
436
- return DocumentChunker(self).chunk(
437
- field, num_words, num_lines, include_original, hash_original
438
- )
500
+ if num_words is not None:
501
+ chunks = list(self._word_chunks(self[field], num_words))
502
+
503
+ if num_lines is not None:
504
+ chunks = list(self._line_chunks(self[field], num_lines))
505
+
506
+ if num_words is None and num_lines is None:
507
+ raise ValueError("You must specify either num_words or num_lines.")
508
+
509
+ if num_words is not None and num_lines is not None:
510
+ raise ValueError(
511
+ "You must specify either num_words or num_lines, but not both."
512
+ )
513
+
514
+ scenarios = []
515
+ for i, chunk in enumerate(chunks):
516
+ new_scenario = copy.deepcopy(self)
517
+ new_scenario[field] = chunk
518
+ new_scenario[field + "_chunk"] = i
519
+ if include_original:
520
+ if hash_original:
521
+ new_scenario[field + "_original"] = hashlib.md5(
522
+ self[field].encode()
523
+ ).hexdigest()
524
+ else:
525
+ new_scenario[field + "_original"] = self[field]
526
+ scenarios.append(new_scenario)
527
+ return ScenarioList(scenarios)
439
528
 
440
529
  @classmethod
441
530
  @remove_edsl_version
@@ -458,30 +547,44 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
458
547
  return cls(d)
459
548
 
460
549
  def _table(self) -> tuple[dict, list]:
461
- """Prepare generic table data.
462
- >>> s = Scenario({"food": "wood chips"})
463
- >>> s._table()
464
- ([{'Attribute': 'data', 'Value': "{'food': 'wood chips'}"}, {'Attribute': 'name', 'Value': 'None'}], ['Attribute', 'Value'])
465
- """
550
+ """Prepare generic table data."""
466
551
  table_data = []
467
552
  for attr_name, attr_value in self.__dict__.items():
468
553
  table_data.append({"Attribute": attr_name, "Value": repr(attr_value)})
469
554
  column_names = ["Attribute", "Value"]
470
555
  return table_data, column_names
471
556
 
557
+ def rich_print(self) -> "Table":
558
+ """Display an object as a rich table."""
559
+ from rich.table import Table
560
+
561
+ table_data, column_names = self._table()
562
+ table = Table(title=f"{self.__class__.__name__} Attributes")
563
+ for column in column_names:
564
+ table.add_column(column, style="bold")
565
+
566
+ for row in table_data:
567
+ row_data = [row[column] for column in column_names]
568
+ table.add_row(*row_data)
569
+
570
+ return table
571
+
472
572
  @classmethod
473
- def example(cls, randomize: bool = False) -> Scenario:
573
+ def example(cls, randomize: bool = False, has_image=False) -> Scenario:
474
574
  """
475
575
  Returns an example Scenario instance.
476
576
 
477
577
  :param randomize: If True, adds a random string to the value of the example key.
478
578
  """
479
- addition = "" if not randomize else str(uuid4())
480
- return cls(
481
- {
482
- "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
483
- }
484
- )
579
+ if not has_image:
580
+ addition = "" if not randomize else str(uuid4())
581
+ return cls(
582
+ {
583
+ "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
584
+ }
585
+ )
586
+ else:
587
+ return cls.from_image(cls.example_image())
485
588
 
486
589
  def code(self) -> List[str]:
487
590
  """Return the code for the scenario."""
@@ -1,4 +1,7 @@
1
+ import requests
1
2
  from typing import Optional
3
+ from requests.adapters import HTTPAdapter
4
+ from requests.packages.urllib3.util.retry import Retry
2
5
 
3
6
 
4
7
  class ScenarioHtmlMixin:
@@ -19,10 +22,6 @@ class ScenarioHtmlMixin:
19
22
 
20
23
  def fetch_html(url):
21
24
  # Define the user-agent to mimic a browser
22
- import requests
23
- from requests.adapters import HTTPAdapter
24
- from requests.packages.urllib3.util.retry import Retry
25
-
26
25
  headers = {
27
26
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
28
27
  }
@@ -1,9 +1,9 @@
1
1
  from __future__ import annotations
2
2
  from typing import Union, TYPE_CHECKING
3
3
 
4
- if TYPE_CHECKING:
5
- from edsl.scenarios.ScenarioList import ScenarioList
6
- from edsl.scenarios.Scenario import Scenario
4
+ # if TYPE_CHECKING:
5
+ from edsl.scenarios.ScenarioList import ScenarioList
6
+ from edsl.scenarios.Scenario import Scenario
7
7
 
8
8
 
9
9
  class ScenarioJoin:
@@ -23,7 +23,7 @@ class ScenarioJoin:
23
23
  self.left = left
24
24
  self.right = right
25
25
 
26
- def left_join(self, by: Union[str, list[str]]) -> "ScenarioList":
26
+ def left_join(self, by: Union[str, list[str]]) -> ScenarioList:
27
27
  """Perform a left join between the two ScenarioLists.
28
28
 
29
29
  Args:
@@ -35,8 +35,6 @@ class ScenarioJoin:
35
35
  Raises:
36
36
  ValueError: If by is empty or if any join keys don't exist in both ScenarioLists
37
37
  """
38
- from edsl.scenarios.ScenarioList import ScenarioList
39
-
40
38
  self._validate_join_keys(by)
41
39
  by_keys = [by] if isinstance(by, str) else by
42
40
 
@@ -88,8 +86,6 @@ class ScenarioJoin:
88
86
  self, by_keys: list[str], other_dict: dict, all_keys: set
89
87
  ) -> list[Scenario]:
90
88
  """Create the joined scenarios."""
91
- from edsl.scenarios.Scenario import Scenario
92
-
93
89
  new_scenarios = []
94
90
 
95
91
  for scenario in self.left:
@@ -109,8 +105,8 @@ class ScenarioJoin:
109
105
  def _handle_matching_scenario(
110
106
  self,
111
107
  new_scenario: dict,
112
- left_scenario: "Scenario",
113
- right_scenario: "Scenario",
108
+ left_scenario: Scenario,
109
+ right_scenario: Scenario,
114
110
  by_keys: list[str],
115
111
  ) -> None:
116
112
  """Handle merging of matching scenarios and conflict warnings."""