edsl 0.1.31.dev4__py3-none-any.whl → 0.1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. edsl/Base.py +9 -3
  2. edsl/TemplateLoader.py +24 -0
  3. edsl/__init__.py +8 -3
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +40 -8
  6. edsl/agents/AgentList.py +43 -0
  7. edsl/agents/Invigilator.py +136 -221
  8. edsl/agents/InvigilatorBase.py +148 -59
  9. edsl/agents/{PromptConstructionMixin.py → PromptConstructor.py} +154 -85
  10. edsl/agents/__init__.py +1 -0
  11. edsl/auto/AutoStudy.py +117 -0
  12. edsl/auto/StageBase.py +230 -0
  13. edsl/auto/StageGenerateSurvey.py +178 -0
  14. edsl/auto/StageLabelQuestions.py +125 -0
  15. edsl/auto/StagePersona.py +61 -0
  16. edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
  17. edsl/auto/StagePersonaDimensionValues.py +74 -0
  18. edsl/auto/StagePersonaDimensions.py +69 -0
  19. edsl/auto/StageQuestions.py +73 -0
  20. edsl/auto/SurveyCreatorPipeline.py +21 -0
  21. edsl/auto/utilities.py +224 -0
  22. edsl/config.py +48 -47
  23. edsl/conjure/Conjure.py +6 -0
  24. edsl/coop/PriceFetcher.py +58 -0
  25. edsl/coop/coop.py +50 -7
  26. edsl/data/Cache.py +35 -1
  27. edsl/data/CacheHandler.py +3 -4
  28. edsl/data_transfer_models.py +73 -38
  29. edsl/enums.py +8 -0
  30. edsl/exceptions/general.py +10 -8
  31. edsl/exceptions/language_models.py +25 -1
  32. edsl/exceptions/questions.py +62 -5
  33. edsl/exceptions/results.py +4 -0
  34. edsl/inference_services/AnthropicService.py +13 -11
  35. edsl/inference_services/AwsBedrock.py +112 -0
  36. edsl/inference_services/AzureAI.py +214 -0
  37. edsl/inference_services/DeepInfraService.py +4 -3
  38. edsl/inference_services/GoogleService.py +16 -12
  39. edsl/inference_services/GroqService.py +5 -4
  40. edsl/inference_services/InferenceServiceABC.py +58 -3
  41. edsl/inference_services/InferenceServicesCollection.py +13 -8
  42. edsl/inference_services/MistralAIService.py +120 -0
  43. edsl/inference_services/OllamaService.py +18 -0
  44. edsl/inference_services/OpenAIService.py +55 -56
  45. edsl/inference_services/TestService.py +80 -0
  46. edsl/inference_services/TogetherAIService.py +170 -0
  47. edsl/inference_services/models_available_cache.py +25 -0
  48. edsl/inference_services/registry.py +19 -1
  49. edsl/jobs/Answers.py +10 -12
  50. edsl/jobs/FailedQuestion.py +78 -0
  51. edsl/jobs/Jobs.py +137 -41
  52. edsl/jobs/buckets/BucketCollection.py +24 -15
  53. edsl/jobs/buckets/TokenBucket.py +105 -18
  54. edsl/jobs/interviews/Interview.py +393 -83
  55. edsl/jobs/interviews/{interview_exception_tracking.py → InterviewExceptionCollection.py} +22 -18
  56. edsl/jobs/interviews/InterviewExceptionEntry.py +167 -0
  57. edsl/jobs/runners/JobsRunnerAsyncio.py +152 -160
  58. edsl/jobs/runners/JobsRunnerStatus.py +331 -0
  59. edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
  60. edsl/jobs/tasks/TaskCreators.py +1 -1
  61. edsl/jobs/tasks/TaskHistory.py +205 -126
  62. edsl/language_models/LanguageModel.py +297 -177
  63. edsl/language_models/ModelList.py +2 -2
  64. edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
  65. edsl/language_models/fake_openai_call.py +15 -0
  66. edsl/language_models/fake_openai_service.py +61 -0
  67. edsl/language_models/registry.py +25 -8
  68. edsl/language_models/repair.py +0 -19
  69. edsl/language_models/utilities.py +61 -0
  70. edsl/notebooks/Notebook.py +20 -2
  71. edsl/prompts/Prompt.py +52 -2
  72. edsl/questions/AnswerValidatorMixin.py +23 -26
  73. edsl/questions/QuestionBase.py +330 -249
  74. edsl/questions/QuestionBaseGenMixin.py +133 -0
  75. edsl/questions/QuestionBasePromptsMixin.py +266 -0
  76. edsl/questions/QuestionBudget.py +99 -42
  77. edsl/questions/QuestionCheckBox.py +227 -36
  78. edsl/questions/QuestionExtract.py +98 -28
  79. edsl/questions/QuestionFreeText.py +47 -31
  80. edsl/questions/QuestionFunctional.py +7 -0
  81. edsl/questions/QuestionList.py +141 -23
  82. edsl/questions/QuestionMultipleChoice.py +159 -66
  83. edsl/questions/QuestionNumerical.py +88 -47
  84. edsl/questions/QuestionRank.py +182 -25
  85. edsl/questions/Quick.py +41 -0
  86. edsl/questions/RegisterQuestionsMeta.py +31 -12
  87. edsl/questions/ResponseValidatorABC.py +170 -0
  88. edsl/questions/__init__.py +3 -4
  89. edsl/questions/decorators.py +21 -0
  90. edsl/questions/derived/QuestionLikertFive.py +10 -5
  91. edsl/questions/derived/QuestionLinearScale.py +15 -2
  92. edsl/questions/derived/QuestionTopK.py +10 -1
  93. edsl/questions/derived/QuestionYesNo.py +24 -3
  94. edsl/questions/descriptors.py +43 -7
  95. edsl/questions/prompt_templates/question_budget.jinja +13 -0
  96. edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
  97. edsl/questions/prompt_templates/question_extract.jinja +11 -0
  98. edsl/questions/prompt_templates/question_free_text.jinja +3 -0
  99. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
  100. edsl/questions/prompt_templates/question_list.jinja +17 -0
  101. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
  102. edsl/questions/prompt_templates/question_numerical.jinja +37 -0
  103. edsl/questions/question_registry.py +6 -2
  104. edsl/questions/templates/__init__.py +0 -0
  105. edsl/questions/templates/budget/__init__.py +0 -0
  106. edsl/questions/templates/budget/answering_instructions.jinja +7 -0
  107. edsl/questions/templates/budget/question_presentation.jinja +7 -0
  108. edsl/questions/templates/checkbox/__init__.py +0 -0
  109. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
  110. edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
  111. edsl/questions/templates/extract/__init__.py +0 -0
  112. edsl/questions/templates/extract/answering_instructions.jinja +7 -0
  113. edsl/questions/templates/extract/question_presentation.jinja +1 -0
  114. edsl/questions/templates/free_text/__init__.py +0 -0
  115. edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
  116. edsl/questions/templates/free_text/question_presentation.jinja +1 -0
  117. edsl/questions/templates/likert_five/__init__.py +0 -0
  118. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
  119. edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
  120. edsl/questions/templates/linear_scale/__init__.py +0 -0
  121. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
  122. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
  123. edsl/questions/templates/list/__init__.py +0 -0
  124. edsl/questions/templates/list/answering_instructions.jinja +4 -0
  125. edsl/questions/templates/list/question_presentation.jinja +5 -0
  126. edsl/questions/templates/multiple_choice/__init__.py +0 -0
  127. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
  128. edsl/questions/templates/multiple_choice/html.jinja +0 -0
  129. edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
  130. edsl/questions/templates/numerical/__init__.py +0 -0
  131. edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
  132. edsl/questions/templates/numerical/question_presentation.jinja +7 -0
  133. edsl/questions/templates/rank/__init__.py +0 -0
  134. edsl/questions/templates/rank/answering_instructions.jinja +11 -0
  135. edsl/questions/templates/rank/question_presentation.jinja +15 -0
  136. edsl/questions/templates/top_k/__init__.py +0 -0
  137. edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
  138. edsl/questions/templates/top_k/question_presentation.jinja +22 -0
  139. edsl/questions/templates/yes_no/__init__.py +0 -0
  140. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
  141. edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
  142. edsl/results/Dataset.py +20 -0
  143. edsl/results/DatasetExportMixin.py +58 -30
  144. edsl/results/DatasetTree.py +145 -0
  145. edsl/results/Result.py +32 -5
  146. edsl/results/Results.py +135 -46
  147. edsl/results/ResultsDBMixin.py +3 -3
  148. edsl/results/Selector.py +118 -0
  149. edsl/results/tree_explore.py +115 -0
  150. edsl/scenarios/FileStore.py +71 -10
  151. edsl/scenarios/Scenario.py +109 -24
  152. edsl/scenarios/ScenarioImageMixin.py +2 -2
  153. edsl/scenarios/ScenarioList.py +546 -21
  154. edsl/scenarios/ScenarioListExportMixin.py +24 -4
  155. edsl/scenarios/ScenarioListPdfMixin.py +153 -4
  156. edsl/study/SnapShot.py +8 -1
  157. edsl/study/Study.py +32 -0
  158. edsl/surveys/Rule.py +15 -3
  159. edsl/surveys/RuleCollection.py +21 -5
  160. edsl/surveys/Survey.py +707 -298
  161. edsl/surveys/SurveyExportMixin.py +71 -9
  162. edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
  163. edsl/surveys/SurveyQualtricsImport.py +284 -0
  164. edsl/surveys/instructions/ChangeInstruction.py +47 -0
  165. edsl/surveys/instructions/Instruction.py +34 -0
  166. edsl/surveys/instructions/InstructionCollection.py +77 -0
  167. edsl/surveys/instructions/__init__.py +0 -0
  168. edsl/templates/error_reporting/base.html +24 -0
  169. edsl/templates/error_reporting/exceptions_by_model.html +35 -0
  170. edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
  171. edsl/templates/error_reporting/exceptions_by_type.html +17 -0
  172. edsl/templates/error_reporting/interview_details.html +116 -0
  173. edsl/templates/error_reporting/interviews.html +10 -0
  174. edsl/templates/error_reporting/overview.html +5 -0
  175. edsl/templates/error_reporting/performance_plot.html +2 -0
  176. edsl/templates/error_reporting/report.css +74 -0
  177. edsl/templates/error_reporting/report.html +118 -0
  178. edsl/templates/error_reporting/report.js +25 -0
  179. edsl/utilities/utilities.py +40 -1
  180. {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/METADATA +8 -2
  181. edsl-0.1.33.dist-info/RECORD +295 -0
  182. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -271
  183. edsl/jobs/interviews/retry_management.py +0 -37
  184. edsl/jobs/runners/JobsRunnerStatusMixin.py +0 -303
  185. edsl/utilities/gcp_bucket/simple_example.py +0 -9
  186. edsl-0.1.31.dev4.dist-info/RECORD +0 -204
  187. {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/LICENSE +0 -0
  188. {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/WHEEL +0 -0
@@ -0,0 +1,115 @@
1
+ from collections import defaultdict
2
+ from typing import List, Dict, Any
3
+ import json
4
+
5
+
6
+ class FoldableHTMLTableGenerator:
7
+ def __init__(self, data: List[Dict[str, Any]]):
8
+ self.data = data
9
+
10
+ def tree(self, fold_attributes: List[str], drop: List[str] = None) -> Dict:
11
+ def nested_dict():
12
+ return defaultdict(nested_dict)
13
+
14
+ result = nested_dict()
15
+ drop = drop or [] # Use an empty list if drop is None
16
+
17
+ for item in self.data:
18
+ current = result
19
+ for attr in fold_attributes:
20
+ current = current[item[attr]]
21
+
22
+ row = {
23
+ k: v
24
+ for k, v in item.items()
25
+ if k not in fold_attributes and k not in drop
26
+ }
27
+ if "_rows" not in current:
28
+ current["_rows"] = []
29
+ current["_rows"].append(row)
30
+
31
+ return result
32
+
33
+ def generate_html(self, tree, fold_attributes: List[str]) -> str:
34
+ html_content = """
35
+ <!DOCTYPE html>
36
+ <html lang="en">
37
+ <head>
38
+ <meta charset="UTF-8">
39
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
40
+ <title>Foldable Nested Table</title>
41
+ <style>
42
+ .folding-section { margin-left: 20px; }
43
+ .fold-button { cursor: pointer; margin: 5px 0; }
44
+ table { border-collapse: collapse; margin-top: 10px; }
45
+ th, td { border: 1px solid black; padding: 5px; }
46
+ .attribute-label { font-weight: bold; }
47
+ </style>
48
+ </head>
49
+ <body>
50
+ <div id="root"></div>
51
+ <script>
52
+ function toggleFold(id) {
53
+ const element = document.getElementById(id);
54
+ element.style.display = element.style.display === 'none' ? 'block' : 'none';
55
+ }
56
+
57
+ function createFoldableSection(data, path = [], attributes = %s) {
58
+ const container = document.createElement('div');
59
+ container.className = 'folding-section';
60
+
61
+ for (const [key, value] of Object.entries(data)) {
62
+ if (key === '_rows') {
63
+ const table = document.createElement('table');
64
+ const headerRow = table.insertRow();
65
+ const headers = Object.keys(value[0]);
66
+ headers.forEach(header => {
67
+ const th = document.createElement('th');
68
+ th.textContent = header;
69
+ headerRow.appendChild(th);
70
+ });
71
+ value.forEach(row => {
72
+ const tableRow = table.insertRow();
73
+ headers.forEach(header => {
74
+ const cell = tableRow.insertCell();
75
+ cell.textContent = row[header];
76
+ });
77
+ });
78
+ container.appendChild(table);
79
+ } else {
80
+ const button = document.createElement('button');
81
+ const attributeType = attributes[path.length];
82
+ button.innerHTML = `<span class="attribute-label">${attributeType}:</span> ${key}`;
83
+ button.className = 'fold-button';
84
+ const sectionId = `section-${path.join('-')}-${key}`;
85
+ button.onclick = () => toggleFold(sectionId);
86
+ container.appendChild(button);
87
+
88
+ const section = document.createElement('div');
89
+ section.id = sectionId;
90
+ section.style.display = 'none';
91
+ section.appendChild(createFoldableSection(value, [...path, key], attributes));
92
+ container.appendChild(section);
93
+ }
94
+ }
95
+
96
+ return container;
97
+ }
98
+
99
+ const treeData = %s;
100
+ document.getElementById('root').appendChild(createFoldableSection(treeData));
101
+ </script>
102
+ </body>
103
+ </html>
104
+ """
105
+
106
+ return html_content % (json.dumps(fold_attributes), json.dumps(tree))
107
+
108
+ def save_html(self, fold_attributes: List[str], filename: str = "output.html"):
109
+ tree = self.tree(fold_attributes)
110
+ html_content = self.generate_html(tree, fold_attributes)
111
+
112
+ with open(filename, "w", encoding="utf-8") as f:
113
+ f.write(html_content)
114
+
115
+ print(f"HTML file has been generated: {filename}")
@@ -120,14 +120,22 @@ class FileStore(Scenario):
120
120
  return info
121
121
 
122
122
  @classmethod
123
- def pull(cls, uuid):
124
- scenario_version = Scenario.pull(uuid)
123
+ def pull(cls, uuid, expected_parrot_url: Optional[str] = None):
124
+ scenario_version = Scenario.pull(uuid, expected_parrot_url=expected_parrot_url)
125
125
  return cls.from_dict(scenario_version.to_dict())
126
126
 
127
127
 
128
128
  class CSVFileStore(FileStore):
129
- def __init__(self, filename):
130
- super().__init__(filename, suffix=".csv")
129
+ def __init__(
130
+ self,
131
+ filename,
132
+ binary: Optional[bool] = None,
133
+ suffix: Optional[str] = None,
134
+ base64_string: Optional[str] = None,
135
+ ):
136
+ super().__init__(
137
+ filename, binary=binary, base64_string=base64_string, suffix=".csv"
138
+ )
131
139
 
132
140
  @classmethod
133
141
  def example(cls):
@@ -147,8 +155,16 @@ class CSVFileStore(FileStore):
147
155
 
148
156
 
149
157
  class PDFFileStore(FileStore):
150
- def __init__(self, filename):
151
- super().__init__(filename, suffix=".pdf")
158
+ def __init__(
159
+ self,
160
+ filename,
161
+ binary: Optional[bool] = None,
162
+ suffix: Optional[str] = None,
163
+ base64_string: Optional[str] = None,
164
+ ):
165
+ super().__init__(
166
+ filename, binary=binary, base64_string=base64_string, suffix=".pdf"
167
+ )
152
168
 
153
169
  def view(self):
154
170
  pdf_path = self.to_tempfile()
@@ -225,8 +241,16 @@ class PDFFileStore(FileStore):
225
241
 
226
242
 
227
243
  class PNGFileStore(FileStore):
228
- def __init__(self, filename):
229
- super().__init__(filename, suffix=".png")
244
+ def __init__(
245
+ self,
246
+ filename,
247
+ binary: Optional[bool] = None,
248
+ suffix: Optional[str] = None,
249
+ base64_string: Optional[str] = None,
250
+ ):
251
+ super().__init__(
252
+ filename, binary=binary, base64_string=base64_string, suffix=".png"
253
+ )
230
254
 
231
255
  @classmethod
232
256
  def example(cls):
@@ -251,8 +275,16 @@ class PNGFileStore(FileStore):
251
275
 
252
276
 
253
277
  class SQLiteFileStore(FileStore):
254
- def __init__(self, filename):
255
- super().__init__(filename, suffix=".sqlite")
278
+ def __init__(
279
+ self,
280
+ filename,
281
+ binary: Optional[bool] = None,
282
+ suffix: Optional[str] = None,
283
+ base64_string: Optional[str] = None,
284
+ ):
285
+ super().__init__(
286
+ filename, binary=binary, base64_string=base64_string, suffix=".sqlite"
287
+ )
256
288
 
257
289
  @classmethod
258
290
  def example(cls):
@@ -265,6 +297,8 @@ class SQLiteFileStore(FileStore):
265
297
  c.execute("""CREATE TABLE stocks (date text)""")
266
298
  conn.commit()
267
299
 
300
+ return cls(f.name)
301
+
268
302
  def view(self):
269
303
  import subprocess
270
304
  import os
@@ -273,6 +307,33 @@ class SQLiteFileStore(FileStore):
273
307
  os.system(f"sqlite3 {sqlite_path}")
274
308
 
275
309
 
310
+ class HTMLFileStore(FileStore):
311
+ def __init__(
312
+ self,
313
+ filename,
314
+ binary: Optional[bool] = None,
315
+ suffix: Optional[str] = None,
316
+ base64_string: Optional[str] = None,
317
+ ):
318
+ super().__init__(
319
+ filename, binary=binary, base64_string=base64_string, suffix=".html"
320
+ )
321
+
322
+ @classmethod
323
+ def example(cls):
324
+ import tempfile
325
+
326
+ with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
327
+ f.write("<html><body><h1>Test</h1></body></html>".encode())
328
+ return cls(f.name)
329
+
330
+ def view(self):
331
+ import webbrowser
332
+
333
+ html_path = self.to_tempfile()
334
+ webbrowser.open("file://" + html_path)
335
+
336
+
276
337
  if __name__ == "__main__":
277
338
  # file_path = "../conjure/examples/Ex11-2.sav"
278
339
  # fs = FileStore(file_path)
@@ -5,6 +5,10 @@ import copy
5
5
  import base64
6
6
  import hashlib
7
7
  import os
8
+ import reprlib
9
+ import imghdr
10
+
11
+
8
12
  from collections import UserDict
9
13
  from typing import Union, List, Optional, Generator
10
14
  from uuid import uuid4
@@ -13,6 +17,8 @@ from edsl.scenarios.ScenarioImageMixin import ScenarioImageMixin
13
17
  from edsl.scenarios.ScenarioHtmlMixin import ScenarioHtmlMixin
14
18
  from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
15
19
 
20
+ from edsl.data_transfer_models import ImageInfo
21
+
16
22
 
17
23
  class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
18
24
  """A Scenario is a dictionary of keys/values.
@@ -49,6 +55,39 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
49
55
  self._has_image = False
50
56
  return self._has_image
51
57
 
58
+ @property
59
+ def has_jinja_braces(self) -> bool:
60
+ """Return whether the scenario has jinja braces. This matters for rendering.
61
+
62
+ >>> s = Scenario({"food": "I love {{wood chips}}"})
63
+ >>> s.has_jinja_braces
64
+ True
65
+ """
66
+ for key, value in self.items():
67
+ if "{{" in str(value) and "}}" in value:
68
+ return True
69
+ return False
70
+
71
+ def convert_jinja_braces(
72
+ self, replacement_left="<<", replacement_right=">>"
73
+ ) -> Scenario:
74
+ """Convert Jinja braces to some other character.
75
+
76
+ >>> s = Scenario({"food": "I love {{wood chips}}"})
77
+ >>> s.convert_jinja_braces()
78
+ Scenario({'food': 'I love <<wood chips>>'})
79
+
80
+ """
81
+ new_scenario = Scenario()
82
+ for key, value in self.items():
83
+ if isinstance(value, str):
84
+ new_scenario[key] = value.replace("{{", replacement_left).replace(
85
+ "}}", replacement_right
86
+ )
87
+ else:
88
+ new_scenario[key] = value
89
+ return new_scenario
90
+
52
91
  @has_image.setter
53
92
  def has_image(self, value):
54
93
  self._has_image = value
@@ -142,6 +181,7 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
142
181
  print_json(json.dumps(self.to_dict()))
143
182
 
144
183
  def __repr__(self):
184
+ # return "Scenario(" + reprlib.repr(self.data) + ")"
145
185
  return "Scenario(" + repr(self.data) + ")"
146
186
 
147
187
  def _repr_html_(self):
@@ -183,32 +223,67 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
183
223
  return new_scenario
184
224
 
185
225
  @classmethod
186
- def from_image(cls, image_path: str) -> str:
187
- """Creates a scenario with a base64 encoding of an image.
226
+ def from_url(cls, url: str, field_name: Optional[str] = "text") -> "Scenario":
227
+ """Creates a scenario from a URL.
188
228
 
189
- Example:
229
+ :param url: The URL to create the scenario from.
230
+ :param field_name: The field name to use for the text.
231
+
232
+ """
233
+ import requests
234
+
235
+ text = requests.get(url).text
236
+ return cls({"url": url, field_name: text})
237
+
238
+ @classmethod
239
+ def from_image(
240
+ cls, image_path: str, image_name: Optional[str] = None
241
+ ) -> "Scenario":
242
+ """
243
+ Creates a scenario with a base64 encoding of an image.
244
+
245
+ Args:
246
+ image_path (str): Path to the image file.
247
+
248
+ Returns:
249
+ Scenario: A new Scenario instance with image information.
190
250
 
251
+ Example:
191
252
  >>> s = Scenario.from_image(Scenario.example_image())
192
253
  >>> s
193
- Scenario({'file_path': '...', 'encoded_image': '...'})
254
+ Scenario({'logo': ...})
194
255
  """
256
+ if not os.path.exists(image_path):
257
+ raise FileNotFoundError(f"Image file not found: {image_path}")
258
+
195
259
  with open(image_path, "rb") as image_file:
196
- s = cls(
197
- {
198
- "file_path": image_path,
199
- "encoded_image": base64.b64encode(image_file.read()).decode(
200
- "utf-8"
201
- ),
202
- }
203
- )
204
- s.has_image = True
205
- return s
260
+ file_content = image_file.read()
261
+
262
+ file_name = os.path.basename(image_path)
263
+ file_size = os.path.getsize(image_path)
264
+ image_format = imghdr.what(image_path) or "unknown"
265
+
266
+ if image_name is None:
267
+ image_name = file_name.split(".")[0]
268
+
269
+ image_info = ImageInfo(
270
+ file_path=image_path,
271
+ file_name=file_name,
272
+ image_format=image_format,
273
+ file_size=file_size,
274
+ encoded_image=base64.b64encode(file_content).decode("utf-8"),
275
+ )
276
+
277
+ scenario_data = {image_name: image_info}
278
+ s = cls(scenario_data)
279
+ s.has_image = True
280
+ return s
206
281
 
207
282
  @classmethod
208
283
  def from_pdf(cls, pdf_path):
209
- import fitz # PyMuPDF
210
-
211
284
  # Ensure the file exists
285
+ import fitz
286
+
212
287
  if not os.path.exists(pdf_path):
213
288
  raise FileNotFoundError(f"The file {pdf_path} does not exist.")
214
289
 
@@ -222,7 +297,14 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
222
297
  text = ""
223
298
  for page_num in range(len(document)):
224
299
  page = document.load_page(page_num)
225
- text = text + page.get_text()
300
+ blocks = page.get_text("blocks") # Extract text blocks
301
+
302
+ # Sort blocks by their vertical position (y0) to maintain reading order
303
+ blocks.sort(key=lambda b: (b[1], b[0])) # Sort by y0 first, then x0
304
+
305
+ # Combine the text blocks in order
306
+ for block in blocks:
307
+ text += block[4] + "\n"
226
308
 
227
309
  # Create a dictionary for the combined text
228
310
  page_info = {"filename": filename, "text": text}
@@ -409,18 +491,21 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
409
491
  return table
410
492
 
411
493
  @classmethod
412
- def example(cls, randomize: bool = False) -> Scenario:
494
+ def example(cls, randomize: bool = False, has_image=False) -> Scenario:
413
495
  """
414
496
  Returns an example Scenario instance.
415
497
 
416
498
  :param randomize: If True, adds a random string to the value of the example key.
417
499
  """
418
- addition = "" if not randomize else str(uuid4())
419
- return cls(
420
- {
421
- "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
422
- }
423
- )
500
+ if not has_image:
501
+ addition = "" if not randomize else str(uuid4())
502
+ return cls(
503
+ {
504
+ "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
505
+ }
506
+ )
507
+ else:
508
+ return cls.from_image(cls.example_image())
424
509
 
425
510
  def code(self) -> List[str]:
426
511
  """Return the code for the scenario."""
@@ -13,7 +13,7 @@ class ScenarioImageMixin:
13
13
  >>> from edsl.scenarios.Scenario import Scenario
14
14
  >>> s = Scenario({"food": "wood chips"})
15
15
  >>> s.add_image(Scenario.example_image())
16
- Scenario({'food': 'wood chips', 'file_path': '...', 'encoded_image': '...'})
16
+ Scenario({'food': 'wood chips', 'logo': ...})
17
17
  """
18
18
  new_scenario = self.from_image(image_path)
19
19
  return self + new_scenario
@@ -33,7 +33,7 @@ class ScenarioImageMixin:
33
33
  >>> from edsl.scenarios.Scenario import Scenario
34
34
  >>> s = Scenario.from_image(Scenario.example_image())
35
35
  >>> s
36
- Scenario({'file_path': '...', 'encoded_image': '...'})
36
+ Scenario({'logo': ...})
37
37
  """
38
38
 
39
39
  if image_path.startswith("http://") or image_path.startswith("https://"):