edsl 0.1.32__py3-none-any.whl → 0.1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. edsl/Base.py +9 -3
  2. edsl/TemplateLoader.py +24 -0
  3. edsl/__init__.py +8 -3
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +40 -8
  6. edsl/agents/AgentList.py +43 -0
  7. edsl/agents/Invigilator.py +135 -219
  8. edsl/agents/InvigilatorBase.py +148 -59
  9. edsl/agents/{PromptConstructionMixin.py → PromptConstructor.py} +138 -89
  10. edsl/agents/__init__.py +1 -0
  11. edsl/auto/AutoStudy.py +117 -0
  12. edsl/auto/StageBase.py +230 -0
  13. edsl/auto/StageGenerateSurvey.py +178 -0
  14. edsl/auto/StageLabelQuestions.py +125 -0
  15. edsl/auto/StagePersona.py +61 -0
  16. edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
  17. edsl/auto/StagePersonaDimensionValues.py +74 -0
  18. edsl/auto/StagePersonaDimensions.py +69 -0
  19. edsl/auto/StageQuestions.py +73 -0
  20. edsl/auto/SurveyCreatorPipeline.py +21 -0
  21. edsl/auto/utilities.py +224 -0
  22. edsl/config.py +47 -56
  23. edsl/coop/PriceFetcher.py +58 -0
  24. edsl/coop/coop.py +50 -7
  25. edsl/data/Cache.py +35 -1
  26. edsl/data_transfer_models.py +73 -38
  27. edsl/enums.py +4 -0
  28. edsl/exceptions/language_models.py +25 -1
  29. edsl/exceptions/questions.py +62 -5
  30. edsl/exceptions/results.py +4 -0
  31. edsl/inference_services/AnthropicService.py +13 -11
  32. edsl/inference_services/AwsBedrock.py +19 -17
  33. edsl/inference_services/AzureAI.py +37 -20
  34. edsl/inference_services/GoogleService.py +16 -12
  35. edsl/inference_services/GroqService.py +2 -0
  36. edsl/inference_services/InferenceServiceABC.py +58 -3
  37. edsl/inference_services/MistralAIService.py +120 -0
  38. edsl/inference_services/OpenAIService.py +48 -54
  39. edsl/inference_services/TestService.py +80 -0
  40. edsl/inference_services/TogetherAIService.py +170 -0
  41. edsl/inference_services/models_available_cache.py +0 -6
  42. edsl/inference_services/registry.py +6 -0
  43. edsl/jobs/Answers.py +10 -12
  44. edsl/jobs/FailedQuestion.py +78 -0
  45. edsl/jobs/Jobs.py +37 -22
  46. edsl/jobs/buckets/BucketCollection.py +24 -15
  47. edsl/jobs/buckets/TokenBucket.py +93 -14
  48. edsl/jobs/interviews/Interview.py +366 -78
  49. edsl/jobs/interviews/{interview_exception_tracking.py → InterviewExceptionCollection.py} +14 -68
  50. edsl/jobs/interviews/InterviewExceptionEntry.py +85 -19
  51. edsl/jobs/runners/JobsRunnerAsyncio.py +146 -175
  52. edsl/jobs/runners/JobsRunnerStatus.py +331 -0
  53. edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
  54. edsl/jobs/tasks/TaskHistory.py +148 -213
  55. edsl/language_models/LanguageModel.py +261 -156
  56. edsl/language_models/ModelList.py +2 -2
  57. edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
  58. edsl/language_models/fake_openai_call.py +15 -0
  59. edsl/language_models/fake_openai_service.py +61 -0
  60. edsl/language_models/registry.py +23 -6
  61. edsl/language_models/repair.py +0 -19
  62. edsl/language_models/utilities.py +61 -0
  63. edsl/notebooks/Notebook.py +20 -2
  64. edsl/prompts/Prompt.py +52 -2
  65. edsl/questions/AnswerValidatorMixin.py +23 -26
  66. edsl/questions/QuestionBase.py +330 -249
  67. edsl/questions/QuestionBaseGenMixin.py +133 -0
  68. edsl/questions/QuestionBasePromptsMixin.py +266 -0
  69. edsl/questions/QuestionBudget.py +99 -41
  70. edsl/questions/QuestionCheckBox.py +227 -35
  71. edsl/questions/QuestionExtract.py +98 -27
  72. edsl/questions/QuestionFreeText.py +52 -29
  73. edsl/questions/QuestionFunctional.py +7 -0
  74. edsl/questions/QuestionList.py +141 -22
  75. edsl/questions/QuestionMultipleChoice.py +159 -65
  76. edsl/questions/QuestionNumerical.py +88 -46
  77. edsl/questions/QuestionRank.py +182 -24
  78. edsl/questions/Quick.py +41 -0
  79. edsl/questions/RegisterQuestionsMeta.py +31 -12
  80. edsl/questions/ResponseValidatorABC.py +170 -0
  81. edsl/questions/__init__.py +3 -4
  82. edsl/questions/decorators.py +21 -0
  83. edsl/questions/derived/QuestionLikertFive.py +10 -5
  84. edsl/questions/derived/QuestionLinearScale.py +15 -2
  85. edsl/questions/derived/QuestionTopK.py +10 -1
  86. edsl/questions/derived/QuestionYesNo.py +24 -3
  87. edsl/questions/descriptors.py +43 -7
  88. edsl/questions/prompt_templates/question_budget.jinja +13 -0
  89. edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
  90. edsl/questions/prompt_templates/question_extract.jinja +11 -0
  91. edsl/questions/prompt_templates/question_free_text.jinja +3 -0
  92. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
  93. edsl/questions/prompt_templates/question_list.jinja +17 -0
  94. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
  95. edsl/questions/prompt_templates/question_numerical.jinja +37 -0
  96. edsl/questions/question_registry.py +6 -2
  97. edsl/questions/templates/__init__.py +0 -0
  98. edsl/questions/templates/budget/__init__.py +0 -0
  99. edsl/questions/templates/budget/answering_instructions.jinja +7 -0
  100. edsl/questions/templates/budget/question_presentation.jinja +7 -0
  101. edsl/questions/templates/checkbox/__init__.py +0 -0
  102. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
  103. edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
  104. edsl/questions/templates/extract/__init__.py +0 -0
  105. edsl/questions/templates/extract/answering_instructions.jinja +7 -0
  106. edsl/questions/templates/extract/question_presentation.jinja +1 -0
  107. edsl/questions/templates/free_text/__init__.py +0 -0
  108. edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
  109. edsl/questions/templates/free_text/question_presentation.jinja +1 -0
  110. edsl/questions/templates/likert_five/__init__.py +0 -0
  111. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
  112. edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
  113. edsl/questions/templates/linear_scale/__init__.py +0 -0
  114. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
  115. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
  116. edsl/questions/templates/list/__init__.py +0 -0
  117. edsl/questions/templates/list/answering_instructions.jinja +4 -0
  118. edsl/questions/templates/list/question_presentation.jinja +5 -0
  119. edsl/questions/templates/multiple_choice/__init__.py +0 -0
  120. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
  121. edsl/questions/templates/multiple_choice/html.jinja +0 -0
  122. edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
  123. edsl/questions/templates/numerical/__init__.py +0 -0
  124. edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
  125. edsl/questions/templates/numerical/question_presentation.jinja +7 -0
  126. edsl/questions/templates/rank/__init__.py +0 -0
  127. edsl/questions/templates/rank/answering_instructions.jinja +11 -0
  128. edsl/questions/templates/rank/question_presentation.jinja +15 -0
  129. edsl/questions/templates/top_k/__init__.py +0 -0
  130. edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
  131. edsl/questions/templates/top_k/question_presentation.jinja +22 -0
  132. edsl/questions/templates/yes_no/__init__.py +0 -0
  133. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
  134. edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
  135. edsl/results/Dataset.py +20 -0
  136. edsl/results/DatasetExportMixin.py +46 -48
  137. edsl/results/DatasetTree.py +145 -0
  138. edsl/results/Result.py +32 -5
  139. edsl/results/Results.py +135 -46
  140. edsl/results/ResultsDBMixin.py +3 -3
  141. edsl/results/Selector.py +118 -0
  142. edsl/results/tree_explore.py +115 -0
  143. edsl/scenarios/FileStore.py +71 -10
  144. edsl/scenarios/Scenario.py +96 -25
  145. edsl/scenarios/ScenarioImageMixin.py +2 -2
  146. edsl/scenarios/ScenarioList.py +361 -39
  147. edsl/scenarios/ScenarioListExportMixin.py +9 -0
  148. edsl/scenarios/ScenarioListPdfMixin.py +150 -4
  149. edsl/study/SnapShot.py +8 -1
  150. edsl/study/Study.py +32 -0
  151. edsl/surveys/Rule.py +10 -1
  152. edsl/surveys/RuleCollection.py +21 -5
  153. edsl/surveys/Survey.py +637 -311
  154. edsl/surveys/SurveyExportMixin.py +71 -9
  155. edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
  156. edsl/surveys/SurveyQualtricsImport.py +75 -4
  157. edsl/surveys/instructions/ChangeInstruction.py +47 -0
  158. edsl/surveys/instructions/Instruction.py +34 -0
  159. edsl/surveys/instructions/InstructionCollection.py +77 -0
  160. edsl/surveys/instructions/__init__.py +0 -0
  161. edsl/templates/error_reporting/base.html +24 -0
  162. edsl/templates/error_reporting/exceptions_by_model.html +35 -0
  163. edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
  164. edsl/templates/error_reporting/exceptions_by_type.html +17 -0
  165. edsl/templates/error_reporting/interview_details.html +116 -0
  166. edsl/templates/error_reporting/interviews.html +10 -0
  167. edsl/templates/error_reporting/overview.html +5 -0
  168. edsl/templates/error_reporting/performance_plot.html +2 -0
  169. edsl/templates/error_reporting/report.css +74 -0
  170. edsl/templates/error_reporting/report.html +118 -0
  171. edsl/templates/error_reporting/report.js +25 -0
  172. edsl/utilities/utilities.py +9 -1
  173. {edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/METADATA +5 -2
  174. edsl-0.1.33.dist-info/RECORD +295 -0
  175. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -286
  176. edsl/jobs/interviews/retry_management.py +0 -37
  177. edsl/jobs/runners/JobsRunnerStatusMixin.py +0 -333
  178. edsl/utilities/gcp_bucket/simple_example.py +0 -9
  179. edsl-0.1.32.dist-info/RECORD +0 -209
  180. {edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/LICENSE +0 -0
  181. {edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/WHEEL +0 -0
@@ -0,0 +1,115 @@
1
+ from collections import defaultdict
2
+ from typing import List, Dict, Any
3
+ import json
4
+
5
+
6
+ class FoldableHTMLTableGenerator:
7
+ def __init__(self, data: List[Dict[str, Any]]):
8
+ self.data = data
9
+
10
+ def tree(self, fold_attributes: List[str], drop: List[str] = None) -> Dict:
11
+ def nested_dict():
12
+ return defaultdict(nested_dict)
13
+
14
+ result = nested_dict()
15
+ drop = drop or [] # Use an empty list if drop is None
16
+
17
+ for item in self.data:
18
+ current = result
19
+ for attr in fold_attributes:
20
+ current = current[item[attr]]
21
+
22
+ row = {
23
+ k: v
24
+ for k, v in item.items()
25
+ if k not in fold_attributes and k not in drop
26
+ }
27
+ if "_rows" not in current:
28
+ current["_rows"] = []
29
+ current["_rows"].append(row)
30
+
31
+ return result
32
+
33
+ def generate_html(self, tree, fold_attributes: List[str]) -> str:
34
+ html_content = """
35
+ <!DOCTYPE html>
36
+ <html lang="en">
37
+ <head>
38
+ <meta charset="UTF-8">
39
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
40
+ <title>Foldable Nested Table</title>
41
+ <style>
42
+ .folding-section { margin-left: 20px; }
43
+ .fold-button { cursor: pointer; margin: 5px 0; }
44
+ table { border-collapse: collapse; margin-top: 10px; }
45
+ th, td { border: 1px solid black; padding: 5px; }
46
+ .attribute-label { font-weight: bold; }
47
+ </style>
48
+ </head>
49
+ <body>
50
+ <div id="root"></div>
51
+ <script>
52
+ function toggleFold(id) {
53
+ const element = document.getElementById(id);
54
+ element.style.display = element.style.display === 'none' ? 'block' : 'none';
55
+ }
56
+
57
+ function createFoldableSection(data, path = [], attributes = %s) {
58
+ const container = document.createElement('div');
59
+ container.className = 'folding-section';
60
+
61
+ for (const [key, value] of Object.entries(data)) {
62
+ if (key === '_rows') {
63
+ const table = document.createElement('table');
64
+ const headerRow = table.insertRow();
65
+ const headers = Object.keys(value[0]);
66
+ headers.forEach(header => {
67
+ const th = document.createElement('th');
68
+ th.textContent = header;
69
+ headerRow.appendChild(th);
70
+ });
71
+ value.forEach(row => {
72
+ const tableRow = table.insertRow();
73
+ headers.forEach(header => {
74
+ const cell = tableRow.insertCell();
75
+ cell.textContent = row[header];
76
+ });
77
+ });
78
+ container.appendChild(table);
79
+ } else {
80
+ const button = document.createElement('button');
81
+ const attributeType = attributes[path.length];
82
+ button.innerHTML = `<span class="attribute-label">${attributeType}:</span> ${key}`;
83
+ button.className = 'fold-button';
84
+ const sectionId = `section-${path.join('-')}-${key}`;
85
+ button.onclick = () => toggleFold(sectionId);
86
+ container.appendChild(button);
87
+
88
+ const section = document.createElement('div');
89
+ section.id = sectionId;
90
+ section.style.display = 'none';
91
+ section.appendChild(createFoldableSection(value, [...path, key], attributes));
92
+ container.appendChild(section);
93
+ }
94
+ }
95
+
96
+ return container;
97
+ }
98
+
99
+ const treeData = %s;
100
+ document.getElementById('root').appendChild(createFoldableSection(treeData));
101
+ </script>
102
+ </body>
103
+ </html>
104
+ """
105
+
106
+ return html_content % (json.dumps(fold_attributes), json.dumps(tree))
107
+
108
+ def save_html(self, fold_attributes: List[str], filename: str = "output.html"):
109
+ tree = self.tree(fold_attributes)
110
+ html_content = self.generate_html(tree, fold_attributes)
111
+
112
+ with open(filename, "w", encoding="utf-8") as f:
113
+ f.write(html_content)
114
+
115
+ print(f"HTML file has been generated: {filename}")
@@ -120,14 +120,22 @@ class FileStore(Scenario):
120
120
  return info
121
121
 
122
122
  @classmethod
123
- def pull(cls, uuid):
124
- scenario_version = Scenario.pull(uuid)
123
+ def pull(cls, uuid, expected_parrot_url: Optional[str] = None):
124
+ scenario_version = Scenario.pull(uuid, expected_parrot_url=expected_parrot_url)
125
125
  return cls.from_dict(scenario_version.to_dict())
126
126
 
127
127
 
128
128
  class CSVFileStore(FileStore):
129
- def __init__(self, filename):
130
- super().__init__(filename, suffix=".csv")
129
+ def __init__(
130
+ self,
131
+ filename,
132
+ binary: Optional[bool] = None,
133
+ suffix: Optional[str] = None,
134
+ base64_string: Optional[str] = None,
135
+ ):
136
+ super().__init__(
137
+ filename, binary=binary, base64_string=base64_string, suffix=".csv"
138
+ )
131
139
 
132
140
  @classmethod
133
141
  def example(cls):
@@ -147,8 +155,16 @@ class CSVFileStore(FileStore):
147
155
 
148
156
 
149
157
  class PDFFileStore(FileStore):
150
- def __init__(self, filename):
151
- super().__init__(filename, suffix=".pdf")
158
+ def __init__(
159
+ self,
160
+ filename,
161
+ binary: Optional[bool] = None,
162
+ suffix: Optional[str] = None,
163
+ base64_string: Optional[str] = None,
164
+ ):
165
+ super().__init__(
166
+ filename, binary=binary, base64_string=base64_string, suffix=".pdf"
167
+ )
152
168
 
153
169
  def view(self):
154
170
  pdf_path = self.to_tempfile()
@@ -225,8 +241,16 @@ class PDFFileStore(FileStore):
225
241
 
226
242
 
227
243
  class PNGFileStore(FileStore):
228
- def __init__(self, filename):
229
- super().__init__(filename, suffix=".png")
244
+ def __init__(
245
+ self,
246
+ filename,
247
+ binary: Optional[bool] = None,
248
+ suffix: Optional[str] = None,
249
+ base64_string: Optional[str] = None,
250
+ ):
251
+ super().__init__(
252
+ filename, binary=binary, base64_string=base64_string, suffix=".png"
253
+ )
230
254
 
231
255
  @classmethod
232
256
  def example(cls):
@@ -251,8 +275,16 @@ class PNGFileStore(FileStore):
251
275
 
252
276
 
253
277
  class SQLiteFileStore(FileStore):
254
- def __init__(self, filename):
255
- super().__init__(filename, suffix=".sqlite")
278
+ def __init__(
279
+ self,
280
+ filename,
281
+ binary: Optional[bool] = None,
282
+ suffix: Optional[str] = None,
283
+ base64_string: Optional[str] = None,
284
+ ):
285
+ super().__init__(
286
+ filename, binary=binary, base64_string=base64_string, suffix=".sqlite"
287
+ )
256
288
 
257
289
  @classmethod
258
290
  def example(cls):
@@ -265,6 +297,8 @@ class SQLiteFileStore(FileStore):
265
297
  c.execute("""CREATE TABLE stocks (date text)""")
266
298
  conn.commit()
267
299
 
300
+ return cls(f.name)
301
+
268
302
  def view(self):
269
303
  import subprocess
270
304
  import os
@@ -273,6 +307,33 @@ class SQLiteFileStore(FileStore):
273
307
  os.system(f"sqlite3 {sqlite_path}")
274
308
 
275
309
 
310
+ class HTMLFileStore(FileStore):
311
+ def __init__(
312
+ self,
313
+ filename,
314
+ binary: Optional[bool] = None,
315
+ suffix: Optional[str] = None,
316
+ base64_string: Optional[str] = None,
317
+ ):
318
+ super().__init__(
319
+ filename, binary=binary, base64_string=base64_string, suffix=".html"
320
+ )
321
+
322
+ @classmethod
323
+ def example(cls):
324
+ import tempfile
325
+
326
+ with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
327
+ f.write("<html><body><h1>Test</h1></body></html>".encode())
328
+ return cls(f.name)
329
+
330
+ def view(self):
331
+ import webbrowser
332
+
333
+ html_path = self.to_tempfile()
334
+ webbrowser.open("file://" + html_path)
335
+
336
+
276
337
  if __name__ == "__main__":
277
338
  # file_path = "../conjure/examples/Ex11-2.sav"
278
339
  # fs = FileStore(file_path)
@@ -5,6 +5,10 @@ import copy
5
5
  import base64
6
6
  import hashlib
7
7
  import os
8
+ import reprlib
9
+ import imghdr
10
+
11
+
8
12
  from collections import UserDict
9
13
  from typing import Union, List, Optional, Generator
10
14
  from uuid import uuid4
@@ -13,6 +17,8 @@ from edsl.scenarios.ScenarioImageMixin import ScenarioImageMixin
13
17
  from edsl.scenarios.ScenarioHtmlMixin import ScenarioHtmlMixin
14
18
  from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
15
19
 
20
+ from edsl.data_transfer_models import ImageInfo
21
+
16
22
 
17
23
  class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
18
24
  """A Scenario is a dictionary of keys/values.
@@ -49,6 +55,39 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
49
55
  self._has_image = False
50
56
  return self._has_image
51
57
 
58
+ @property
59
+ def has_jinja_braces(self) -> bool:
60
+ """Return whether the scenario has jinja braces. This matters for rendering.
61
+
62
+ >>> s = Scenario({"food": "I love {{wood chips}}"})
63
+ >>> s.has_jinja_braces
64
+ True
65
+ """
66
+ for key, value in self.items():
67
+ if "{{" in str(value) and "}}" in value:
68
+ return True
69
+ return False
70
+
71
+ def convert_jinja_braces(
72
+ self, replacement_left="<<", replacement_right=">>"
73
+ ) -> Scenario:
74
+ """Convert Jinja braces to some other character.
75
+
76
+ >>> s = Scenario({"food": "I love {{wood chips}}"})
77
+ >>> s.convert_jinja_braces()
78
+ Scenario({'food': 'I love <<wood chips>>'})
79
+
80
+ """
81
+ new_scenario = Scenario()
82
+ for key, value in self.items():
83
+ if isinstance(value, str):
84
+ new_scenario[key] = value.replace("{{", replacement_left).replace(
85
+ "}}", replacement_right
86
+ )
87
+ else:
88
+ new_scenario[key] = value
89
+ return new_scenario
90
+
52
91
  @has_image.setter
53
92
  def has_image(self, value):
54
93
  self._has_image = value
@@ -142,6 +181,7 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
142
181
  print_json(json.dumps(self.to_dict()))
143
182
 
144
183
  def __repr__(self):
184
+ # return "Scenario(" + reprlib.repr(self.data) + ")"
145
185
  return "Scenario(" + repr(self.data) + ")"
146
186
 
147
187
  def _repr_html_(self):
@@ -196,33 +236,54 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
196
236
  return cls({"url": url, field_name: text})
197
237
 
198
238
  @classmethod
199
- def from_image(cls, image_path: str) -> str:
200
- """Creates a scenario with a base64 encoding of an image.
239
+ def from_image(
240
+ cls, image_path: str, image_name: Optional[str] = None
241
+ ) -> "Scenario":
242
+ """
243
+ Creates a scenario with a base64 encoding of an image.
201
244
 
202
- Example:
245
+ Args:
246
+ image_path (str): Path to the image file.
247
+
248
+ Returns:
249
+ Scenario: A new Scenario instance with image information.
203
250
 
251
+ Example:
204
252
  >>> s = Scenario.from_image(Scenario.example_image())
205
253
  >>> s
206
- Scenario({'file_path': '...', 'encoded_image': '...'})
254
+ Scenario({'logo': ...})
207
255
  """
256
+ if not os.path.exists(image_path):
257
+ raise FileNotFoundError(f"Image file not found: {image_path}")
258
+
208
259
  with open(image_path, "rb") as image_file:
209
- s = cls(
210
- {
211
- "file_path": image_path,
212
- "encoded_image": base64.b64encode(image_file.read()).decode(
213
- "utf-8"
214
- ),
215
- }
216
- )
217
- s.has_image = True
218
- return s
260
+ file_content = image_file.read()
261
+
262
+ file_name = os.path.basename(image_path)
263
+ file_size = os.path.getsize(image_path)
264
+ image_format = imghdr.what(image_path) or "unknown"
265
+
266
+ if image_name is None:
267
+ image_name = file_name.split(".")[0]
268
+
269
+ image_info = ImageInfo(
270
+ file_path=image_path,
271
+ file_name=file_name,
272
+ image_format=image_format,
273
+ file_size=file_size,
274
+ encoded_image=base64.b64encode(file_content).decode("utf-8"),
275
+ )
276
+
277
+ scenario_data = {image_name: image_info}
278
+ s = cls(scenario_data)
279
+ s.has_image = True
280
+ return s
219
281
 
220
282
  @classmethod
221
283
  def from_pdf(cls, pdf_path):
222
- import fitz # PyMuPDF
223
- from edsl import Scenario
224
-
225
284
  # Ensure the file exists
285
+ import fitz
286
+
226
287
  if not os.path.exists(pdf_path):
227
288
  raise FileNotFoundError(f"The file {pdf_path} does not exist.")
228
289
 
@@ -236,7 +297,14 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
236
297
  text = ""
237
298
  for page_num in range(len(document)):
238
299
  page = document.load_page(page_num)
239
- text = text + page.get_text()
300
+ blocks = page.get_text("blocks") # Extract text blocks
301
+
302
+ # Sort blocks by their vertical position (y0) to maintain reading order
303
+ blocks.sort(key=lambda b: (b[1], b[0])) # Sort by y0 first, then x0
304
+
305
+ # Combine the text blocks in order
306
+ for block in blocks:
307
+ text += block[4] + "\n"
240
308
 
241
309
  # Create a dictionary for the combined text
242
310
  page_info = {"filename": filename, "text": text}
@@ -423,18 +491,21 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
423
491
  return table
424
492
 
425
493
  @classmethod
426
- def example(cls, randomize: bool = False) -> Scenario:
494
+ def example(cls, randomize: bool = False, has_image=False) -> Scenario:
427
495
  """
428
496
  Returns an example Scenario instance.
429
497
 
430
498
  :param randomize: If True, adds a random string to the value of the example key.
431
499
  """
432
- addition = "" if not randomize else str(uuid4())
433
- return cls(
434
- {
435
- "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
436
- }
437
- )
500
+ if not has_image:
501
+ addition = "" if not randomize else str(uuid4())
502
+ return cls(
503
+ {
504
+ "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
505
+ }
506
+ )
507
+ else:
508
+ return cls.from_image(cls.example_image())
438
509
 
439
510
  def code(self) -> List[str]:
440
511
  """Return the code for the scenario."""
@@ -13,7 +13,7 @@ class ScenarioImageMixin:
13
13
  >>> from edsl.scenarios.Scenario import Scenario
14
14
  >>> s = Scenario({"food": "wood chips"})
15
15
  >>> s.add_image(Scenario.example_image())
16
- Scenario({'food': 'wood chips', 'file_path': '...', 'encoded_image': '...'})
16
+ Scenario({'food': 'wood chips', 'logo': ...})
17
17
  """
18
18
  new_scenario = self.from_image(image_path)
19
19
  return self + new_scenario
@@ -33,7 +33,7 @@ class ScenarioImageMixin:
33
33
  >>> from edsl.scenarios.Scenario import Scenario
34
34
  >>> s = Scenario.from_image(Scenario.example_image())
35
35
  >>> s
36
- Scenario({'file_path': '...', 'encoded_image': '...'})
36
+ Scenario({'logo': ...})
37
37
  """
38
38
 
39
39
  if image_path.startswith("http://") or image_path.startswith("https://"):