edsl 0.1.33__py3-none-any.whl → 0.1.33.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. edsl/Base.py +3 -9
  2. edsl/__init__.py +3 -8
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +8 -40
  5. edsl/agents/AgentList.py +0 -43
  6. edsl/agents/Invigilator.py +219 -135
  7. edsl/agents/InvigilatorBase.py +59 -148
  8. edsl/agents/{PromptConstructor.py → PromptConstructionMixin.py} +89 -138
  9. edsl/agents/__init__.py +0 -1
  10. edsl/config.py +56 -47
  11. edsl/coop/coop.py +7 -50
  12. edsl/data/Cache.py +1 -35
  13. edsl/data_transfer_models.py +38 -73
  14. edsl/enums.py +0 -4
  15. edsl/exceptions/language_models.py +1 -25
  16. edsl/exceptions/questions.py +5 -62
  17. edsl/exceptions/results.py +0 -4
  18. edsl/inference_services/AnthropicService.py +11 -13
  19. edsl/inference_services/AwsBedrock.py +17 -19
  20. edsl/inference_services/AzureAI.py +20 -37
  21. edsl/inference_services/GoogleService.py +12 -16
  22. edsl/inference_services/GroqService.py +0 -2
  23. edsl/inference_services/InferenceServiceABC.py +3 -58
  24. edsl/inference_services/OpenAIService.py +54 -48
  25. edsl/inference_services/models_available_cache.py +6 -0
  26. edsl/inference_services/registry.py +0 -6
  27. edsl/jobs/Answers.py +12 -10
  28. edsl/jobs/Jobs.py +21 -36
  29. edsl/jobs/buckets/BucketCollection.py +15 -24
  30. edsl/jobs/buckets/TokenBucket.py +14 -93
  31. edsl/jobs/interviews/Interview.py +78 -366
  32. edsl/jobs/interviews/InterviewExceptionEntry.py +19 -85
  33. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +286 -0
  34. edsl/jobs/interviews/{InterviewExceptionCollection.py → interview_exception_tracking.py} +68 -14
  35. edsl/jobs/interviews/retry_management.py +37 -0
  36. edsl/jobs/runners/JobsRunnerAsyncio.py +175 -146
  37. edsl/jobs/runners/JobsRunnerStatusMixin.py +333 -0
  38. edsl/jobs/tasks/QuestionTaskCreator.py +23 -30
  39. edsl/jobs/tasks/TaskHistory.py +213 -148
  40. edsl/language_models/LanguageModel.py +156 -261
  41. edsl/language_models/ModelList.py +2 -2
  42. edsl/language_models/RegisterLanguageModelsMeta.py +29 -14
  43. edsl/language_models/registry.py +6 -23
  44. edsl/language_models/repair.py +19 -0
  45. edsl/prompts/Prompt.py +2 -52
  46. edsl/questions/AnswerValidatorMixin.py +26 -23
  47. edsl/questions/QuestionBase.py +249 -329
  48. edsl/questions/QuestionBudget.py +41 -99
  49. edsl/questions/QuestionCheckBox.py +35 -227
  50. edsl/questions/QuestionExtract.py +27 -98
  51. edsl/questions/QuestionFreeText.py +29 -52
  52. edsl/questions/QuestionFunctional.py +0 -7
  53. edsl/questions/QuestionList.py +22 -141
  54. edsl/questions/QuestionMultipleChoice.py +65 -159
  55. edsl/questions/QuestionNumerical.py +46 -88
  56. edsl/questions/QuestionRank.py +24 -182
  57. edsl/questions/RegisterQuestionsMeta.py +12 -31
  58. edsl/questions/__init__.py +4 -3
  59. edsl/questions/derived/QuestionLikertFive.py +5 -10
  60. edsl/questions/derived/QuestionLinearScale.py +2 -15
  61. edsl/questions/derived/QuestionTopK.py +1 -10
  62. edsl/questions/derived/QuestionYesNo.py +3 -24
  63. edsl/questions/descriptors.py +7 -43
  64. edsl/questions/question_registry.py +2 -6
  65. edsl/results/Dataset.py +0 -20
  66. edsl/results/DatasetExportMixin.py +48 -46
  67. edsl/results/Result.py +5 -32
  68. edsl/results/Results.py +46 -135
  69. edsl/results/ResultsDBMixin.py +3 -3
  70. edsl/scenarios/FileStore.py +10 -71
  71. edsl/scenarios/Scenario.py +25 -96
  72. edsl/scenarios/ScenarioImageMixin.py +2 -2
  73. edsl/scenarios/ScenarioList.py +39 -361
  74. edsl/scenarios/ScenarioListExportMixin.py +0 -9
  75. edsl/scenarios/ScenarioListPdfMixin.py +4 -150
  76. edsl/study/SnapShot.py +1 -8
  77. edsl/study/Study.py +0 -32
  78. edsl/surveys/Rule.py +1 -10
  79. edsl/surveys/RuleCollection.py +5 -21
  80. edsl/surveys/Survey.py +310 -636
  81. edsl/surveys/SurveyExportMixin.py +9 -71
  82. edsl/surveys/SurveyFlowVisualizationMixin.py +1 -2
  83. edsl/surveys/SurveyQualtricsImport.py +4 -75
  84. edsl/utilities/gcp_bucket/simple_example.py +9 -0
  85. edsl/utilities/utilities.py +1 -9
  86. {edsl-0.1.33.dist-info → edsl-0.1.33.dev1.dist-info}/METADATA +2 -5
  87. edsl-0.1.33.dev1.dist-info/RECORD +209 -0
  88. edsl/TemplateLoader.py +0 -24
  89. edsl/auto/AutoStudy.py +0 -117
  90. edsl/auto/StageBase.py +0 -230
  91. edsl/auto/StageGenerateSurvey.py +0 -178
  92. edsl/auto/StageLabelQuestions.py +0 -125
  93. edsl/auto/StagePersona.py +0 -61
  94. edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
  95. edsl/auto/StagePersonaDimensionValues.py +0 -74
  96. edsl/auto/StagePersonaDimensions.py +0 -69
  97. edsl/auto/StageQuestions.py +0 -73
  98. edsl/auto/SurveyCreatorPipeline.py +0 -21
  99. edsl/auto/utilities.py +0 -224
  100. edsl/coop/PriceFetcher.py +0 -58
  101. edsl/inference_services/MistralAIService.py +0 -120
  102. edsl/inference_services/TestService.py +0 -80
  103. edsl/inference_services/TogetherAIService.py +0 -170
  104. edsl/jobs/FailedQuestion.py +0 -78
  105. edsl/jobs/runners/JobsRunnerStatus.py +0 -331
  106. edsl/language_models/fake_openai_call.py +0 -15
  107. edsl/language_models/fake_openai_service.py +0 -61
  108. edsl/language_models/utilities.py +0 -61
  109. edsl/questions/QuestionBaseGenMixin.py +0 -133
  110. edsl/questions/QuestionBasePromptsMixin.py +0 -266
  111. edsl/questions/Quick.py +0 -41
  112. edsl/questions/ResponseValidatorABC.py +0 -170
  113. edsl/questions/decorators.py +0 -21
  114. edsl/questions/prompt_templates/question_budget.jinja +0 -13
  115. edsl/questions/prompt_templates/question_checkbox.jinja +0 -32
  116. edsl/questions/prompt_templates/question_extract.jinja +0 -11
  117. edsl/questions/prompt_templates/question_free_text.jinja +0 -3
  118. edsl/questions/prompt_templates/question_linear_scale.jinja +0 -11
  119. edsl/questions/prompt_templates/question_list.jinja +0 -17
  120. edsl/questions/prompt_templates/question_multiple_choice.jinja +0 -33
  121. edsl/questions/prompt_templates/question_numerical.jinja +0 -37
  122. edsl/questions/templates/__init__.py +0 -0
  123. edsl/questions/templates/budget/__init__.py +0 -0
  124. edsl/questions/templates/budget/answering_instructions.jinja +0 -7
  125. edsl/questions/templates/budget/question_presentation.jinja +0 -7
  126. edsl/questions/templates/checkbox/__init__.py +0 -0
  127. edsl/questions/templates/checkbox/answering_instructions.jinja +0 -10
  128. edsl/questions/templates/checkbox/question_presentation.jinja +0 -22
  129. edsl/questions/templates/extract/__init__.py +0 -0
  130. edsl/questions/templates/extract/answering_instructions.jinja +0 -7
  131. edsl/questions/templates/extract/question_presentation.jinja +0 -1
  132. edsl/questions/templates/free_text/__init__.py +0 -0
  133. edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
  134. edsl/questions/templates/free_text/question_presentation.jinja +0 -1
  135. edsl/questions/templates/likert_five/__init__.py +0 -0
  136. edsl/questions/templates/likert_five/answering_instructions.jinja +0 -10
  137. edsl/questions/templates/likert_five/question_presentation.jinja +0 -12
  138. edsl/questions/templates/linear_scale/__init__.py +0 -0
  139. edsl/questions/templates/linear_scale/answering_instructions.jinja +0 -5
  140. edsl/questions/templates/linear_scale/question_presentation.jinja +0 -5
  141. edsl/questions/templates/list/__init__.py +0 -0
  142. edsl/questions/templates/list/answering_instructions.jinja +0 -4
  143. edsl/questions/templates/list/question_presentation.jinja +0 -5
  144. edsl/questions/templates/multiple_choice/__init__.py +0 -0
  145. edsl/questions/templates/multiple_choice/answering_instructions.jinja +0 -9
  146. edsl/questions/templates/multiple_choice/html.jinja +0 -0
  147. edsl/questions/templates/multiple_choice/question_presentation.jinja +0 -12
  148. edsl/questions/templates/numerical/__init__.py +0 -0
  149. edsl/questions/templates/numerical/answering_instructions.jinja +0 -8
  150. edsl/questions/templates/numerical/question_presentation.jinja +0 -7
  151. edsl/questions/templates/rank/__init__.py +0 -0
  152. edsl/questions/templates/rank/answering_instructions.jinja +0 -11
  153. edsl/questions/templates/rank/question_presentation.jinja +0 -15
  154. edsl/questions/templates/top_k/__init__.py +0 -0
  155. edsl/questions/templates/top_k/answering_instructions.jinja +0 -8
  156. edsl/questions/templates/top_k/question_presentation.jinja +0 -22
  157. edsl/questions/templates/yes_no/__init__.py +0 -0
  158. edsl/questions/templates/yes_no/answering_instructions.jinja +0 -6
  159. edsl/questions/templates/yes_no/question_presentation.jinja +0 -12
  160. edsl/results/DatasetTree.py +0 -145
  161. edsl/results/Selector.py +0 -118
  162. edsl/results/tree_explore.py +0 -115
  163. edsl/surveys/instructions/ChangeInstruction.py +0 -47
  164. edsl/surveys/instructions/Instruction.py +0 -34
  165. edsl/surveys/instructions/InstructionCollection.py +0 -77
  166. edsl/surveys/instructions/__init__.py +0 -0
  167. edsl/templates/error_reporting/base.html +0 -24
  168. edsl/templates/error_reporting/exceptions_by_model.html +0 -35
  169. edsl/templates/error_reporting/exceptions_by_question_name.html +0 -17
  170. edsl/templates/error_reporting/exceptions_by_type.html +0 -17
  171. edsl/templates/error_reporting/interview_details.html +0 -116
  172. edsl/templates/error_reporting/interviews.html +0 -10
  173. edsl/templates/error_reporting/overview.html +0 -5
  174. edsl/templates/error_reporting/performance_plot.html +0 -2
  175. edsl/templates/error_reporting/report.css +0 -74
  176. edsl/templates/error_reporting/report.html +0 -118
  177. edsl/templates/error_reporting/report.js +0 -25
  178. edsl-0.1.33.dist-info/RECORD +0 -295
  179. {edsl-0.1.33.dist-info → edsl-0.1.33.dev1.dist-info}/LICENSE +0 -0
  180. {edsl-0.1.33.dist-info → edsl-0.1.33.dev1.dist-info}/WHEEL +0 -0
edsl/results/Results.py CHANGED
@@ -17,7 +17,6 @@ from edsl.exceptions.results import (
17
17
  ResultsInvalidNameError,
18
18
  ResultsMutateError,
19
19
  ResultsFilterError,
20
- ResultsDeserializationError,
21
20
  )
22
21
 
23
22
  from edsl.results.ResultsExportMixin import ResultsExportMixin
@@ -78,7 +77,6 @@ class Results(UserList, Mixins, Base):
78
77
  "question_options",
79
78
  "question_type",
80
79
  "comment",
81
- "generated_tokens",
82
80
  ]
83
81
 
84
82
  def __init__(
@@ -110,81 +108,6 @@ class Results(UserList, Mixins, Base):
110
108
  if hasattr(self, "_add_output_functions"):
111
109
  self._add_output_functions()
112
110
 
113
- def leaves(self):
114
- leaves = []
115
- for result in self:
116
- leaves.extend(result.leaves())
117
- return leaves
118
-
119
- def tree(
120
- self,
121
- fold_attributes: Optional[List[str]] = None,
122
- drop: Optional[List[str]] = None,
123
- open_file=True,
124
- ) -> dict:
125
- """Return the results as a tree."""
126
- from edsl.results.tree_explore import FoldableHTMLTableGenerator
127
-
128
- if drop is None:
129
- drop = []
130
-
131
- valid_attributes = [
132
- "model",
133
- "scenario",
134
- "agent",
135
- "answer",
136
- "question",
137
- "iteration",
138
- ]
139
- if fold_attributes is None:
140
- fold_attributes = []
141
-
142
- for attribute in fold_attributes:
143
- if attribute not in valid_attributes:
144
- raise ValueError(
145
- f"Invalid fold attribute: {attribute}; must be in {valid_attributes}"
146
- )
147
- data = self.leaves()
148
- generator = FoldableHTMLTableGenerator(data)
149
- tree = generator.tree(fold_attributes=fold_attributes, drop=drop)
150
- html_content = generator.generate_html(tree, fold_attributes)
151
- import tempfile
152
- from edsl.utilities.utilities import is_notebook
153
-
154
- from IPython.display import display, HTML
155
-
156
- if is_notebook():
157
- import html
158
- from IPython.display import display, HTML
159
-
160
- height = 1000
161
- width = 1000
162
- escaped_output = html.escape(html_content)
163
- # escaped_output = rendered_html
164
- iframe = f""""
165
- <iframe srcdoc="{ escaped_output }" style="width: {width}px; height: {height}px;"></iframe>
166
- """
167
- display(HTML(iframe))
168
- return None
169
-
170
- with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
171
- f.write(html_content.encode())
172
- print(f"HTML file has been generated: {f.name}")
173
-
174
- if open_file:
175
- import webbrowser
176
- import time
177
-
178
- time.sleep(1) # Wait for 1 second
179
- # webbrowser.open(f.name)
180
- import os
181
-
182
- filename = f.name
183
- webbrowser.open(f"file://{os.path.abspath(filename)}")
184
-
185
- else:
186
- return html_content
187
-
188
111
  def code(self):
189
112
  raise NotImplementedError
190
113
 
@@ -245,9 +168,7 @@ class Results(UserList, Mixins, Base):
245
168
  )
246
169
 
247
170
  def __repr__(self) -> str:
248
- import reprlib
249
-
250
- return f"Results(data = {reprlib.repr(self.data)}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
171
+ return f"Results(data = {self.data}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
251
172
 
252
173
  def _repr_html_(self) -> str:
253
174
  from IPython.display import HTML
@@ -369,7 +290,8 @@ class Results(UserList, Mixins, Base):
369
290
  ),
370
291
  )
371
292
  except Exception as e:
372
- raise ResultsDeserializationError(f"Error in Results.from_dict: {e}")
293
+ print(e)
294
+ # breakpoint()
373
295
  return results
374
296
 
375
297
  ######################
@@ -473,7 +395,7 @@ class Results(UserList, Mixins, Base):
473
395
 
474
396
  >>> r = Results.example()
475
397
  >>> r.models[0]
476
- Model(model_name = ...)
398
+ Model(model_name = 'gpt-4-1106-preview', temperature = 0.5, max_tokens = 1000, top_p = 1, frequency_penalty = 0, presence_penalty = 0, logprobs = False, top_logprobs = 3)
477
399
  """
478
400
  return [r.model for r in self.data]
479
401
 
@@ -555,6 +477,39 @@ class Results(UserList, Mixins, Base):
555
477
  )
556
478
  return sorted(list(all_keys))
557
479
 
480
+ def _parse_column(self, column: str) -> tuple[str, str]:
481
+ """
482
+ Parses a column name into a tuple containing a data type and a key.
483
+
484
+ >>> r = Results.example()
485
+ >>> r._parse_column("answer.how_feeling")
486
+ ('answer', 'how_feeling')
487
+
488
+ The standard way a column is specified is with a dot-separated string, e.g. _parse_column("agent.status")
489
+ But you can also specify a single key, e.g. "status", in which case it will look up the data type.
490
+ """
491
+ if "." in column:
492
+ data_type, key = column.split(".")
493
+ else:
494
+ try:
495
+ data_type, key = self._key_to_data_type[column], column
496
+ except KeyError:
497
+ import difflib
498
+
499
+ close_matches = difflib.get_close_matches(
500
+ column, self._key_to_data_type.keys()
501
+ )
502
+ if close_matches:
503
+ suggestions = ", ".join(close_matches)
504
+ raise ResultsColumnNotFoundError(
505
+ f"Column '{column}' not found in data. Did you mean: {suggestions}?"
506
+ )
507
+ else:
508
+ raise ResultsColumnNotFoundError(
509
+ f"Column {column} not found in data"
510
+ )
511
+ return data_type, key
512
+
558
513
  def first(self) -> "Result":
559
514
  """Return the first observation in the results.
560
515
 
@@ -677,11 +632,9 @@ class Results(UserList, Mixins, Base):
677
632
  """
678
633
  if functions_dict is None:
679
634
  functions_dict = {}
680
- evaluator = EvalWithCompoundTypes(
635
+ return EvalWithCompoundTypes(
681
636
  names=result.combined_dict, functions=functions_dict
682
637
  )
683
- evaluator.functions.update(int=int, float=float)
684
- return evaluator
685
638
 
686
639
  def mutate(
687
640
  self, new_var_string: str, functions_dict: Optional[dict] = None
@@ -768,8 +721,8 @@ class Results(UserList, Mixins, Base):
768
721
 
769
722
  def sample(
770
723
  self,
771
- n: Optional[int] = None,
772
- frac: Optional[float] = None,
724
+ n: int = None,
725
+ frac: float = None,
773
726
  with_replacement: bool = True,
774
727
  seed: Optional[str] = "edsl",
775
728
  ) -> Results:
@@ -818,17 +771,13 @@ class Results(UserList, Mixins, Base):
818
771
  Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
819
772
 
820
773
  >>> results.select('how_feeling', 'model', 'how_feeling')
821
- Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'model.model': ['...', '...', '...', '...']}, {'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
822
-
823
- >>> from edsl import Results; r = Results.example(); r.select('answer.how_feeling_y')
824
- Dataset([{'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
774
+ Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'model.model': ['gpt-4-1106-preview', 'gpt-4-1106-preview', 'gpt-4-1106-preview', 'gpt-4-1106-preview']}, {'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
825
775
  """
826
776
 
827
- # if len(self) == 0:
828
- # raise Exception("No data to select from---the Results object is empty.")
777
+ if len(self) == 0:
778
+ raise Exception("No data to select from---the Results object is empty.")
829
779
 
830
780
  if not columns or columns == ("*",) or columns == (None,):
831
- # is the users passes nothing, then we'll return all the columns
832
781
  columns = ("*.*",)
833
782
 
834
783
  if isinstance(columns[0], list):
@@ -852,16 +801,6 @@ class Results(UserList, Mixins, Base):
852
801
  # iterate through the passed columns
853
802
  for column in columns:
854
803
  # a user could pass 'result.how_feeling' or just 'how_feeling'
855
- matches = self._matching_columns(column)
856
- if len(matches) > 1:
857
- raise Exception(
858
- f"Column '{column}' is ambiguous. Did you mean one of {matches}?"
859
- )
860
- if len(matches) == 0 and ".*" not in column:
861
- raise Exception(f"Column '{column}' not found in data.")
862
- if len(matches) == 1:
863
- column = matches[0]
864
-
865
804
  parsed_data_type, parsed_key = self._parse_column(column)
866
805
  data_types = get_data_types_to_return(parsed_data_type)
867
806
  found_once = False # we need to track this to make sure we found the key at least once
@@ -904,21 +843,6 @@ class Results(UserList, Mixins, Base):
904
843
 
905
844
  return Dataset(sorted_new_data)
906
845
 
907
- def select(self, *columns: Union[str, list[str]]) -> "Results":
908
- from edsl.results.Selector import Selector
909
-
910
- if len(self) == 0:
911
- raise Exception("No data to select from---the Results object is empty.")
912
-
913
- selector = Selector(
914
- known_data_types=self.known_data_types,
915
- data_type_to_keys=self._data_type_to_keys,
916
- key_to_data_type=self._key_to_data_type,
917
- fetch_list_func=self._fetch_list,
918
- columns=self.columns,
919
- )
920
- return selector.select(*columns)
921
-
922
846
  def sort_by(self, *columns: str, reverse: bool = False) -> Results:
923
847
  import warnings
924
848
 
@@ -927,11 +851,6 @@ class Results(UserList, Mixins, Base):
927
851
  )
928
852
  return self.order_by(*columns, reverse=reverse)
929
853
 
930
- def _parse_column(self, column: str) -> tuple[str, str]:
931
- if "." in column:
932
- return column.split(".")
933
- return self._key_to_data_type[column], column
934
-
935
854
  def order_by(self, *columns: str, reverse: bool = False) -> Results:
936
855
  """Sort the results by one or more columns.
937
856
 
@@ -1029,9 +948,7 @@ class Results(UserList, Mixins, Base):
1029
948
  def has_single_equals(string):
1030
949
  if "!=" in string:
1031
950
  return False
1032
- if "=" in string and not (
1033
- "==" in string or "<=" in string or ">=" in string
1034
- ):
951
+ if "=" in string and not "==" in string:
1035
952
  return True
1036
953
 
1037
954
  if has_single_equals(expression):
@@ -1072,7 +989,7 @@ class Results(UserList, Mixins, Base):
1072
989
  return Results(survey=self.survey, data=new_data, created_columns=None)
1073
990
 
1074
991
  @classmethod
1075
- def example(cls, randomize: bool = False) -> Results:
992
+ def example(cls, debug: bool = False, randomize: bool = False) -> Results:
1076
993
  """Return an example `Results` object.
1077
994
 
1078
995
  Example usage:
@@ -1086,13 +1003,7 @@ class Results(UserList, Mixins, Base):
1086
1003
 
1087
1004
  c = Cache()
1088
1005
  job = Jobs.example(randomize=randomize)
1089
- results = job.run(
1090
- cache=c,
1091
- stop_on_exception=True,
1092
- skip_retry=True,
1093
- raise_validation_errors=True,
1094
- disable_remote_inference=True,
1095
- )
1006
+ results = job.run(cache=c, debug=debug)
1096
1007
  return results
1097
1008
 
1098
1009
  def rich_print(self):
@@ -136,9 +136,9 @@ class ResultsDBMixin:
136
136
 
137
137
  >>> from edsl.results import Results
138
138
  >>> r = Results.example()
139
- >>> d = r.sql("select data_type, key, value from self where data_type = 'answer' order by value limit 3", shape="long")
140
- >>> sorted(list(d['value']))
141
- ['Good', 'Great', 'Great']
139
+ >>> d = r.sql("select data_type, key, value from self where data_type = 'answer' limit 3", shape="long")
140
+ >>> list(d['value'])
141
+ ['OK', 'This is a real survey response from a human.', 'Great']
142
142
 
143
143
  We can also return the data in wide format.
144
144
  Note the use of single quotes to escape the column names, as required by sql.
@@ -120,22 +120,14 @@ class FileStore(Scenario):
120
120
  return info
121
121
 
122
122
  @classmethod
123
- def pull(cls, uuid, expected_parrot_url: Optional[str] = None):
124
- scenario_version = Scenario.pull(uuid, expected_parrot_url=expected_parrot_url)
123
+ def pull(cls, uuid):
124
+ scenario_version = Scenario.pull(uuid)
125
125
  return cls.from_dict(scenario_version.to_dict())
126
126
 
127
127
 
128
128
  class CSVFileStore(FileStore):
129
- def __init__(
130
- self,
131
- filename,
132
- binary: Optional[bool] = None,
133
- suffix: Optional[str] = None,
134
- base64_string: Optional[str] = None,
135
- ):
136
- super().__init__(
137
- filename, binary=binary, base64_string=base64_string, suffix=".csv"
138
- )
129
+ def __init__(self, filename):
130
+ super().__init__(filename, suffix=".csv")
139
131
 
140
132
  @classmethod
141
133
  def example(cls):
@@ -155,16 +147,8 @@ class CSVFileStore(FileStore):
155
147
 
156
148
 
157
149
  class PDFFileStore(FileStore):
158
- def __init__(
159
- self,
160
- filename,
161
- binary: Optional[bool] = None,
162
- suffix: Optional[str] = None,
163
- base64_string: Optional[str] = None,
164
- ):
165
- super().__init__(
166
- filename, binary=binary, base64_string=base64_string, suffix=".pdf"
167
- )
150
+ def __init__(self, filename):
151
+ super().__init__(filename, suffix=".pdf")
168
152
 
169
153
  def view(self):
170
154
  pdf_path = self.to_tempfile()
@@ -241,16 +225,8 @@ class PDFFileStore(FileStore):
241
225
 
242
226
 
243
227
  class PNGFileStore(FileStore):
244
- def __init__(
245
- self,
246
- filename,
247
- binary: Optional[bool] = None,
248
- suffix: Optional[str] = None,
249
- base64_string: Optional[str] = None,
250
- ):
251
- super().__init__(
252
- filename, binary=binary, base64_string=base64_string, suffix=".png"
253
- )
228
+ def __init__(self, filename):
229
+ super().__init__(filename, suffix=".png")
254
230
 
255
231
  @classmethod
256
232
  def example(cls):
@@ -275,16 +251,8 @@ class PNGFileStore(FileStore):
275
251
 
276
252
 
277
253
  class SQLiteFileStore(FileStore):
278
- def __init__(
279
- self,
280
- filename,
281
- binary: Optional[bool] = None,
282
- suffix: Optional[str] = None,
283
- base64_string: Optional[str] = None,
284
- ):
285
- super().__init__(
286
- filename, binary=binary, base64_string=base64_string, suffix=".sqlite"
287
- )
254
+ def __init__(self, filename):
255
+ super().__init__(filename, suffix=".sqlite")
288
256
 
289
257
  @classmethod
290
258
  def example(cls):
@@ -297,8 +265,6 @@ class SQLiteFileStore(FileStore):
297
265
  c.execute("""CREATE TABLE stocks (date text)""")
298
266
  conn.commit()
299
267
 
300
- return cls(f.name)
301
-
302
268
  def view(self):
303
269
  import subprocess
304
270
  import os
@@ -307,33 +273,6 @@ class SQLiteFileStore(FileStore):
307
273
  os.system(f"sqlite3 {sqlite_path}")
308
274
 
309
275
 
310
- class HTMLFileStore(FileStore):
311
- def __init__(
312
- self,
313
- filename,
314
- binary: Optional[bool] = None,
315
- suffix: Optional[str] = None,
316
- base64_string: Optional[str] = None,
317
- ):
318
- super().__init__(
319
- filename, binary=binary, base64_string=base64_string, suffix=".html"
320
- )
321
-
322
- @classmethod
323
- def example(cls):
324
- import tempfile
325
-
326
- with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
327
- f.write("<html><body><h1>Test</h1></body></html>".encode())
328
- return cls(f.name)
329
-
330
- def view(self):
331
- import webbrowser
332
-
333
- html_path = self.to_tempfile()
334
- webbrowser.open("file://" + html_path)
335
-
336
-
337
276
  if __name__ == "__main__":
338
277
  # file_path = "../conjure/examples/Ex11-2.sav"
339
278
  # fs = FileStore(file_path)
@@ -5,10 +5,6 @@ import copy
5
5
  import base64
6
6
  import hashlib
7
7
  import os
8
- import reprlib
9
- import imghdr
10
-
11
-
12
8
  from collections import UserDict
13
9
  from typing import Union, List, Optional, Generator
14
10
  from uuid import uuid4
@@ -17,8 +13,6 @@ from edsl.scenarios.ScenarioImageMixin import ScenarioImageMixin
17
13
  from edsl.scenarios.ScenarioHtmlMixin import ScenarioHtmlMixin
18
14
  from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
19
15
 
20
- from edsl.data_transfer_models import ImageInfo
21
-
22
16
 
23
17
  class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
24
18
  """A Scenario is a dictionary of keys/values.
@@ -55,39 +49,6 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
55
49
  self._has_image = False
56
50
  return self._has_image
57
51
 
58
- @property
59
- def has_jinja_braces(self) -> bool:
60
- """Return whether the scenario has jinja braces. This matters for rendering.
61
-
62
- >>> s = Scenario({"food": "I love {{wood chips}}"})
63
- >>> s.has_jinja_braces
64
- True
65
- """
66
- for key, value in self.items():
67
- if "{{" in str(value) and "}}" in value:
68
- return True
69
- return False
70
-
71
- def convert_jinja_braces(
72
- self, replacement_left="<<", replacement_right=">>"
73
- ) -> Scenario:
74
- """Convert Jinja braces to some other character.
75
-
76
- >>> s = Scenario({"food": "I love {{wood chips}}"})
77
- >>> s.convert_jinja_braces()
78
- Scenario({'food': 'I love <<wood chips>>'})
79
-
80
- """
81
- new_scenario = Scenario()
82
- for key, value in self.items():
83
- if isinstance(value, str):
84
- new_scenario[key] = value.replace("{{", replacement_left).replace(
85
- "}}", replacement_right
86
- )
87
- else:
88
- new_scenario[key] = value
89
- return new_scenario
90
-
91
52
  @has_image.setter
92
53
  def has_image(self, value):
93
54
  self._has_image = value
@@ -181,7 +142,6 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
181
142
  print_json(json.dumps(self.to_dict()))
182
143
 
183
144
  def __repr__(self):
184
- # return "Scenario(" + reprlib.repr(self.data) + ")"
185
145
  return "Scenario(" + repr(self.data) + ")"
186
146
 
187
147
  def _repr_html_(self):
@@ -236,54 +196,33 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
236
196
  return cls({"url": url, field_name: text})
237
197
 
238
198
  @classmethod
239
- def from_image(
240
- cls, image_path: str, image_name: Optional[str] = None
241
- ) -> "Scenario":
242
- """
243
- Creates a scenario with a base64 encoding of an image.
244
-
245
- Args:
246
- image_path (str): Path to the image file.
247
-
248
- Returns:
249
- Scenario: A new Scenario instance with image information.
199
+ def from_image(cls, image_path: str) -> str:
200
+ """Creates a scenario with a base64 encoding of an image.
250
201
 
251
202
  Example:
203
+
252
204
  >>> s = Scenario.from_image(Scenario.example_image())
253
205
  >>> s
254
- Scenario({'logo': ...})
206
+ Scenario({'file_path': '...', 'encoded_image': '...'})
255
207
  """
256
- if not os.path.exists(image_path):
257
- raise FileNotFoundError(f"Image file not found: {image_path}")
258
-
259
208
  with open(image_path, "rb") as image_file:
260
- file_content = image_file.read()
261
-
262
- file_name = os.path.basename(image_path)
263
- file_size = os.path.getsize(image_path)
264
- image_format = imghdr.what(image_path) or "unknown"
265
-
266
- if image_name is None:
267
- image_name = file_name.split(".")[0]
268
-
269
- image_info = ImageInfo(
270
- file_path=image_path,
271
- file_name=file_name,
272
- image_format=image_format,
273
- file_size=file_size,
274
- encoded_image=base64.b64encode(file_content).decode("utf-8"),
275
- )
276
-
277
- scenario_data = {image_name: image_info}
278
- s = cls(scenario_data)
279
- s.has_image = True
280
- return s
209
+ s = cls(
210
+ {
211
+ "file_path": image_path,
212
+ "encoded_image": base64.b64encode(image_file.read()).decode(
213
+ "utf-8"
214
+ ),
215
+ }
216
+ )
217
+ s.has_image = True
218
+ return s
281
219
 
282
220
  @classmethod
283
221
  def from_pdf(cls, pdf_path):
284
- # Ensure the file exists
285
- import fitz
222
+ import fitz # PyMuPDF
223
+ from edsl import Scenario
286
224
 
225
+ # Ensure the file exists
287
226
  if not os.path.exists(pdf_path):
288
227
  raise FileNotFoundError(f"The file {pdf_path} does not exist.")
289
228
 
@@ -297,14 +236,7 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
297
236
  text = ""
298
237
  for page_num in range(len(document)):
299
238
  page = document.load_page(page_num)
300
- blocks = page.get_text("blocks") # Extract text blocks
301
-
302
- # Sort blocks by their vertical position (y0) to maintain reading order
303
- blocks.sort(key=lambda b: (b[1], b[0])) # Sort by y0 first, then x0
304
-
305
- # Combine the text blocks in order
306
- for block in blocks:
307
- text += block[4] + "\n"
239
+ text = text + page.get_text()
308
240
 
309
241
  # Create a dictionary for the combined text
310
242
  page_info = {"filename": filename, "text": text}
@@ -491,21 +423,18 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
491
423
  return table
492
424
 
493
425
  @classmethod
494
- def example(cls, randomize: bool = False, has_image=False) -> Scenario:
426
+ def example(cls, randomize: bool = False) -> Scenario:
495
427
  """
496
428
  Returns an example Scenario instance.
497
429
 
498
430
  :param randomize: If True, adds a random string to the value of the example key.
499
431
  """
500
- if not has_image:
501
- addition = "" if not randomize else str(uuid4())
502
- return cls(
503
- {
504
- "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
505
- }
506
- )
507
- else:
508
- return cls.from_image(cls.example_image())
432
+ addition = "" if not randomize else str(uuid4())
433
+ return cls(
434
+ {
435
+ "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
436
+ }
437
+ )
509
438
 
510
439
  def code(self) -> List[str]:
511
440
  """Return the code for the scenario."""
@@ -13,7 +13,7 @@ class ScenarioImageMixin:
13
13
  >>> from edsl.scenarios.Scenario import Scenario
14
14
  >>> s = Scenario({"food": "wood chips"})
15
15
  >>> s.add_image(Scenario.example_image())
16
- Scenario({'food': 'wood chips', 'logo': ...})
16
+ Scenario({'food': 'wood chips', 'file_path': '...', 'encoded_image': '...'})
17
17
  """
18
18
  new_scenario = self.from_image(image_path)
19
19
  return self + new_scenario
@@ -33,7 +33,7 @@ class ScenarioImageMixin:
33
33
  >>> from edsl.scenarios.Scenario import Scenario
34
34
  >>> s = Scenario.from_image(Scenario.example_image())
35
35
  >>> s
36
- Scenario({'logo': ...})
36
+ Scenario({'file_path': '...', 'encoded_image': '...'})
37
37
  """
38
38
 
39
39
  if image_path.startswith("http://") or image_path.startswith("https://"):