edsl 0.1.38.dev4__py3-none-any.whl → 0.1.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. edsl/Base.py +197 -116
  2. edsl/__init__.py +15 -7
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +351 -147
  5. edsl/agents/AgentList.py +211 -73
  6. edsl/agents/Invigilator.py +101 -50
  7. edsl/agents/InvigilatorBase.py +62 -70
  8. edsl/agents/PromptConstructor.py +143 -225
  9. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  10. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  11. edsl/agents/__init__.py +0 -1
  12. edsl/agents/prompt_helpers.py +3 -3
  13. edsl/agents/question_option_processor.py +172 -0
  14. edsl/auto/AutoStudy.py +18 -5
  15. edsl/auto/StageBase.py +53 -40
  16. edsl/auto/StageQuestions.py +2 -1
  17. edsl/auto/utilities.py +0 -6
  18. edsl/config.py +22 -2
  19. edsl/conversation/car_buying.py +2 -1
  20. edsl/coop/CoopFunctionsMixin.py +15 -0
  21. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  22. edsl/coop/PriceFetcher.py +1 -1
  23. edsl/coop/coop.py +125 -47
  24. edsl/coop/utils.py +14 -14
  25. edsl/data/Cache.py +45 -27
  26. edsl/data/CacheEntry.py +12 -15
  27. edsl/data/CacheHandler.py +31 -12
  28. edsl/data/RemoteCacheSync.py +154 -46
  29. edsl/data/__init__.py +4 -3
  30. edsl/data_transfer_models.py +2 -1
  31. edsl/enums.py +27 -0
  32. edsl/exceptions/__init__.py +50 -50
  33. edsl/exceptions/agents.py +12 -0
  34. edsl/exceptions/inference_services.py +5 -0
  35. edsl/exceptions/questions.py +24 -6
  36. edsl/exceptions/scenarios.py +7 -0
  37. edsl/inference_services/AnthropicService.py +38 -19
  38. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  39. edsl/inference_services/AvailableModelFetcher.py +215 -0
  40. edsl/inference_services/AwsBedrock.py +0 -2
  41. edsl/inference_services/AzureAI.py +0 -2
  42. edsl/inference_services/GoogleService.py +7 -12
  43. edsl/inference_services/InferenceServiceABC.py +18 -85
  44. edsl/inference_services/InferenceServicesCollection.py +120 -79
  45. edsl/inference_services/MistralAIService.py +0 -3
  46. edsl/inference_services/OpenAIService.py +47 -35
  47. edsl/inference_services/PerplexityService.py +0 -3
  48. edsl/inference_services/ServiceAvailability.py +135 -0
  49. edsl/inference_services/TestService.py +11 -10
  50. edsl/inference_services/TogetherAIService.py +5 -3
  51. edsl/inference_services/data_structures.py +134 -0
  52. edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
  53. edsl/jobs/Answers.py +1 -14
  54. edsl/jobs/FetchInvigilator.py +47 -0
  55. edsl/jobs/InterviewTaskManager.py +98 -0
  56. edsl/jobs/InterviewsConstructor.py +50 -0
  57. edsl/jobs/Jobs.py +356 -431
  58. edsl/jobs/JobsChecks.py +35 -10
  59. edsl/jobs/JobsComponentConstructor.py +189 -0
  60. edsl/jobs/JobsPrompts.py +6 -4
  61. edsl/jobs/JobsRemoteInferenceHandler.py +205 -133
  62. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  63. edsl/jobs/RequestTokenEstimator.py +30 -0
  64. edsl/jobs/async_interview_runner.py +138 -0
  65. edsl/jobs/buckets/BucketCollection.py +44 -3
  66. edsl/jobs/buckets/TokenBucket.py +53 -21
  67. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  68. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  69. edsl/jobs/check_survey_scenario_compatibility.py +85 -0
  70. edsl/jobs/data_structures.py +120 -0
  71. edsl/jobs/decorators.py +35 -0
  72. edsl/jobs/interviews/Interview.py +143 -408
  73. edsl/jobs/jobs_status_enums.py +9 -0
  74. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  75. edsl/jobs/results_exceptions_handler.py +98 -0
  76. edsl/jobs/runners/JobsRunnerAsyncio.py +88 -403
  77. edsl/jobs/runners/JobsRunnerStatus.py +133 -165
  78. edsl/jobs/tasks/QuestionTaskCreator.py +21 -19
  79. edsl/jobs/tasks/TaskHistory.py +38 -18
  80. edsl/jobs/tasks/task_status_enum.py +0 -2
  81. edsl/language_models/ComputeCost.py +63 -0
  82. edsl/language_models/LanguageModel.py +194 -236
  83. edsl/language_models/ModelList.py +28 -19
  84. edsl/language_models/PriceManager.py +127 -0
  85. edsl/language_models/RawResponseHandler.py +106 -0
  86. edsl/language_models/ServiceDataSources.py +0 -0
  87. edsl/language_models/__init__.py +1 -2
  88. edsl/language_models/key_management/KeyLookup.py +63 -0
  89. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  90. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  91. edsl/language_models/key_management/__init__.py +0 -0
  92. edsl/language_models/key_management/models.py +131 -0
  93. edsl/language_models/model.py +256 -0
  94. edsl/language_models/repair.py +2 -2
  95. edsl/language_models/utilities.py +5 -4
  96. edsl/notebooks/Notebook.py +19 -14
  97. edsl/notebooks/NotebookToLaTeX.py +142 -0
  98. edsl/prompts/Prompt.py +29 -39
  99. edsl/questions/ExceptionExplainer.py +77 -0
  100. edsl/questions/HTMLQuestion.py +103 -0
  101. edsl/questions/QuestionBase.py +68 -214
  102. edsl/questions/QuestionBasePromptsMixin.py +7 -3
  103. edsl/questions/QuestionBudget.py +1 -1
  104. edsl/questions/QuestionCheckBox.py +3 -3
  105. edsl/questions/QuestionExtract.py +5 -7
  106. edsl/questions/QuestionFreeText.py +2 -3
  107. edsl/questions/QuestionList.py +10 -18
  108. edsl/questions/QuestionMatrix.py +265 -0
  109. edsl/questions/QuestionMultipleChoice.py +67 -23
  110. edsl/questions/QuestionNumerical.py +2 -4
  111. edsl/questions/QuestionRank.py +7 -17
  112. edsl/questions/SimpleAskMixin.py +4 -3
  113. edsl/questions/__init__.py +2 -1
  114. edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +47 -2
  115. edsl/questions/data_structures.py +20 -0
  116. edsl/questions/derived/QuestionLinearScale.py +6 -3
  117. edsl/questions/derived/QuestionTopK.py +1 -1
  118. edsl/questions/descriptors.py +17 -3
  119. edsl/questions/loop_processor.py +149 -0
  120. edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +57 -50
  121. edsl/questions/question_registry.py +1 -1
  122. edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +40 -26
  123. edsl/questions/response_validator_factory.py +34 -0
  124. edsl/questions/templates/matrix/__init__.py +1 -0
  125. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  126. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  127. edsl/results/CSSParameterizer.py +1 -1
  128. edsl/results/Dataset.py +170 -7
  129. edsl/results/DatasetExportMixin.py +168 -305
  130. edsl/results/DatasetTree.py +28 -8
  131. edsl/results/MarkdownToDocx.py +122 -0
  132. edsl/results/MarkdownToPDF.py +111 -0
  133. edsl/results/Result.py +298 -206
  134. edsl/results/Results.py +149 -131
  135. edsl/results/ResultsExportMixin.py +2 -0
  136. edsl/results/TableDisplay.py +98 -171
  137. edsl/results/TextEditor.py +50 -0
  138. edsl/results/__init__.py +1 -1
  139. edsl/results/file_exports.py +252 -0
  140. edsl/results/{Selector.py → results_selector.py} +23 -13
  141. edsl/results/smart_objects.py +96 -0
  142. edsl/results/table_data_class.py +12 -0
  143. edsl/results/table_renderers.py +118 -0
  144. edsl/scenarios/ConstructDownloadLink.py +109 -0
  145. edsl/scenarios/DocumentChunker.py +102 -0
  146. edsl/scenarios/DocxScenario.py +16 -0
  147. edsl/scenarios/FileStore.py +150 -239
  148. edsl/scenarios/PdfExtractor.py +40 -0
  149. edsl/scenarios/Scenario.py +90 -193
  150. edsl/scenarios/ScenarioHtmlMixin.py +4 -3
  151. edsl/scenarios/ScenarioList.py +415 -244
  152. edsl/scenarios/ScenarioListExportMixin.py +0 -7
  153. edsl/scenarios/ScenarioListPdfMixin.py +15 -37
  154. edsl/scenarios/__init__.py +1 -2
  155. edsl/scenarios/directory_scanner.py +96 -0
  156. edsl/scenarios/file_methods.py +85 -0
  157. edsl/scenarios/handlers/__init__.py +13 -0
  158. edsl/scenarios/handlers/csv.py +49 -0
  159. edsl/scenarios/handlers/docx.py +76 -0
  160. edsl/scenarios/handlers/html.py +37 -0
  161. edsl/scenarios/handlers/json.py +111 -0
  162. edsl/scenarios/handlers/latex.py +5 -0
  163. edsl/scenarios/handlers/md.py +51 -0
  164. edsl/scenarios/handlers/pdf.py +68 -0
  165. edsl/scenarios/handlers/png.py +39 -0
  166. edsl/scenarios/handlers/pptx.py +105 -0
  167. edsl/scenarios/handlers/py.py +294 -0
  168. edsl/scenarios/handlers/sql.py +313 -0
  169. edsl/scenarios/handlers/sqlite.py +149 -0
  170. edsl/scenarios/handlers/txt.py +33 -0
  171. edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +10 -6
  172. edsl/scenarios/scenario_selector.py +156 -0
  173. edsl/study/ObjectEntry.py +1 -1
  174. edsl/study/SnapShot.py +1 -1
  175. edsl/study/Study.py +5 -12
  176. edsl/surveys/ConstructDAG.py +92 -0
  177. edsl/surveys/EditSurvey.py +221 -0
  178. edsl/surveys/InstructionHandler.py +100 -0
  179. edsl/surveys/MemoryManagement.py +72 -0
  180. edsl/surveys/Rule.py +5 -4
  181. edsl/surveys/RuleCollection.py +25 -27
  182. edsl/surveys/RuleManager.py +172 -0
  183. edsl/surveys/Simulator.py +75 -0
  184. edsl/surveys/Survey.py +270 -791
  185. edsl/surveys/SurveyCSS.py +20 -8
  186. edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +11 -9
  187. edsl/surveys/SurveyToApp.py +141 -0
  188. edsl/surveys/__init__.py +4 -2
  189. edsl/surveys/descriptors.py +6 -2
  190. edsl/surveys/instructions/ChangeInstruction.py +1 -2
  191. edsl/surveys/instructions/Instruction.py +4 -13
  192. edsl/surveys/instructions/InstructionCollection.py +11 -6
  193. edsl/templates/error_reporting/interview_details.html +1 -1
  194. edsl/templates/error_reporting/report.html +1 -1
  195. edsl/tools/plotting.py +1 -1
  196. edsl/utilities/PrettyList.py +56 -0
  197. edsl/utilities/is_notebook.py +18 -0
  198. edsl/utilities/is_valid_variable_name.py +11 -0
  199. edsl/utilities/remove_edsl_version.py +24 -0
  200. edsl/utilities/utilities.py +35 -23
  201. {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/METADATA +12 -10
  202. edsl-0.1.39.dist-info/RECORD +358 -0
  203. {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/WHEEL +1 -1
  204. edsl/language_models/KeyLookup.py +0 -30
  205. edsl/language_models/registry.py +0 -190
  206. edsl/language_models/unused/ReplicateBase.py +0 -83
  207. edsl/results/ResultsDBMixin.py +0 -238
  208. edsl-0.1.38.dev4.dist-info/RECORD +0 -277
  209. /edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +0 -0
  210. /edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +0 -0
  211. /edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +0 -0
  212. {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/LICENSE +0 -0
@@ -1,12 +1,11 @@
1
1
  """Mixin class for exporting results."""
2
2
 
3
- import base64
4
- import csv
5
3
  import io
6
- import html
7
- from typing import Optional
4
+ import warnings
5
+ import textwrap
6
+ from typing import Optional, Tuple, Union, List
8
7
 
9
- from typing import Literal, Optional, Union, List
8
+ from edsl.results.file_exports import CSVExport, ExcelExport, JSONLExport, SQLiteExport
10
9
 
11
10
 
12
11
  class DatasetExportMixin:
@@ -37,7 +36,7 @@ class DatasetExportMixin:
37
36
 
38
37
  >>> from edsl.results import Results
39
38
  >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
40
- ['model.frequency_penalty', 'model.logprobs', 'model.max_tokens', 'model.model', 'model.presence_penalty', 'model.temperature', 'model.top_logprobs', 'model.top_p']
39
+ ['model.frequency_penalty', ...]
41
40
 
42
41
  >>> Results.example().relevant_columns(data_type = "flimflam")
43
42
  Traceback (most recent call last):
@@ -72,7 +71,7 @@ class DatasetExportMixin:
72
71
  def num_observations(self):
73
72
  """Return the number of observations in the dataset.
74
73
 
75
- >>> from edsl.results import Results
74
+ >>> from edsl.results.Results import Results
76
75
  >>> Results.example().num_observations()
77
76
  4
78
77
  """
@@ -144,299 +143,148 @@ class DatasetExportMixin:
144
143
  for value in list_of_values:
145
144
  print(f"{key}: {value}")
146
145
 
147
- # def print(
148
- # self,
149
- # pretty_labels: Optional[dict] = None,
150
- # filename: Optional[str] = None,
151
- # format: Optional[Literal["rich", "html", "markdown", "latex"]] = None,
152
- # interactive: bool = False,
153
- # split_at_dot: bool = True,
154
- # max_rows=None,
155
- # tee=False,
156
- # iframe=False,
157
- # iframe_height: int = 200,
158
- # iframe_width: int = 600,
159
- # web=False,
160
- # return_string: bool = False,
161
- # ) -> Union[None, str, "Results"]:
162
- # """Print the results in a pretty format.
163
-
164
- # :param pretty_labels: A dictionary of pretty labels for the columns.
165
- # :param filename: The filename to save the results to.
166
- # :param format: The format to print the results in. Options are 'rich', 'html', 'markdown', or 'latex'.
167
- # :param interactive: Whether to print the results interactively in a Jupyter notebook.
168
- # :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
169
- # :param max_rows: The maximum number of rows to print.
170
- # :param tee: Whether to return the dataset.
171
- # :param iframe: Whether to display the table in an iframe.
172
- # :param iframe_height: The height of the iframe.
173
- # :param iframe_width: The width of the iframe.
174
- # :param web: Whether to display the table in a web browser.
175
- # :param return_string: Whether to return the output as a string instead of printing.
176
-
177
- # :return: None if tee is False and return_string is False, the dataset if tee is True, or a string if return_string is True.
178
-
179
- # Example: Print in rich format at the terminal
180
-
181
- # >>> from edsl.results import Results
182
- # >>> r = Results.example()
183
- # >>> r.select('how_feeling').print(format = "rich")
184
- # ┏━━━━━━━━━━━━━━┓
185
- # ┃ answer ┃
186
- # ┃ .how_feeling ┃
187
- # ┡━━━━━━━━━━━━━━┩
188
- # │ OK │
189
- # ├──────────────┤
190
- # │ Great │
191
- # ├──────────────┤
192
- # │ Terrible │
193
- # ├──────────────┤
194
- # │ OK │
195
- # └──────────────┘
196
-
197
- # >>> r = Results.example()
198
- # >>> r2 = r.select("how_feeling").print(format = "rich", tee = True, max_rows = 2)
199
- # ┏━━━━━━━━━━━━━━┓
200
- # ┃ answer ┃
201
- # ┃ .how_feeling ┃
202
- # ┡━━━━━━━━━━━━━━┩
203
- # │ OK │
204
- # ├──────────────┤
205
- # │ Great │
206
- # └──────────────┘
207
- # >>> r2
208
- # Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
209
-
210
- # >>> r.select('how_feeling').print(format = "rich", max_rows = 2)
211
- # ┏━━━━━━━━━━━━━━┓
212
- # ┃ answer ┃
213
- # ┃ .how_feeling ┃
214
- # ┡━━━━━━━━━━━━━━┩
215
- # │ OK │
216
- # ├──────────────┤
217
- # │ Great │
218
- # └──────────────┘
219
-
220
- # >>> r.select('how_feeling').print(format = "rich", split_at_dot = False)
221
- # ┏━━━━━━━━━━━━━━━━━━━━┓
222
- # ┃ answer.how_feeling ┃
223
- # ┡━━━━━━━━━━━━━━━━━━━━┩
224
- # │ OK │
225
- # ├────────────────────┤
226
- # │ Great │
227
- # ├────────────────────┤
228
- # │ Terrible │
229
- # ├────────────────────┤
230
- # │ OK │
231
- # └────────────────────┘
232
-
233
- # Example: using the pretty_labels parameter
234
-
235
- # >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
236
- # ┏━━━━━━━━━━━━━━━━━━━━━┓
237
- # ┃ How are you feeling ┃
238
- # ┡━━━━━━━━━━━━━━━━━━━━━┩
239
- # │ OK │
240
- # ├─────────────────────┤
241
- # │ Great │
242
- # ├─────────────────────┤
243
- # │ Terrible │
244
- # ├─────────────────────┤
245
- # │ OK │
246
- # └─────────────────────┘
247
-
248
- # Example: printing in markdown format
249
-
250
- # >>> r.select('how_feeling').print(format='markdown')
251
- # | answer.how_feeling |
252
- # |--|
253
- # | OK |
254
- # | Great |
255
- # | Terrible |
256
- # | OK |
257
- # ...
258
-
259
- # >>> r.select('how_feeling').print(format='latex')
260
- # \\begin{tabular}{l}
261
- # ...
262
- # \\end{tabular}
263
- # <BLANKLINE>
264
- # """
265
- # from IPython.display import HTML, display
266
- # from edsl.utilities.utilities import is_notebook
267
- # import io
268
- # import sys
269
-
270
- # def _determine_format(format):
271
- # if format is None:
272
- # if is_notebook():
273
- # format = "html"
274
- # else:
275
- # format = "rich"
276
- # if format not in ["rich", "html", "markdown", "latex"]:
277
- # raise ValueError(
278
- # "format must be one of 'rich', 'html', 'markdown', or 'latex'."
279
- # )
280
-
281
- # return format
282
-
283
- # format = _determine_format(format)
284
-
285
- # if pretty_labels is None:
286
- # pretty_labels = {}
287
-
288
- # if pretty_labels != {}: # only split at dot if there are no pretty labels
289
- # split_at_dot = False
290
-
291
- # def _create_data():
292
- # for index, entry in enumerate(self):
293
- # key, list_of_values = list(entry.items())[0]
294
- # yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
295
-
296
- # new_data = list(_create_data())
297
-
298
- # # Capture output if return_string is True
299
- # if return_string:
300
- # old_stdout = sys.stdout
301
- # sys.stdout = io.StringIO()
302
-
303
- # output = None
304
-
305
- # if format == "rich":
306
- # from edsl.utilities.interface import print_dataset_with_rich
307
-
308
- # output = print_dataset_with_rich(
309
- # new_data, filename=filename, split_at_dot=split_at_dot
310
- # )
311
- # elif format == "markdown":
312
- # from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
313
-
314
- # output = print_list_of_dicts_as_markdown_table(new_data, filename=filename)
315
- # elif format == "latex":
316
- # df = self.to_pandas()
317
- # df.columns = [col.replace("_", " ") for col in df.columns]
318
- # latex_string = df.to_latex(index=False)
319
-
320
- # if filename is not None:
321
- # with open(filename, "w") as f:
322
- # f.write(latex_string)
323
- # else:
324
- # print(latex_string)
325
- # output = latex_string
326
- # elif format == "html":
327
- # from edsl.utilities.interface import print_list_of_dicts_as_html_table
328
-
329
- # html_source = print_list_of_dicts_as_html_table(
330
- # new_data, interactive=interactive
331
- # )
332
-
333
- # if iframe:
334
- # iframe = f""""
335
- # <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
336
- # """
337
- # display(HTML(iframe))
338
- # elif is_notebook():
339
- # display(HTML(html_source))
340
- # else:
341
- # from edsl.utilities.interface import view_html
342
-
343
- # view_html(html_source)
344
-
345
- # output = html_source
346
-
347
- # # Restore stdout and get captured output if return_string is True
348
- # if return_string:
349
- # captured_output = sys.stdout.getvalue()
350
- # sys.stdout = old_stdout
351
- # return captured_output or output
352
-
353
- # if tee:
354
- # return self
355
-
356
- # return None
146
+ def _get_tabular_data(
147
+ self,
148
+ remove_prefix: bool = False,
149
+ pretty_labels: Optional[dict] = None,
150
+ ) -> Tuple[List[str], List[List]]:
151
+ """Internal method to get tabular data in a standard format.
152
+
153
+ Args:
154
+ remove_prefix: Whether to remove the prefix from column names
155
+ pretty_labels: Dictionary mapping original column names to pretty labels
156
+
157
+ Returns:
158
+ Tuple containing (header_row, data_rows)
159
+ """
160
+ if pretty_labels is None:
161
+ pretty_labels = {}
162
+
163
+ return self._make_tabular(
164
+ remove_prefix=remove_prefix, pretty_labels=pretty_labels
165
+ )
166
+
167
+ def to_jsonl(self, filename: Optional[str] = None) -> Optional["FileStore"]:
168
+ """Export the results to a FileStore instance containing JSONL data."""
169
+ exporter = JSONLExport(data=self, filename=filename)
170
+ return exporter.export()
171
+
172
+ def to_sqlite(
173
+ self,
174
+ filename: Optional[str] = None,
175
+ remove_prefix: bool = False,
176
+ pretty_labels: Optional[dict] = None,
177
+ table_name: str = "results",
178
+ if_exists: str = "replace",
179
+ ) -> Optional["FileStore"]:
180
+ """Export the results to a SQLite database file."""
181
+ exporter = SQLiteExport(
182
+ data=self,
183
+ filename=filename,
184
+ remove_prefix=remove_prefix,
185
+ pretty_labels=pretty_labels,
186
+ table_name=table_name,
187
+ if_exists=if_exists,
188
+ )
189
+ return exporter.export()
357
190
 
358
191
  def to_csv(
359
192
  self,
360
193
  filename: Optional[str] = None,
361
194
  remove_prefix: bool = False,
362
- download_link: bool = False,
363
195
  pretty_labels: Optional[dict] = None,
364
- ):
365
- """Export the results to a CSV file.
196
+ ) -> Optional["FileStore"]:
197
+ """Export the results to a FileStore instance containing CSV data."""
198
+ exporter = CSVExport(
199
+ data=self,
200
+ filename=filename,
201
+ remove_prefix=remove_prefix,
202
+ pretty_labels=pretty_labels,
203
+ )
204
+ return exporter.export()
366
205
 
367
- :param filename: The filename to save the CSV file to.
368
- :param remove_prefix: Whether to remove the prefix from the column names.
369
- :param download_link: Whether to display a download link in a Jupyter notebook.
206
+ def to_excel(
207
+ self,
208
+ filename: Optional[str] = None,
209
+ remove_prefix: bool = False,
210
+ pretty_labels: Optional[dict] = None,
211
+ sheet_name: Optional[str] = None,
212
+ ) -> Optional["FileStore"]:
213
+ """Export the results to a FileStore instance containing Excel data."""
214
+ exporter = ExcelExport(
215
+ data=self,
216
+ filename=filename,
217
+ remove_prefix=remove_prefix,
218
+ pretty_labels=pretty_labels,
219
+ sheet_name=sheet_name,
220
+ )
221
+ return exporter.export()
370
222
 
371
- Example:
223
+ def _db(self, remove_prefix: bool = True):
224
+ """Create a SQLite database in memory and return the connection.
372
225
 
373
- >>> from edsl.results import Results
374
- >>> r = Results.example()
375
- >>> r.select('how_feeling').to_csv()
376
- 'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
377
-
378
- >>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
379
- 'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
380
-
381
- >>> import tempfile
382
- >>> filename = tempfile.NamedTemporaryFile(delete=False).name
383
- >>> r.select('how_feeling').to_csv(filename = filename)
384
- >>> import os
385
- >>> import csv
386
- >>> with open(filename, newline='') as f:
387
- ... reader = csv.reader(f)
388
- ... for row in reader:
389
- ... print(row)
390
- ['answer.how_feeling']
391
- ['OK']
392
- ['Great']
393
- ['Terrible']
394
- ['OK']
226
+ Args:
227
+ shape: The shape of the data in the database (wide or long)
228
+ remove_prefix: Whether to remove the prefix from the column names
395
229
 
230
+ Returns:
231
+ A database connection
396
232
  """
397
- if pretty_labels is None:
398
- pretty_labels = {}
399
- header, rows = self._make_tabular(
400
- remove_prefix=remove_prefix, pretty_labels=pretty_labels
401
- )
233
+ from sqlalchemy import create_engine
402
234
 
403
- if filename is not None:
404
- with open(filename, "w") as f:
405
- writer = csv.writer(f)
406
- writer.writerow(header)
407
- writer.writerows(rows)
408
- # print(f"Saved to {filename}")
235
+ engine = create_engine("sqlite:///:memory:")
236
+ if remove_prefix:
237
+ df = self.remove_prefix().to_pandas(lists_as_strings=True)
409
238
  else:
410
- output = io.StringIO()
411
- writer = csv.writer(output)
412
- writer.writerow(header)
413
- writer.writerows(rows)
414
-
415
- if download_link:
416
- from IPython.display import HTML, display
417
-
418
- csv_file = output.getvalue()
419
- b64 = base64.b64encode(csv_file.encode()).decode()
420
- download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
421
- display(HTML(download_link))
422
- else:
423
- return output.getvalue()
424
-
425
- def download_link(self, pretty_labels: Optional[dict] = None) -> str:
426
- """Return a download link for the results.
239
+ df = self.to_pandas(lists_as_strings=True)
240
+ df.to_sql(
241
+ "self",
242
+ engine,
243
+ index=False,
244
+ if_exists="replace",
245
+ )
246
+ return engine.connect()
427
247
 
428
- :param pretty_labels: A dictionary of pretty labels for the columns.
248
+ def sql(
249
+ self,
250
+ query: str,
251
+ transpose: bool = None,
252
+ transpose_by: str = None,
253
+ remove_prefix: bool = True,
254
+ ) -> Union["pd.DataFrame", str]:
255
+ """Execute a SQL query and return the results as a DataFrame.
256
+
257
+ Args:
258
+ query: The SQL query to execute
259
+ shape: The shape of the data in the database (wide or long)
260
+ remove_prefix: Whether to remove the prefix from the column names
261
+ transpose: Whether to transpose the DataFrame
262
+ transpose_by: The column to use as the index when transposing
263
+ csv: Whether to return the DataFrame as a CSV string
264
+ to_list: Whether to return the results as a list
265
+ to_latex: Whether to return the results as LaTeX
266
+ filename: Optional filename to save the results to
267
+
268
+ Returns:
269
+ DataFrame, CSV string, list, or LaTeX string depending on parameters
429
270
 
430
- >>> from edsl.results import Results
431
- >>> r = Results.example()
432
- >>> r.select('how_feeling').download_link()
433
- '<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
434
271
  """
435
- import base64
272
+ import pandas as pd
273
+
274
+ conn = self._db(remove_prefix=remove_prefix)
275
+ df = pd.read_sql_query(query, conn)
276
+
277
+ # Transpose the DataFrame if transpose is True
278
+ if transpose or transpose_by:
279
+ df = pd.DataFrame(df)
280
+ if transpose_by:
281
+ df = df.set_index(transpose_by)
282
+ else:
283
+ df = df.set_index(df.columns[0])
284
+ df = df.transpose()
285
+ from edsl.results.Dataset import Dataset
436
286
 
437
- csv_string = self.to_csv(pretty_labels=pretty_labels)
438
- b64 = base64.b64encode(csv_string.encode()).decode()
439
- return f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
287
+ return Dataset.from_pandas_dataframe(df)
440
288
 
441
289
  def to_pandas(
442
290
  self, remove_prefix: bool = False, lists_as_strings=False
@@ -447,19 +295,6 @@ class DatasetExportMixin:
447
295
 
448
296
  """
449
297
  return self._to_pandas_strings(remove_prefix)
450
- # if lists_as_strings:
451
- # return self._to_pandas_strings(remove_prefix=remove_prefix)
452
-
453
- # import pandas as pd
454
-
455
- # df = pd.DataFrame(self.data)
456
-
457
- # if remove_prefix:
458
- # # Optionally remove prefixes from column names
459
- # df.columns = [col.split(".")[-1] for col in df.columns]
460
-
461
- # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
462
- # return df_sorted
463
298
 
464
299
  def _to_pandas_strings(self, remove_prefix: bool = False) -> "pd.DataFrame":
465
300
  """Convert the results to a pandas DataFrame.
@@ -478,12 +313,32 @@ class DatasetExportMixin:
478
313
 
479
314
  import pandas as pd
480
315
 
481
- csv_string = self.to_csv(remove_prefix=remove_prefix)
316
+ csv_string = self.to_csv(remove_prefix=remove_prefix).text
482
317
  csv_buffer = io.StringIO(csv_string)
483
318
  df = pd.read_csv(csv_buffer)
484
319
  # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
485
320
  return df
486
321
 
322
+ def to_polars(
323
+ self, remove_prefix: bool = False, lists_as_strings=False
324
+ ) -> "pl.DataFrame":
325
+ """Convert the results to a Polars DataFrame.
326
+
327
+ :param remove_prefix: Whether to remove the prefix from the column names.
328
+ """
329
+ return self._to_polars_strings(remove_prefix)
330
+
331
+ def _to_polars_strings(self, remove_prefix: bool = False) -> "pl.DataFrame":
332
+ """Convert the results to a Polars DataFrame.
333
+
334
+ :param remove_prefix: Whether to remove the prefix from the column names.
335
+ """
336
+ import polars as pl
337
+
338
+ csv_string = self.to_csv(remove_prefix=remove_prefix).text
339
+ df = pl.read_csv(io.StringIO(csv_string))
340
+ return df
341
+
487
342
  def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
488
343
  """Convert the results to a list of dictionaries, one per scenario.
489
344
 
@@ -494,14 +349,14 @@ class DatasetExportMixin:
494
349
  >>> r.select('how_feeling').to_scenario_list()
495
350
  ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
496
351
  """
497
- from edsl import ScenarioList, Scenario
352
+ from edsl.scenarios.ScenarioList import ScenarioList
353
+ from edsl.scenarios.Scenario import Scenario
498
354
 
499
355
  list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
500
356
  scenarios = []
501
357
  for d in list_of_dicts:
502
358
  scenarios.append(Scenario(d))
503
359
  return ScenarioList(scenarios)
504
- # return ScenarioList([Scenario(d) for d in list_of_dicts])
505
360
 
506
361
  def to_agent_list(self, remove_prefix: bool = True):
507
362
  """Convert the results to a list of dictionaries, one per agent.
@@ -513,7 +368,8 @@ class DatasetExportMixin:
513
368
  >>> r.select('how_feeling').to_agent_list()
514
369
  AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
515
370
  """
516
- from edsl import AgentList, Agent
371
+ from edsl.agents import Agent
372
+ from edsl.agents.AgentList import AgentList
517
373
 
518
374
  list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
519
375
  agents = []
@@ -521,6 +377,11 @@ class DatasetExportMixin:
521
377
  if "name" in d:
522
378
  d["agent_name"] = d.pop("name")
523
379
  agents.append(Agent(d, name=d["agent_name"]))
380
+ if "agent_parameters" in d:
381
+ agent_parameters = d.pop("agent_parameters")
382
+ agent_name = agent_parameters.get("name", None)
383
+ instruction = agent_parameters.get("instruction", None)
384
+ agents.append(Agent(d, name=agent_name, instruction=instruction))
524
385
  else:
525
386
  agents.append(Agent(d))
526
387
  return AgentList(agents)
@@ -608,7 +469,9 @@ class DatasetExportMixin:
608
469
  new_list.append(item)
609
470
  list_to_return = new_list
610
471
 
611
- return list_to_return
472
+ from edsl.utilities.PrettyList import PrettyList
473
+
474
+ return PrettyList(list_to_return)
612
475
 
613
476
  def html(
614
477
  self,
@@ -658,8 +521,10 @@ class DatasetExportMixin:
658
521
  >>> r = Results.example()
659
522
  >>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
660
523
  {'OK': 2, 'Great': 1, 'Terrible': 1}
661
- >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset")
662
- Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
524
+ >>> from edsl.results.Dataset import Dataset
525
+ >>> expected = Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
526
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset") == expected
527
+ True
663
528
  """
664
529
  from collections import Counter
665
530
 
@@ -691,8 +556,6 @@ class DatasetExportMixin:
691
556
  if top_n is not None:
692
557
  sorted_tally = dict(list(sorted_tally.items())[:top_n])
693
558
 
694
- import warnings
695
- import textwrap
696
559
  from edsl.results.Dataset import Dataset
697
560
 
698
561
  if output == "dict":
@@ -1,8 +1,4 @@
1
1
  from typing import Dict, List, Any, Optional, List
2
- from docx import Document
3
- from docx.shared import Inches, Pt
4
- from docx.enum.text import WD_ALIGN_PARAGRAPH
5
- from docx.enum.style import WD_STYLE_TYPE
6
2
 
7
3
 
8
4
  def is_hashable(v):
@@ -98,6 +94,11 @@ class Tree:
98
94
  if filename is None:
99
95
  filename = "tree_structure.docx"
100
96
 
97
+ from docx import Document
98
+ from docx.shared import Inches, Pt
99
+ from docx.enum.text import WD_ALIGN_PARAGRAPH
100
+ from docx.enum.style import WD_STYLE_TYPE
101
+
101
102
  doc = Document()
102
103
 
103
104
  # Create styles for headings
@@ -115,10 +116,29 @@ class Tree:
115
116
  body_style.font.size = Pt(11)
116
117
 
117
118
  self._add_to_docx(doc, self.root, 0)
118
- doc.save(filename)
119
- from edsl.utilities.utilities import file_notice
120
-
121
- file_notice(filename)
119
+ import base64
120
+ from io import BytesIO
121
+ import base64
122
+
123
+ # Save document to bytes buffer
124
+ doc_buffer = BytesIO()
125
+ doc.save(doc_buffer)
126
+ doc_buffer.seek(0)
127
+
128
+ base64_string = base64.b64encode(doc_buffer.getvalue()).decode("utf-8")
129
+ from edsl.scenarios.FileStore import FileStore
130
+
131
+ # Create and return FileStore instance
132
+ return FileStore(
133
+ path="tree_structure.docx", # Default name
134
+ mime_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
135
+ binary=True,
136
+ suffix="docx",
137
+ base64_string=base64_string,
138
+ )
139
+ # doc.save(filename)
140
+ # from edsl.utilities.utilities import file_notice
141
+ # file_notice(filename)
122
142
 
123
143
  def _repr_html_(self):
124
144
  """Returns an interactive HTML representation of the tree with collapsible sections."""