edsl 0.1.39.dev1__py3-none-any.whl → 0.1.39.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. edsl/Base.py +169 -116
  2. edsl/__init__.py +14 -6
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +358 -146
  5. edsl/agents/AgentList.py +211 -73
  6. edsl/agents/Invigilator.py +88 -36
  7. edsl/agents/InvigilatorBase.py +59 -70
  8. edsl/agents/PromptConstructor.py +117 -219
  9. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  10. edsl/agents/QuestionOptionProcessor.py +172 -0
  11. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  12. edsl/agents/__init__.py +0 -1
  13. edsl/agents/prompt_helpers.py +3 -3
  14. edsl/config.py +22 -2
  15. edsl/conversation/car_buying.py +2 -1
  16. edsl/coop/CoopFunctionsMixin.py +15 -0
  17. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  18. edsl/coop/PriceFetcher.py +1 -1
  19. edsl/coop/coop.py +104 -42
  20. edsl/coop/utils.py +14 -14
  21. edsl/data/Cache.py +21 -14
  22. edsl/data/CacheEntry.py +12 -15
  23. edsl/data/CacheHandler.py +33 -12
  24. edsl/data/__init__.py +4 -3
  25. edsl/data_transfer_models.py +2 -1
  26. edsl/enums.py +20 -0
  27. edsl/exceptions/__init__.py +50 -50
  28. edsl/exceptions/agents.py +12 -0
  29. edsl/exceptions/inference_services.py +5 -0
  30. edsl/exceptions/questions.py +24 -6
  31. edsl/exceptions/scenarios.py +7 -0
  32. edsl/inference_services/AnthropicService.py +0 -3
  33. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  34. edsl/inference_services/AvailableModelFetcher.py +209 -0
  35. edsl/inference_services/AwsBedrock.py +0 -2
  36. edsl/inference_services/AzureAI.py +0 -2
  37. edsl/inference_services/GoogleService.py +2 -11
  38. edsl/inference_services/InferenceServiceABC.py +18 -85
  39. edsl/inference_services/InferenceServicesCollection.py +105 -80
  40. edsl/inference_services/MistralAIService.py +0 -3
  41. edsl/inference_services/OpenAIService.py +1 -4
  42. edsl/inference_services/PerplexityService.py +0 -3
  43. edsl/inference_services/ServiceAvailability.py +135 -0
  44. edsl/inference_services/TestService.py +11 -8
  45. edsl/inference_services/data_structures.py +62 -0
  46. edsl/jobs/AnswerQuestionFunctionConstructor.py +188 -0
  47. edsl/jobs/Answers.py +1 -14
  48. edsl/jobs/FetchInvigilator.py +40 -0
  49. edsl/jobs/InterviewTaskManager.py +98 -0
  50. edsl/jobs/InterviewsConstructor.py +48 -0
  51. edsl/jobs/Jobs.py +102 -243
  52. edsl/jobs/JobsChecks.py +35 -10
  53. edsl/jobs/JobsComponentConstructor.py +189 -0
  54. edsl/jobs/JobsPrompts.py +5 -3
  55. edsl/jobs/JobsRemoteInferenceHandler.py +128 -80
  56. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  57. edsl/jobs/RequestTokenEstimator.py +30 -0
  58. edsl/jobs/buckets/BucketCollection.py +44 -3
  59. edsl/jobs/buckets/TokenBucket.py +53 -21
  60. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  61. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  62. edsl/jobs/decorators.py +35 -0
  63. edsl/jobs/interviews/Interview.py +77 -380
  64. edsl/jobs/jobs_status_enums.py +9 -0
  65. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  66. edsl/jobs/runners/JobsRunnerAsyncio.py +4 -49
  67. edsl/jobs/tasks/QuestionTaskCreator.py +21 -19
  68. edsl/jobs/tasks/TaskHistory.py +14 -15
  69. edsl/jobs/tasks/task_status_enum.py +0 -2
  70. edsl/language_models/ComputeCost.py +63 -0
  71. edsl/language_models/LanguageModel.py +137 -234
  72. edsl/language_models/ModelList.py +11 -13
  73. edsl/language_models/PriceManager.py +127 -0
  74. edsl/language_models/RawResponseHandler.py +106 -0
  75. edsl/language_models/ServiceDataSources.py +0 -0
  76. edsl/language_models/__init__.py +0 -1
  77. edsl/language_models/key_management/KeyLookup.py +63 -0
  78. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  79. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  80. edsl/language_models/key_management/__init__.py +0 -0
  81. edsl/language_models/key_management/models.py +131 -0
  82. edsl/language_models/registry.py +49 -59
  83. edsl/language_models/repair.py +2 -2
  84. edsl/language_models/utilities.py +5 -4
  85. edsl/notebooks/Notebook.py +19 -14
  86. edsl/notebooks/NotebookToLaTeX.py +142 -0
  87. edsl/prompts/Prompt.py +29 -39
  88. edsl/questions/AnswerValidatorMixin.py +47 -2
  89. edsl/questions/ExceptionExplainer.py +77 -0
  90. edsl/questions/HTMLQuestion.py +103 -0
  91. edsl/questions/LoopProcessor.py +149 -0
  92. edsl/questions/QuestionBase.py +37 -192
  93. edsl/questions/QuestionBaseGenMixin.py +52 -48
  94. edsl/questions/QuestionBasePromptsMixin.py +7 -3
  95. edsl/questions/QuestionCheckBox.py +1 -1
  96. edsl/questions/QuestionExtract.py +1 -1
  97. edsl/questions/QuestionFreeText.py +1 -2
  98. edsl/questions/QuestionList.py +3 -5
  99. edsl/questions/QuestionMatrix.py +265 -0
  100. edsl/questions/QuestionMultipleChoice.py +66 -22
  101. edsl/questions/QuestionNumerical.py +1 -3
  102. edsl/questions/QuestionRank.py +6 -16
  103. edsl/questions/ResponseValidatorABC.py +37 -11
  104. edsl/questions/ResponseValidatorFactory.py +28 -0
  105. edsl/questions/SimpleAskMixin.py +4 -3
  106. edsl/questions/__init__.py +1 -0
  107. edsl/questions/derived/QuestionLinearScale.py +6 -3
  108. edsl/questions/derived/QuestionTopK.py +1 -1
  109. edsl/questions/descriptors.py +17 -3
  110. edsl/questions/question_registry.py +1 -1
  111. edsl/questions/templates/matrix/__init__.py +1 -0
  112. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  113. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  114. edsl/results/CSSParameterizer.py +1 -1
  115. edsl/results/Dataset.py +170 -7
  116. edsl/results/DatasetExportMixin.py +224 -302
  117. edsl/results/DatasetTree.py +28 -8
  118. edsl/results/MarkdownToDocx.py +122 -0
  119. edsl/results/MarkdownToPDF.py +111 -0
  120. edsl/results/Result.py +192 -206
  121. edsl/results/Results.py +120 -113
  122. edsl/results/ResultsExportMixin.py +2 -0
  123. edsl/results/Selector.py +23 -13
  124. edsl/results/TableDisplay.py +98 -171
  125. edsl/results/TextEditor.py +50 -0
  126. edsl/results/__init__.py +1 -1
  127. edsl/results/smart_objects.py +96 -0
  128. edsl/results/table_data_class.py +12 -0
  129. edsl/results/table_renderers.py +118 -0
  130. edsl/scenarios/ConstructDownloadLink.py +109 -0
  131. edsl/scenarios/DirectoryScanner.py +96 -0
  132. edsl/scenarios/DocumentChunker.py +102 -0
  133. edsl/scenarios/DocxScenario.py +16 -0
  134. edsl/scenarios/FileStore.py +118 -239
  135. edsl/scenarios/PdfExtractor.py +40 -0
  136. edsl/scenarios/Scenario.py +90 -193
  137. edsl/scenarios/ScenarioHtmlMixin.py +4 -3
  138. edsl/scenarios/ScenarioJoin.py +10 -6
  139. edsl/scenarios/ScenarioList.py +383 -240
  140. edsl/scenarios/ScenarioListExportMixin.py +0 -7
  141. edsl/scenarios/ScenarioListPdfMixin.py +15 -37
  142. edsl/scenarios/ScenarioSelector.py +156 -0
  143. edsl/scenarios/__init__.py +1 -2
  144. edsl/scenarios/file_methods.py +85 -0
  145. edsl/scenarios/handlers/__init__.py +13 -0
  146. edsl/scenarios/handlers/csv.py +38 -0
  147. edsl/scenarios/handlers/docx.py +76 -0
  148. edsl/scenarios/handlers/html.py +37 -0
  149. edsl/scenarios/handlers/json.py +111 -0
  150. edsl/scenarios/handlers/latex.py +5 -0
  151. edsl/scenarios/handlers/md.py +51 -0
  152. edsl/scenarios/handlers/pdf.py +68 -0
  153. edsl/scenarios/handlers/png.py +39 -0
  154. edsl/scenarios/handlers/pptx.py +105 -0
  155. edsl/scenarios/handlers/py.py +294 -0
  156. edsl/scenarios/handlers/sql.py +313 -0
  157. edsl/scenarios/handlers/sqlite.py +149 -0
  158. edsl/scenarios/handlers/txt.py +33 -0
  159. edsl/study/ObjectEntry.py +1 -1
  160. edsl/study/SnapShot.py +1 -1
  161. edsl/study/Study.py +5 -12
  162. edsl/surveys/ConstructDAG.py +92 -0
  163. edsl/surveys/EditSurvey.py +221 -0
  164. edsl/surveys/InstructionHandler.py +100 -0
  165. edsl/surveys/MemoryManagement.py +72 -0
  166. edsl/surveys/Rule.py +5 -4
  167. edsl/surveys/RuleCollection.py +25 -27
  168. edsl/surveys/RuleManager.py +172 -0
  169. edsl/surveys/Simulator.py +75 -0
  170. edsl/surveys/Survey.py +199 -771
  171. edsl/surveys/SurveyCSS.py +20 -8
  172. edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +11 -9
  173. edsl/surveys/SurveyToApp.py +141 -0
  174. edsl/surveys/__init__.py +4 -2
  175. edsl/surveys/descriptors.py +6 -2
  176. edsl/surveys/instructions/ChangeInstruction.py +1 -2
  177. edsl/surveys/instructions/Instruction.py +4 -13
  178. edsl/surveys/instructions/InstructionCollection.py +11 -6
  179. edsl/templates/error_reporting/interview_details.html +1 -1
  180. edsl/templates/error_reporting/report.html +1 -1
  181. edsl/tools/plotting.py +1 -1
  182. edsl/utilities/PrettyList.py +56 -0
  183. edsl/utilities/is_notebook.py +18 -0
  184. edsl/utilities/is_valid_variable_name.py +11 -0
  185. edsl/utilities/remove_edsl_version.py +24 -0
  186. edsl/utilities/utilities.py +35 -23
  187. {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/METADATA +12 -10
  188. edsl-0.1.39.dev2.dist-info/RECORD +352 -0
  189. edsl/language_models/KeyLookup.py +0 -30
  190. edsl/language_models/unused/ReplicateBase.py +0 -83
  191. edsl/results/ResultsDBMixin.py +0 -238
  192. edsl-0.1.39.dev1.dist-info/RECORD +0 -277
  193. {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/LICENSE +0 -0
  194. {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/WHEEL +0 -0
@@ -3,10 +3,9 @@
3
3
  import base64
4
4
  import csv
5
5
  import io
6
- import html
7
- from typing import Optional
8
-
9
- from typing import Literal, Optional, Union, List
6
+ import warnings
7
+ import textwrap
8
+ from typing import Optional, Tuple, Union, List
10
9
 
11
10
 
12
11
  class DatasetExportMixin:
@@ -37,7 +36,7 @@ class DatasetExportMixin:
37
36
 
38
37
  >>> from edsl.results import Results
39
38
  >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
40
- ['model.frequency_penalty', 'model.logprobs', 'model.max_tokens', 'model.model', 'model.presence_penalty', 'model.temperature', 'model.top_logprobs', 'model.top_p']
39
+ ['model.frequency_penalty', ...]
41
40
 
42
41
  >>> Results.example().relevant_columns(data_type = "flimflam")
43
42
  Traceback (most recent call last):
@@ -72,7 +71,7 @@ class DatasetExportMixin:
72
71
  def num_observations(self):
73
72
  """Return the number of observations in the dataset.
74
73
 
75
- >>> from edsl.results import Results
74
+ >>> from edsl.results.Results import Results
76
75
  >>> Results.example().num_observations()
77
76
  4
78
77
  """
@@ -144,299 +143,227 @@ class DatasetExportMixin:
144
143
  for value in list_of_values:
145
144
  print(f"{key}: {value}")
146
145
 
147
- # def print(
148
- # self,
149
- # pretty_labels: Optional[dict] = None,
150
- # filename: Optional[str] = None,
151
- # format: Optional[Literal["rich", "html", "markdown", "latex"]] = None,
152
- # interactive: bool = False,
153
- # split_at_dot: bool = True,
154
- # max_rows=None,
155
- # tee=False,
156
- # iframe=False,
157
- # iframe_height: int = 200,
158
- # iframe_width: int = 600,
159
- # web=False,
160
- # return_string: bool = False,
161
- # ) -> Union[None, str, "Results"]:
162
- # """Print the results in a pretty format.
163
-
164
- # :param pretty_labels: A dictionary of pretty labels for the columns.
165
- # :param filename: The filename to save the results to.
166
- # :param format: The format to print the results in. Options are 'rich', 'html', 'markdown', or 'latex'.
167
- # :param interactive: Whether to print the results interactively in a Jupyter notebook.
168
- # :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
169
- # :param max_rows: The maximum number of rows to print.
170
- # :param tee: Whether to return the dataset.
171
- # :param iframe: Whether to display the table in an iframe.
172
- # :param iframe_height: The height of the iframe.
173
- # :param iframe_width: The width of the iframe.
174
- # :param web: Whether to display the table in a web browser.
175
- # :param return_string: Whether to return the output as a string instead of printing.
176
-
177
- # :return: None if tee is False and return_string is False, the dataset if tee is True, or a string if return_string is True.
178
-
179
- # Example: Print in rich format at the terminal
180
-
181
- # >>> from edsl.results import Results
182
- # >>> r = Results.example()
183
- # >>> r.select('how_feeling').print(format = "rich")
184
- # ┏━━━━━━━━━━━━━━┓
185
- # ┃ answer ┃
186
- # .how_feeling
187
- # ┡━━━━━━━━━━━━━━┩
188
- # │ OK │
189
- # ├──────────────┤
190
- # │ Great │
191
- # ├──────────────┤
192
- # │ Terrible │
193
- # ├──────────────┤
194
- # │ OK │
195
- # └──────────────┘
196
-
197
- # >>> r = Results.example()
198
- # >>> r2 = r.select("how_feeling").print(format = "rich", tee = True, max_rows = 2)
199
- # ┏━━━━━━━━━━━━━━┓
200
- # ┃ answer ┃
201
- # ┃ .how_feeling ┃
202
- # ┡━━━━━━━━━━━━━━┩
203
- # │ OK │
204
- # ├──────────────┤
205
- # │ Great │
206
- # └──────────────┘
207
- # >>> r2
208
- # Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
209
-
210
- # >>> r.select('how_feeling').print(format = "rich", max_rows = 2)
211
- # ┏━━━━━━━━━━━━━━┓
212
- # ┃ answer ┃
213
- # ┃ .how_feeling ┃
214
- # ┡━━━━━━━━━━━━━━┩
215
- # │ OK │
216
- # ├──────────────┤
217
- # │ Great │
218
- # └──────────────┘
219
-
220
- # >>> r.select('how_feeling').print(format = "rich", split_at_dot = False)
221
- # ┏━━━━━━━━━━━━━━━━━━━━┓
222
- # ┃ answer.how_feeling ┃
223
- # ┡━━━━━━━━━━━━━━━━━━━━┩
224
- # │ OK │
225
- # ├────────────────────┤
226
- # │ Great │
227
- # ├────────────────────┤
228
- # │ Terrible │
229
- # ├────────────────────┤
230
- # │ OK │
231
- # └────────────────────┘
232
-
233
- # Example: using the pretty_labels parameter
234
-
235
- # >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
236
- # ┏━━━━━━━━━━━━━━━━━━━━━┓
237
- # ┃ How are you feeling ┃
238
- # ┡━━━━━━━━━━━━━━━━━━━━━┩
239
- # │ OK │
240
- # ├─────────────────────┤
241
- # │ Great │
242
- # ├─────────────────────┤
243
- # │ Terrible │
244
- # ├─────────────────────┤
245
- # │ OK │
246
- # └─────────────────────┘
247
-
248
- # Example: printing in markdown format
249
-
250
- # >>> r.select('how_feeling').print(format='markdown')
251
- # | answer.how_feeling |
252
- # |--|
253
- # | OK |
254
- # | Great |
255
- # | Terrible |
256
- # | OK |
257
- # ...
258
-
259
- # >>> r.select('how_feeling').print(format='latex')
260
- # \\begin{tabular}{l}
261
- # ...
262
- # \\end{tabular}
263
- # <BLANKLINE>
264
- # """
265
- # from IPython.display import HTML, display
266
- # from edsl.utilities.utilities import is_notebook
267
- # import io
268
- # import sys
269
-
270
- # def _determine_format(format):
271
- # if format is None:
272
- # if is_notebook():
273
- # format = "html"
274
- # else:
275
- # format = "rich"
276
- # if format not in ["rich", "html", "markdown", "latex"]:
277
- # raise ValueError(
278
- # "format must be one of 'rich', 'html', 'markdown', or 'latex'."
279
- # )
280
-
281
- # return format
282
-
283
- # format = _determine_format(format)
284
-
285
- # if pretty_labels is None:
286
- # pretty_labels = {}
287
-
288
- # if pretty_labels != {}: # only split at dot if there are no pretty labels
289
- # split_at_dot = False
290
-
291
- # def _create_data():
292
- # for index, entry in enumerate(self):
293
- # key, list_of_values = list(entry.items())[0]
294
- # yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
295
-
296
- # new_data = list(_create_data())
297
-
298
- # # Capture output if return_string is True
299
- # if return_string:
300
- # old_stdout = sys.stdout
301
- # sys.stdout = io.StringIO()
302
-
303
- # output = None
304
-
305
- # if format == "rich":
306
- # from edsl.utilities.interface import print_dataset_with_rich
307
-
308
- # output = print_dataset_with_rich(
309
- # new_data, filename=filename, split_at_dot=split_at_dot
310
- # )
311
- # elif format == "markdown":
312
- # from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
313
-
314
- # output = print_list_of_dicts_as_markdown_table(new_data, filename=filename)
315
- # elif format == "latex":
316
- # df = self.to_pandas()
317
- # df.columns = [col.replace("_", " ") for col in df.columns]
318
- # latex_string = df.to_latex(index=False)
319
-
320
- # if filename is not None:
321
- # with open(filename, "w") as f:
322
- # f.write(latex_string)
323
- # else:
324
- # print(latex_string)
325
- # output = latex_string
326
- # elif format == "html":
327
- # from edsl.utilities.interface import print_list_of_dicts_as_html_table
328
-
329
- # html_source = print_list_of_dicts_as_html_table(
330
- # new_data, interactive=interactive
331
- # )
332
-
333
- # if iframe:
334
- # iframe = f""""
335
- # <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
336
- # """
337
- # display(HTML(iframe))
338
- # elif is_notebook():
339
- # display(HTML(html_source))
340
- # else:
341
- # from edsl.utilities.interface import view_html
342
-
343
- # view_html(html_source)
344
-
345
- # output = html_source
346
-
347
- # # Restore stdout and get captured output if return_string is True
348
- # if return_string:
349
- # captured_output = sys.stdout.getvalue()
350
- # sys.stdout = old_stdout
351
- # return captured_output or output
352
-
353
- # if tee:
354
- # return self
355
-
356
- # return None
146
+ def _get_tabular_data(
147
+ self,
148
+ remove_prefix: bool = False,
149
+ pretty_labels: Optional[dict] = None,
150
+ ) -> Tuple[List[str], List[List]]:
151
+ """Internal method to get tabular data in a standard format.
152
+
153
+ Args:
154
+ remove_prefix: Whether to remove the prefix from column names
155
+ pretty_labels: Dictionary mapping original column names to pretty labels
156
+
157
+ Returns:
158
+ Tuple containing (header_row, data_rows)
159
+ """
160
+ if pretty_labels is None:
161
+ pretty_labels = {}
162
+
163
+ return self._make_tabular(
164
+ remove_prefix=remove_prefix, pretty_labels=pretty_labels
165
+ )
166
+
167
+ def to_jsonl(self, filename: Optional[str] = None) -> "FileStore":
168
+ """Export the results to a FileStore instance containing JSONL data.
169
+
170
+ Args:
171
+ filename: Optional filename for the JSONL file (defaults to "results.jsonl")
172
+
173
+ Returns:
174
+ FileStore: Instance containing the JSONL data
175
+ """
176
+ if filename is None:
177
+ filename = "results.jsonl"
178
+
179
+ # Write to string buffer
180
+ output = io.StringIO()
181
+ for entry in self:
182
+ key, values = list(entry.items())[0]
183
+ output.write(f'{{"{key}": {values}}}\n')
184
+
185
+ # Get the CSV string and encode to base64
186
+ jsonl_string = output.getvalue()
187
+ base64_string = base64.b64encode(jsonl_string.encode()).decode()
188
+ from edsl.scenarios.FileStore import FileStore
189
+
190
+ return FileStore(
191
+ path=filename,
192
+ mime_type="application/jsonl",
193
+ binary=False,
194
+ suffix="jsonl",
195
+ base64_string=base64_string,
196
+ )
357
197
 
358
198
  def to_csv(
359
199
  self,
360
200
  filename: Optional[str] = None,
361
201
  remove_prefix: bool = False,
362
- download_link: bool = False,
363
202
  pretty_labels: Optional[dict] = None,
364
- ):
365
- """Export the results to a CSV file.
203
+ ) -> "FileStore":
204
+ """Export the results to a FileStore instance containing CSV data.
366
205
 
367
- :param filename: The filename to save the CSV file to.
368
- :param remove_prefix: Whether to remove the prefix from the column names.
369
- :param download_link: Whether to display a download link in a Jupyter notebook.
206
+ Args:
207
+ filename: Optional filename for the CSV (defaults to "results.csv")
208
+ remove_prefix: Whether to remove the prefix from column names
209
+ pretty_labels: Dictionary mapping original column names to pretty labels
210
+
211
+ Returns:
212
+ FileStore: Instance containing the CSV data
213
+ """
214
+ if filename is None:
215
+ filename = "results.csv"
370
216
 
371
- Example:
217
+ # Get the tabular data
218
+ header, rows = self._get_tabular_data(
219
+ remove_prefix=remove_prefix, pretty_labels=pretty_labels
220
+ )
372
221
 
373
- >>> from edsl.results import Results
374
- >>> r = Results.example()
375
- >>> r.select('how_feeling').to_csv()
376
- 'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
377
-
378
- >>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
379
- 'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
380
-
381
- >>> import tempfile
382
- >>> filename = tempfile.NamedTemporaryFile(delete=False).name
383
- >>> r.select('how_feeling').to_csv(filename = filename)
384
- >>> import os
385
- >>> import csv
386
- >>> with open(filename, newline='') as f:
387
- ... reader = csv.reader(f)
388
- ... for row in reader:
389
- ... print(row)
390
- ['answer.how_feeling']
391
- ['OK']
392
- ['Great']
393
- ['Terrible']
394
- ['OK']
222
+ # Write to string buffer
223
+ output = io.StringIO()
224
+ writer = csv.writer(output)
225
+ writer.writerow(header)
226
+ writer.writerows(rows)
227
+
228
+ # Get the CSV string and encode to base64
229
+ csv_string = output.getvalue()
230
+ base64_string = base64.b64encode(csv_string.encode()).decode()
231
+ from edsl.scenarios.FileStore import FileStore
232
+
233
+ return FileStore(
234
+ path=filename,
235
+ mime_type="text/csv",
236
+ binary=False,
237
+ suffix="csv",
238
+ base64_string=base64_string,
239
+ )
395
240
 
241
+ def to_excel(
242
+ self,
243
+ filename: Optional[str] = None,
244
+ remove_prefix: bool = False,
245
+ pretty_labels: Optional[dict] = None,
246
+ sheet_name: Optional[str] = None,
247
+ ) -> "FileStore":
248
+ """Export the results to a FileStore instance containing Excel data.
249
+
250
+ Args:
251
+ filename: Optional filename for the Excel file (defaults to "results.xlsx")
252
+ remove_prefix: Whether to remove the prefix from column names
253
+ pretty_labels: Dictionary mapping original column names to pretty labels
254
+ sheet_name: Name of the worksheet (defaults to "Results")
255
+
256
+ Returns:
257
+ FileStore: Instance containing the Excel data
396
258
  """
397
- if pretty_labels is None:
398
- pretty_labels = {}
399
- header, rows = self._make_tabular(
259
+ from openpyxl import Workbook
260
+
261
+ if filename is None:
262
+ filename = "results.xlsx"
263
+ if sheet_name is None:
264
+ sheet_name = "Results"
265
+
266
+ # Get the tabular data
267
+ header, rows = self._get_tabular_data(
400
268
  remove_prefix=remove_prefix, pretty_labels=pretty_labels
401
269
  )
402
270
 
403
- if filename is not None:
404
- with open(filename, "w") as f:
405
- writer = csv.writer(f)
406
- writer.writerow(header)
407
- writer.writerows(rows)
408
- # print(f"Saved to {filename}")
409
- else:
410
- output = io.StringIO()
411
- writer = csv.writer(output)
412
- writer.writerow(header)
413
- writer.writerows(rows)
414
-
415
- if download_link:
416
- from IPython.display import HTML, display
417
-
418
- csv_file = output.getvalue()
419
- b64 = base64.b64encode(csv_file.encode()).decode()
420
- download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
421
- display(HTML(download_link))
422
- else:
423
- return output.getvalue()
271
+ # Create Excel workbook in memory
272
+ wb = Workbook()
273
+ ws = wb.active
274
+ ws.title = sheet_name
275
+
276
+ # Write header
277
+ for col, value in enumerate(header, 1):
278
+ ws.cell(row=1, column=col, value=value)
279
+
280
+ # Write data rows
281
+ for row_idx, row_data in enumerate(rows, 2):
282
+ for col, value in enumerate(row_data, 1):
283
+ ws.cell(row=row_idx, column=col, value=value)
284
+
285
+ # Save to bytes buffer
286
+ buffer = io.BytesIO()
287
+ wb.save(buffer)
288
+ buffer.seek(0)
289
+
290
+ # Convert to base64
291
+ base64_string = base64.b64encode(buffer.getvalue()).decode()
292
+ from edsl.scenarios.FileStore import FileStore
293
+
294
+ return FileStore(
295
+ path=filename,
296
+ mime_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
297
+ binary=True,
298
+ suffix="xlsx",
299
+ base64_string=base64_string,
300
+ )
424
301
 
425
- def download_link(self, pretty_labels: Optional[dict] = None) -> str:
426
- """Return a download link for the results.
302
+ def _db(self, remove_prefix: bool = True):
303
+ """Create a SQLite database in memory and return the connection.
427
304
 
428
- :param pretty_labels: A dictionary of pretty labels for the columns.
305
+ Args:
306
+ shape: The shape of the data in the database (wide or long)
307
+ remove_prefix: Whether to remove the prefix from the column names
429
308
 
430
- >>> from edsl.results import Results
431
- >>> r = Results.example()
432
- >>> r.select('how_feeling').download_link()
433
- '<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
309
+ Returns:
310
+ A database connection
434
311
  """
435
- import base64
312
+ from sqlalchemy import create_engine
436
313
 
437
- csv_string = self.to_csv(pretty_labels=pretty_labels)
438
- b64 = base64.b64encode(csv_string.encode()).decode()
439
- return f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
314
+ engine = create_engine("sqlite:///:memory:")
315
+ if remove_prefix:
316
+ df = self.remove_prefix().to_pandas(lists_as_strings=True)
317
+ else:
318
+ df = self.to_pandas(lists_as_strings=True)
319
+ df.to_sql(
320
+ "self",
321
+ engine,
322
+ index=False,
323
+ if_exists="replace",
324
+ )
325
+ return engine.connect()
326
+
327
+ def sql(
328
+ self,
329
+ query: str,
330
+ transpose: bool = None,
331
+ transpose_by: str = None,
332
+ remove_prefix: bool = True,
333
+ ) -> Union["pd.DataFrame", str]:
334
+ """Execute a SQL query and return the results as a DataFrame.
335
+
336
+ Args:
337
+ query: The SQL query to execute
338
+ shape: The shape of the data in the database (wide or long)
339
+ remove_prefix: Whether to remove the prefix from the column names
340
+ transpose: Whether to transpose the DataFrame
341
+ transpose_by: The column to use as the index when transposing
342
+ csv: Whether to return the DataFrame as a CSV string
343
+ to_list: Whether to return the results as a list
344
+ to_latex: Whether to return the results as LaTeX
345
+ filename: Optional filename to save the results to
346
+
347
+ Returns:
348
+ DataFrame, CSV string, list, or LaTeX string depending on parameters
349
+
350
+ """
351
+ import pandas as pd
352
+
353
+ conn = self._db(remove_prefix=remove_prefix)
354
+ df = pd.read_sql_query(query, conn)
355
+
356
+ # Transpose the DataFrame if transpose is True
357
+ if transpose or transpose_by:
358
+ df = pd.DataFrame(df)
359
+ if transpose_by:
360
+ df = df.set_index(transpose_by)
361
+ else:
362
+ df = df.set_index(df.columns[0])
363
+ df = df.transpose()
364
+ from edsl.results.Dataset import Dataset
365
+
366
+ return Dataset.from_pandas_dataframe(df)
440
367
 
441
368
  def to_pandas(
442
369
  self, remove_prefix: bool = False, lists_as_strings=False
@@ -447,19 +374,6 @@ class DatasetExportMixin:
447
374
 
448
375
  """
449
376
  return self._to_pandas_strings(remove_prefix)
450
- # if lists_as_strings:
451
- # return self._to_pandas_strings(remove_prefix=remove_prefix)
452
-
453
- # import pandas as pd
454
-
455
- # df = pd.DataFrame(self.data)
456
-
457
- # if remove_prefix:
458
- # # Optionally remove prefixes from column names
459
- # df.columns = [col.split(".")[-1] for col in df.columns]
460
-
461
- # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
462
- # return df_sorted
463
377
 
464
378
  def _to_pandas_strings(self, remove_prefix: bool = False) -> "pd.DataFrame":
465
379
  """Convert the results to a pandas DataFrame.
@@ -478,7 +392,7 @@ class DatasetExportMixin:
478
392
 
479
393
  import pandas as pd
480
394
 
481
- csv_string = self.to_csv(remove_prefix=remove_prefix)
395
+ csv_string = self.to_csv(remove_prefix=remove_prefix).text
482
396
  csv_buffer = io.StringIO(csv_string)
483
397
  df = pd.read_csv(csv_buffer)
484
398
  # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
@@ -494,14 +408,14 @@ class DatasetExportMixin:
494
408
  >>> r.select('how_feeling').to_scenario_list()
495
409
  ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
496
410
  """
497
- from edsl import ScenarioList, Scenario
411
+ from edsl.scenarios.ScenarioList import ScenarioList
412
+ from edsl.scenarios.Scenario import Scenario
498
413
 
499
414
  list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
500
415
  scenarios = []
501
416
  for d in list_of_dicts:
502
417
  scenarios.append(Scenario(d))
503
418
  return ScenarioList(scenarios)
504
- # return ScenarioList([Scenario(d) for d in list_of_dicts])
505
419
 
506
420
  def to_agent_list(self, remove_prefix: bool = True):
507
421
  """Convert the results to a list of dictionaries, one per agent.
@@ -513,7 +427,8 @@ class DatasetExportMixin:
513
427
  >>> r.select('how_feeling').to_agent_list()
514
428
  AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
515
429
  """
516
- from edsl import AgentList, Agent
430
+ from edsl.agents import Agent
431
+ from edsl.agents.AgentList import AgentList
517
432
 
518
433
  list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
519
434
  agents = []
@@ -521,6 +436,11 @@ class DatasetExportMixin:
521
436
  if "name" in d:
522
437
  d["agent_name"] = d.pop("name")
523
438
  agents.append(Agent(d, name=d["agent_name"]))
439
+ if "agent_parameters" in d:
440
+ agent_parameters = d.pop("agent_parameters")
441
+ agent_name = agent_parameters.get("name", None)
442
+ instruction = agent_parameters.get("instruction", None)
443
+ agents.append(Agent(d, name=agent_name, instruction=instruction))
524
444
  else:
525
445
  agents.append(Agent(d))
526
446
  return AgentList(agents)
@@ -608,7 +528,9 @@ class DatasetExportMixin:
608
528
  new_list.append(item)
609
529
  list_to_return = new_list
610
530
 
611
- return list_to_return
531
+ from edsl.utilities.PrettyList import PrettyList
532
+
533
+ return PrettyList(list_to_return)
612
534
 
613
535
  def html(
614
536
  self,
@@ -658,8 +580,10 @@ class DatasetExportMixin:
658
580
  >>> r = Results.example()
659
581
  >>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
660
582
  {'OK': 2, 'Great': 1, 'Terrible': 1}
661
- >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset")
662
- Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
583
+ >>> from edsl.results.Dataset import Dataset
584
+ >>> expected = Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
585
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset") == expected
586
+ True
663
587
  """
664
588
  from collections import Counter
665
589
 
@@ -691,8 +615,6 @@ class DatasetExportMixin:
691
615
  if top_n is not None:
692
616
  sorted_tally = dict(list(sorted_tally.items())[:top_n])
693
617
 
694
- import warnings
695
- import textwrap
696
618
  from edsl.results.Dataset import Dataset
697
619
 
698
620
  if output == "dict":