edsl 0.1.38.dev4__py3-none-any.whl → 0.1.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +197 -116
- edsl/__init__.py +15 -7
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +351 -147
- edsl/agents/AgentList.py +211 -73
- edsl/agents/Invigilator.py +101 -50
- edsl/agents/InvigilatorBase.py +62 -70
- edsl/agents/PromptConstructor.py +143 -225
- edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
- edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
- edsl/agents/__init__.py +0 -1
- edsl/agents/prompt_helpers.py +3 -3
- edsl/agents/question_option_processor.py +172 -0
- edsl/auto/AutoStudy.py +18 -5
- edsl/auto/StageBase.py +53 -40
- edsl/auto/StageQuestions.py +2 -1
- edsl/auto/utilities.py +0 -6
- edsl/config.py +22 -2
- edsl/conversation/car_buying.py +2 -1
- edsl/coop/CoopFunctionsMixin.py +15 -0
- edsl/coop/ExpectedParrotKeyHandler.py +125 -0
- edsl/coop/PriceFetcher.py +1 -1
- edsl/coop/coop.py +125 -47
- edsl/coop/utils.py +14 -14
- edsl/data/Cache.py +45 -27
- edsl/data/CacheEntry.py +12 -15
- edsl/data/CacheHandler.py +31 -12
- edsl/data/RemoteCacheSync.py +154 -46
- edsl/data/__init__.py +4 -3
- edsl/data_transfer_models.py +2 -1
- edsl/enums.py +27 -0
- edsl/exceptions/__init__.py +50 -50
- edsl/exceptions/agents.py +12 -0
- edsl/exceptions/inference_services.py +5 -0
- edsl/exceptions/questions.py +24 -6
- edsl/exceptions/scenarios.py +7 -0
- edsl/inference_services/AnthropicService.py +38 -19
- edsl/inference_services/AvailableModelCacheHandler.py +184 -0
- edsl/inference_services/AvailableModelFetcher.py +215 -0
- edsl/inference_services/AwsBedrock.py +0 -2
- edsl/inference_services/AzureAI.py +0 -2
- edsl/inference_services/GoogleService.py +7 -12
- edsl/inference_services/InferenceServiceABC.py +18 -85
- edsl/inference_services/InferenceServicesCollection.py +120 -79
- edsl/inference_services/MistralAIService.py +0 -3
- edsl/inference_services/OpenAIService.py +47 -35
- edsl/inference_services/PerplexityService.py +0 -3
- edsl/inference_services/ServiceAvailability.py +135 -0
- edsl/inference_services/TestService.py +11 -10
- edsl/inference_services/TogetherAIService.py +5 -3
- edsl/inference_services/data_structures.py +134 -0
- edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
- edsl/jobs/Answers.py +1 -14
- edsl/jobs/FetchInvigilator.py +47 -0
- edsl/jobs/InterviewTaskManager.py +98 -0
- edsl/jobs/InterviewsConstructor.py +50 -0
- edsl/jobs/Jobs.py +356 -431
- edsl/jobs/JobsChecks.py +35 -10
- edsl/jobs/JobsComponentConstructor.py +189 -0
- edsl/jobs/JobsPrompts.py +6 -4
- edsl/jobs/JobsRemoteInferenceHandler.py +205 -133
- edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
- edsl/jobs/RequestTokenEstimator.py +30 -0
- edsl/jobs/async_interview_runner.py +138 -0
- edsl/jobs/buckets/BucketCollection.py +44 -3
- edsl/jobs/buckets/TokenBucket.py +53 -21
- edsl/jobs/buckets/TokenBucketAPI.py +211 -0
- edsl/jobs/buckets/TokenBucketClient.py +191 -0
- edsl/jobs/check_survey_scenario_compatibility.py +85 -0
- edsl/jobs/data_structures.py +120 -0
- edsl/jobs/decorators.py +35 -0
- edsl/jobs/interviews/Interview.py +143 -408
- edsl/jobs/jobs_status_enums.py +9 -0
- edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
- edsl/jobs/results_exceptions_handler.py +98 -0
- edsl/jobs/runners/JobsRunnerAsyncio.py +88 -403
- edsl/jobs/runners/JobsRunnerStatus.py +133 -165
- edsl/jobs/tasks/QuestionTaskCreator.py +21 -19
- edsl/jobs/tasks/TaskHistory.py +38 -18
- edsl/jobs/tasks/task_status_enum.py +0 -2
- edsl/language_models/ComputeCost.py +63 -0
- edsl/language_models/LanguageModel.py +194 -236
- edsl/language_models/ModelList.py +28 -19
- edsl/language_models/PriceManager.py +127 -0
- edsl/language_models/RawResponseHandler.py +106 -0
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/__init__.py +1 -2
- edsl/language_models/key_management/KeyLookup.py +63 -0
- edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
- edsl/language_models/key_management/KeyLookupCollection.py +38 -0
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +131 -0
- edsl/language_models/model.py +256 -0
- edsl/language_models/repair.py +2 -2
- edsl/language_models/utilities.py +5 -4
- edsl/notebooks/Notebook.py +19 -14
- edsl/notebooks/NotebookToLaTeX.py +142 -0
- edsl/prompts/Prompt.py +29 -39
- edsl/questions/ExceptionExplainer.py +77 -0
- edsl/questions/HTMLQuestion.py +103 -0
- edsl/questions/QuestionBase.py +68 -214
- edsl/questions/QuestionBasePromptsMixin.py +7 -3
- edsl/questions/QuestionBudget.py +1 -1
- edsl/questions/QuestionCheckBox.py +3 -3
- edsl/questions/QuestionExtract.py +5 -7
- edsl/questions/QuestionFreeText.py +2 -3
- edsl/questions/QuestionList.py +10 -18
- edsl/questions/QuestionMatrix.py +265 -0
- edsl/questions/QuestionMultipleChoice.py +67 -23
- edsl/questions/QuestionNumerical.py +2 -4
- edsl/questions/QuestionRank.py +7 -17
- edsl/questions/SimpleAskMixin.py +4 -3
- edsl/questions/__init__.py +2 -1
- edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +47 -2
- edsl/questions/data_structures.py +20 -0
- edsl/questions/derived/QuestionLinearScale.py +6 -3
- edsl/questions/derived/QuestionTopK.py +1 -1
- edsl/questions/descriptors.py +17 -3
- edsl/questions/loop_processor.py +149 -0
- edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +57 -50
- edsl/questions/question_registry.py +1 -1
- edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +40 -26
- edsl/questions/response_validator_factory.py +34 -0
- edsl/questions/templates/matrix/__init__.py +1 -0
- edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
- edsl/questions/templates/matrix/question_presentation.jinja +20 -0
- edsl/results/CSSParameterizer.py +1 -1
- edsl/results/Dataset.py +170 -7
- edsl/results/DatasetExportMixin.py +168 -305
- edsl/results/DatasetTree.py +28 -8
- edsl/results/MarkdownToDocx.py +122 -0
- edsl/results/MarkdownToPDF.py +111 -0
- edsl/results/Result.py +298 -206
- edsl/results/Results.py +149 -131
- edsl/results/ResultsExportMixin.py +2 -0
- edsl/results/TableDisplay.py +98 -171
- edsl/results/TextEditor.py +50 -0
- edsl/results/__init__.py +1 -1
- edsl/results/file_exports.py +252 -0
- edsl/results/{Selector.py → results_selector.py} +23 -13
- edsl/results/smart_objects.py +96 -0
- edsl/results/table_data_class.py +12 -0
- edsl/results/table_renderers.py +118 -0
- edsl/scenarios/ConstructDownloadLink.py +109 -0
- edsl/scenarios/DocumentChunker.py +102 -0
- edsl/scenarios/DocxScenario.py +16 -0
- edsl/scenarios/FileStore.py +150 -239
- edsl/scenarios/PdfExtractor.py +40 -0
- edsl/scenarios/Scenario.py +90 -193
- edsl/scenarios/ScenarioHtmlMixin.py +4 -3
- edsl/scenarios/ScenarioList.py +415 -244
- edsl/scenarios/ScenarioListExportMixin.py +0 -7
- edsl/scenarios/ScenarioListPdfMixin.py +15 -37
- edsl/scenarios/__init__.py +1 -2
- edsl/scenarios/directory_scanner.py +96 -0
- edsl/scenarios/file_methods.py +85 -0
- edsl/scenarios/handlers/__init__.py +13 -0
- edsl/scenarios/handlers/csv.py +49 -0
- edsl/scenarios/handlers/docx.py +76 -0
- edsl/scenarios/handlers/html.py +37 -0
- edsl/scenarios/handlers/json.py +111 -0
- edsl/scenarios/handlers/latex.py +5 -0
- edsl/scenarios/handlers/md.py +51 -0
- edsl/scenarios/handlers/pdf.py +68 -0
- edsl/scenarios/handlers/png.py +39 -0
- edsl/scenarios/handlers/pptx.py +105 -0
- edsl/scenarios/handlers/py.py +294 -0
- edsl/scenarios/handlers/sql.py +313 -0
- edsl/scenarios/handlers/sqlite.py +149 -0
- edsl/scenarios/handlers/txt.py +33 -0
- edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +10 -6
- edsl/scenarios/scenario_selector.py +156 -0
- edsl/study/ObjectEntry.py +1 -1
- edsl/study/SnapShot.py +1 -1
- edsl/study/Study.py +5 -12
- edsl/surveys/ConstructDAG.py +92 -0
- edsl/surveys/EditSurvey.py +221 -0
- edsl/surveys/InstructionHandler.py +100 -0
- edsl/surveys/MemoryManagement.py +72 -0
- edsl/surveys/Rule.py +5 -4
- edsl/surveys/RuleCollection.py +25 -27
- edsl/surveys/RuleManager.py +172 -0
- edsl/surveys/Simulator.py +75 -0
- edsl/surveys/Survey.py +270 -791
- edsl/surveys/SurveyCSS.py +20 -8
- edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +11 -9
- edsl/surveys/SurveyToApp.py +141 -0
- edsl/surveys/__init__.py +4 -2
- edsl/surveys/descriptors.py +6 -2
- edsl/surveys/instructions/ChangeInstruction.py +1 -2
- edsl/surveys/instructions/Instruction.py +4 -13
- edsl/surveys/instructions/InstructionCollection.py +11 -6
- edsl/templates/error_reporting/interview_details.html +1 -1
- edsl/templates/error_reporting/report.html +1 -1
- edsl/tools/plotting.py +1 -1
- edsl/utilities/PrettyList.py +56 -0
- edsl/utilities/is_notebook.py +18 -0
- edsl/utilities/is_valid_variable_name.py +11 -0
- edsl/utilities/remove_edsl_version.py +24 -0
- edsl/utilities/utilities.py +35 -23
- {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/METADATA +12 -10
- edsl-0.1.39.dist-info/RECORD +358 -0
- {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/WHEEL +1 -1
- edsl/language_models/KeyLookup.py +0 -30
- edsl/language_models/registry.py +0 -190
- edsl/language_models/unused/ReplicateBase.py +0 -83
- edsl/results/ResultsDBMixin.py +0 -238
- edsl-0.1.38.dev4.dist-info/RECORD +0 -277
- /edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +0 -0
- /edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +0 -0
- /edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +0 -0
- {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/LICENSE +0 -0
@@ -1,12 +1,11 @@
|
|
1
1
|
"""Mixin class for exporting results."""
|
2
2
|
|
3
|
-
import base64
|
4
|
-
import csv
|
5
3
|
import io
|
6
|
-
import
|
7
|
-
|
4
|
+
import warnings
|
5
|
+
import textwrap
|
6
|
+
from typing import Optional, Tuple, Union, List
|
8
7
|
|
9
|
-
from
|
8
|
+
from edsl.results.file_exports import CSVExport, ExcelExport, JSONLExport, SQLiteExport
|
10
9
|
|
11
10
|
|
12
11
|
class DatasetExportMixin:
|
@@ -37,7 +36,7 @@ class DatasetExportMixin:
|
|
37
36
|
|
38
37
|
>>> from edsl.results import Results
|
39
38
|
>>> sorted(Results.example().select().relevant_columns(data_type = "model"))
|
40
|
-
['model.frequency_penalty',
|
39
|
+
['model.frequency_penalty', ...]
|
41
40
|
|
42
41
|
>>> Results.example().relevant_columns(data_type = "flimflam")
|
43
42
|
Traceback (most recent call last):
|
@@ -72,7 +71,7 @@ class DatasetExportMixin:
|
|
72
71
|
def num_observations(self):
|
73
72
|
"""Return the number of observations in the dataset.
|
74
73
|
|
75
|
-
>>> from edsl.results import Results
|
74
|
+
>>> from edsl.results.Results import Results
|
76
75
|
>>> Results.example().num_observations()
|
77
76
|
4
|
78
77
|
"""
|
@@ -144,299 +143,148 @@ class DatasetExportMixin:
|
|
144
143
|
for value in list_of_values:
|
145
144
|
print(f"{key}: {value}")
|
146
145
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
# ├──────────────┤
|
192
|
-
# │ Terrible │
|
193
|
-
# ├──────────────┤
|
194
|
-
# │ OK │
|
195
|
-
# └──────────────┘
|
196
|
-
|
197
|
-
# >>> r = Results.example()
|
198
|
-
# >>> r2 = r.select("how_feeling").print(format = "rich", tee = True, max_rows = 2)
|
199
|
-
# ┏━━━━━━━━━━━━━━┓
|
200
|
-
# ┃ answer ┃
|
201
|
-
# ┃ .how_feeling ┃
|
202
|
-
# ┡━━━━━━━━━━━━━━┩
|
203
|
-
# │ OK │
|
204
|
-
# ├──────────────┤
|
205
|
-
# │ Great │
|
206
|
-
# └──────────────┘
|
207
|
-
# >>> r2
|
208
|
-
# Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
|
209
|
-
|
210
|
-
# >>> r.select('how_feeling').print(format = "rich", max_rows = 2)
|
211
|
-
# ┏━━━━━━━━━━━━━━┓
|
212
|
-
# ┃ answer ┃
|
213
|
-
# ┃ .how_feeling ┃
|
214
|
-
# ┡━━━━━━━━━━━━━━┩
|
215
|
-
# │ OK │
|
216
|
-
# ├──────────────┤
|
217
|
-
# │ Great │
|
218
|
-
# └──────────────┘
|
219
|
-
|
220
|
-
# >>> r.select('how_feeling').print(format = "rich", split_at_dot = False)
|
221
|
-
# ┏━━━━━━━━━━━━━━━━━━━━┓
|
222
|
-
# ┃ answer.how_feeling ┃
|
223
|
-
# ┡━━━━━━━━━━━━━━━━━━━━┩
|
224
|
-
# │ OK │
|
225
|
-
# ├────────────────────┤
|
226
|
-
# │ Great │
|
227
|
-
# ├────────────────────┤
|
228
|
-
# │ Terrible │
|
229
|
-
# ├────────────────────┤
|
230
|
-
# │ OK │
|
231
|
-
# └────────────────────┘
|
232
|
-
|
233
|
-
# Example: using the pretty_labels parameter
|
234
|
-
|
235
|
-
# >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
|
236
|
-
# ┏━━━━━━━━━━━━━━━━━━━━━┓
|
237
|
-
# ┃ How are you feeling ┃
|
238
|
-
# ┡━━━━━━━━━━━━━━━━━━━━━┩
|
239
|
-
# │ OK │
|
240
|
-
# ├─────────────────────┤
|
241
|
-
# │ Great │
|
242
|
-
# ├─────────────────────┤
|
243
|
-
# │ Terrible │
|
244
|
-
# ├─────────────────────┤
|
245
|
-
# │ OK │
|
246
|
-
# └─────────────────────┘
|
247
|
-
|
248
|
-
# Example: printing in markdown format
|
249
|
-
|
250
|
-
# >>> r.select('how_feeling').print(format='markdown')
|
251
|
-
# | answer.how_feeling |
|
252
|
-
# |--|
|
253
|
-
# | OK |
|
254
|
-
# | Great |
|
255
|
-
# | Terrible |
|
256
|
-
# | OK |
|
257
|
-
# ...
|
258
|
-
|
259
|
-
# >>> r.select('how_feeling').print(format='latex')
|
260
|
-
# \\begin{tabular}{l}
|
261
|
-
# ...
|
262
|
-
# \\end{tabular}
|
263
|
-
# <BLANKLINE>
|
264
|
-
# """
|
265
|
-
# from IPython.display import HTML, display
|
266
|
-
# from edsl.utilities.utilities import is_notebook
|
267
|
-
# import io
|
268
|
-
# import sys
|
269
|
-
|
270
|
-
# def _determine_format(format):
|
271
|
-
# if format is None:
|
272
|
-
# if is_notebook():
|
273
|
-
# format = "html"
|
274
|
-
# else:
|
275
|
-
# format = "rich"
|
276
|
-
# if format not in ["rich", "html", "markdown", "latex"]:
|
277
|
-
# raise ValueError(
|
278
|
-
# "format must be one of 'rich', 'html', 'markdown', or 'latex'."
|
279
|
-
# )
|
280
|
-
|
281
|
-
# return format
|
282
|
-
|
283
|
-
# format = _determine_format(format)
|
284
|
-
|
285
|
-
# if pretty_labels is None:
|
286
|
-
# pretty_labels = {}
|
287
|
-
|
288
|
-
# if pretty_labels != {}: # only split at dot if there are no pretty labels
|
289
|
-
# split_at_dot = False
|
290
|
-
|
291
|
-
# def _create_data():
|
292
|
-
# for index, entry in enumerate(self):
|
293
|
-
# key, list_of_values = list(entry.items())[0]
|
294
|
-
# yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
|
295
|
-
|
296
|
-
# new_data = list(_create_data())
|
297
|
-
|
298
|
-
# # Capture output if return_string is True
|
299
|
-
# if return_string:
|
300
|
-
# old_stdout = sys.stdout
|
301
|
-
# sys.stdout = io.StringIO()
|
302
|
-
|
303
|
-
# output = None
|
304
|
-
|
305
|
-
# if format == "rich":
|
306
|
-
# from edsl.utilities.interface import print_dataset_with_rich
|
307
|
-
|
308
|
-
# output = print_dataset_with_rich(
|
309
|
-
# new_data, filename=filename, split_at_dot=split_at_dot
|
310
|
-
# )
|
311
|
-
# elif format == "markdown":
|
312
|
-
# from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
|
313
|
-
|
314
|
-
# output = print_list_of_dicts_as_markdown_table(new_data, filename=filename)
|
315
|
-
# elif format == "latex":
|
316
|
-
# df = self.to_pandas()
|
317
|
-
# df.columns = [col.replace("_", " ") for col in df.columns]
|
318
|
-
# latex_string = df.to_latex(index=False)
|
319
|
-
|
320
|
-
# if filename is not None:
|
321
|
-
# with open(filename, "w") as f:
|
322
|
-
# f.write(latex_string)
|
323
|
-
# else:
|
324
|
-
# print(latex_string)
|
325
|
-
# output = latex_string
|
326
|
-
# elif format == "html":
|
327
|
-
# from edsl.utilities.interface import print_list_of_dicts_as_html_table
|
328
|
-
|
329
|
-
# html_source = print_list_of_dicts_as_html_table(
|
330
|
-
# new_data, interactive=interactive
|
331
|
-
# )
|
332
|
-
|
333
|
-
# if iframe:
|
334
|
-
# iframe = f""""
|
335
|
-
# <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
|
336
|
-
# """
|
337
|
-
# display(HTML(iframe))
|
338
|
-
# elif is_notebook():
|
339
|
-
# display(HTML(html_source))
|
340
|
-
# else:
|
341
|
-
# from edsl.utilities.interface import view_html
|
342
|
-
|
343
|
-
# view_html(html_source)
|
344
|
-
|
345
|
-
# output = html_source
|
346
|
-
|
347
|
-
# # Restore stdout and get captured output if return_string is True
|
348
|
-
# if return_string:
|
349
|
-
# captured_output = sys.stdout.getvalue()
|
350
|
-
# sys.stdout = old_stdout
|
351
|
-
# return captured_output or output
|
352
|
-
|
353
|
-
# if tee:
|
354
|
-
# return self
|
355
|
-
|
356
|
-
# return None
|
146
|
+
def _get_tabular_data(
|
147
|
+
self,
|
148
|
+
remove_prefix: bool = False,
|
149
|
+
pretty_labels: Optional[dict] = None,
|
150
|
+
) -> Tuple[List[str], List[List]]:
|
151
|
+
"""Internal method to get tabular data in a standard format.
|
152
|
+
|
153
|
+
Args:
|
154
|
+
remove_prefix: Whether to remove the prefix from column names
|
155
|
+
pretty_labels: Dictionary mapping original column names to pretty labels
|
156
|
+
|
157
|
+
Returns:
|
158
|
+
Tuple containing (header_row, data_rows)
|
159
|
+
"""
|
160
|
+
if pretty_labels is None:
|
161
|
+
pretty_labels = {}
|
162
|
+
|
163
|
+
return self._make_tabular(
|
164
|
+
remove_prefix=remove_prefix, pretty_labels=pretty_labels
|
165
|
+
)
|
166
|
+
|
167
|
+
def to_jsonl(self, filename: Optional[str] = None) -> Optional["FileStore"]:
|
168
|
+
"""Export the results to a FileStore instance containing JSONL data."""
|
169
|
+
exporter = JSONLExport(data=self, filename=filename)
|
170
|
+
return exporter.export()
|
171
|
+
|
172
|
+
def to_sqlite(
|
173
|
+
self,
|
174
|
+
filename: Optional[str] = None,
|
175
|
+
remove_prefix: bool = False,
|
176
|
+
pretty_labels: Optional[dict] = None,
|
177
|
+
table_name: str = "results",
|
178
|
+
if_exists: str = "replace",
|
179
|
+
) -> Optional["FileStore"]:
|
180
|
+
"""Export the results to a SQLite database file."""
|
181
|
+
exporter = SQLiteExport(
|
182
|
+
data=self,
|
183
|
+
filename=filename,
|
184
|
+
remove_prefix=remove_prefix,
|
185
|
+
pretty_labels=pretty_labels,
|
186
|
+
table_name=table_name,
|
187
|
+
if_exists=if_exists,
|
188
|
+
)
|
189
|
+
return exporter.export()
|
357
190
|
|
358
191
|
def to_csv(
|
359
192
|
self,
|
360
193
|
filename: Optional[str] = None,
|
361
194
|
remove_prefix: bool = False,
|
362
|
-
download_link: bool = False,
|
363
195
|
pretty_labels: Optional[dict] = None,
|
364
|
-
):
|
365
|
-
"""Export the results to a CSV
|
196
|
+
) -> Optional["FileStore"]:
|
197
|
+
"""Export the results to a FileStore instance containing CSV data."""
|
198
|
+
exporter = CSVExport(
|
199
|
+
data=self,
|
200
|
+
filename=filename,
|
201
|
+
remove_prefix=remove_prefix,
|
202
|
+
pretty_labels=pretty_labels,
|
203
|
+
)
|
204
|
+
return exporter.export()
|
366
205
|
|
367
|
-
|
368
|
-
|
369
|
-
:
|
206
|
+
def to_excel(
|
207
|
+
self,
|
208
|
+
filename: Optional[str] = None,
|
209
|
+
remove_prefix: bool = False,
|
210
|
+
pretty_labels: Optional[dict] = None,
|
211
|
+
sheet_name: Optional[str] = None,
|
212
|
+
) -> Optional["FileStore"]:
|
213
|
+
"""Export the results to a FileStore instance containing Excel data."""
|
214
|
+
exporter = ExcelExport(
|
215
|
+
data=self,
|
216
|
+
filename=filename,
|
217
|
+
remove_prefix=remove_prefix,
|
218
|
+
pretty_labels=pretty_labels,
|
219
|
+
sheet_name=sheet_name,
|
220
|
+
)
|
221
|
+
return exporter.export()
|
370
222
|
|
371
|
-
|
223
|
+
def _db(self, remove_prefix: bool = True):
|
224
|
+
"""Create a SQLite database in memory and return the connection.
|
372
225
|
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
|
377
|
-
|
378
|
-
>>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
|
379
|
-
'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
|
380
|
-
|
381
|
-
>>> import tempfile
|
382
|
-
>>> filename = tempfile.NamedTemporaryFile(delete=False).name
|
383
|
-
>>> r.select('how_feeling').to_csv(filename = filename)
|
384
|
-
>>> import os
|
385
|
-
>>> import csv
|
386
|
-
>>> with open(filename, newline='') as f:
|
387
|
-
... reader = csv.reader(f)
|
388
|
-
... for row in reader:
|
389
|
-
... print(row)
|
390
|
-
['answer.how_feeling']
|
391
|
-
['OK']
|
392
|
-
['Great']
|
393
|
-
['Terrible']
|
394
|
-
['OK']
|
226
|
+
Args:
|
227
|
+
shape: The shape of the data in the database (wide or long)
|
228
|
+
remove_prefix: Whether to remove the prefix from the column names
|
395
229
|
|
230
|
+
Returns:
|
231
|
+
A database connection
|
396
232
|
"""
|
397
|
-
|
398
|
-
pretty_labels = {}
|
399
|
-
header, rows = self._make_tabular(
|
400
|
-
remove_prefix=remove_prefix, pretty_labels=pretty_labels
|
401
|
-
)
|
233
|
+
from sqlalchemy import create_engine
|
402
234
|
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
writer.writerow(header)
|
407
|
-
writer.writerows(rows)
|
408
|
-
# print(f"Saved to {filename}")
|
235
|
+
engine = create_engine("sqlite:///:memory:")
|
236
|
+
if remove_prefix:
|
237
|
+
df = self.remove_prefix().to_pandas(lists_as_strings=True)
|
409
238
|
else:
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
csv_file = output.getvalue()
|
419
|
-
b64 = base64.b64encode(csv_file.encode()).decode()
|
420
|
-
download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
|
421
|
-
display(HTML(download_link))
|
422
|
-
else:
|
423
|
-
return output.getvalue()
|
424
|
-
|
425
|
-
def download_link(self, pretty_labels: Optional[dict] = None) -> str:
|
426
|
-
"""Return a download link for the results.
|
239
|
+
df = self.to_pandas(lists_as_strings=True)
|
240
|
+
df.to_sql(
|
241
|
+
"self",
|
242
|
+
engine,
|
243
|
+
index=False,
|
244
|
+
if_exists="replace",
|
245
|
+
)
|
246
|
+
return engine.connect()
|
427
247
|
|
428
|
-
|
248
|
+
def sql(
|
249
|
+
self,
|
250
|
+
query: str,
|
251
|
+
transpose: bool = None,
|
252
|
+
transpose_by: str = None,
|
253
|
+
remove_prefix: bool = True,
|
254
|
+
) -> Union["pd.DataFrame", str]:
|
255
|
+
"""Execute a SQL query and return the results as a DataFrame.
|
256
|
+
|
257
|
+
Args:
|
258
|
+
query: The SQL query to execute
|
259
|
+
shape: The shape of the data in the database (wide or long)
|
260
|
+
remove_prefix: Whether to remove the prefix from the column names
|
261
|
+
transpose: Whether to transpose the DataFrame
|
262
|
+
transpose_by: The column to use as the index when transposing
|
263
|
+
csv: Whether to return the DataFrame as a CSV string
|
264
|
+
to_list: Whether to return the results as a list
|
265
|
+
to_latex: Whether to return the results as LaTeX
|
266
|
+
filename: Optional filename to save the results to
|
267
|
+
|
268
|
+
Returns:
|
269
|
+
DataFrame, CSV string, list, or LaTeX string depending on parameters
|
429
270
|
|
430
|
-
>>> from edsl.results import Results
|
431
|
-
>>> r = Results.example()
|
432
|
-
>>> r.select('how_feeling').download_link()
|
433
|
-
'<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
|
434
271
|
"""
|
435
|
-
import
|
272
|
+
import pandas as pd
|
273
|
+
|
274
|
+
conn = self._db(remove_prefix=remove_prefix)
|
275
|
+
df = pd.read_sql_query(query, conn)
|
276
|
+
|
277
|
+
# Transpose the DataFrame if transpose is True
|
278
|
+
if transpose or transpose_by:
|
279
|
+
df = pd.DataFrame(df)
|
280
|
+
if transpose_by:
|
281
|
+
df = df.set_index(transpose_by)
|
282
|
+
else:
|
283
|
+
df = df.set_index(df.columns[0])
|
284
|
+
df = df.transpose()
|
285
|
+
from edsl.results.Dataset import Dataset
|
436
286
|
|
437
|
-
|
438
|
-
b64 = base64.b64encode(csv_string.encode()).decode()
|
439
|
-
return f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
|
287
|
+
return Dataset.from_pandas_dataframe(df)
|
440
288
|
|
441
289
|
def to_pandas(
|
442
290
|
self, remove_prefix: bool = False, lists_as_strings=False
|
@@ -447,19 +295,6 @@ class DatasetExportMixin:
|
|
447
295
|
|
448
296
|
"""
|
449
297
|
return self._to_pandas_strings(remove_prefix)
|
450
|
-
# if lists_as_strings:
|
451
|
-
# return self._to_pandas_strings(remove_prefix=remove_prefix)
|
452
|
-
|
453
|
-
# import pandas as pd
|
454
|
-
|
455
|
-
# df = pd.DataFrame(self.data)
|
456
|
-
|
457
|
-
# if remove_prefix:
|
458
|
-
# # Optionally remove prefixes from column names
|
459
|
-
# df.columns = [col.split(".")[-1] for col in df.columns]
|
460
|
-
|
461
|
-
# df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
|
462
|
-
# return df_sorted
|
463
298
|
|
464
299
|
def _to_pandas_strings(self, remove_prefix: bool = False) -> "pd.DataFrame":
|
465
300
|
"""Convert the results to a pandas DataFrame.
|
@@ -478,12 +313,32 @@ class DatasetExportMixin:
|
|
478
313
|
|
479
314
|
import pandas as pd
|
480
315
|
|
481
|
-
csv_string = self.to_csv(remove_prefix=remove_prefix)
|
316
|
+
csv_string = self.to_csv(remove_prefix=remove_prefix).text
|
482
317
|
csv_buffer = io.StringIO(csv_string)
|
483
318
|
df = pd.read_csv(csv_buffer)
|
484
319
|
# df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
|
485
320
|
return df
|
486
321
|
|
322
|
+
def to_polars(
|
323
|
+
self, remove_prefix: bool = False, lists_as_strings=False
|
324
|
+
) -> "pl.DataFrame":
|
325
|
+
"""Convert the results to a Polars DataFrame.
|
326
|
+
|
327
|
+
:param remove_prefix: Whether to remove the prefix from the column names.
|
328
|
+
"""
|
329
|
+
return self._to_polars_strings(remove_prefix)
|
330
|
+
|
331
|
+
def _to_polars_strings(self, remove_prefix: bool = False) -> "pl.DataFrame":
|
332
|
+
"""Convert the results to a Polars DataFrame.
|
333
|
+
|
334
|
+
:param remove_prefix: Whether to remove the prefix from the column names.
|
335
|
+
"""
|
336
|
+
import polars as pl
|
337
|
+
|
338
|
+
csv_string = self.to_csv(remove_prefix=remove_prefix).text
|
339
|
+
df = pl.read_csv(io.StringIO(csv_string))
|
340
|
+
return df
|
341
|
+
|
487
342
|
def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
|
488
343
|
"""Convert the results to a list of dictionaries, one per scenario.
|
489
344
|
|
@@ -494,14 +349,14 @@ class DatasetExportMixin:
|
|
494
349
|
>>> r.select('how_feeling').to_scenario_list()
|
495
350
|
ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
|
496
351
|
"""
|
497
|
-
from edsl import ScenarioList
|
352
|
+
from edsl.scenarios.ScenarioList import ScenarioList
|
353
|
+
from edsl.scenarios.Scenario import Scenario
|
498
354
|
|
499
355
|
list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
|
500
356
|
scenarios = []
|
501
357
|
for d in list_of_dicts:
|
502
358
|
scenarios.append(Scenario(d))
|
503
359
|
return ScenarioList(scenarios)
|
504
|
-
# return ScenarioList([Scenario(d) for d in list_of_dicts])
|
505
360
|
|
506
361
|
def to_agent_list(self, remove_prefix: bool = True):
|
507
362
|
"""Convert the results to a list of dictionaries, one per agent.
|
@@ -513,7 +368,8 @@ class DatasetExportMixin:
|
|
513
368
|
>>> r.select('how_feeling').to_agent_list()
|
514
369
|
AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
|
515
370
|
"""
|
516
|
-
from edsl import
|
371
|
+
from edsl.agents import Agent
|
372
|
+
from edsl.agents.AgentList import AgentList
|
517
373
|
|
518
374
|
list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
|
519
375
|
agents = []
|
@@ -521,6 +377,11 @@ class DatasetExportMixin:
|
|
521
377
|
if "name" in d:
|
522
378
|
d["agent_name"] = d.pop("name")
|
523
379
|
agents.append(Agent(d, name=d["agent_name"]))
|
380
|
+
if "agent_parameters" in d:
|
381
|
+
agent_parameters = d.pop("agent_parameters")
|
382
|
+
agent_name = agent_parameters.get("name", None)
|
383
|
+
instruction = agent_parameters.get("instruction", None)
|
384
|
+
agents.append(Agent(d, name=agent_name, instruction=instruction))
|
524
385
|
else:
|
525
386
|
agents.append(Agent(d))
|
526
387
|
return AgentList(agents)
|
@@ -608,7 +469,9 @@ class DatasetExportMixin:
|
|
608
469
|
new_list.append(item)
|
609
470
|
list_to_return = new_list
|
610
471
|
|
611
|
-
|
472
|
+
from edsl.utilities.PrettyList import PrettyList
|
473
|
+
|
474
|
+
return PrettyList(list_to_return)
|
612
475
|
|
613
476
|
def html(
|
614
477
|
self,
|
@@ -658,8 +521,10 @@ class DatasetExportMixin:
|
|
658
521
|
>>> r = Results.example()
|
659
522
|
>>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
|
660
523
|
{'OK': 2, 'Great': 1, 'Terrible': 1}
|
661
|
-
>>>
|
662
|
-
Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
|
524
|
+
>>> from edsl.results.Dataset import Dataset
|
525
|
+
>>> expected = Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
|
526
|
+
>>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset") == expected
|
527
|
+
True
|
663
528
|
"""
|
664
529
|
from collections import Counter
|
665
530
|
|
@@ -691,8 +556,6 @@ class DatasetExportMixin:
|
|
691
556
|
if top_n is not None:
|
692
557
|
sorted_tally = dict(list(sorted_tally.items())[:top_n])
|
693
558
|
|
694
|
-
import warnings
|
695
|
-
import textwrap
|
696
559
|
from edsl.results.Dataset import Dataset
|
697
560
|
|
698
561
|
if output == "dict":
|
edsl/results/DatasetTree.py
CHANGED
@@ -1,8 +1,4 @@
|
|
1
1
|
from typing import Dict, List, Any, Optional, List
|
2
|
-
from docx import Document
|
3
|
-
from docx.shared import Inches, Pt
|
4
|
-
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
5
|
-
from docx.enum.style import WD_STYLE_TYPE
|
6
2
|
|
7
3
|
|
8
4
|
def is_hashable(v):
|
@@ -98,6 +94,11 @@ class Tree:
|
|
98
94
|
if filename is None:
|
99
95
|
filename = "tree_structure.docx"
|
100
96
|
|
97
|
+
from docx import Document
|
98
|
+
from docx.shared import Inches, Pt
|
99
|
+
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
100
|
+
from docx.enum.style import WD_STYLE_TYPE
|
101
|
+
|
101
102
|
doc = Document()
|
102
103
|
|
103
104
|
# Create styles for headings
|
@@ -115,10 +116,29 @@ class Tree:
|
|
115
116
|
body_style.font.size = Pt(11)
|
116
117
|
|
117
118
|
self._add_to_docx(doc, self.root, 0)
|
118
|
-
|
119
|
-
from
|
120
|
-
|
121
|
-
|
119
|
+
import base64
|
120
|
+
from io import BytesIO
|
121
|
+
import base64
|
122
|
+
|
123
|
+
# Save document to bytes buffer
|
124
|
+
doc_buffer = BytesIO()
|
125
|
+
doc.save(doc_buffer)
|
126
|
+
doc_buffer.seek(0)
|
127
|
+
|
128
|
+
base64_string = base64.b64encode(doc_buffer.getvalue()).decode("utf-8")
|
129
|
+
from edsl.scenarios.FileStore import FileStore
|
130
|
+
|
131
|
+
# Create and return FileStore instance
|
132
|
+
return FileStore(
|
133
|
+
path="tree_structure.docx", # Default name
|
134
|
+
mime_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
135
|
+
binary=True,
|
136
|
+
suffix="docx",
|
137
|
+
base64_string=base64_string,
|
138
|
+
)
|
139
|
+
# doc.save(filename)
|
140
|
+
# from edsl.utilities.utilities import file_notice
|
141
|
+
# file_notice(filename)
|
122
142
|
|
123
143
|
def _repr_html_(self):
|
124
144
|
"""Returns an interactive HTML representation of the tree with collapsible sections."""
|