edsl 0.1.39.dev1__py3-none-any.whl → 0.1.39.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +169 -116
- edsl/__init__.py +14 -6
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +358 -146
- edsl/agents/AgentList.py +211 -73
- edsl/agents/Invigilator.py +88 -36
- edsl/agents/InvigilatorBase.py +59 -70
- edsl/agents/PromptConstructor.py +117 -219
- edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
- edsl/agents/QuestionOptionProcessor.py +172 -0
- edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
- edsl/agents/__init__.py +0 -1
- edsl/agents/prompt_helpers.py +3 -3
- edsl/config.py +22 -2
- edsl/conversation/car_buying.py +2 -1
- edsl/coop/CoopFunctionsMixin.py +15 -0
- edsl/coop/ExpectedParrotKeyHandler.py +125 -0
- edsl/coop/PriceFetcher.py +1 -1
- edsl/coop/coop.py +104 -42
- edsl/coop/utils.py +14 -14
- edsl/data/Cache.py +21 -14
- edsl/data/CacheEntry.py +12 -15
- edsl/data/CacheHandler.py +33 -12
- edsl/data/__init__.py +4 -3
- edsl/data_transfer_models.py +2 -1
- edsl/enums.py +20 -0
- edsl/exceptions/__init__.py +50 -50
- edsl/exceptions/agents.py +12 -0
- edsl/exceptions/inference_services.py +5 -0
- edsl/exceptions/questions.py +24 -6
- edsl/exceptions/scenarios.py +7 -0
- edsl/inference_services/AnthropicService.py +0 -3
- edsl/inference_services/AvailableModelCacheHandler.py +184 -0
- edsl/inference_services/AvailableModelFetcher.py +209 -0
- edsl/inference_services/AwsBedrock.py +0 -2
- edsl/inference_services/AzureAI.py +0 -2
- edsl/inference_services/GoogleService.py +2 -11
- edsl/inference_services/InferenceServiceABC.py +18 -85
- edsl/inference_services/InferenceServicesCollection.py +105 -80
- edsl/inference_services/MistralAIService.py +0 -3
- edsl/inference_services/OpenAIService.py +1 -4
- edsl/inference_services/PerplexityService.py +0 -3
- edsl/inference_services/ServiceAvailability.py +135 -0
- edsl/inference_services/TestService.py +11 -8
- edsl/inference_services/data_structures.py +62 -0
- edsl/jobs/AnswerQuestionFunctionConstructor.py +188 -0
- edsl/jobs/Answers.py +1 -14
- edsl/jobs/FetchInvigilator.py +40 -0
- edsl/jobs/InterviewTaskManager.py +98 -0
- edsl/jobs/InterviewsConstructor.py +48 -0
- edsl/jobs/Jobs.py +102 -243
- edsl/jobs/JobsChecks.py +35 -10
- edsl/jobs/JobsComponentConstructor.py +189 -0
- edsl/jobs/JobsPrompts.py +5 -3
- edsl/jobs/JobsRemoteInferenceHandler.py +128 -80
- edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
- edsl/jobs/RequestTokenEstimator.py +30 -0
- edsl/jobs/buckets/BucketCollection.py +44 -3
- edsl/jobs/buckets/TokenBucket.py +53 -21
- edsl/jobs/buckets/TokenBucketAPI.py +211 -0
- edsl/jobs/buckets/TokenBucketClient.py +191 -0
- edsl/jobs/decorators.py +35 -0
- edsl/jobs/interviews/Interview.py +77 -380
- edsl/jobs/jobs_status_enums.py +9 -0
- edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
- edsl/jobs/runners/JobsRunnerAsyncio.py +4 -49
- edsl/jobs/tasks/QuestionTaskCreator.py +21 -19
- edsl/jobs/tasks/TaskHistory.py +14 -15
- edsl/jobs/tasks/task_status_enum.py +0 -2
- edsl/language_models/ComputeCost.py +63 -0
- edsl/language_models/LanguageModel.py +137 -234
- edsl/language_models/ModelList.py +11 -13
- edsl/language_models/PriceManager.py +127 -0
- edsl/language_models/RawResponseHandler.py +106 -0
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/__init__.py +0 -1
- edsl/language_models/key_management/KeyLookup.py +63 -0
- edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
- edsl/language_models/key_management/KeyLookupCollection.py +38 -0
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +131 -0
- edsl/language_models/registry.py +49 -59
- edsl/language_models/repair.py +2 -2
- edsl/language_models/utilities.py +5 -4
- edsl/notebooks/Notebook.py +19 -14
- edsl/notebooks/NotebookToLaTeX.py +142 -0
- edsl/prompts/Prompt.py +29 -39
- edsl/questions/AnswerValidatorMixin.py +47 -2
- edsl/questions/ExceptionExplainer.py +77 -0
- edsl/questions/HTMLQuestion.py +103 -0
- edsl/questions/LoopProcessor.py +149 -0
- edsl/questions/QuestionBase.py +37 -192
- edsl/questions/QuestionBaseGenMixin.py +52 -48
- edsl/questions/QuestionBasePromptsMixin.py +7 -3
- edsl/questions/QuestionCheckBox.py +1 -1
- edsl/questions/QuestionExtract.py +1 -1
- edsl/questions/QuestionFreeText.py +1 -2
- edsl/questions/QuestionList.py +3 -5
- edsl/questions/QuestionMatrix.py +265 -0
- edsl/questions/QuestionMultipleChoice.py +66 -22
- edsl/questions/QuestionNumerical.py +1 -3
- edsl/questions/QuestionRank.py +6 -16
- edsl/questions/ResponseValidatorABC.py +37 -11
- edsl/questions/ResponseValidatorFactory.py +28 -0
- edsl/questions/SimpleAskMixin.py +4 -3
- edsl/questions/__init__.py +1 -0
- edsl/questions/derived/QuestionLinearScale.py +6 -3
- edsl/questions/derived/QuestionTopK.py +1 -1
- edsl/questions/descriptors.py +17 -3
- edsl/questions/question_registry.py +1 -1
- edsl/questions/templates/matrix/__init__.py +1 -0
- edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
- edsl/questions/templates/matrix/question_presentation.jinja +20 -0
- edsl/results/CSSParameterizer.py +1 -1
- edsl/results/Dataset.py +170 -7
- edsl/results/DatasetExportMixin.py +224 -302
- edsl/results/DatasetTree.py +28 -8
- edsl/results/MarkdownToDocx.py +122 -0
- edsl/results/MarkdownToPDF.py +111 -0
- edsl/results/Result.py +192 -206
- edsl/results/Results.py +120 -113
- edsl/results/ResultsExportMixin.py +2 -0
- edsl/results/Selector.py +23 -13
- edsl/results/TableDisplay.py +98 -171
- edsl/results/TextEditor.py +50 -0
- edsl/results/__init__.py +1 -1
- edsl/results/smart_objects.py +96 -0
- edsl/results/table_data_class.py +12 -0
- edsl/results/table_renderers.py +118 -0
- edsl/scenarios/ConstructDownloadLink.py +109 -0
- edsl/scenarios/DirectoryScanner.py +96 -0
- edsl/scenarios/DocumentChunker.py +102 -0
- edsl/scenarios/DocxScenario.py +16 -0
- edsl/scenarios/FileStore.py +118 -239
- edsl/scenarios/PdfExtractor.py +40 -0
- edsl/scenarios/Scenario.py +90 -193
- edsl/scenarios/ScenarioHtmlMixin.py +4 -3
- edsl/scenarios/ScenarioJoin.py +10 -6
- edsl/scenarios/ScenarioList.py +383 -240
- edsl/scenarios/ScenarioListExportMixin.py +0 -7
- edsl/scenarios/ScenarioListPdfMixin.py +15 -37
- edsl/scenarios/ScenarioSelector.py +156 -0
- edsl/scenarios/__init__.py +1 -2
- edsl/scenarios/file_methods.py +85 -0
- edsl/scenarios/handlers/__init__.py +13 -0
- edsl/scenarios/handlers/csv.py +38 -0
- edsl/scenarios/handlers/docx.py +76 -0
- edsl/scenarios/handlers/html.py +37 -0
- edsl/scenarios/handlers/json.py +111 -0
- edsl/scenarios/handlers/latex.py +5 -0
- edsl/scenarios/handlers/md.py +51 -0
- edsl/scenarios/handlers/pdf.py +68 -0
- edsl/scenarios/handlers/png.py +39 -0
- edsl/scenarios/handlers/pptx.py +105 -0
- edsl/scenarios/handlers/py.py +294 -0
- edsl/scenarios/handlers/sql.py +313 -0
- edsl/scenarios/handlers/sqlite.py +149 -0
- edsl/scenarios/handlers/txt.py +33 -0
- edsl/study/ObjectEntry.py +1 -1
- edsl/study/SnapShot.py +1 -1
- edsl/study/Study.py +5 -12
- edsl/surveys/ConstructDAG.py +92 -0
- edsl/surveys/EditSurvey.py +221 -0
- edsl/surveys/InstructionHandler.py +100 -0
- edsl/surveys/MemoryManagement.py +72 -0
- edsl/surveys/Rule.py +5 -4
- edsl/surveys/RuleCollection.py +25 -27
- edsl/surveys/RuleManager.py +172 -0
- edsl/surveys/Simulator.py +75 -0
- edsl/surveys/Survey.py +199 -771
- edsl/surveys/SurveyCSS.py +20 -8
- edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +11 -9
- edsl/surveys/SurveyToApp.py +141 -0
- edsl/surveys/__init__.py +4 -2
- edsl/surveys/descriptors.py +6 -2
- edsl/surveys/instructions/ChangeInstruction.py +1 -2
- edsl/surveys/instructions/Instruction.py +4 -13
- edsl/surveys/instructions/InstructionCollection.py +11 -6
- edsl/templates/error_reporting/interview_details.html +1 -1
- edsl/templates/error_reporting/report.html +1 -1
- edsl/tools/plotting.py +1 -1
- edsl/utilities/PrettyList.py +56 -0
- edsl/utilities/is_notebook.py +18 -0
- edsl/utilities/is_valid_variable_name.py +11 -0
- edsl/utilities/remove_edsl_version.py +24 -0
- edsl/utilities/utilities.py +35 -23
- {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/METADATA +12 -10
- edsl-0.1.39.dev2.dist-info/RECORD +352 -0
- edsl/language_models/KeyLookup.py +0 -30
- edsl/language_models/unused/ReplicateBase.py +0 -83
- edsl/results/ResultsDBMixin.py +0 -238
- edsl-0.1.39.dev1.dist-info/RECORD +0 -277
- {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/LICENSE +0 -0
- {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/WHEEL +0 -0
@@ -3,10 +3,9 @@
|
|
3
3
|
import base64
|
4
4
|
import csv
|
5
5
|
import io
|
6
|
-
import
|
7
|
-
|
8
|
-
|
9
|
-
from typing import Literal, Optional, Union, List
|
6
|
+
import warnings
|
7
|
+
import textwrap
|
8
|
+
from typing import Optional, Tuple, Union, List
|
10
9
|
|
11
10
|
|
12
11
|
class DatasetExportMixin:
|
@@ -37,7 +36,7 @@ class DatasetExportMixin:
|
|
37
36
|
|
38
37
|
>>> from edsl.results import Results
|
39
38
|
>>> sorted(Results.example().select().relevant_columns(data_type = "model"))
|
40
|
-
['model.frequency_penalty',
|
39
|
+
['model.frequency_penalty', ...]
|
41
40
|
|
42
41
|
>>> Results.example().relevant_columns(data_type = "flimflam")
|
43
42
|
Traceback (most recent call last):
|
@@ -72,7 +71,7 @@ class DatasetExportMixin:
|
|
72
71
|
def num_observations(self):
|
73
72
|
"""Return the number of observations in the dataset.
|
74
73
|
|
75
|
-
>>> from edsl.results import Results
|
74
|
+
>>> from edsl.results.Results import Results
|
76
75
|
>>> Results.example().num_observations()
|
77
76
|
4
|
78
77
|
"""
|
@@ -144,299 +143,227 @@ class DatasetExportMixin:
|
|
144
143
|
for value in list_of_values:
|
145
144
|
print(f"{key}: {value}")
|
146
145
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
# >>> r2 = r.select("how_feeling").print(format = "rich", tee = True, max_rows = 2)
|
199
|
-
# ┏━━━━━━━━━━━━━━┓
|
200
|
-
# ┃ answer ┃
|
201
|
-
# ┃ .how_feeling ┃
|
202
|
-
# ┡━━━━━━━━━━━━━━┩
|
203
|
-
# │ OK │
|
204
|
-
# ├──────────────┤
|
205
|
-
# │ Great │
|
206
|
-
# └──────────────┘
|
207
|
-
# >>> r2
|
208
|
-
# Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
|
209
|
-
|
210
|
-
# >>> r.select('how_feeling').print(format = "rich", max_rows = 2)
|
211
|
-
# ┏━━━━━━━━━━━━━━┓
|
212
|
-
# ┃ answer ┃
|
213
|
-
# ┃ .how_feeling ┃
|
214
|
-
# ┡━━━━━━━━━━━━━━┩
|
215
|
-
# │ OK │
|
216
|
-
# ├──────────────┤
|
217
|
-
# │ Great │
|
218
|
-
# └──────────────┘
|
219
|
-
|
220
|
-
# >>> r.select('how_feeling').print(format = "rich", split_at_dot = False)
|
221
|
-
# ┏━━━━━━━━━━━━━━━━━━━━┓
|
222
|
-
# ┃ answer.how_feeling ┃
|
223
|
-
# ┡━━━━━━━━━━━━━━━━━━━━┩
|
224
|
-
# │ OK │
|
225
|
-
# ├────────────────────┤
|
226
|
-
# │ Great │
|
227
|
-
# ├────────────────────┤
|
228
|
-
# │ Terrible │
|
229
|
-
# ├────────────────────┤
|
230
|
-
# │ OK │
|
231
|
-
# └────────────────────┘
|
232
|
-
|
233
|
-
# Example: using the pretty_labels parameter
|
234
|
-
|
235
|
-
# >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
|
236
|
-
# ┏━━━━━━━━━━━━━━━━━━━━━┓
|
237
|
-
# ┃ How are you feeling ┃
|
238
|
-
# ┡━━━━━━━━━━━━━━━━━━━━━┩
|
239
|
-
# │ OK │
|
240
|
-
# ├─────────────────────┤
|
241
|
-
# │ Great │
|
242
|
-
# ├─────────────────────┤
|
243
|
-
# │ Terrible │
|
244
|
-
# ├─────────────────────┤
|
245
|
-
# │ OK │
|
246
|
-
# └─────────────────────┘
|
247
|
-
|
248
|
-
# Example: printing in markdown format
|
249
|
-
|
250
|
-
# >>> r.select('how_feeling').print(format='markdown')
|
251
|
-
# | answer.how_feeling |
|
252
|
-
# |--|
|
253
|
-
# | OK |
|
254
|
-
# | Great |
|
255
|
-
# | Terrible |
|
256
|
-
# | OK |
|
257
|
-
# ...
|
258
|
-
|
259
|
-
# >>> r.select('how_feeling').print(format='latex')
|
260
|
-
# \\begin{tabular}{l}
|
261
|
-
# ...
|
262
|
-
# \\end{tabular}
|
263
|
-
# <BLANKLINE>
|
264
|
-
# """
|
265
|
-
# from IPython.display import HTML, display
|
266
|
-
# from edsl.utilities.utilities import is_notebook
|
267
|
-
# import io
|
268
|
-
# import sys
|
269
|
-
|
270
|
-
# def _determine_format(format):
|
271
|
-
# if format is None:
|
272
|
-
# if is_notebook():
|
273
|
-
# format = "html"
|
274
|
-
# else:
|
275
|
-
# format = "rich"
|
276
|
-
# if format not in ["rich", "html", "markdown", "latex"]:
|
277
|
-
# raise ValueError(
|
278
|
-
# "format must be one of 'rich', 'html', 'markdown', or 'latex'."
|
279
|
-
# )
|
280
|
-
|
281
|
-
# return format
|
282
|
-
|
283
|
-
# format = _determine_format(format)
|
284
|
-
|
285
|
-
# if pretty_labels is None:
|
286
|
-
# pretty_labels = {}
|
287
|
-
|
288
|
-
# if pretty_labels != {}: # only split at dot if there are no pretty labels
|
289
|
-
# split_at_dot = False
|
290
|
-
|
291
|
-
# def _create_data():
|
292
|
-
# for index, entry in enumerate(self):
|
293
|
-
# key, list_of_values = list(entry.items())[0]
|
294
|
-
# yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
|
295
|
-
|
296
|
-
# new_data = list(_create_data())
|
297
|
-
|
298
|
-
# # Capture output if return_string is True
|
299
|
-
# if return_string:
|
300
|
-
# old_stdout = sys.stdout
|
301
|
-
# sys.stdout = io.StringIO()
|
302
|
-
|
303
|
-
# output = None
|
304
|
-
|
305
|
-
# if format == "rich":
|
306
|
-
# from edsl.utilities.interface import print_dataset_with_rich
|
307
|
-
|
308
|
-
# output = print_dataset_with_rich(
|
309
|
-
# new_data, filename=filename, split_at_dot=split_at_dot
|
310
|
-
# )
|
311
|
-
# elif format == "markdown":
|
312
|
-
# from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
|
313
|
-
|
314
|
-
# output = print_list_of_dicts_as_markdown_table(new_data, filename=filename)
|
315
|
-
# elif format == "latex":
|
316
|
-
# df = self.to_pandas()
|
317
|
-
# df.columns = [col.replace("_", " ") for col in df.columns]
|
318
|
-
# latex_string = df.to_latex(index=False)
|
319
|
-
|
320
|
-
# if filename is not None:
|
321
|
-
# with open(filename, "w") as f:
|
322
|
-
# f.write(latex_string)
|
323
|
-
# else:
|
324
|
-
# print(latex_string)
|
325
|
-
# output = latex_string
|
326
|
-
# elif format == "html":
|
327
|
-
# from edsl.utilities.interface import print_list_of_dicts_as_html_table
|
328
|
-
|
329
|
-
# html_source = print_list_of_dicts_as_html_table(
|
330
|
-
# new_data, interactive=interactive
|
331
|
-
# )
|
332
|
-
|
333
|
-
# if iframe:
|
334
|
-
# iframe = f""""
|
335
|
-
# <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
|
336
|
-
# """
|
337
|
-
# display(HTML(iframe))
|
338
|
-
# elif is_notebook():
|
339
|
-
# display(HTML(html_source))
|
340
|
-
# else:
|
341
|
-
# from edsl.utilities.interface import view_html
|
342
|
-
|
343
|
-
# view_html(html_source)
|
344
|
-
|
345
|
-
# output = html_source
|
346
|
-
|
347
|
-
# # Restore stdout and get captured output if return_string is True
|
348
|
-
# if return_string:
|
349
|
-
# captured_output = sys.stdout.getvalue()
|
350
|
-
# sys.stdout = old_stdout
|
351
|
-
# return captured_output or output
|
352
|
-
|
353
|
-
# if tee:
|
354
|
-
# return self
|
355
|
-
|
356
|
-
# return None
|
146
|
+
def _get_tabular_data(
|
147
|
+
self,
|
148
|
+
remove_prefix: bool = False,
|
149
|
+
pretty_labels: Optional[dict] = None,
|
150
|
+
) -> Tuple[List[str], List[List]]:
|
151
|
+
"""Internal method to get tabular data in a standard format.
|
152
|
+
|
153
|
+
Args:
|
154
|
+
remove_prefix: Whether to remove the prefix from column names
|
155
|
+
pretty_labels: Dictionary mapping original column names to pretty labels
|
156
|
+
|
157
|
+
Returns:
|
158
|
+
Tuple containing (header_row, data_rows)
|
159
|
+
"""
|
160
|
+
if pretty_labels is None:
|
161
|
+
pretty_labels = {}
|
162
|
+
|
163
|
+
return self._make_tabular(
|
164
|
+
remove_prefix=remove_prefix, pretty_labels=pretty_labels
|
165
|
+
)
|
166
|
+
|
167
|
+
def to_jsonl(self, filename: Optional[str] = None) -> "FileStore":
|
168
|
+
"""Export the results to a FileStore instance containing JSONL data.
|
169
|
+
|
170
|
+
Args:
|
171
|
+
filename: Optional filename for the JSONL file (defaults to "results.jsonl")
|
172
|
+
|
173
|
+
Returns:
|
174
|
+
FileStore: Instance containing the JSONL data
|
175
|
+
"""
|
176
|
+
if filename is None:
|
177
|
+
filename = "results.jsonl"
|
178
|
+
|
179
|
+
# Write to string buffer
|
180
|
+
output = io.StringIO()
|
181
|
+
for entry in self:
|
182
|
+
key, values = list(entry.items())[0]
|
183
|
+
output.write(f'{{"{key}": {values}}}\n')
|
184
|
+
|
185
|
+
# Get the CSV string and encode to base64
|
186
|
+
jsonl_string = output.getvalue()
|
187
|
+
base64_string = base64.b64encode(jsonl_string.encode()).decode()
|
188
|
+
from edsl.scenarios.FileStore import FileStore
|
189
|
+
|
190
|
+
return FileStore(
|
191
|
+
path=filename,
|
192
|
+
mime_type="application/jsonl",
|
193
|
+
binary=False,
|
194
|
+
suffix="jsonl",
|
195
|
+
base64_string=base64_string,
|
196
|
+
)
|
357
197
|
|
358
198
|
def to_csv(
|
359
199
|
self,
|
360
200
|
filename: Optional[str] = None,
|
361
201
|
remove_prefix: bool = False,
|
362
|
-
download_link: bool = False,
|
363
202
|
pretty_labels: Optional[dict] = None,
|
364
|
-
):
|
365
|
-
"""Export the results to a CSV
|
203
|
+
) -> "FileStore":
|
204
|
+
"""Export the results to a FileStore instance containing CSV data.
|
366
205
|
|
367
|
-
:
|
368
|
-
|
369
|
-
|
206
|
+
Args:
|
207
|
+
filename: Optional filename for the CSV (defaults to "results.csv")
|
208
|
+
remove_prefix: Whether to remove the prefix from column names
|
209
|
+
pretty_labels: Dictionary mapping original column names to pretty labels
|
210
|
+
|
211
|
+
Returns:
|
212
|
+
FileStore: Instance containing the CSV data
|
213
|
+
"""
|
214
|
+
if filename is None:
|
215
|
+
filename = "results.csv"
|
370
216
|
|
371
|
-
|
217
|
+
# Get the tabular data
|
218
|
+
header, rows = self._get_tabular_data(
|
219
|
+
remove_prefix=remove_prefix, pretty_labels=pretty_labels
|
220
|
+
)
|
372
221
|
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
['OK']
|
392
|
-
['Great']
|
393
|
-
['Terrible']
|
394
|
-
['OK']
|
222
|
+
# Write to string buffer
|
223
|
+
output = io.StringIO()
|
224
|
+
writer = csv.writer(output)
|
225
|
+
writer.writerow(header)
|
226
|
+
writer.writerows(rows)
|
227
|
+
|
228
|
+
# Get the CSV string and encode to base64
|
229
|
+
csv_string = output.getvalue()
|
230
|
+
base64_string = base64.b64encode(csv_string.encode()).decode()
|
231
|
+
from edsl.scenarios.FileStore import FileStore
|
232
|
+
|
233
|
+
return FileStore(
|
234
|
+
path=filename,
|
235
|
+
mime_type="text/csv",
|
236
|
+
binary=False,
|
237
|
+
suffix="csv",
|
238
|
+
base64_string=base64_string,
|
239
|
+
)
|
395
240
|
|
241
|
+
def to_excel(
|
242
|
+
self,
|
243
|
+
filename: Optional[str] = None,
|
244
|
+
remove_prefix: bool = False,
|
245
|
+
pretty_labels: Optional[dict] = None,
|
246
|
+
sheet_name: Optional[str] = None,
|
247
|
+
) -> "FileStore":
|
248
|
+
"""Export the results to a FileStore instance containing Excel data.
|
249
|
+
|
250
|
+
Args:
|
251
|
+
filename: Optional filename for the Excel file (defaults to "results.xlsx")
|
252
|
+
remove_prefix: Whether to remove the prefix from column names
|
253
|
+
pretty_labels: Dictionary mapping original column names to pretty labels
|
254
|
+
sheet_name: Name of the worksheet (defaults to "Results")
|
255
|
+
|
256
|
+
Returns:
|
257
|
+
FileStore: Instance containing the Excel data
|
396
258
|
"""
|
397
|
-
|
398
|
-
|
399
|
-
|
259
|
+
from openpyxl import Workbook
|
260
|
+
|
261
|
+
if filename is None:
|
262
|
+
filename = "results.xlsx"
|
263
|
+
if sheet_name is None:
|
264
|
+
sheet_name = "Results"
|
265
|
+
|
266
|
+
# Get the tabular data
|
267
|
+
header, rows = self._get_tabular_data(
|
400
268
|
remove_prefix=remove_prefix, pretty_labels=pretty_labels
|
401
269
|
)
|
402
270
|
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
271
|
+
# Create Excel workbook in memory
|
272
|
+
wb = Workbook()
|
273
|
+
ws = wb.active
|
274
|
+
ws.title = sheet_name
|
275
|
+
|
276
|
+
# Write header
|
277
|
+
for col, value in enumerate(header, 1):
|
278
|
+
ws.cell(row=1, column=col, value=value)
|
279
|
+
|
280
|
+
# Write data rows
|
281
|
+
for row_idx, row_data in enumerate(rows, 2):
|
282
|
+
for col, value in enumerate(row_data, 1):
|
283
|
+
ws.cell(row=row_idx, column=col, value=value)
|
284
|
+
|
285
|
+
# Save to bytes buffer
|
286
|
+
buffer = io.BytesIO()
|
287
|
+
wb.save(buffer)
|
288
|
+
buffer.seek(0)
|
289
|
+
|
290
|
+
# Convert to base64
|
291
|
+
base64_string = base64.b64encode(buffer.getvalue()).decode()
|
292
|
+
from edsl.scenarios.FileStore import FileStore
|
293
|
+
|
294
|
+
return FileStore(
|
295
|
+
path=filename,
|
296
|
+
mime_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
297
|
+
binary=True,
|
298
|
+
suffix="xlsx",
|
299
|
+
base64_string=base64_string,
|
300
|
+
)
|
424
301
|
|
425
|
-
def
|
426
|
-
"""
|
302
|
+
def _db(self, remove_prefix: bool = True):
|
303
|
+
"""Create a SQLite database in memory and return the connection.
|
427
304
|
|
428
|
-
:
|
305
|
+
Args:
|
306
|
+
shape: The shape of the data in the database (wide or long)
|
307
|
+
remove_prefix: Whether to remove the prefix from the column names
|
429
308
|
|
430
|
-
|
431
|
-
|
432
|
-
>>> r.select('how_feeling').download_link()
|
433
|
-
'<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
|
309
|
+
Returns:
|
310
|
+
A database connection
|
434
311
|
"""
|
435
|
-
import
|
312
|
+
from sqlalchemy import create_engine
|
436
313
|
|
437
|
-
|
438
|
-
|
439
|
-
|
314
|
+
engine = create_engine("sqlite:///:memory:")
|
315
|
+
if remove_prefix:
|
316
|
+
df = self.remove_prefix().to_pandas(lists_as_strings=True)
|
317
|
+
else:
|
318
|
+
df = self.to_pandas(lists_as_strings=True)
|
319
|
+
df.to_sql(
|
320
|
+
"self",
|
321
|
+
engine,
|
322
|
+
index=False,
|
323
|
+
if_exists="replace",
|
324
|
+
)
|
325
|
+
return engine.connect()
|
326
|
+
|
327
|
+
def sql(
|
328
|
+
self,
|
329
|
+
query: str,
|
330
|
+
transpose: bool = None,
|
331
|
+
transpose_by: str = None,
|
332
|
+
remove_prefix: bool = True,
|
333
|
+
) -> Union["pd.DataFrame", str]:
|
334
|
+
"""Execute a SQL query and return the results as a DataFrame.
|
335
|
+
|
336
|
+
Args:
|
337
|
+
query: The SQL query to execute
|
338
|
+
shape: The shape of the data in the database (wide or long)
|
339
|
+
remove_prefix: Whether to remove the prefix from the column names
|
340
|
+
transpose: Whether to transpose the DataFrame
|
341
|
+
transpose_by: The column to use as the index when transposing
|
342
|
+
csv: Whether to return the DataFrame as a CSV string
|
343
|
+
to_list: Whether to return the results as a list
|
344
|
+
to_latex: Whether to return the results as LaTeX
|
345
|
+
filename: Optional filename to save the results to
|
346
|
+
|
347
|
+
Returns:
|
348
|
+
DataFrame, CSV string, list, or LaTeX string depending on parameters
|
349
|
+
|
350
|
+
"""
|
351
|
+
import pandas as pd
|
352
|
+
|
353
|
+
conn = self._db(remove_prefix=remove_prefix)
|
354
|
+
df = pd.read_sql_query(query, conn)
|
355
|
+
|
356
|
+
# Transpose the DataFrame if transpose is True
|
357
|
+
if transpose or transpose_by:
|
358
|
+
df = pd.DataFrame(df)
|
359
|
+
if transpose_by:
|
360
|
+
df = df.set_index(transpose_by)
|
361
|
+
else:
|
362
|
+
df = df.set_index(df.columns[0])
|
363
|
+
df = df.transpose()
|
364
|
+
from edsl.results.Dataset import Dataset
|
365
|
+
|
366
|
+
return Dataset.from_pandas_dataframe(df)
|
440
367
|
|
441
368
|
def to_pandas(
|
442
369
|
self, remove_prefix: bool = False, lists_as_strings=False
|
@@ -447,19 +374,6 @@ class DatasetExportMixin:
|
|
447
374
|
|
448
375
|
"""
|
449
376
|
return self._to_pandas_strings(remove_prefix)
|
450
|
-
# if lists_as_strings:
|
451
|
-
# return self._to_pandas_strings(remove_prefix=remove_prefix)
|
452
|
-
|
453
|
-
# import pandas as pd
|
454
|
-
|
455
|
-
# df = pd.DataFrame(self.data)
|
456
|
-
|
457
|
-
# if remove_prefix:
|
458
|
-
# # Optionally remove prefixes from column names
|
459
|
-
# df.columns = [col.split(".")[-1] for col in df.columns]
|
460
|
-
|
461
|
-
# df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
|
462
|
-
# return df_sorted
|
463
377
|
|
464
378
|
def _to_pandas_strings(self, remove_prefix: bool = False) -> "pd.DataFrame":
|
465
379
|
"""Convert the results to a pandas DataFrame.
|
@@ -478,7 +392,7 @@ class DatasetExportMixin:
|
|
478
392
|
|
479
393
|
import pandas as pd
|
480
394
|
|
481
|
-
csv_string = self.to_csv(remove_prefix=remove_prefix)
|
395
|
+
csv_string = self.to_csv(remove_prefix=remove_prefix).text
|
482
396
|
csv_buffer = io.StringIO(csv_string)
|
483
397
|
df = pd.read_csv(csv_buffer)
|
484
398
|
# df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
|
@@ -494,14 +408,14 @@ class DatasetExportMixin:
|
|
494
408
|
>>> r.select('how_feeling').to_scenario_list()
|
495
409
|
ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
|
496
410
|
"""
|
497
|
-
from edsl import ScenarioList
|
411
|
+
from edsl.scenarios.ScenarioList import ScenarioList
|
412
|
+
from edsl.scenarios.Scenario import Scenario
|
498
413
|
|
499
414
|
list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
|
500
415
|
scenarios = []
|
501
416
|
for d in list_of_dicts:
|
502
417
|
scenarios.append(Scenario(d))
|
503
418
|
return ScenarioList(scenarios)
|
504
|
-
# return ScenarioList([Scenario(d) for d in list_of_dicts])
|
505
419
|
|
506
420
|
def to_agent_list(self, remove_prefix: bool = True):
|
507
421
|
"""Convert the results to a list of dictionaries, one per agent.
|
@@ -513,7 +427,8 @@ class DatasetExportMixin:
|
|
513
427
|
>>> r.select('how_feeling').to_agent_list()
|
514
428
|
AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
|
515
429
|
"""
|
516
|
-
from edsl import
|
430
|
+
from edsl.agents import Agent
|
431
|
+
from edsl.agents.AgentList import AgentList
|
517
432
|
|
518
433
|
list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
|
519
434
|
agents = []
|
@@ -521,6 +436,11 @@ class DatasetExportMixin:
|
|
521
436
|
if "name" in d:
|
522
437
|
d["agent_name"] = d.pop("name")
|
523
438
|
agents.append(Agent(d, name=d["agent_name"]))
|
439
|
+
if "agent_parameters" in d:
|
440
|
+
agent_parameters = d.pop("agent_parameters")
|
441
|
+
agent_name = agent_parameters.get("name", None)
|
442
|
+
instruction = agent_parameters.get("instruction", None)
|
443
|
+
agents.append(Agent(d, name=agent_name, instruction=instruction))
|
524
444
|
else:
|
525
445
|
agents.append(Agent(d))
|
526
446
|
return AgentList(agents)
|
@@ -608,7 +528,9 @@ class DatasetExportMixin:
|
|
608
528
|
new_list.append(item)
|
609
529
|
list_to_return = new_list
|
610
530
|
|
611
|
-
|
531
|
+
from edsl.utilities.PrettyList import PrettyList
|
532
|
+
|
533
|
+
return PrettyList(list_to_return)
|
612
534
|
|
613
535
|
def html(
|
614
536
|
self,
|
@@ -658,8 +580,10 @@ class DatasetExportMixin:
|
|
658
580
|
>>> r = Results.example()
|
659
581
|
>>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
|
660
582
|
{'OK': 2, 'Great': 1, 'Terrible': 1}
|
661
|
-
>>>
|
662
|
-
Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
|
583
|
+
>>> from edsl.results.Dataset import Dataset
|
584
|
+
>>> expected = Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
|
585
|
+
>>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset") == expected
|
586
|
+
True
|
663
587
|
"""
|
664
588
|
from collections import Counter
|
665
589
|
|
@@ -691,8 +615,6 @@ class DatasetExportMixin:
|
|
691
615
|
if top_n is not None:
|
692
616
|
sorted_tally = dict(list(sorted_tally.items())[:top_n])
|
693
617
|
|
694
|
-
import warnings
|
695
|
-
import textwrap
|
696
618
|
from edsl.results.Dataset import Dataset
|
697
619
|
|
698
620
|
if output == "dict":
|