edsl 0.1.29__py3-none-any.whl → 0.1.29.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +18 -18
- edsl/__init__.py +23 -23
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +41 -77
- edsl/agents/AgentList.py +9 -19
- edsl/agents/Invigilator.py +1 -19
- edsl/agents/InvigilatorBase.py +10 -15
- edsl/agents/PromptConstructionMixin.py +100 -342
- edsl/agents/descriptors.py +1 -2
- edsl/config.py +1 -2
- edsl/conjure/InputData.py +8 -39
- edsl/coop/coop.py +150 -187
- edsl/coop/utils.py +75 -43
- edsl/data/Cache.py +5 -19
- edsl/data/SQLiteDict.py +3 -11
- edsl/jobs/Answers.py +1 -15
- edsl/jobs/Jobs.py +46 -90
- edsl/jobs/buckets/ModelBuckets.py +2 -4
- edsl/jobs/buckets/TokenBucket.py +2 -1
- edsl/jobs/interviews/Interview.py +9 -3
- edsl/jobs/interviews/InterviewStatusMixin.py +3 -3
- edsl/jobs/interviews/InterviewTaskBuildingMixin.py +10 -15
- edsl/jobs/runners/JobsRunnerAsyncio.py +25 -21
- edsl/jobs/tasks/TaskHistory.py +3 -4
- edsl/language_models/LanguageModel.py +11 -5
- edsl/language_models/ModelList.py +1 -1
- edsl/language_models/repair.py +7 -8
- edsl/notebooks/Notebook.py +3 -40
- edsl/prompts/Prompt.py +19 -31
- edsl/questions/QuestionBase.py +13 -38
- edsl/questions/QuestionBudget.py +6 -5
- edsl/questions/QuestionCheckBox.py +3 -7
- edsl/questions/QuestionExtract.py +3 -5
- edsl/questions/QuestionFreeText.py +3 -3
- edsl/questions/QuestionFunctional.py +3 -0
- edsl/questions/QuestionList.py +4 -3
- edsl/questions/QuestionMultipleChoice.py +8 -16
- edsl/questions/QuestionNumerical.py +3 -4
- edsl/questions/QuestionRank.py +3 -5
- edsl/questions/__init__.py +3 -4
- edsl/questions/descriptors.py +2 -4
- edsl/questions/question_registry.py +31 -20
- edsl/questions/settings.py +1 -1
- edsl/results/Dataset.py +0 -31
- edsl/results/Result.py +74 -22
- edsl/results/Results.py +47 -97
- edsl/results/ResultsDBMixin.py +3 -7
- edsl/results/ResultsExportMixin.py +537 -22
- edsl/results/ResultsGGMixin.py +3 -3
- edsl/results/ResultsToolsMixin.py +5 -5
- edsl/scenarios/Scenario.py +6 -5
- edsl/scenarios/ScenarioList.py +11 -34
- edsl/scenarios/ScenarioListPdfMixin.py +1 -2
- edsl/scenarios/__init__.py +0 -1
- edsl/study/Study.py +9 -3
- edsl/surveys/MemoryPlan.py +4 -11
- edsl/surveys/Survey.py +7 -46
- edsl/surveys/SurveyExportMixin.py +2 -4
- edsl/surveys/SurveyFlowVisualizationMixin.py +4 -6
- edsl/tools/plotting.py +2 -4
- edsl/utilities/__init__.py +21 -21
- edsl/utilities/interface.py +45 -66
- edsl/utilities/utilities.py +13 -11
- {edsl-0.1.29.dist-info → edsl-0.1.29.dev2.dist-info}/METADATA +10 -11
- {edsl-0.1.29.dist-info → edsl-0.1.29.dev2.dist-info}/RECORD +68 -71
- edsl-0.1.29.dev2.dist-info/entry_points.txt +3 -0
- edsl/base/Base.py +0 -289
- edsl/results/DatasetExportMixin.py +0 -493
- edsl/scenarios/FileStore.py +0 -140
- edsl/scenarios/ScenarioListExportMixin.py +0 -32
- {edsl-0.1.29.dist-info → edsl-0.1.29.dev2.dist-info}/LICENSE +0 -0
- {edsl-0.1.29.dist-info → edsl-0.1.29.dev2.dist-info}/WHEEL +0 -0
@@ -1,493 +0,0 @@
|
|
1
|
-
"""Mixin class for exporting results."""
|
2
|
-
|
3
|
-
import base64
|
4
|
-
import csv
|
5
|
-
import io
|
6
|
-
|
7
|
-
from typing import Literal, Optional, Union
|
8
|
-
|
9
|
-
|
10
|
-
class DatasetExportMixin:
|
11
|
-
"""Mixin class"""
|
12
|
-
|
13
|
-
def relevant_columns(
|
14
|
-
self, data_type: Optional[str] = None, remove_prefix=False
|
15
|
-
) -> list:
|
16
|
-
"""Return the set of keys that are present in the dataset.
|
17
|
-
|
18
|
-
>>> from edsl.results.Dataset import Dataset
|
19
|
-
>>> d = Dataset([{'a.b':[1,2,3,4]}])
|
20
|
-
>>> d.relevant_columns()
|
21
|
-
['a.b']
|
22
|
-
|
23
|
-
>>> d.relevant_columns(remove_prefix=True)
|
24
|
-
['b']
|
25
|
-
|
26
|
-
>>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
|
27
|
-
['answer.how_feeling', 'answer.how_feeling_yesterday']
|
28
|
-
"""
|
29
|
-
columns = [list(x.keys())[0] for x in self]
|
30
|
-
# columns = set([list(result.keys())[0] for result in self.data])
|
31
|
-
if remove_prefix:
|
32
|
-
columns = [column.split(".")[-1] for column in columns]
|
33
|
-
|
34
|
-
if data_type:
|
35
|
-
columns = [
|
36
|
-
column for column in columns if column.split(".")[0] == data_type
|
37
|
-
]
|
38
|
-
|
39
|
-
return columns
|
40
|
-
|
41
|
-
def _make_tabular(self, remove_prefix: bool, pretty_labels: Optional[dict] = None):
|
42
|
-
"""Turn the results into a tabular format.
|
43
|
-
|
44
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
45
|
-
|
46
|
-
>>> from edsl.results import Results
|
47
|
-
>>> r = Results.example()
|
48
|
-
>>> r.select('how_feeling')._make_tabular(remove_prefix = True)
|
49
|
-
(['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
|
50
|
-
|
51
|
-
>>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
|
52
|
-
(['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
|
53
|
-
"""
|
54
|
-
d = {}
|
55
|
-
full_header = sorted(list(self.relevant_columns()))
|
56
|
-
for entry in self.data:
|
57
|
-
key, list_of_values = list(entry.items())[0]
|
58
|
-
d[key] = list_of_values
|
59
|
-
if remove_prefix:
|
60
|
-
header = [h.split(".")[-1] for h in full_header]
|
61
|
-
else:
|
62
|
-
header = full_header
|
63
|
-
num_observations = len(list(self[0].values())[0])
|
64
|
-
rows = []
|
65
|
-
# rows.append(header)
|
66
|
-
for i in range(num_observations):
|
67
|
-
row = [d[h][i] for h in full_header]
|
68
|
-
rows.append(row)
|
69
|
-
if pretty_labels is not None:
|
70
|
-
header = [pretty_labels.get(h, h) for h in header]
|
71
|
-
return header, rows
|
72
|
-
|
73
|
-
def print_long(self):
|
74
|
-
"""Print the results in a long format."""
|
75
|
-
for entry in self:
|
76
|
-
key, list_of_values = list(entry.items())[0]
|
77
|
-
for value in list_of_values:
|
78
|
-
print(f"{key}: {value}")
|
79
|
-
|
80
|
-
def print(
|
81
|
-
self,
|
82
|
-
pretty_labels: Optional[dict] = None,
|
83
|
-
filename: Optional[str] = None,
|
84
|
-
format: Literal["rich", "html", "markdown", "latex"] = None,
|
85
|
-
interactive: bool = False,
|
86
|
-
split_at_dot: bool = True,
|
87
|
-
max_rows=None,
|
88
|
-
tee=False,
|
89
|
-
iframe=False,
|
90
|
-
iframe_height: int = 200,
|
91
|
-
iframe_width: int = 600,
|
92
|
-
web=False,
|
93
|
-
) -> None:
|
94
|
-
"""Print the results in a pretty format.
|
95
|
-
|
96
|
-
:param pretty_labels: A dictionary of pretty labels for the columns.
|
97
|
-
:param filename: The filename to save the results to.
|
98
|
-
:param format: The format to print the results in. Options are 'rich', 'html', or 'markdown'.
|
99
|
-
:param interactive: Whether to print the results interactively in a Jupyter notebook.
|
100
|
-
:param split_at_dot: Whether to split the column names at the last dot w/ a newline.
|
101
|
-
|
102
|
-
Example: Print in rich format at the terminal
|
103
|
-
|
104
|
-
>>> from edsl.results import Results
|
105
|
-
>>> r = Results.example()
|
106
|
-
>>> r.select('how_feeling').print(format = "rich")
|
107
|
-
┏━━━━━━━━━━━━━━┓
|
108
|
-
┃ answer ┃
|
109
|
-
┃ .how_feeling ┃
|
110
|
-
┡━━━━━━━━━━━━━━┩
|
111
|
-
│ OK │
|
112
|
-
├──────────────┤
|
113
|
-
│ Great │
|
114
|
-
├──────────────┤
|
115
|
-
│ Terrible │
|
116
|
-
├──────────────┤
|
117
|
-
│ OK │
|
118
|
-
└──────────────┘
|
119
|
-
|
120
|
-
Example: using the pretty_labels parameter
|
121
|
-
|
122
|
-
>>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
|
123
|
-
┏━━━━━━━━━━━━━━━━━━━━━┓
|
124
|
-
┃ How are you feeling ┃
|
125
|
-
┡━━━━━━━━━━━━━━━━━━━━━┩
|
126
|
-
│ OK │
|
127
|
-
├─────────────────────┤
|
128
|
-
│ Great │
|
129
|
-
├─────────────────────┤
|
130
|
-
│ Terrible │
|
131
|
-
├─────────────────────┤
|
132
|
-
│ OK │
|
133
|
-
└─────────────────────┘
|
134
|
-
|
135
|
-
Example: printing in markdown format
|
136
|
-
|
137
|
-
>>> r.select('how_feeling').print(format='markdown')
|
138
|
-
| answer.how_feeling |
|
139
|
-
|--|
|
140
|
-
| OK |
|
141
|
-
| Great |
|
142
|
-
| Terrible |
|
143
|
-
| OK |
|
144
|
-
...
|
145
|
-
"""
|
146
|
-
from IPython.display import HTML, display
|
147
|
-
from edsl.utilities.utilities import is_notebook
|
148
|
-
|
149
|
-
if format is None:
|
150
|
-
|
151
|
-
if is_notebook():
|
152
|
-
format = "html"
|
153
|
-
else:
|
154
|
-
format = "rich"
|
155
|
-
|
156
|
-
if pretty_labels is None:
|
157
|
-
pretty_labels = {}
|
158
|
-
|
159
|
-
if format not in ["rich", "html", "markdown", "latex"]:
|
160
|
-
raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
|
161
|
-
|
162
|
-
new_data = []
|
163
|
-
for index, entry in enumerate(self):
|
164
|
-
key, list_of_values = list(entry.items())[0]
|
165
|
-
new_data.append({pretty_labels.get(key, key): list_of_values})
|
166
|
-
|
167
|
-
if max_rows is not None:
|
168
|
-
for entry in new_data:
|
169
|
-
for key in entry:
|
170
|
-
actual_rows = len(entry[key])
|
171
|
-
entry[key] = entry[key][:max_rows]
|
172
|
-
# print(f"Showing only the first {max_rows} rows of {actual_rows} rows.")
|
173
|
-
|
174
|
-
if format == "rich":
|
175
|
-
from edsl.utilities.interface import print_dataset_with_rich
|
176
|
-
|
177
|
-
print_dataset_with_rich(
|
178
|
-
new_data, filename=filename, split_at_dot=split_at_dot
|
179
|
-
)
|
180
|
-
elif format == "html":
|
181
|
-
notebook = is_notebook()
|
182
|
-
from edsl.utilities.interface import print_list_of_dicts_as_html_table
|
183
|
-
|
184
|
-
html_source = print_list_of_dicts_as_html_table(
|
185
|
-
new_data, interactive=interactive
|
186
|
-
)
|
187
|
-
if iframe:
|
188
|
-
import html
|
189
|
-
|
190
|
-
height = iframe_height
|
191
|
-
width = iframe_width
|
192
|
-
escaped_output = html.escape(html_source)
|
193
|
-
# escaped_output = html_source
|
194
|
-
iframe = f""""
|
195
|
-
<iframe srcdoc="{ escaped_output }" style="width: {width}px; height: {height}px;"></iframe>
|
196
|
-
"""
|
197
|
-
display(HTML(iframe))
|
198
|
-
elif notebook:
|
199
|
-
display(HTML(html_source))
|
200
|
-
else:
|
201
|
-
from edsl.utilities.interface import view_html
|
202
|
-
|
203
|
-
view_html(html_source)
|
204
|
-
|
205
|
-
elif format == "markdown":
|
206
|
-
from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
|
207
|
-
|
208
|
-
print_list_of_dicts_as_markdown_table(new_data, filename=filename)
|
209
|
-
elif format == "latex":
|
210
|
-
df = self.to_pandas()
|
211
|
-
df.columns = [col.replace("_", " ") for col in df.columns]
|
212
|
-
latex_string = df.to_latex()
|
213
|
-
if filename is not None:
|
214
|
-
with open(filename, "w") as f:
|
215
|
-
f.write(latex_string)
|
216
|
-
else:
|
217
|
-
return latex_string
|
218
|
-
# raise NotImplementedError("Latex format not yet implemented.")
|
219
|
-
# latex_string = create_latex_table_from_data(new_data, filename=filename)
|
220
|
-
# if filename is None:
|
221
|
-
# return latex_string
|
222
|
-
# Not working quite
|
223
|
-
|
224
|
-
else:
|
225
|
-
raise ValueError("format not recognized.")
|
226
|
-
|
227
|
-
if tee:
|
228
|
-
return self
|
229
|
-
|
230
|
-
def to_csv(
|
231
|
-
self,
|
232
|
-
filename: Optional[str] = None,
|
233
|
-
remove_prefix: bool = False,
|
234
|
-
download_link: bool = False,
|
235
|
-
pretty_labels: Optional[dict] = None,
|
236
|
-
):
|
237
|
-
"""Export the results to a CSV file.
|
238
|
-
|
239
|
-
:param filename: The filename to save the CSV file to.
|
240
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
241
|
-
:param download_link: Whether to display a download link in a Jupyter notebook.
|
242
|
-
|
243
|
-
Example:
|
244
|
-
|
245
|
-
>>> from edsl.results import Results
|
246
|
-
>>> r = Results.example()
|
247
|
-
>>> r.select('how_feeling').to_csv()
|
248
|
-
'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
|
249
|
-
"""
|
250
|
-
if pretty_labels is None:
|
251
|
-
pretty_labels = {}
|
252
|
-
header, rows = self._make_tabular(
|
253
|
-
remove_prefix=remove_prefix, pretty_labels=pretty_labels
|
254
|
-
)
|
255
|
-
|
256
|
-
if filename is not None:
|
257
|
-
with open(filename, "w") as f:
|
258
|
-
writer = csv.writer(f)
|
259
|
-
writer.writerow(header)
|
260
|
-
writer.writerows(rows)
|
261
|
-
else:
|
262
|
-
output = io.StringIO()
|
263
|
-
writer = csv.writer(output)
|
264
|
-
writer.writerow(header)
|
265
|
-
writer.writerows(rows)
|
266
|
-
|
267
|
-
if download_link:
|
268
|
-
csv_file = output.getvalue()
|
269
|
-
b64 = base64.b64encode(csv_file.encode()).decode()
|
270
|
-
download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
|
271
|
-
display(HTML(download_link))
|
272
|
-
else:
|
273
|
-
return output.getvalue()
|
274
|
-
|
275
|
-
def to_pandas(self, remove_prefix: bool = False) -> "pd.DataFrame":
|
276
|
-
"""Convert the results to a pandas DataFrame.
|
277
|
-
|
278
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
279
|
-
|
280
|
-
>>> from edsl.results import Results
|
281
|
-
>>> r = Results.example()
|
282
|
-
>>> r.select('how_feeling').to_pandas()
|
283
|
-
answer.how_feeling
|
284
|
-
0 OK
|
285
|
-
1 Great
|
286
|
-
2 Terrible
|
287
|
-
3 OK
|
288
|
-
"""
|
289
|
-
import pandas as pd
|
290
|
-
|
291
|
-
csv_string = self.to_csv(remove_prefix=remove_prefix)
|
292
|
-
csv_buffer = io.StringIO(csv_string)
|
293
|
-
df = pd.read_csv(csv_buffer)
|
294
|
-
df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
|
295
|
-
return df_sorted
|
296
|
-
|
297
|
-
def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
|
298
|
-
"""Convert the results to a list of dictionaries, one per scenario.
|
299
|
-
|
300
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
301
|
-
|
302
|
-
>>> from edsl.results import Results
|
303
|
-
>>> r = Results.example()
|
304
|
-
>>> r.select('how_feeling').to_scenario_list()
|
305
|
-
ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
|
306
|
-
"""
|
307
|
-
from edsl import ScenarioList, Scenario
|
308
|
-
|
309
|
-
list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
|
310
|
-
return ScenarioList([Scenario(d) for d in list_of_dicts])
|
311
|
-
|
312
|
-
def to_agent_list(self, remove_prefix: bool = True):
|
313
|
-
from edsl import AgentList, Agent
|
314
|
-
|
315
|
-
list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
|
316
|
-
return AgentList([Agent(d) for d in list_of_dicts])
|
317
|
-
|
318
|
-
def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
|
319
|
-
"""Convert the results to a list of dictionaries.
|
320
|
-
|
321
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
322
|
-
|
323
|
-
>>> from edsl.results import Results
|
324
|
-
>>> r = Results.example()
|
325
|
-
>>> r.select('how_feeling').to_dicts()
|
326
|
-
[{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
|
327
|
-
|
328
|
-
"""
|
329
|
-
list_of_keys = []
|
330
|
-
list_of_values = []
|
331
|
-
for entry in self:
|
332
|
-
key, values = list(entry.items())[0]
|
333
|
-
list_of_keys.append(key)
|
334
|
-
list_of_values.append(values)
|
335
|
-
|
336
|
-
if remove_prefix:
|
337
|
-
list_of_keys = [key.split(".")[-1] for key in list_of_keys]
|
338
|
-
|
339
|
-
list_of_dicts = []
|
340
|
-
for entries in zip(*list_of_values):
|
341
|
-
list_of_dicts.append(dict(zip(list_of_keys, entries)))
|
342
|
-
|
343
|
-
return list_of_dicts
|
344
|
-
|
345
|
-
def to_list(self, flatten=False, remove_none=False) -> list[list]:
|
346
|
-
"""Convert the results to a list of lists.
|
347
|
-
|
348
|
-
>>> from edsl.results import Results
|
349
|
-
>>> Results.example().select('how_feeling', 'how_feeling_yesterday')
|
350
|
-
Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
|
351
|
-
|
352
|
-
>>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
|
353
|
-
[('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
|
354
|
-
|
355
|
-
>>> r = Results.example()
|
356
|
-
>>> r.select('how_feeling').to_list()
|
357
|
-
['OK', 'Great', 'Terrible', 'OK']
|
358
|
-
"""
|
359
|
-
if len(self.relevant_columns()) > 1 and flatten:
|
360
|
-
raise ValueError(
|
361
|
-
"Cannot flatten a list of lists when there are multiple columns selected."
|
362
|
-
)
|
363
|
-
|
364
|
-
if len(self.relevant_columns()) == 1:
|
365
|
-
# if only one 'column' is selected (which is typical for this method
|
366
|
-
list_to_return = list(self[0].values())[0]
|
367
|
-
else:
|
368
|
-
keys = self.relevant_columns()
|
369
|
-
data = self.to_dicts(remove_prefix=False)
|
370
|
-
list_to_return = []
|
371
|
-
for d in data:
|
372
|
-
list_to_return.append(tuple([d[key] for key in keys]))
|
373
|
-
|
374
|
-
if remove_none:
|
375
|
-
list_to_return = [item for item in list_to_return if item is not None]
|
376
|
-
|
377
|
-
if flatten:
|
378
|
-
new_list = []
|
379
|
-
for item in list_to_return:
|
380
|
-
if isinstance(item, list):
|
381
|
-
new_list.extend(item)
|
382
|
-
else:
|
383
|
-
new_list.append(item)
|
384
|
-
list_to_return = new_list
|
385
|
-
|
386
|
-
return list_to_return
|
387
|
-
|
388
|
-
def html(
|
389
|
-
self, filename: str = None, cta: str = "Open in browser", return_link=False
|
390
|
-
):
|
391
|
-
import os
|
392
|
-
import tempfile
|
393
|
-
from edsl.utilities.utilities import is_notebook
|
394
|
-
from IPython.display import HTML, display
|
395
|
-
from edsl.utilities.utilities import is_notebook
|
396
|
-
|
397
|
-
df = self.to_pandas()
|
398
|
-
|
399
|
-
if filename is None:
|
400
|
-
current_directory = os.getcwd()
|
401
|
-
filename = tempfile.NamedTemporaryFile(
|
402
|
-
"w", delete=False, suffix=".html", dir=current_directory
|
403
|
-
).name
|
404
|
-
|
405
|
-
with open(filename, "w") as f:
|
406
|
-
f.write(df.to_html())
|
407
|
-
|
408
|
-
if is_notebook():
|
409
|
-
|
410
|
-
html_url = f"/files/{filename}"
|
411
|
-
html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
|
412
|
-
display(HTML(html_link))
|
413
|
-
else:
|
414
|
-
print(f"Saved to {filename}")
|
415
|
-
import webbrowser
|
416
|
-
import os
|
417
|
-
|
418
|
-
webbrowser.open(f"file://{os.path.abspath(filename)}")
|
419
|
-
|
420
|
-
if return_link:
|
421
|
-
return filename
|
422
|
-
|
423
|
-
def tally(
|
424
|
-
self, *fields: Optional[str], top_n=None, output="dict"
|
425
|
-
) -> Union[dict, "Dataset"]:
|
426
|
-
"""Tally the values of a field or perform a cross-tab of multiple fields.
|
427
|
-
|
428
|
-
:param fields: The field(s) to tally, multiple fields for cross-tabulation.
|
429
|
-
|
430
|
-
>>> from edsl.results import Results
|
431
|
-
>>> r = Results.example()
|
432
|
-
>>> r.select('how_feeling').tally('answer.how_feeling')
|
433
|
-
{'OK': 2, 'Great': 1, 'Terrible': 1}
|
434
|
-
>>> r.select('how_feeling', 'period').tally('how_feeling', 'period')
|
435
|
-
{('OK', 'morning'): 1, ('Great', 'afternoon'): 1, ('Terrible', 'morning'): 1, ('OK', 'afternoon'): 1}
|
436
|
-
"""
|
437
|
-
from collections import Counter
|
438
|
-
|
439
|
-
if len(fields) == 0:
|
440
|
-
fields = self.relevant_columns()
|
441
|
-
|
442
|
-
relevant_columns_without_prefix = [
|
443
|
-
column.split(".")[-1] for column in self.relevant_columns()
|
444
|
-
]
|
445
|
-
|
446
|
-
if not all(
|
447
|
-
f in self.relevant_columns() or f in relevant_columns_without_prefix
|
448
|
-
for f in fields
|
449
|
-
):
|
450
|
-
raise ValueError("One or more specified fields are not in the dataset.")
|
451
|
-
|
452
|
-
if len(fields) == 1:
|
453
|
-
field = fields[0]
|
454
|
-
values = self._key_to_value(field)
|
455
|
-
else:
|
456
|
-
values = list(zip(*(self._key_to_value(field) for field in fields)))
|
457
|
-
|
458
|
-
for value in values:
|
459
|
-
if isinstance(value, list):
|
460
|
-
value = tuple(value)
|
461
|
-
|
462
|
-
tally = dict(Counter(values))
|
463
|
-
sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
|
464
|
-
if top_n is not None:
|
465
|
-
sorted_tally = dict(list(sorted_tally.items())[:top_n])
|
466
|
-
|
467
|
-
import warnings
|
468
|
-
import textwrap
|
469
|
-
from edsl.results.Dataset import Dataset
|
470
|
-
|
471
|
-
if output == "dict":
|
472
|
-
warnings.warn(
|
473
|
-
textwrap.dedent(
|
474
|
-
"""\
|
475
|
-
The default output from tally will change to Dataset in the future.
|
476
|
-
Use output='Dataset' to get the Dataset object for now.
|
477
|
-
"""
|
478
|
-
)
|
479
|
-
)
|
480
|
-
return sorted_tally
|
481
|
-
elif output == "Dataset":
|
482
|
-
return Dataset(
|
483
|
-
[
|
484
|
-
{"value": list(sorted_tally.keys())},
|
485
|
-
{"count": list(sorted_tally.values())},
|
486
|
-
]
|
487
|
-
)
|
488
|
-
|
489
|
-
|
490
|
-
if __name__ == "__main__":
|
491
|
-
import doctest
|
492
|
-
|
493
|
-
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
edsl/scenarios/FileStore.py
DELETED
@@ -1,140 +0,0 @@
|
|
1
|
-
from edsl import Scenario
|
2
|
-
import base64
|
3
|
-
import io
|
4
|
-
import tempfile
|
5
|
-
from typing import Optional
|
6
|
-
|
7
|
-
|
8
|
-
class FileStore(Scenario):
|
9
|
-
def __init__(
|
10
|
-
self,
|
11
|
-
filename: str,
|
12
|
-
binary: Optional[bool] = None,
|
13
|
-
suffix: Optional[str] = None,
|
14
|
-
base64_string: Optional[str] = None,
|
15
|
-
):
|
16
|
-
self.filename = filename
|
17
|
-
self.suffix = suffix or "." + filename.split(".")[-1]
|
18
|
-
self.binary = binary or False
|
19
|
-
self.base64_string = base64_string or self.encode_file_to_base64_string(
|
20
|
-
filename
|
21
|
-
)
|
22
|
-
super().__init__(
|
23
|
-
{
|
24
|
-
"filename": self.filename,
|
25
|
-
"base64_string": self.base64_string,
|
26
|
-
"binary": self.binary,
|
27
|
-
"suffix": self.suffix,
|
28
|
-
}
|
29
|
-
)
|
30
|
-
|
31
|
-
@classmethod
|
32
|
-
def from_dict(cls, d):
|
33
|
-
return cls(d["filename"], d["binary"], d["suffix"], d["base64_string"])
|
34
|
-
|
35
|
-
def encode_file_to_base64_string(self, file_path):
|
36
|
-
try:
|
37
|
-
# Attempt to open the file in text mode
|
38
|
-
with open(file_path, "r") as text_file:
|
39
|
-
# Read the text data
|
40
|
-
text_data = text_file.read()
|
41
|
-
# Encode the text data to a base64 string
|
42
|
-
base64_encoded_data = base64.b64encode(text_data.encode("utf-8"))
|
43
|
-
except UnicodeDecodeError:
|
44
|
-
# If reading as text fails, open the file in binary mode
|
45
|
-
with open(file_path, "rb") as binary_file:
|
46
|
-
# Read the binary data
|
47
|
-
binary_data = binary_file.read()
|
48
|
-
# Encode the binary data to a base64 string
|
49
|
-
base64_encoded_data = base64.b64encode(binary_data)
|
50
|
-
self.binary = True
|
51
|
-
# Convert the base64 bytes to a string
|
52
|
-
base64_string = base64_encoded_data.decode("utf-8")
|
53
|
-
|
54
|
-
return base64_string
|
55
|
-
|
56
|
-
def open(self):
|
57
|
-
if self.binary:
|
58
|
-
return self.base64_to_file(self["base64_string"], is_binary=True)
|
59
|
-
else:
|
60
|
-
return self.base64_to_text_file(self["base64_string"])
|
61
|
-
|
62
|
-
@staticmethod
|
63
|
-
def base64_to_text_file(base64_string):
|
64
|
-
# Decode the base64 string to bytes
|
65
|
-
text_data_bytes = base64.b64decode(base64_string)
|
66
|
-
|
67
|
-
# Convert bytes to string
|
68
|
-
text_data = text_data_bytes.decode("utf-8")
|
69
|
-
|
70
|
-
# Create a StringIO object from the text data
|
71
|
-
text_file = io.StringIO(text_data)
|
72
|
-
|
73
|
-
return text_file
|
74
|
-
|
75
|
-
@staticmethod
|
76
|
-
def base64_to_file(base64_string, is_binary=True):
|
77
|
-
# Decode the base64 string to bytes
|
78
|
-
file_data = base64.b64decode(base64_string)
|
79
|
-
|
80
|
-
if is_binary:
|
81
|
-
# Create a BytesIO object for binary data
|
82
|
-
return io.BytesIO(file_data)
|
83
|
-
else:
|
84
|
-
# Convert bytes to string for text data
|
85
|
-
text_data = file_data.decode("utf-8")
|
86
|
-
# Create a StringIO object for text data
|
87
|
-
return io.StringIO(text_data)
|
88
|
-
|
89
|
-
def to_tempfile(self, suffix=None):
|
90
|
-
if suffix is None:
|
91
|
-
suffix = self.suffix
|
92
|
-
if self.binary:
|
93
|
-
file_like_object = self.base64_to_file(
|
94
|
-
self["base64_string"], is_binary=True
|
95
|
-
)
|
96
|
-
else:
|
97
|
-
file_like_object = self.base64_to_text_file(self["base64_string"])
|
98
|
-
|
99
|
-
# Create a named temporary file
|
100
|
-
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
|
101
|
-
temp_file.write(file_like_object.read())
|
102
|
-
temp_file.close()
|
103
|
-
|
104
|
-
return temp_file.name
|
105
|
-
|
106
|
-
def push(self, description=None):
|
107
|
-
scenario_version = Scenario.from_dict(self.to_dict())
|
108
|
-
if description is None:
|
109
|
-
description = "File: " + self["filename"]
|
110
|
-
info = scenario_version.push(description=description)
|
111
|
-
return info
|
112
|
-
|
113
|
-
@classmethod
|
114
|
-
def pull(cls, uuid):
|
115
|
-
scenario_version = Scenario.pull(uuid)
|
116
|
-
return cls.from_dict(scenario_version.to_dict())
|
117
|
-
|
118
|
-
|
119
|
-
class CSVFileStore(FileStore):
|
120
|
-
def __init__(self, filename):
|
121
|
-
super().__init__(filename, suffix=".csv")
|
122
|
-
|
123
|
-
|
124
|
-
class PDFFileStore(FileStore):
|
125
|
-
def __init__(self, filename):
|
126
|
-
super().__init__(filename, suffix=".pdf")
|
127
|
-
|
128
|
-
|
129
|
-
if __name__ == "__main__":
|
130
|
-
# file_path = "../conjure/examples/Ex11-2.sav"
|
131
|
-
# fs = FileStore(file_path)
|
132
|
-
# info = fs.push()
|
133
|
-
# print(info)
|
134
|
-
|
135
|
-
# from edsl import Conjure
|
136
|
-
|
137
|
-
# c = Conjure(datafile_name=fs.to_tempfile())
|
138
|
-
f = PDFFileStore("paper.pdf")
|
139
|
-
# print(f.to_tempfile())
|
140
|
-
f.push()
|
@@ -1,32 +0,0 @@
|
|
1
|
-
"""Mixin class for exporting results."""
|
2
|
-
|
3
|
-
from functools import wraps
|
4
|
-
from edsl.results.DatasetExportMixin import DatasetExportMixin
|
5
|
-
|
6
|
-
|
7
|
-
def to_dataset(func):
|
8
|
-
"""Convert the Results object to a Dataset object before calling the function."""
|
9
|
-
|
10
|
-
@wraps(func)
|
11
|
-
def wrapper(self, *args, **kwargs):
|
12
|
-
"""Return the function with the Results object converted to a Dataset object."""
|
13
|
-
if self.__class__.__name__ == "ScenarioList":
|
14
|
-
return func(self.to_dataset(), *args, **kwargs)
|
15
|
-
else:
|
16
|
-
raise Exception(
|
17
|
-
f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
|
18
|
-
)
|
19
|
-
|
20
|
-
return wrapper
|
21
|
-
|
22
|
-
|
23
|
-
def decorate_all_methods(cls):
|
24
|
-
for attr_name, attr_value in cls.__dict__.items():
|
25
|
-
if callable(attr_value):
|
26
|
-
setattr(cls, attr_name, to_dataset(attr_value))
|
27
|
-
return cls
|
28
|
-
|
29
|
-
|
30
|
-
@decorate_all_methods
|
31
|
-
class ScenarioListExportMixin(DatasetExportMixin):
|
32
|
-
"""Mixin class for exporting Results objects."""
|
File without changes
|
File without changes
|