edsl 0.1.29.dev3__py3-none-any.whl → 0.1.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. edsl/Base.py +18 -18
  2. edsl/__init__.py +23 -23
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +79 -41
  5. edsl/agents/AgentList.py +26 -26
  6. edsl/agents/Invigilator.py +19 -2
  7. edsl/agents/InvigilatorBase.py +15 -10
  8. edsl/agents/PromptConstructionMixin.py +342 -100
  9. edsl/agents/descriptors.py +2 -1
  10. edsl/base/Base.py +289 -0
  11. edsl/config.py +2 -1
  12. edsl/conjure/InputData.py +39 -8
  13. edsl/conversation/car_buying.py +1 -1
  14. edsl/coop/coop.py +187 -150
  15. edsl/coop/utils.py +43 -75
  16. edsl/data/Cache.py +41 -18
  17. edsl/data/CacheEntry.py +6 -7
  18. edsl/data/SQLiteDict.py +11 -3
  19. edsl/data_transfer_models.py +4 -0
  20. edsl/jobs/Answers.py +15 -1
  21. edsl/jobs/Jobs.py +108 -49
  22. edsl/jobs/buckets/ModelBuckets.py +14 -2
  23. edsl/jobs/buckets/TokenBucket.py +32 -5
  24. edsl/jobs/interviews/Interview.py +99 -79
  25. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +19 -24
  26. edsl/jobs/runners/JobsRunnerAsyncio.py +16 -16
  27. edsl/jobs/tasks/QuestionTaskCreator.py +10 -6
  28. edsl/jobs/tasks/TaskHistory.py +4 -3
  29. edsl/language_models/LanguageModel.py +17 -17
  30. edsl/language_models/ModelList.py +1 -1
  31. edsl/language_models/repair.py +8 -7
  32. edsl/notebooks/Notebook.py +47 -10
  33. edsl/prompts/Prompt.py +31 -19
  34. edsl/questions/QuestionBase.py +38 -13
  35. edsl/questions/QuestionBudget.py +5 -6
  36. edsl/questions/QuestionCheckBox.py +7 -3
  37. edsl/questions/QuestionExtract.py +5 -3
  38. edsl/questions/QuestionFreeText.py +7 -5
  39. edsl/questions/QuestionFunctional.py +34 -5
  40. edsl/questions/QuestionList.py +3 -4
  41. edsl/questions/QuestionMultipleChoice.py +68 -12
  42. edsl/questions/QuestionNumerical.py +4 -3
  43. edsl/questions/QuestionRank.py +5 -3
  44. edsl/questions/__init__.py +4 -3
  45. edsl/questions/descriptors.py +46 -4
  46. edsl/questions/question_registry.py +20 -31
  47. edsl/questions/settings.py +1 -1
  48. edsl/results/Dataset.py +31 -0
  49. edsl/results/DatasetExportMixin.py +570 -0
  50. edsl/results/Result.py +66 -70
  51. edsl/results/Results.py +160 -68
  52. edsl/results/ResultsDBMixin.py +7 -3
  53. edsl/results/ResultsExportMixin.py +22 -537
  54. edsl/results/ResultsGGMixin.py +3 -3
  55. edsl/results/ResultsToolsMixin.py +5 -5
  56. edsl/scenarios/FileStore.py +299 -0
  57. edsl/scenarios/Scenario.py +16 -24
  58. edsl/scenarios/ScenarioList.py +42 -17
  59. edsl/scenarios/ScenarioListExportMixin.py +32 -0
  60. edsl/scenarios/ScenarioListPdfMixin.py +2 -1
  61. edsl/scenarios/__init__.py +1 -0
  62. edsl/study/Study.py +8 -16
  63. edsl/surveys/MemoryPlan.py +11 -4
  64. edsl/surveys/Survey.py +88 -17
  65. edsl/surveys/SurveyExportMixin.py +4 -2
  66. edsl/surveys/SurveyFlowVisualizationMixin.py +6 -4
  67. edsl/tools/plotting.py +4 -2
  68. edsl/utilities/__init__.py +21 -21
  69. edsl/utilities/interface.py +66 -45
  70. edsl/utilities/utilities.py +11 -13
  71. {edsl-0.1.29.dev3.dist-info → edsl-0.1.30.dist-info}/METADATA +11 -10
  72. {edsl-0.1.29.dev3.dist-info → edsl-0.1.30.dist-info}/RECORD +74 -71
  73. {edsl-0.1.29.dev3.dist-info → edsl-0.1.30.dist-info}/WHEEL +1 -1
  74. edsl-0.1.29.dev3.dist-info/entry_points.txt +0 -3
  75. {edsl-0.1.29.dev3.dist-info → edsl-0.1.30.dist-info}/LICENSE +0 -0
@@ -0,0 +1,570 @@
1
+ """Mixin class for exporting results."""
2
+
3
+ import base64
4
+ import csv
5
+ import io
6
+
7
+ from typing import Literal, Optional, Union
8
+
9
+
10
+ class DatasetExportMixin:
11
+ """Mixin class"""
12
+
13
+ def relevant_columns(
14
+ self, data_type: Optional[str] = None, remove_prefix=False
15
+ ) -> list:
16
+ """Return the set of keys that are present in the dataset.
17
+
18
+ :param data_type: The data type to filter by.
19
+ :param remove_prefix: Whether to remove the prefix from the column names.
20
+
21
+ >>> from edsl.results.Dataset import Dataset
22
+ >>> d = Dataset([{'a.b':[1,2,3,4]}])
23
+ >>> d.relevant_columns()
24
+ ['a.b']
25
+
26
+ >>> d.relevant_columns(remove_prefix=True)
27
+ ['b']
28
+
29
+ >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
30
+ ['answer.how_feeling', 'answer.how_feeling_yesterday']
31
+ """
32
+ columns = [list(x.keys())[0] for x in self]
33
+ if remove_prefix:
34
+ columns = [column.split(".")[-1] for column in columns]
35
+
36
+ if data_type:
37
+ columns = [
38
+ column for column in columns if column.split(".")[0] == data_type
39
+ ]
40
+
41
+ return columns
42
+
43
+ def _make_tabular(self, remove_prefix: bool, pretty_labels: Optional[dict] = None):
44
+ """Turn the results into a tabular format.
45
+
46
+ :param remove_prefix: Whether to remove the prefix from the column names.
47
+
48
+ >>> from edsl.results import Results
49
+ >>> r = Results.example()
50
+ >>> r.select('how_feeling')._make_tabular(remove_prefix = True)
51
+ (['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
52
+
53
+ >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
54
+ (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
55
+ """
56
+ d = {}
57
+ full_header = sorted(list(self.relevant_columns()))
58
+ for entry in self.data:
59
+ key, list_of_values = list(entry.items())[0]
60
+ d[key] = list_of_values
61
+ if remove_prefix:
62
+ header = [h.split(".")[-1] for h in full_header]
63
+ else:
64
+ header = full_header
65
+ num_observations = len(list(self[0].values())[0])
66
+ rows = []
67
+ # rows.append(header)
68
+ for i in range(num_observations):
69
+ row = [d[h][i] for h in full_header]
70
+ rows.append(row)
71
+ if pretty_labels is not None:
72
+ header = [pretty_labels.get(h, h) for h in header]
73
+ return header, rows
74
+
75
+ def print_long(self):
76
+ """Print the results in a long format.
77
+ >>> from edsl.results import Results
78
+ >>> r = Results.example()
79
+ >>> r.select('how_feeling').print_long()
80
+ answer.how_feeling: OK
81
+ answer.how_feeling: Great
82
+ answer.how_feeling: Terrible
83
+ answer.how_feeling: OK
84
+ """
85
+ for entry in self:
86
+ key, list_of_values = list(entry.items())[0]
87
+ for value in list_of_values:
88
+ print(f"{key}: {value}")
89
+
90
+ def print(
91
+ self,
92
+ pretty_labels: Optional[dict] = None,
93
+ filename: Optional[str] = None,
94
+ format: Literal["rich", "html", "markdown", "latex"] = None,
95
+ interactive: bool = False,
96
+ split_at_dot: bool = True,
97
+ max_rows=None,
98
+ tee=False,
99
+ iframe=False,
100
+ iframe_height: int = 200,
101
+ iframe_width: int = 600,
102
+ web=False,
103
+ ) -> None:
104
+ """Print the results in a pretty format.
105
+
106
+ :param pretty_labels: A dictionary of pretty labels for the columns.
107
+ :param filename: The filename to save the results to.
108
+ :param format: The format to print the results in. Options are 'rich', 'html', or 'markdown'.
109
+ :param interactive: Whether to print the results interactively in a Jupyter notebook.
110
+ :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
111
+
112
+ Example: Print in rich format at the terminal
113
+
114
+ >>> from edsl.results import Results
115
+ >>> r = Results.example()
116
+ >>> r.select('how_feeling').print(format = "rich")
117
+ ┏━━━━━━━━━━━━━━┓
118
+ ┃ answer ┃
119
+ ┃ .how_feeling ┃
120
+ ┡━━━━━━━━━━━━━━┩
121
+ │ OK │
122
+ ├──────────────┤
123
+ │ Great │
124
+ ├──────────────┤
125
+ │ Terrible │
126
+ ├──────────────┤
127
+ │ OK │
128
+ └──────────────┘
129
+
130
+ >>> r = Results.example()
131
+ >>> r2 = r.select("how_feeling").print(format = "rich", tee = True, max_rows = 2)
132
+ ┏━━━━━━━━━━━━━━┓
133
+ ┃ answer ┃
134
+ ┃ .how_feeling ┃
135
+ ┡━━━━━━━━━━━━━━┩
136
+ │ OK │
137
+ ├──────────────┤
138
+ │ Great │
139
+ └──────────────┘
140
+ >>> r2
141
+ Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
142
+
143
+ >>> r.select('how_feeling').print(format = "rich", max_rows = 2)
144
+ ┏━━━━━━━━━━━━━━┓
145
+ ┃ answer ┃
146
+ ┃ .how_feeling ┃
147
+ ┡━━━━━━━━━━━━━━┩
148
+ │ OK │
149
+ ├──────────────┤
150
+ │ Great │
151
+ └──────────────┘
152
+
153
+ >>> r.select('how_feeling').print(format = "rich", split_at_dot = False)
154
+ ┏━━━━━━━━━━━━━━━━━━━━┓
155
+ ┃ answer.how_feeling ┃
156
+ ┡━━━━━━━━━━━━━━━━━━━━┩
157
+ │ OK │
158
+ ├────────────────────┤
159
+ │ Great │
160
+ ├────────────────────┤
161
+ │ Terrible │
162
+ ├────────────────────┤
163
+ │ OK │
164
+ └────────────────────┘
165
+
166
+ Example: using the pretty_labels parameter
167
+
168
+ >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
169
+ ┏━━━━━━━━━━━━━━━━━━━━━┓
170
+ ┃ How are you feeling ┃
171
+ ┡━━━━━━━━━━━━━━━━━━━━━┩
172
+ │ OK │
173
+ ├─────────────────────┤
174
+ │ Great │
175
+ ├─────────────────────┤
176
+ │ Terrible │
177
+ ├─────────────────────┤
178
+ │ OK │
179
+ └─────────────────────┘
180
+
181
+ Example: printing in markdown format
182
+
183
+ >>> r.select('how_feeling').print(format='markdown')
184
+ | answer.how_feeling |
185
+ |--|
186
+ | OK |
187
+ | Great |
188
+ | Terrible |
189
+ | OK |
190
+ ...
191
+ """
192
+ from IPython.display import HTML, display
193
+ from edsl.utilities.utilities import is_notebook
194
+
195
+ if format is None:
196
+ if is_notebook():
197
+ format = "html"
198
+ else:
199
+ format = "rich"
200
+
201
+ if pretty_labels is None:
202
+ pretty_labels = {}
203
+ else:
204
+ # if the user passes in pretty_labels, we don't want to split at the dot
205
+ split_at_dot = False
206
+
207
+ if format not in ["rich", "html", "markdown", "latex"]:
208
+ raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
209
+
210
+ new_data = []
211
+ for index, entry in enumerate(self):
212
+ key, list_of_values = list(entry.items())[0]
213
+ new_data.append({pretty_labels.get(key, key): list_of_values})
214
+
215
+ if max_rows is not None:
216
+ for entry in new_data:
217
+ for key in entry:
218
+ actual_rows = len(entry[key])
219
+ entry[key] = entry[key][:max_rows]
220
+
221
+ if format == "rich":
222
+ from edsl.utilities.interface import print_dataset_with_rich
223
+
224
+ print_dataset_with_rich(
225
+ new_data, filename=filename, split_at_dot=split_at_dot
226
+ )
227
+ elif format == "html":
228
+ notebook = is_notebook()
229
+ from edsl.utilities.interface import print_list_of_dicts_as_html_table
230
+
231
+ html_source = print_list_of_dicts_as_html_table(
232
+ new_data, interactive=interactive
233
+ )
234
+ if iframe:
235
+ import html
236
+
237
+ height = iframe_height
238
+ width = iframe_width
239
+ escaped_output = html.escape(html_source)
240
+ # escaped_output = html_source
241
+ iframe = f""""
242
+ <iframe srcdoc="{ escaped_output }" style="width: {width}px; height: {height}px;"></iframe>
243
+ """
244
+ display(HTML(iframe))
245
+ elif notebook:
246
+ display(HTML(html_source))
247
+ else:
248
+ from edsl.utilities.interface import view_html
249
+
250
+ view_html(html_source)
251
+
252
+ elif format == "markdown":
253
+ from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
254
+
255
+ print_list_of_dicts_as_markdown_table(new_data, filename=filename)
256
+ elif format == "latex":
257
+ df = self.to_pandas()
258
+ df.columns = [col.replace("_", " ") for col in df.columns]
259
+ latex_string = df.to_latex()
260
+ if filename is not None:
261
+ with open(filename, "w") as f:
262
+ f.write(latex_string)
263
+ else:
264
+ return latex_string
265
+ # raise NotImplementedError("Latex format not yet implemented.")
266
+ # latex_string = create_latex_table_from_data(new_data, filename=filename)
267
+ # if filename is None:
268
+ # return latex_string
269
+ # Not working quite
270
+
271
+ else:
272
+ raise ValueError("format not recognized.")
273
+
274
+ if tee:
275
+ return self
276
+
277
+ def to_csv(
278
+ self,
279
+ filename: Optional[str] = None,
280
+ remove_prefix: bool = False,
281
+ download_link: bool = False,
282
+ pretty_labels: Optional[dict] = None,
283
+ ):
284
+ """Export the results to a CSV file.
285
+
286
+ :param filename: The filename to save the CSV file to.
287
+ :param remove_prefix: Whether to remove the prefix from the column names.
288
+ :param download_link: Whether to display a download link in a Jupyter notebook.
289
+
290
+ Example:
291
+
292
+ >>> from edsl.results import Results
293
+ >>> r = Results.example()
294
+ >>> r.select('how_feeling').to_csv()
295
+ 'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
296
+
297
+ >>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
298
+ 'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
299
+
300
+ """
301
+ if pretty_labels is None:
302
+ pretty_labels = {}
303
+ header, rows = self._make_tabular(
304
+ remove_prefix=remove_prefix, pretty_labels=pretty_labels
305
+ )
306
+
307
+ if filename is not None:
308
+ with open(filename, "w") as f:
309
+ writer = csv.writer(f)
310
+ writer.writerow(header)
311
+ writer.writerows(rows)
312
+ else:
313
+ output = io.StringIO()
314
+ writer = csv.writer(output)
315
+ writer.writerow(header)
316
+ writer.writerows(rows)
317
+
318
+ if download_link:
319
+ csv_file = output.getvalue()
320
+ b64 = base64.b64encode(csv_file.encode()).decode()
321
+ download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
322
+ display(HTML(download_link))
323
+ else:
324
+ return output.getvalue()
325
+
326
+ def to_pandas(self, remove_prefix: bool = False) -> "pd.DataFrame":
327
+ """Convert the results to a pandas DataFrame.
328
+
329
+ :param remove_prefix: Whether to remove the prefix from the column names.
330
+
331
+ >>> from edsl.results import Results
332
+ >>> r = Results.example()
333
+ >>> r.select('how_feeling').to_pandas()
334
+ answer.how_feeling
335
+ 0 OK
336
+ 1 Great
337
+ 2 Terrible
338
+ 3 OK
339
+ """
340
+ import pandas as pd
341
+
342
+ csv_string = self.to_csv(remove_prefix=remove_prefix)
343
+ csv_buffer = io.StringIO(csv_string)
344
+ df = pd.read_csv(csv_buffer)
345
+ df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
346
+ return df_sorted
347
+
348
+ def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
349
+ """Convert the results to a list of dictionaries, one per scenario.
350
+
351
+ :param remove_prefix: Whether to remove the prefix from the column names.
352
+
353
+ >>> from edsl.results import Results
354
+ >>> r = Results.example()
355
+ >>> r.select('how_feeling').to_scenario_list()
356
+ ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
357
+ """
358
+ from edsl import ScenarioList, Scenario
359
+
360
+ list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
361
+ return ScenarioList([Scenario(d) for d in list_of_dicts])
362
+
363
+ def to_agent_list(self, remove_prefix: bool = True):
364
+ """Convert the results to a list of dictionaries, one per agent.
365
+
366
+ :param remove_prefix: Whether to remove the prefix from the column names.
367
+
368
+ >>> from edsl.results import Results
369
+ >>> r = Results.example()
370
+ >>> r.select('how_feeling').to_agent_list()
371
+ AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
372
+ """
373
+ from edsl import AgentList, Agent
374
+
375
+ list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
376
+ return AgentList([Agent(d) for d in list_of_dicts])
377
+
378
+ def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
379
+ """Convert the results to a list of dictionaries.
380
+
381
+ :param remove_prefix: Whether to remove the prefix from the column names.
382
+
383
+ >>> from edsl.results import Results
384
+ >>> r = Results.example()
385
+ >>> r.select('how_feeling').to_dicts()
386
+ [{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
387
+
388
+ """
389
+ list_of_keys = []
390
+ list_of_values = []
391
+ for entry in self:
392
+ key, values = list(entry.items())[0]
393
+ list_of_keys.append(key)
394
+ list_of_values.append(values)
395
+
396
+ if remove_prefix:
397
+ list_of_keys = [key.split(".")[-1] for key in list_of_keys]
398
+
399
+ list_of_dicts = []
400
+ for entries in zip(*list_of_values):
401
+ list_of_dicts.append(dict(zip(list_of_keys, entries)))
402
+
403
+ return list_of_dicts
404
+
405
+ def to_list(self, flatten=False, remove_none=False) -> list[list]:
406
+ """Convert the results to a list of lists.
407
+
408
+ :param flatten: Whether to flatten the list of lists.
409
+ :param remove_none: Whether to remove None values from the list.
410
+
411
+ >>> from edsl.results import Results
412
+ >>> Results.example().select('how_feeling', 'how_feeling_yesterday')
413
+ Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
414
+
415
+ >>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
416
+ [('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
417
+
418
+ >>> r = Results.example()
419
+ >>> r.select('how_feeling').to_list()
420
+ ['OK', 'Great', 'Terrible', 'OK']
421
+
422
+ >>> from edsl.results.Dataset import Dataset
423
+ >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}]).select('a.b').to_list(flatten = True)
424
+ [1, 9, 2, 3, 4]
425
+
426
+ >>> from edsl.results.Dataset import Dataset
427
+ >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}, {'c': [6, 2, 3, 4]}]).select('a.b', 'c').to_list(flatten = True)
428
+ Traceback (most recent call last):
429
+ ...
430
+ ValueError: Cannot flatten a list of lists when there are multiple columns selected.
431
+
432
+
433
+ """
434
+ if len(self.relevant_columns()) > 1 and flatten:
435
+ raise ValueError(
436
+ "Cannot flatten a list of lists when there are multiple columns selected."
437
+ )
438
+
439
+ if len(self.relevant_columns()) == 1:
440
+ # if only one 'column' is selected (which is typical for this method
441
+ list_to_return = list(self[0].values())[0]
442
+ else:
443
+ keys = self.relevant_columns()
444
+ data = self.to_dicts(remove_prefix=False)
445
+ list_to_return = []
446
+ for d in data:
447
+ list_to_return.append(tuple([d[key] for key in keys]))
448
+
449
+ if remove_none:
450
+ list_to_return = [item for item in list_to_return if item is not None]
451
+
452
+ if flatten:
453
+ new_list = []
454
+ for item in list_to_return:
455
+ if isinstance(item, list):
456
+ new_list.extend(item)
457
+ else:
458
+ new_list.append(item)
459
+ list_to_return = new_list
460
+
461
+ return list_to_return
462
+
463
+ def html(
464
+ self,
465
+ filename: Optional[str] = None,
466
+ cta: str = "Open in browser",
467
+ return_link: bool = False,
468
+ ):
469
+ import os
470
+ import tempfile
471
+ from edsl.utilities.utilities import is_notebook
472
+ from IPython.display import HTML, display
473
+ from edsl.utilities.utilities import is_notebook
474
+
475
+ df = self.to_pandas()
476
+
477
+ if filename is None:
478
+ current_directory = os.getcwd()
479
+ filename = tempfile.NamedTemporaryFile(
480
+ "w", delete=False, suffix=".html", dir=current_directory
481
+ ).name
482
+
483
+ with open(filename, "w") as f:
484
+ f.write(df.to_html())
485
+
486
+ if is_notebook():
487
+ html_url = f"/files/{filename}"
488
+ html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
489
+ display(HTML(html_link))
490
+ else:
491
+ print(f"Saved to {filename}")
492
+ import webbrowser
493
+ import os
494
+
495
+ webbrowser.open(f"file://{os.path.abspath(filename)}")
496
+
497
+ if return_link:
498
+ return filename
499
+
500
+ def tally(
501
+ self, *fields: Optional[str], top_n: Optional[int] = None, output="dict"
502
+ ) -> Union[dict, "Dataset"]:
503
+ """Tally the values of a field or perform a cross-tab of multiple fields.
504
+
505
+ :param fields: The field(s) to tally, multiple fields for cross-tabulation.
506
+
507
+ >>> from edsl.results import Results
508
+ >>> r = Results.example()
509
+ >>> r.select('how_feeling').tally('answer.how_feeling')
510
+ {'OK': 2, 'Great': 1, 'Terrible': 1}
511
+ >>> r.select('how_feeling', 'period').tally('how_feeling', 'period')
512
+ {('OK', 'morning'): 1, ('Great', 'afternoon'): 1, ('Terrible', 'morning'): 1, ('OK', 'afternoon'): 1}
513
+ """
514
+ from collections import Counter
515
+
516
+ if len(fields) == 0:
517
+ fields = self.relevant_columns()
518
+
519
+ relevant_columns_without_prefix = [
520
+ column.split(".")[-1] for column in self.relevant_columns()
521
+ ]
522
+
523
+ if not all(
524
+ f in self.relevant_columns() or f in relevant_columns_without_prefix
525
+ for f in fields
526
+ ):
527
+ raise ValueError("One or more specified fields are not in the dataset.")
528
+
529
+ if len(fields) == 1:
530
+ field = fields[0]
531
+ values = self._key_to_value(field)
532
+ else:
533
+ values = list(zip(*(self._key_to_value(field) for field in fields)))
534
+
535
+ for value in values:
536
+ if isinstance(value, list):
537
+ value = tuple(value)
538
+
539
+ tally = dict(Counter(values))
540
+ sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
541
+ if top_n is not None:
542
+ sorted_tally = dict(list(sorted_tally.items())[:top_n])
543
+
544
+ import warnings
545
+ import textwrap
546
+ from edsl.results.Dataset import Dataset
547
+
548
+ if output == "dict":
549
+ warnings.warn(
550
+ textwrap.dedent(
551
+ """\
552
+ The default output from tally will change to Dataset in the future.
553
+ Use output='Dataset' to get the Dataset object for now.
554
+ """
555
+ )
556
+ )
557
+ return sorted_tally
558
+ elif output == "Dataset":
559
+ return Dataset(
560
+ [
561
+ {"value": list(sorted_tally.keys())},
562
+ {"count": list(sorted_tally.values())},
563
+ ]
564
+ )
565
+
566
+
567
+ if __name__ == "__main__":
568
+ import doctest
569
+
570
+ doctest.testmod(optionflags=doctest.ELLIPSIS)