edsl 0.1.29__py3-none-any.whl → 0.1.29.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. edsl/Base.py +18 -18
  2. edsl/__init__.py +23 -23
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +41 -77
  5. edsl/agents/AgentList.py +9 -19
  6. edsl/agents/Invigilator.py +1 -19
  7. edsl/agents/InvigilatorBase.py +10 -15
  8. edsl/agents/PromptConstructionMixin.py +100 -342
  9. edsl/agents/descriptors.py +1 -2
  10. edsl/config.py +1 -2
  11. edsl/conjure/InputData.py +8 -39
  12. edsl/coop/coop.py +150 -187
  13. edsl/coop/utils.py +75 -43
  14. edsl/data/Cache.py +5 -19
  15. edsl/data/SQLiteDict.py +3 -11
  16. edsl/jobs/Answers.py +1 -15
  17. edsl/jobs/Jobs.py +46 -90
  18. edsl/jobs/buckets/ModelBuckets.py +2 -4
  19. edsl/jobs/buckets/TokenBucket.py +2 -1
  20. edsl/jobs/interviews/Interview.py +9 -3
  21. edsl/jobs/interviews/InterviewStatusMixin.py +3 -3
  22. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +10 -15
  23. edsl/jobs/runners/JobsRunnerAsyncio.py +25 -21
  24. edsl/jobs/tasks/TaskHistory.py +3 -4
  25. edsl/language_models/LanguageModel.py +11 -5
  26. edsl/language_models/ModelList.py +1 -1
  27. edsl/language_models/repair.py +7 -8
  28. edsl/notebooks/Notebook.py +3 -40
  29. edsl/prompts/Prompt.py +19 -31
  30. edsl/questions/QuestionBase.py +13 -38
  31. edsl/questions/QuestionBudget.py +6 -5
  32. edsl/questions/QuestionCheckBox.py +3 -7
  33. edsl/questions/QuestionExtract.py +3 -5
  34. edsl/questions/QuestionFreeText.py +3 -3
  35. edsl/questions/QuestionFunctional.py +3 -0
  36. edsl/questions/QuestionList.py +4 -3
  37. edsl/questions/QuestionMultipleChoice.py +8 -16
  38. edsl/questions/QuestionNumerical.py +3 -4
  39. edsl/questions/QuestionRank.py +3 -5
  40. edsl/questions/__init__.py +3 -4
  41. edsl/questions/descriptors.py +2 -4
  42. edsl/questions/question_registry.py +31 -20
  43. edsl/questions/settings.py +1 -1
  44. edsl/results/Dataset.py +0 -31
  45. edsl/results/Result.py +74 -22
  46. edsl/results/Results.py +47 -97
  47. edsl/results/ResultsDBMixin.py +3 -7
  48. edsl/results/ResultsExportMixin.py +537 -22
  49. edsl/results/ResultsGGMixin.py +3 -3
  50. edsl/results/ResultsToolsMixin.py +5 -5
  51. edsl/scenarios/Scenario.py +6 -5
  52. edsl/scenarios/ScenarioList.py +11 -34
  53. edsl/scenarios/ScenarioListPdfMixin.py +1 -2
  54. edsl/scenarios/__init__.py +0 -1
  55. edsl/study/Study.py +9 -3
  56. edsl/surveys/MemoryPlan.py +4 -11
  57. edsl/surveys/Survey.py +7 -46
  58. edsl/surveys/SurveyExportMixin.py +2 -4
  59. edsl/surveys/SurveyFlowVisualizationMixin.py +4 -6
  60. edsl/tools/plotting.py +2 -4
  61. edsl/utilities/__init__.py +21 -21
  62. edsl/utilities/interface.py +45 -66
  63. edsl/utilities/utilities.py +13 -11
  64. {edsl-0.1.29.dist-info → edsl-0.1.29.dev2.dist-info}/METADATA +10 -11
  65. {edsl-0.1.29.dist-info → edsl-0.1.29.dev2.dist-info}/RECORD +68 -71
  66. edsl-0.1.29.dev2.dist-info/entry_points.txt +3 -0
  67. edsl/base/Base.py +0 -289
  68. edsl/results/DatasetExportMixin.py +0 -493
  69. edsl/scenarios/FileStore.py +0 -140
  70. edsl/scenarios/ScenarioListExportMixin.py +0 -32
  71. {edsl-0.1.29.dist-info → edsl-0.1.29.dev2.dist-info}/LICENSE +0 -0
  72. {edsl-0.1.29.dist-info → edsl-0.1.29.dev2.dist-info}/WHEEL +0 -0
@@ -1,43 +1,558 @@
1
1
  """Mixin class for exporting results."""
2
2
 
3
+ import base64
4
+ import csv
5
+ import io
6
+ import random
3
7
  from functools import wraps
8
+
4
9
  from typing import Literal, Optional, Union
5
10
 
6
- from edsl.results.DatasetExportMixin import DatasetExportMixin
11
+ from edsl.utilities.utilities import is_notebook
12
+
13
+ from IPython.display import HTML, display
14
+ import pandas as pd
15
+ from edsl.utilities.interface import (
16
+ print_dataset_with_rich,
17
+ print_list_of_dicts_as_html_table,
18
+ print_list_of_dicts_as_markdown_table,
19
+ create_latex_table_from_data,
20
+ )
21
+
22
+
23
+ class ResultsExportMixin:
24
+ """Mixin class for exporting Results objects."""
25
+
26
+ def _convert_decorator(func):
27
+ """Convert the Results object to a Dataset object before calling the function."""
28
+
29
+ @wraps(func)
30
+ def wrapper(self, *args, **kwargs):
31
+ """Return the function with the Results object converted to a Dataset object."""
32
+ if self.__class__.__name__ == "Results":
33
+ return func(self.select(), *args, **kwargs)
34
+ elif self.__class__.__name__ == "Dataset":
35
+ return func(self, *args, **kwargs)
36
+ elif self.__class__.__name__ == "ScenarioList":
37
+ return func(self.to_dataset(), *args, **kwargs)
38
+ else:
39
+ raise Exception(
40
+ f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
41
+ )
42
+
43
+ return wrapper
44
+
45
+ @_convert_decorator
46
+ def relevant_columns(
47
+ self, data_type: Optional[str] = None, remove_prefix=False
48
+ ) -> list:
49
+ """Return the set of keys that are present in the dataset.
50
+
51
+ >>> from edsl.results.Dataset import Dataset
52
+ >>> d = Dataset([{'a.b':[1,2,3,4]}])
53
+ >>> d.relevant_columns()
54
+ ['a.b']
55
+
56
+ >>> d.relevant_columns(remove_prefix=True)
57
+ ['b']
58
+
59
+ >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
60
+ ['answer.how_feeling', 'answer.how_feeling_yesterday']
61
+ """
62
+ columns = [list(x.keys())[0] for x in self]
63
+ # columns = set([list(result.keys())[0] for result in self.data])
64
+ if remove_prefix:
65
+ columns = [column.split(".")[-1] for column in columns]
7
66
 
67
+ if data_type:
68
+ columns = [
69
+ column for column in columns if column.split(".")[0] == data_type
70
+ ]
8
71
 
9
- def to_dataset(func):
10
- """Convert the Results object to a Dataset object before calling the function."""
72
+ return columns
11
73
 
12
- @wraps(func)
13
- def wrapper(self, *args, **kwargs):
14
- """Return the function with the Results object converted to a Dataset object."""
15
- if self.__class__.__name__ == "Results":
16
- return func(self.select(), *args, **kwargs)
74
+ # @_convert_decorator
75
+ def sample(self, n: int) -> "Results":
76
+ """Return a random sample of the results.
77
+
78
+ :param n: The number of samples to return.
79
+
80
+ >>> from edsl.results import Results
81
+ >>> r = Results.example()
82
+ >>> len(r.sample(2))
83
+ 2
84
+ """
85
+ indices = None
86
+
87
+ for entry in self:
88
+ key, values = list(entry.items())[0]
89
+ if indices is None: # gets the indices for the first time
90
+ indices = list(range(len(values)))
91
+ sampled_indices = random.sample(indices, n)
92
+ if n > len(indices):
93
+ raise ValueError(
94
+ f"Cannot sample {n} items from a list of length {len(indices)}."
95
+ )
96
+ entry[key] = [values[i] for i in sampled_indices]
97
+
98
+ return self
99
+
100
+ @_convert_decorator
101
+ def _make_tabular(self, remove_prefix: bool, pretty_labels: Optional[dict] = None):
102
+ """Turn the results into a tabular format.
103
+
104
+ :param remove_prefix: Whether to remove the prefix from the column names.
105
+
106
+ >>> from edsl.results import Results
107
+ >>> r = Results.example()
108
+ >>> r.select('how_feeling')._make_tabular(remove_prefix = True)
109
+ (['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
110
+
111
+ >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
112
+ (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
113
+ """
114
+ d = {}
115
+ full_header = sorted(list(self.relevant_columns()))
116
+ for entry in self.data:
117
+ key, list_of_values = list(entry.items())[0]
118
+ d[key] = list_of_values
119
+ if remove_prefix:
120
+ header = [h.split(".")[-1] for h in full_header]
17
121
  else:
18
- return func(self, *args, **kwargs)
122
+ header = full_header
123
+ num_observations = len(list(self[0].values())[0])
124
+ rows = []
125
+ # rows.append(header)
126
+ for i in range(num_observations):
127
+ row = [d[h][i] for h in full_header]
128
+ rows.append(row)
129
+ if pretty_labels is not None:
130
+ header = [pretty_labels.get(h, h) for h in header]
131
+ return header, rows
19
132
 
20
- wrapper._is_wrapped = True
21
- return wrapper
133
+ def print_long(self, max_rows=None) -> None:
134
+ """Print the results in long format.
22
135
 
136
+ >>> from edsl.results import Results
137
+ >>> r = Results.example()
138
+ >>> r.select('how_feeling').print_long(max_rows = 2)
139
+ ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━┓
140
+ ┃ Result index ┃ Key ┃ Value ┃
141
+ ┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━┩
142
+ │ 0 │ how_feeling │ OK │
143
+ │ 1 │ how_feeling │ Great │
144
+ └──────────────┴─────────────┴───────┘
145
+ """
146
+ from edsl.utilities.interface import print_results_long
23
147
 
24
- def decorate_methods_from_mixin(cls, mixin_cls):
25
- for attr_name, attr_value in mixin_cls.__dict__.items():
26
- if callable(attr_value) and not attr_name.startswith("__"):
27
- setattr(cls, attr_name, to_dataset(attr_value))
28
- return cls
148
+ print_results_long(self, max_rows=max_rows)
29
149
 
150
+ @_convert_decorator
151
+ def print(
152
+ self,
153
+ pretty_labels: Optional[dict] = None,
154
+ filename: Optional[str] = None,
155
+ format: Literal["rich", "html", "markdown", "latex"] = None,
156
+ interactive: bool = False,
157
+ split_at_dot: bool = True,
158
+ max_rows=None,
159
+ tee=False,
160
+ iframe=False,
161
+ iframe_height: int = 200,
162
+ iframe_width: int = 600,
163
+ web=False,
164
+ ) -> None:
165
+ """Print the results in a pretty format.
30
166
 
31
- class ResultsExportMixin(DatasetExportMixin):
32
- """Mixin class for exporting Results objects."""
167
+ :param pretty_labels: A dictionary of pretty labels for the columns.
168
+ :param filename: The filename to save the results to.
169
+ :param format: The format to print the results in. Options are 'rich', 'html', or 'markdown'.
170
+ :param interactive: Whether to print the results interactively in a Jupyter notebook.
171
+ :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
172
+
173
+ Example: Print in rich format at the terminal
174
+
175
+ >>> from edsl.results import Results
176
+ >>> r = Results.example()
177
+ >>> r.select('how_feeling').print(format = "rich")
178
+ ┏━━━━━━━━━━━━━━┓
179
+ ┃ answer ┃
180
+ ┃ .how_feeling ┃
181
+ ┡━━━━━━━━━━━━━━┩
182
+ │ OK │
183
+ ├──────────────┤
184
+ │ Great │
185
+ ├──────────────┤
186
+ │ Terrible │
187
+ ├──────────────┤
188
+ │ OK │
189
+ └──────────────┘
190
+
191
+ Example: using the pretty_labels parameter
192
+
193
+ >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
194
+ ┏━━━━━━━━━━━━━━━━━━━━━┓
195
+ ┃ How are you feeling ┃
196
+ ┡━━━━━━━━━━━━━━━━━━━━━┩
197
+ │ OK │
198
+ ├─────────────────────┤
199
+ │ Great │
200
+ ├─────────────────────┤
201
+ │ Terrible │
202
+ ├─────────────────────┤
203
+ │ OK │
204
+ └─────────────────────┘
205
+
206
+ Example: printing in markdown format
207
+
208
+ >>> r.select('how_feeling').print(format='markdown')
209
+ | answer.how_feeling |
210
+ |--|
211
+ | OK |
212
+ | Great |
213
+ | Terrible |
214
+ | OK |
215
+ ...
216
+ """
217
+ if format is None:
218
+ if is_notebook():
219
+ format = "html"
220
+ else:
221
+ format = "rich"
222
+
223
+ if pretty_labels is None:
224
+ pretty_labels = {}
225
+
226
+ if format not in ["rich", "html", "markdown", "latex"]:
227
+ raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
228
+
229
+ new_data = []
230
+ for index, entry in enumerate(self):
231
+ key, list_of_values = list(entry.items())[0]
232
+ new_data.append({pretty_labels.get(key, key): list_of_values})
233
+
234
+ if max_rows is not None:
235
+ for entry in new_data:
236
+ for key in entry:
237
+ actual_rows = len(entry[key])
238
+ entry[key] = entry[key][:max_rows]
239
+ # print(f"Showing only the first {max_rows} rows of {actual_rows} rows.")
240
+
241
+ if format == "rich":
242
+ print_dataset_with_rich(
243
+ new_data, filename=filename, split_at_dot=split_at_dot
244
+ )
245
+ elif format == "html":
246
+ notebook = is_notebook()
247
+ html_source = print_list_of_dicts_as_html_table(
248
+ new_data, interactive=interactive
249
+ )
250
+ if iframe:
251
+ import html
252
+
253
+ height = iframe_height
254
+ width = iframe_width
255
+ escaped_output = html.escape(html_source)
256
+ # escaped_output = html_source
257
+ iframe = f""""
258
+ <iframe srcdoc="{ escaped_output }" style="width: {width}px; height: {height}px;"></iframe>
259
+ """
260
+ display(HTML(iframe))
261
+ elif notebook:
262
+ display(HTML(html_source))
263
+ else:
264
+ from edsl.utilities.interface import view_html
265
+
266
+ view_html(html_source)
267
+
268
+ elif format == "markdown":
269
+ print_list_of_dicts_as_markdown_table(new_data, filename=filename)
270
+ elif format == "latex":
271
+ df = self.to_pandas()
272
+ df.columns = [col.replace("_", " ") for col in df.columns]
273
+ latex_string = df.to_latex()
274
+ if filename is not None:
275
+ with open(filename, "w") as f:
276
+ f.write(latex_string)
277
+ else:
278
+ return latex_string
279
+ # raise NotImplementedError("Latex format not yet implemented.")
280
+ # latex_string = create_latex_table_from_data(new_data, filename=filename)
281
+ # if filename is None:
282
+ # return latex_string
283
+ # Not working quite
284
+
285
+ else:
286
+ raise ValueError("format not recognized.")
287
+
288
+ if tee:
289
+ return self
290
+
291
+ @_convert_decorator
292
+ def to_csv(
293
+ self,
294
+ filename: Optional[str] = None,
295
+ remove_prefix: bool = False,
296
+ download_link: bool = False,
297
+ pretty_labels: Optional[dict] = None,
298
+ ):
299
+ """Export the results to a CSV file.
300
+
301
+ :param filename: The filename to save the CSV file to.
302
+ :param remove_prefix: Whether to remove the prefix from the column names.
303
+ :param download_link: Whether to display a download link in a Jupyter notebook.
304
+
305
+ Example:
306
+
307
+ >>> from edsl.results import Results
308
+ >>> r = Results.example()
309
+ >>> r.select('how_feeling').to_csv()
310
+ 'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
311
+ """
312
+ if pretty_labels is None:
313
+ pretty_labels = {}
314
+ header, rows = self._make_tabular(
315
+ remove_prefix=remove_prefix, pretty_labels=pretty_labels
316
+ )
317
+
318
+ if filename is not None:
319
+ with open(filename, "w") as f:
320
+ writer = csv.writer(f)
321
+ writer.writerow(header)
322
+ writer.writerows(rows)
323
+ else:
324
+ output = io.StringIO()
325
+ writer = csv.writer(output)
326
+ writer.writerow(header)
327
+ writer.writerows(rows)
328
+
329
+ if download_link:
330
+ csv_file = output.getvalue()
331
+ b64 = base64.b64encode(csv_file.encode()).decode()
332
+ download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
333
+ display(HTML(download_link))
334
+ else:
335
+ return output.getvalue()
336
+
337
+ @_convert_decorator
338
+ def to_pandas(self, remove_prefix: bool = False) -> pd.DataFrame:
339
+ """Convert the results to a pandas DataFrame.
340
+
341
+ :param remove_prefix: Whether to remove the prefix from the column names.
342
+
343
+ >>> from edsl.results import Results
344
+ >>> r = Results.example()
345
+ >>> r.select('how_feeling').to_pandas()
346
+ answer.how_feeling
347
+ 0 OK
348
+ 1 Great
349
+ 2 Terrible
350
+ 3 OK
351
+ """
352
+ csv_string = self.to_csv(remove_prefix=remove_prefix)
353
+ csv_buffer = io.StringIO(csv_string)
354
+ df = pd.read_csv(csv_buffer)
355
+ df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
356
+ return df_sorted
357
+
358
+ @_convert_decorator
359
+ def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
360
+ """Convert the results to a list of dictionaries, one per scenario.
361
+
362
+ :param remove_prefix: Whether to remove the prefix from the column names.
363
+
364
+ >>> from edsl.results import Results
365
+ >>> r = Results.example()
366
+ >>> r.select('how_feeling').to_scenario_list()
367
+ ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
368
+ """
369
+ from edsl import ScenarioList, Scenario
370
+
371
+ list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
372
+ return ScenarioList([Scenario(d) for d in list_of_dicts])
373
+
374
+ def to_agent_list(self, remove_prefix: bool = True):
375
+ from edsl import AgentList, Agent
376
+
377
+ list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
378
+ return AgentList([Agent(d) for d in list_of_dicts])
379
+
380
+ @_convert_decorator
381
+ def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
382
+ """Convert the results to a list of dictionaries.
383
+
384
+ :param remove_prefix: Whether to remove the prefix from the column names.
385
+
386
+ >>> from edsl.results import Results
387
+ >>> r = Results.example()
388
+ >>> r.select('how_feeling').to_dicts()
389
+ [{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
390
+
391
+ """
392
+ list_of_keys = []
393
+ list_of_values = []
394
+ for entry in self:
395
+ key, values = list(entry.items())[0]
396
+ list_of_keys.append(key)
397
+ list_of_values.append(values)
398
+
399
+ if remove_prefix:
400
+ list_of_keys = [key.split(".")[-1] for key in list_of_keys]
401
+ # else:
402
+ # list_of_keys = [key.replace(".", "_") for key in list_of_keys]
403
+
404
+ list_of_dicts = []
405
+ for entries in zip(*list_of_values):
406
+ list_of_dicts.append(dict(zip(list_of_keys, entries)))
407
+
408
+ return list_of_dicts
409
+
410
+ @_convert_decorator
411
+ def to_list(self, flatten=False, remove_none=False) -> list[list]:
412
+ """Convert the results to a list of lists.
413
+
414
+ >>> from edsl.results import Results
415
+ >>> Results.example().select('how_feeling', 'how_feeling_yesterday')
416
+ Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
417
+
418
+ >>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
419
+ [('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
420
+
421
+ >>> r = Results.example()
422
+ >>> r.select('how_feeling').to_list()
423
+ ['OK', 'Great', 'Terrible', 'OK']
424
+ """
425
+ if len(self.relevant_columns()) > 1 and flatten:
426
+ raise ValueError(
427
+ "Cannot flatten a list of lists when there are multiple columns selected."
428
+ )
429
+
430
+ if len(self.relevant_columns()) == 1:
431
+ # if only one 'column' is selected (which is typical for this method
432
+ list_to_return = list(self[0].values())[0]
433
+ else:
434
+ keys = self.relevant_columns()
435
+ data = self.to_dicts(remove_prefix=False)
436
+ list_to_return = []
437
+ for d in data:
438
+ list_to_return.append(tuple([d[key] for key in keys]))
439
+
440
+ if remove_none:
441
+ list_to_return = [item for item in list_to_return if item is not None]
442
+
443
+ if flatten:
444
+ new_list = []
445
+ for item in list_to_return:
446
+ if isinstance(item, list):
447
+ new_list.extend(item)
448
+ else:
449
+ new_list.append(item)
450
+ list_to_return = new_list
451
+
452
+ return list_to_return
453
+
454
+ @_convert_decorator
455
+ def html(
456
+ self, filename: str = None, cta: str = "Open in browser", return_link=False
457
+ ):
458
+ import os
459
+ import tempfile
460
+
461
+ df = self.to_pandas()
462
+
463
+ if filename is None:
464
+ current_directory = os.getcwd()
465
+ filename = tempfile.NamedTemporaryFile(
466
+ "w", delete=False, suffix=".html", dir=current_directory
467
+ ).name
468
+
469
+ with open(filename, "w") as f:
470
+ f.write(df.to_html())
471
+
472
+ if is_notebook():
473
+ html_url = f"/files/{filename}"
474
+ html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
475
+ display(HTML(html_link))
476
+ else:
477
+ print(f"Saved to {filename}")
478
+ import webbrowser
479
+ import os
480
+
481
+ webbrowser.open(f"file://{os.path.abspath(filename)}")
482
+ # webbrowser.open(filename)
483
+
484
+ if return_link:
485
+ return filename
486
+
487
+ @_convert_decorator
488
+ def tally(
489
+ self, *fields: Optional[str], top_n=None, output="dict"
490
+ ) -> Union[dict, "Dataset"]:
491
+ """Tally the values of a field or perform a cross-tab of multiple fields.
492
+
493
+ :param fields: The field(s) to tally, multiple fields for cross-tabulation.
494
+
495
+ >>> from edsl.results import Results
496
+ >>> r = Results.example()
497
+ >>> r.select('how_feeling').tally('answer.how_feeling')
498
+ {'OK': 2, 'Great': 1, 'Terrible': 1}
499
+ >>> r.select('how_feeling', 'period').tally('how_feeling', 'period')
500
+ {('OK', 'morning'): 1, ('Great', 'afternoon'): 1, ('Terrible', 'morning'): 1, ('OK', 'afternoon'): 1}
501
+ """
502
+ from collections import Counter
503
+
504
+ if len(fields) == 0:
505
+ fields = self.relevant_columns()
506
+
507
+ relevant_columns_without_prefix = [
508
+ column.split(".")[-1] for column in self.relevant_columns()
509
+ ]
510
+
511
+ if not all(
512
+ f in self.relevant_columns() or f in relevant_columns_without_prefix
513
+ for f in fields
514
+ ):
515
+ raise ValueError("One or more specified fields are not in the dataset.")
516
+
517
+ if len(fields) == 1:
518
+ field = fields[0]
519
+ values = self._key_to_value(field)
520
+ else:
521
+ values = list(zip(*(self._key_to_value(field) for field in fields)))
522
+
523
+ for value in values:
524
+ if isinstance(value, list):
525
+ value = tuple(value)
526
+
527
+ tally = dict(Counter(values))
528
+ sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
529
+ if top_n is not None:
530
+ sorted_tally = dict(list(sorted_tally.items())[:top_n])
531
+
532
+ import warnings
533
+ import textwrap
534
+ from edsl.results.Dataset import Dataset
33
535
 
34
- def __init_subclass__(cls, **kwargs):
35
- super().__init_subclass__(**kwargs)
36
- decorate_methods_from_mixin(cls, DatasetExportMixin)
536
+ if output == "dict":
537
+ warnings.warn(
538
+ textwrap.dedent(
539
+ """\
540
+ The default output from tally will change to Dataset in the future.
541
+ Use output='Dataset' to get the Dataset object for now.
542
+ """
543
+ )
544
+ )
545
+ return sorted_tally
546
+ elif output == "Dataset":
547
+ return Dataset(
548
+ [
549
+ {"value": list(sorted_tally.keys())},
550
+ {"count": list(sorted_tally.values())},
551
+ ]
552
+ )
37
553
 
38
554
 
39
555
  if __name__ == "__main__":
40
- # pass
41
556
  import doctest
42
557
 
43
558
  doctest.testmod(optionflags=doctest.ELLIPSIS)
@@ -1,8 +1,11 @@
1
1
  """Mixin class for ggplot2 plotting."""
2
2
 
3
3
  import subprocess
4
+ import pandas as pd
4
5
  import tempfile
5
6
  from typing import Optional
7
+ import matplotlib.pyplot as plt
8
+ import matplotlib.image as mpimg
6
9
 
7
10
 
8
11
  class ResultsGGMixin:
@@ -102,9 +105,6 @@ class ResultsGGMixin:
102
105
 
103
106
  def _display_plot(self, filename: str, width: float, height: float):
104
107
  """Display the plot in the notebook."""
105
- import matplotlib.pyplot as plt
106
- import matplotlib.image as mpimg
107
-
108
108
  if filename.endswith(".png"):
109
109
  img = mpimg.imread(filename)
110
110
  plt.figure(
@@ -1,3 +1,7 @@
1
+ # from edsl import ScenarioList
2
+ from edsl.questions import QuestionList, QuestionCheckBox
3
+
4
+
1
5
  class ResultsToolsMixin:
2
6
  def get_themes(
3
7
  self,
@@ -9,12 +13,8 @@ class ResultsToolsMixin:
9
13
  progress_bar=False,
10
14
  print_exceptions=False,
11
15
  ) -> list:
12
- values = [
13
- str(txt)[:1000]
14
- for txt in self.shuffle(seed=seed).select(field).to_list()[:max_values]
15
- ]
16
+ values = self.shuffle(seed=seed).select(field).to_list()[:max_values]
16
17
  from edsl import ScenarioList
17
- from edsl.questions import QuestionList, QuestionCheckBox
18
18
 
19
19
  q = QuestionList(
20
20
  question_text=f"""
@@ -1,12 +1,17 @@
1
1
  """A Scenario is a dictionary with a key/value to parameterize a question."""
2
2
 
3
- import time
4
3
  import copy
5
4
  from collections import UserDict
6
5
  from typing import Union, List, Optional, Generator
7
6
  import base64
8
7
  import hashlib
8
+ import json
9
+
10
+ import fitz # PyMuPDF
9
11
  import os
12
+ import subprocess
13
+
14
+ from rich.table import Table
10
15
 
11
16
  from edsl.Base import Base
12
17
  from edsl.scenarios.ScenarioImageMixin import ScenarioImageMixin
@@ -212,8 +217,6 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
212
217
 
213
218
  @classmethod
214
219
  def from_pdf(cls, pdf_path):
215
- import fitz # PyMuPDF
216
-
217
220
  # Ensure the file exists
218
221
  if not os.path.exists(pdf_path):
219
222
  raise FileNotFoundError(f"The file {pdf_path} does not exist.")
@@ -401,8 +404,6 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
401
404
 
402
405
  def rich_print(self) -> "Table":
403
406
  """Display an object as a rich table."""
404
- from rich.table import Table
405
-
406
407
  table_data, column_names = self._table()
407
408
  table = Table(title=f"{self.__class__.__name__} Attributes")
408
409
  for column in column_names: