edsl 0.1.30.dev4__py3-none-any.whl → 0.1.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. edsl/__version__.py +1 -1
  2. edsl/agents/Invigilator.py +7 -2
  3. edsl/agents/PromptConstructionMixin.py +18 -1
  4. edsl/config.py +4 -0
  5. edsl/conjure/Conjure.py +6 -0
  6. edsl/coop/coop.py +4 -0
  7. edsl/coop/utils.py +9 -1
  8. edsl/data/CacheHandler.py +3 -4
  9. edsl/enums.py +2 -0
  10. edsl/inference_services/DeepInfraService.py +6 -91
  11. edsl/inference_services/GroqService.py +18 -0
  12. edsl/inference_services/InferenceServicesCollection.py +13 -5
  13. edsl/inference_services/OpenAIService.py +64 -21
  14. edsl/inference_services/registry.py +2 -1
  15. edsl/jobs/Jobs.py +80 -33
  16. edsl/jobs/buckets/TokenBucket.py +24 -5
  17. edsl/jobs/interviews/Interview.py +122 -75
  18. edsl/jobs/interviews/InterviewExceptionEntry.py +101 -0
  19. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +58 -52
  20. edsl/jobs/interviews/interview_exception_tracking.py +68 -10
  21. edsl/jobs/runners/JobsRunnerAsyncio.py +112 -81
  22. edsl/jobs/runners/JobsRunnerStatusData.py +0 -237
  23. edsl/jobs/runners/JobsRunnerStatusMixin.py +291 -35
  24. edsl/jobs/tasks/QuestionTaskCreator.py +1 -5
  25. edsl/jobs/tasks/TaskCreators.py +8 -2
  26. edsl/jobs/tasks/TaskHistory.py +145 -1
  27. edsl/language_models/LanguageModel.py +135 -75
  28. edsl/language_models/ModelList.py +8 -2
  29. edsl/language_models/registry.py +16 -0
  30. edsl/questions/QuestionFunctional.py +34 -2
  31. edsl/questions/QuestionMultipleChoice.py +58 -8
  32. edsl/questions/QuestionNumerical.py +0 -1
  33. edsl/questions/descriptors.py +42 -2
  34. edsl/results/DatasetExportMixin.py +258 -75
  35. edsl/results/Result.py +53 -5
  36. edsl/results/Results.py +66 -27
  37. edsl/results/ResultsToolsMixin.py +1 -1
  38. edsl/scenarios/Scenario.py +14 -0
  39. edsl/scenarios/ScenarioList.py +59 -21
  40. edsl/scenarios/ScenarioListExportMixin.py +16 -5
  41. edsl/scenarios/ScenarioListPdfMixin.py +3 -0
  42. edsl/study/Study.py +2 -2
  43. edsl/surveys/Survey.py +35 -1
  44. {edsl-0.1.30.dev4.dist-info → edsl-0.1.31.dist-info}/METADATA +4 -2
  45. {edsl-0.1.30.dev4.dist-info → edsl-0.1.31.dist-info}/RECORD +47 -45
  46. {edsl-0.1.30.dev4.dist-info → edsl-0.1.31.dist-info}/WHEEL +1 -1
  47. {edsl-0.1.30.dev4.dist-info → edsl-0.1.31.dist-info}/LICENSE +0 -0
@@ -3,18 +3,22 @@
3
3
  import base64
4
4
  import csv
5
5
  import io
6
+ import html
6
7
 
7
- from typing import Literal, Optional, Union
8
+ from typing import Literal, Optional, Union, List
8
9
 
9
10
 
10
11
  class DatasetExportMixin:
11
- """Mixin class"""
12
+ """Mixin class for exporting Dataset objects."""
12
13
 
13
14
  def relevant_columns(
14
15
  self, data_type: Optional[str] = None, remove_prefix=False
15
16
  ) -> list:
16
17
  """Return the set of keys that are present in the dataset.
17
18
 
19
+ :param data_type: The data type to filter by.
20
+ :param remove_prefix: Whether to remove the prefix from the column names.
21
+
18
22
  >>> from edsl.results.Dataset import Dataset
19
23
  >>> d = Dataset([{'a.b':[1,2,3,4]}])
20
24
  >>> d.relevant_columns()
@@ -23,22 +27,70 @@ class DatasetExportMixin:
23
27
  >>> d.relevant_columns(remove_prefix=True)
24
28
  ['b']
25
29
 
30
+ >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[5,6,7,8]}])
31
+ >>> d.relevant_columns()
32
+ ['a', 'b']
33
+
26
34
  >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
27
35
  ['answer.how_feeling', 'answer.how_feeling_yesterday']
36
+
37
+ >>> from edsl.results import Results
38
+ >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
39
+ ['model.frequency_penalty', 'model.logprobs', 'model.max_tokens', 'model.model', 'model.presence_penalty', 'model.temperature', 'model.top_logprobs', 'model.top_p']
40
+
41
+ >>> Results.example().relevant_columns(data_type = "flimflam")
42
+ Traceback (most recent call last):
43
+ ...
44
+ ValueError: No columns found for data type: flimflam. Available data types are: ['agent', 'answer', 'comment', 'model', 'prompt', 'question_options', 'question_text', 'question_type', 'raw_model_response', 'scenario'].
28
45
  """
29
46
  columns = [list(x.keys())[0] for x in self]
30
- # columns = set([list(result.keys())[0] for result in self.data])
31
47
  if remove_prefix:
32
48
  columns = [column.split(".")[-1] for column in columns]
33
49
 
50
+ def get_data_type(column):
51
+ if "." in column:
52
+ return column.split(".")[0]
53
+ else:
54
+ return None
55
+
34
56
  if data_type:
57
+ all_columns = columns[:]
35
58
  columns = [
36
- column for column in columns if column.split(".")[0] == data_type
59
+ column for column in columns if get_data_type(column) == data_type
37
60
  ]
61
+ if len(columns) == 0:
62
+ all_data_types = sorted(
63
+ list(set(get_data_type(column) for column in all_columns))
64
+ )
65
+ raise ValueError(
66
+ f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
67
+ )
38
68
 
39
69
  return columns
40
70
 
41
- def _make_tabular(self, remove_prefix: bool, pretty_labels: Optional[dict] = None):
71
+ def num_observations(self):
72
+ """Return the number of observations in the dataset.
73
+
74
+ >>> from edsl.results import Results
75
+ >>> Results.example().num_observations()
76
+ 4
77
+ """
78
+ _num_observations = None
79
+ for entry in self:
80
+ key, values = list(entry.items())[0]
81
+ if _num_observations is None:
82
+ _num_observations = len(values)
83
+ else:
84
+ if len(values) != _num_observations:
85
+ raise ValueError(
86
+ "The number of observations is not consistent across columns."
87
+ )
88
+
89
+ return _num_observations
90
+
91
+ def _make_tabular(
92
+ self, remove_prefix: bool, pretty_labels: Optional[dict] = None
93
+ ) -> tuple[list, List[list]]:
42
94
  """Turn the results into a tabular format.
43
95
 
44
96
  :param remove_prefix: Whether to remove the prefix from the column names.
@@ -51,27 +103,41 @@ class DatasetExportMixin:
51
103
  >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
52
104
  (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
53
105
  """
54
- d = {}
55
- full_header = sorted(list(self.relevant_columns()))
56
- for entry in self.data:
57
- key, list_of_values = list(entry.items())[0]
58
- d[key] = list_of_values
106
+
107
+ def create_dict_from_list_of_dicts(list_of_dicts):
108
+ for entry in list_of_dicts:
109
+ key, list_of_values = list(entry.items())[0]
110
+ yield key, list_of_values
111
+
112
+ tabular_repr = dict(create_dict_from_list_of_dicts(self.data))
113
+
114
+ full_header = [list(x.keys())[0] for x in self]
115
+
116
+ rows = []
117
+ for i in range(self.num_observations()):
118
+ row = [tabular_repr[h][i] for h in full_header]
119
+ rows.append(row)
120
+
59
121
  if remove_prefix:
60
122
  header = [h.split(".")[-1] for h in full_header]
61
123
  else:
62
124
  header = full_header
63
- num_observations = len(list(self[0].values())[0])
64
- rows = []
65
- # rows.append(header)
66
- for i in range(num_observations):
67
- row = [d[h][i] for h in full_header]
68
- rows.append(row)
125
+
69
126
  if pretty_labels is not None:
70
127
  header = [pretty_labels.get(h, h) for h in header]
128
+
71
129
  return header, rows
72
130
 
73
131
  def print_long(self):
74
- """Print the results in a long format."""
132
+ """Print the results in a long format.
133
+ >>> from edsl.results import Results
134
+ >>> r = Results.example()
135
+ >>> r.select('how_feeling').print_long()
136
+ answer.how_feeling: OK
137
+ answer.how_feeling: Great
138
+ answer.how_feeling: Terrible
139
+ answer.how_feeling: OK
140
+ """
75
141
  for entry in self:
76
142
  key, list_of_values = list(entry.items())[0]
77
143
  for value in list_of_values:
@@ -81,7 +147,7 @@ class DatasetExportMixin:
81
147
  self,
82
148
  pretty_labels: Optional[dict] = None,
83
149
  filename: Optional[str] = None,
84
- format: Literal["rich", "html", "markdown", "latex"] = None,
150
+ format: Optional[Literal["rich", "html", "markdown", "latex"]] = None,
85
151
  interactive: bool = False,
86
152
  split_at_dot: bool = True,
87
153
  max_rows=None,
@@ -98,6 +164,12 @@ class DatasetExportMixin:
98
164
  :param format: The format to print the results in. Options are 'rich', 'html', or 'markdown'.
99
165
  :param interactive: Whether to print the results interactively in a Jupyter notebook.
100
166
  :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
167
+ :param max_rows: The maximum number of rows to print.
168
+ :param tee: Whether to return the dataset.
169
+ :param iframe: Whether to display the table in an iframe.
170
+ :param iframe_height: The height of the iframe.
171
+ :param iframe_width: The width of the iframe.
172
+ :param web: Whether to display the table in a web browser.
101
173
 
102
174
  Example: Print in rich format at the terminal
103
175
 
@@ -117,6 +189,42 @@ class DatasetExportMixin:
117
189
  │ OK │
118
190
  └──────────────┘
119
191
 
192
+ >>> r = Results.example()
193
+ >>> r2 = r.select("how_feeling").print(format = "rich", tee = True, max_rows = 2)
194
+ ┏━━━━━━━━━━━━━━┓
195
+ ┃ answer ┃
196
+ ┃ .how_feeling ┃
197
+ ┡━━━━━━━━━━━━━━┩
198
+ │ OK │
199
+ ├──────────────┤
200
+ │ Great │
201
+ └──────────────┘
202
+ >>> r2
203
+ Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
204
+
205
+ >>> r.select('how_feeling').print(format = "rich", max_rows = 2)
206
+ ┏━━━━━━━━━━━━━━┓
207
+ ┃ answer ┃
208
+ ┃ .how_feeling ┃
209
+ ┡━━━━━━━━━━━━━━┩
210
+ │ OK │
211
+ ├──────────────┤
212
+ │ Great │
213
+ └──────────────┘
214
+
215
+ >>> r.select('how_feeling').print(format = "rich", split_at_dot = False)
216
+ ┏━━━━━━━━━━━━━━━━━━━━┓
217
+ ┃ answer.how_feeling ┃
218
+ ┡━━━━━━━━━━━━━━━━━━━━┩
219
+ │ OK │
220
+ ├────────────────────┤
221
+ │ Great │
222
+ ├────────────────────┤
223
+ │ Terrible │
224
+ ├────────────────────┤
225
+ │ OK │
226
+ └────────────────────┘
227
+
120
228
  Example: using the pretty_labels parameter
121
229
 
122
230
  >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
@@ -142,33 +250,40 @@ class DatasetExportMixin:
142
250
  | Terrible |
143
251
  | OK |
144
252
  ...
253
+
254
+ >>> r.select('how_feeling').print(format='latex')
255
+ \\begin{tabular}{l}
256
+ \\toprule
257
+ ...
145
258
  """
146
259
  from IPython.display import HTML, display
147
260
  from edsl.utilities.utilities import is_notebook
148
261
 
149
- if format is None:
150
- if is_notebook():
151
- format = "html"
152
- else:
153
- format = "rich"
262
+ def _determine_format(format):
263
+ if format is None:
264
+ if is_notebook():
265
+ format = "html"
266
+ else:
267
+ format = "rich"
268
+ if format not in ["rich", "html", "markdown", "latex"]:
269
+ raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
270
+
271
+ return format
272
+
273
+ format = _determine_format(format)
154
274
 
155
275
  if pretty_labels is None:
156
276
  pretty_labels = {}
157
277
 
158
- if format not in ["rich", "html", "markdown", "latex"]:
159
- raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
278
+ if pretty_labels != {}: # only split at dot if there are no pretty labels
279
+ split_at_dot = False
160
280
 
161
- new_data = []
162
- for index, entry in enumerate(self):
163
- key, list_of_values = list(entry.items())[0]
164
- new_data.append({pretty_labels.get(key, key): list_of_values})
281
+ def _create_data():
282
+ for index, entry in enumerate(self):
283
+ key, list_of_values = list(entry.items())[0]
284
+ yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
165
285
 
166
- if max_rows is not None:
167
- for entry in new_data:
168
- for key in entry:
169
- actual_rows = len(entry[key])
170
- entry[key] = entry[key][:max_rows]
171
- # print(f"Showing only the first {max_rows} rows of {actual_rows} rows.")
286
+ new_data = list(_create_data())
172
287
 
173
288
  if format == "rich":
174
289
  from edsl.utilities.interface import print_dataset_with_rich
@@ -176,55 +291,54 @@ class DatasetExportMixin:
176
291
  print_dataset_with_rich(
177
292
  new_data, filename=filename, split_at_dot=split_at_dot
178
293
  )
179
- elif format == "html":
180
- notebook = is_notebook()
294
+ return self if tee else None
295
+
296
+ if format == "markdown":
297
+ from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
298
+
299
+ print_list_of_dicts_as_markdown_table(new_data, filename=filename)
300
+ return self if tee else None
301
+
302
+ if format == "latex":
303
+ df = self.to_pandas()
304
+ df.columns = [col.replace("_", " ") for col in df.columns]
305
+ latex_string = df.to_latex(index=False)
306
+
307
+ if filename is not None:
308
+ with open(filename, "w") as f:
309
+ f.write(latex_string)
310
+ else:
311
+ print(latex_string)
312
+
313
+ return self if tee else None
314
+
315
+ if format == "html":
181
316
  from edsl.utilities.interface import print_list_of_dicts_as_html_table
182
317
 
183
318
  html_source = print_list_of_dicts_as_html_table(
184
319
  new_data, interactive=interactive
185
320
  )
186
- if iframe:
187
- import html
188
321
 
189
- height = iframe_height
190
- width = iframe_width
191
- escaped_output = html.escape(html_source)
192
- # escaped_output = html_source
322
+ # if download_link:
323
+ # from IPython.display import HTML, display
324
+ # csv_file = output.getvalue()
325
+ # b64 = base64.b64encode(csv_file.encode()).decode()
326
+ # download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
327
+ # #display(HTML(download_link))
328
+
329
+ if iframe:
193
330
  iframe = f""""
194
- <iframe srcdoc="{ escaped_output }" style="width: {width}px; height: {height}px;"></iframe>
331
+ <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
195
332
  """
196
333
  display(HTML(iframe))
197
- elif notebook:
334
+ elif is_notebook():
198
335
  display(HTML(html_source))
199
336
  else:
200
337
  from edsl.utilities.interface import view_html
201
338
 
202
339
  view_html(html_source)
203
340
 
204
- elif format == "markdown":
205
- from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
206
-
207
- print_list_of_dicts_as_markdown_table(new_data, filename=filename)
208
- elif format == "latex":
209
- df = self.to_pandas()
210
- df.columns = [col.replace("_", " ") for col in df.columns]
211
- latex_string = df.to_latex()
212
- if filename is not None:
213
- with open(filename, "w") as f:
214
- f.write(latex_string)
215
- else:
216
- return latex_string
217
- # raise NotImplementedError("Latex format not yet implemented.")
218
- # latex_string = create_latex_table_from_data(new_data, filename=filename)
219
- # if filename is None:
220
- # return latex_string
221
- # Not working quite
222
-
223
- else:
224
- raise ValueError("format not recognized.")
225
-
226
- if tee:
227
- return self
341
+ return self if tee else None
228
342
 
229
343
  def to_csv(
230
344
  self,
@@ -245,6 +359,25 @@ class DatasetExportMixin:
245
359
  >>> r = Results.example()
246
360
  >>> r.select('how_feeling').to_csv()
247
361
  'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
362
+
363
+ >>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
364
+ 'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
365
+
366
+ >>> import tempfile
367
+ >>> filename = tempfile.NamedTemporaryFile(delete=False).name
368
+ >>> r.select('how_feeling').to_csv(filename = filename)
369
+ >>> import os
370
+ >>> import csv
371
+ >>> with open(filename, newline='') as f:
372
+ ... reader = csv.reader(f)
373
+ ... for row in reader:
374
+ ... print(row)
375
+ ['answer.how_feeling']
376
+ ['OK']
377
+ ['Great']
378
+ ['Terrible']
379
+ ['OK']
380
+
248
381
  """
249
382
  if pretty_labels is None:
250
383
  pretty_labels = {}
@@ -264,6 +397,8 @@ class DatasetExportMixin:
264
397
  writer.writerows(rows)
265
398
 
266
399
  if download_link:
400
+ from IPython.display import HTML, display
401
+
267
402
  csv_file = output.getvalue()
268
403
  b64 = base64.b64encode(csv_file.encode()).decode()
269
404
  download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
@@ -271,6 +406,22 @@ class DatasetExportMixin:
271
406
  else:
272
407
  return output.getvalue()
273
408
 
409
+ def download_link(self, pretty_labels: Optional[dict] = None) -> str:
410
+ """Return a download link for the results.
411
+
412
+ :param pretty_labels: A dictionary of pretty labels for the columns.
413
+
414
+ >>> from edsl.results import Results
415
+ >>> r = Results.example()
416
+ >>> r.select('how_feeling').download_link()
417
+ '<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
418
+ """
419
+ import base64
420
+
421
+ csv_string = self.to_csv(pretty_labels=pretty_labels)
422
+ b64 = base64.b64encode(csv_string.encode()).decode()
423
+ return f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
424
+
274
425
  def to_pandas(self, remove_prefix: bool = False) -> "pd.DataFrame":
275
426
  """Convert the results to a pandas DataFrame.
276
427
 
@@ -290,8 +441,8 @@ class DatasetExportMixin:
290
441
  csv_string = self.to_csv(remove_prefix=remove_prefix)
291
442
  csv_buffer = io.StringIO(csv_string)
292
443
  df = pd.read_csv(csv_buffer)
293
- df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
294
- return df_sorted
444
+ # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
445
+ return df
295
446
 
296
447
  def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
297
448
  """Convert the results to a list of dictionaries, one per scenario.
@@ -309,6 +460,15 @@ class DatasetExportMixin:
309
460
  return ScenarioList([Scenario(d) for d in list_of_dicts])
310
461
 
311
462
  def to_agent_list(self, remove_prefix: bool = True):
463
+ """Convert the results to a list of dictionaries, one per agent.
464
+
465
+ :param remove_prefix: Whether to remove the prefix from the column names.
466
+
467
+ >>> from edsl.results import Results
468
+ >>> r = Results.example()
469
+ >>> r.select('how_feeling').to_agent_list()
470
+ AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
471
+ """
312
472
  from edsl import AgentList, Agent
313
473
 
314
474
  list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
@@ -344,6 +504,9 @@ class DatasetExportMixin:
344
504
  def to_list(self, flatten=False, remove_none=False) -> list[list]:
345
505
  """Convert the results to a list of lists.
346
506
 
507
+ :param flatten: Whether to flatten the list of lists.
508
+ :param remove_none: Whether to remove None values from the list.
509
+
347
510
  >>> from edsl.results import Results
348
511
  >>> Results.example().select('how_feeling', 'how_feeling_yesterday')
349
512
  Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
@@ -354,6 +517,18 @@ class DatasetExportMixin:
354
517
  >>> r = Results.example()
355
518
  >>> r.select('how_feeling').to_list()
356
519
  ['OK', 'Great', 'Terrible', 'OK']
520
+
521
+ >>> from edsl.results.Dataset import Dataset
522
+ >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}]).select('a.b').to_list(flatten = True)
523
+ [1, 9, 2, 3, 4]
524
+
525
+ >>> from edsl.results.Dataset import Dataset
526
+ >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}, {'c': [6, 2, 3, 4]}]).select('a.b', 'c').to_list(flatten = True)
527
+ Traceback (most recent call last):
528
+ ...
529
+ ValueError: Cannot flatten a list of lists when there are multiple columns selected.
530
+
531
+
357
532
  """
358
533
  if len(self.relevant_columns()) > 1 and flatten:
359
534
  raise ValueError(
@@ -385,7 +560,10 @@ class DatasetExportMixin:
385
560
  return list_to_return
386
561
 
387
562
  def html(
388
- self, filename: str = None, cta: str = "Open in browser", return_link=False
563
+ self,
564
+ filename: Optional[str] = None,
565
+ cta: str = "Open in browser",
566
+ return_link: bool = False,
389
567
  ):
390
568
  import os
391
569
  import tempfile
@@ -419,7 +597,7 @@ class DatasetExportMixin:
419
597
  return filename
420
598
 
421
599
  def tally(
422
- self, *fields: Optional[str], top_n=None, output="dict"
600
+ self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
423
601
  ) -> Union[dict, "Dataset"]:
424
602
  """Tally the values of a field or perform a cross-tab of multiple fields.
425
603
 
@@ -427,9 +605,11 @@ class DatasetExportMixin:
427
605
 
428
606
  >>> from edsl.results import Results
429
607
  >>> r = Results.example()
430
- >>> r.select('how_feeling').tally('answer.how_feeling')
608
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
431
609
  {'OK': 2, 'Great': 1, 'Terrible': 1}
432
- >>> r.select('how_feeling', 'period').tally('how_feeling', 'period')
610
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset")
611
+ Dataset([{'value': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
612
+ >>> r.select('how_feeling', 'period').tally('how_feeling', 'period', output = "dict")
433
613
  {('OK', 'morning'): 1, ('Great', 'afternoon'): 1, ('Terrible', 'morning'): 1, ('OK', 'afternoon'): 1}
434
614
  """
435
615
  from collections import Counter
@@ -441,6 +621,8 @@ class DatasetExportMixin:
441
621
  column.split(".")[-1] for column in self.relevant_columns()
442
622
  ]
443
623
 
624
+ # breakpoint()
625
+
444
626
  if not all(
445
627
  f in self.relevant_columns() or f in relevant_columns_without_prefix
446
628
  for f in fields
@@ -467,6 +649,7 @@ class DatasetExportMixin:
467
649
  from edsl.results.Dataset import Dataset
468
650
 
469
651
  if output == "dict":
652
+ # why did I do this?
470
653
  warnings.warn(
471
654
  textwrap.dedent(
472
655
  """\
edsl/results/Result.py CHANGED
@@ -126,6 +126,9 @@ class Result(Base, UserDict):
126
126
  self.survey = survey
127
127
  self.question_to_attributes = question_to_attributes
128
128
 
129
+ self._combined_dict = None
130
+ self._problem_keys = None
131
+
129
132
  ###############
130
133
  # Used in Results
131
134
  ###############
@@ -164,25 +167,64 @@ class Result(Base, UserDict):
164
167
  "answer": self.answer,
165
168
  "prompt": self.prompt,
166
169
  "raw_model_response": self.raw_model_response,
167
- "iteration": {"iteration": self.iteration},
170
+ # "iteration": {"iteration": self.iteration},
168
171
  "question_text": question_text_dict,
169
172
  "question_options": question_options_dict,
170
173
  "question_type": question_type_dict,
171
174
  "comment": comments_dict,
172
175
  }
173
176
 
177
+ def check_expression(self, expression) -> None:
178
+ for key in self.problem_keys:
179
+ if key in expression and not key + "." in expression:
180
+ raise ValueError(
181
+ f"Key by iself {key} is problematic. Use the full key {key + '.' + key} name instead."
182
+ )
183
+ return None
184
+
174
185
  def code(self):
175
186
  """Return a string of code that can be used to recreate the Result object."""
176
187
  raise NotImplementedError
177
188
 
178
189
  @property
179
- def combined_dict(self) -> dict[str, Any]:
180
- """Return a dictionary that includes all sub_dicts, but also puts the key-value pairs in each sub_dict as a key_value pair in the combined dictionary."""
190
+ def problem_keys(self):
191
+ """Return a list of keys that are problematic."""
192
+ return self._problem_keys
193
+
194
+ def _compute_combined_dict_and_problem_keys(self) -> None:
181
195
  combined = {}
196
+ problem_keys = []
182
197
  for key, sub_dict in self.sub_dicts.items():
183
198
  combined.update(sub_dict)
199
+ # in some cases, the sub_dict might have keys that conflict with the main dict
200
+ if key in combined:
201
+ # The key is already in the combined dict
202
+ problem_keys = problem_keys + [key]
203
+
184
204
  combined.update({key: sub_dict})
185
- return combined
205
+ # I *think* this allows us to do do things like "answer.how_feelling" i.e., that the evaluator can use
206
+ # dot notation to access the subdicts.
207
+ self._combined_dict = combined
208
+ self._problem_keys = problem_keys
209
+
210
+ @property
211
+ def combined_dict(self) -> dict[str, Any]:
212
+ """Return a dictionary that includes all sub_dicts, but also puts the key-value pairs in each sub_dict as a key_value pair in the combined dictionary.
213
+
214
+ >>> r = Result.example()
215
+ >>> r.combined_dict['how_feeling']
216
+ 'OK'
217
+ """
218
+ if self._combined_dict is None or self._problem_keys is None:
219
+ self._compute_combined_dict_and_problem_keys()
220
+ return self._combined_dict
221
+
222
+ @property
223
+ def problem_keys(self):
224
+ """Return a list of keys that are problematic."""
225
+ if self._combined_dict is None or self._problem_keys is None:
226
+ self._compute_combined_dict_and_problem_keys()
227
+ return self._problem_keys
186
228
 
187
229
  def get_value(self, data_type: str, key: str) -> Any:
188
230
  """Return the value for a given data type and key.
@@ -226,7 +268,13 @@ class Result(Base, UserDict):
226
268
  return Result.from_dict(self.to_dict())
227
269
 
228
270
  def __eq__(self, other) -> bool:
229
- """Return True if the Result object is equal to another Result object."""
271
+ """Return True if the Result object is equal to another Result object.
272
+
273
+ >>> r = Result.example()
274
+ >>> r == r
275
+ True
276
+
277
+ """
230
278
  return self.to_dict() == other.to_dict()
231
279
 
232
280
  ###############