edsl 0.1.30.dev5__py3-none-any.whl → 0.1.31.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,12 +3,13 @@
3
3
  import base64
4
4
  import csv
5
5
  import io
6
+ import html
6
7
 
7
- from typing import Literal, Optional, Union
8
+ from typing import Literal, Optional, Union, List
8
9
 
9
10
 
10
11
  class DatasetExportMixin:
11
- """Mixin class"""
12
+ """Mixin class for exporting Dataset objects."""
12
13
 
13
14
  def relevant_columns(
14
15
  self, data_type: Optional[str] = None, remove_prefix=False
@@ -28,19 +29,64 @@ class DatasetExportMixin:
28
29
 
29
30
  >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
30
31
  ['answer.how_feeling', 'answer.how_feeling_yesterday']
32
+
33
+ >>> from edsl.results import Results
34
+ >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
35
+ ['model.frequency_penalty', 'model.logprobs', 'model.max_tokens', 'model.model', 'model.presence_penalty', 'model.temperature', 'model.top_logprobs', 'model.top_p']
36
+
37
+ >>> Results.example().relevant_columns(data_type = "flimflam")
38
+ Traceback (most recent call last):
39
+ ...
40
+ ValueError: No columns found for data type: flimflam. Available data types are: ['agent', 'answer', 'comment', 'model', 'prompt', 'question_options', 'question_text', 'question_type', 'raw_model_response', 'scenario'].
31
41
  """
32
42
  columns = [list(x.keys())[0] for x in self]
33
43
  if remove_prefix:
34
44
  columns = [column.split(".")[-1] for column in columns]
35
45
 
46
+ def get_data_type(column):
47
+ if "." in column:
48
+ return column.split(".")[0]
49
+ else:
50
+ return None
51
+
36
52
  if data_type:
53
+ all_columns = columns[:]
37
54
  columns = [
38
- column for column in columns if column.split(".")[0] == data_type
55
+ column for column in columns if get_data_type(column) == data_type
39
56
  ]
57
+ if len(columns) == 0:
58
+ all_data_types = sorted(
59
+ list(set(get_data_type(column) for column in all_columns))
60
+ )
61
+ raise ValueError(
62
+ f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
63
+ )
40
64
 
41
65
  return columns
42
66
 
43
- def _make_tabular(self, remove_prefix: bool, pretty_labels: Optional[dict] = None):
67
+ def num_observations(self):
68
+ """Return the number of observations in the dataset.
69
+
70
+ >>> from edsl.results import Results
71
+ >>> Results.example().num_observations()
72
+ 4
73
+ """
74
+ _num_observations = None
75
+ for entry in self:
76
+ key, values = list(entry.items())[0]
77
+ if _num_observations is None:
78
+ _num_observations = len(values)
79
+ else:
80
+ if len(values) != _num_observations:
81
+ raise ValueError(
82
+ "The number of observations is not consistent across columns."
83
+ )
84
+
85
+ return _num_observations
86
+
87
+ def _make_tabular(
88
+ self, remove_prefix: bool, pretty_labels: Optional[dict] = None
89
+ ) -> tuple[list, List[list]]:
44
90
  """Turn the results into a tabular format.
45
91
 
46
92
  :param remove_prefix: Whether to remove the prefix from the column names.
@@ -53,23 +99,29 @@ class DatasetExportMixin:
53
99
  >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
54
100
  (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
55
101
  """
56
- d = {}
57
- full_header = sorted(list(self.relevant_columns()))
58
- for entry in self.data:
59
- key, list_of_values = list(entry.items())[0]
60
- d[key] = list_of_values
102
+
103
+ def create_dict_from_list_of_dicts(list_of_dicts):
104
+ for entry in list_of_dicts:
105
+ key, list_of_values = list(entry.items())[0]
106
+ yield key, list_of_values
107
+
108
+ tabular_repr = dict(create_dict_from_list_of_dicts(self.data))
109
+
110
+ full_header = [list(x.keys())[0] for x in self]
111
+
112
+ rows = []
113
+ for i in range(self.num_observations()):
114
+ row = [tabular_repr[h][i] for h in full_header]
115
+ rows.append(row)
116
+
61
117
  if remove_prefix:
62
118
  header = [h.split(".")[-1] for h in full_header]
63
119
  else:
64
120
  header = full_header
65
- num_observations = len(list(self[0].values())[0])
66
- rows = []
67
- # rows.append(header)
68
- for i in range(num_observations):
69
- row = [d[h][i] for h in full_header]
70
- rows.append(row)
121
+
71
122
  if pretty_labels is not None:
72
123
  header = [pretty_labels.get(h, h) for h in header]
124
+
73
125
  return header, rows
74
126
 
75
127
  def print_long(self):
@@ -91,7 +143,7 @@ class DatasetExportMixin:
91
143
  self,
92
144
  pretty_labels: Optional[dict] = None,
93
145
  filename: Optional[str] = None,
94
- format: Literal["rich", "html", "markdown", "latex"] = None,
146
+ format: Optional[Literal["rich", "html", "markdown", "latex"]] = None,
95
147
  interactive: bool = False,
96
148
  split_at_dot: bool = True,
97
149
  max_rows=None,
@@ -108,6 +160,12 @@ class DatasetExportMixin:
108
160
  :param format: The format to print the results in. Options are 'rich', 'html', or 'markdown'.
109
161
  :param interactive: Whether to print the results interactively in a Jupyter notebook.
110
162
  :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
163
+ :param max_rows: The maximum number of rows to print.
164
+ :param tee: Whether to return the dataset.
165
+ :param iframe: Whether to display the table in an iframe.
166
+ :param iframe_height: The height of the iframe.
167
+ :param iframe_width: The width of the iframe.
168
+ :param web: Whether to display the table in a web browser.
111
169
 
112
170
  Example: Print in rich format at the terminal
113
171
 
@@ -188,91 +246,95 @@ class DatasetExportMixin:
188
246
  | Terrible |
189
247
  | OK |
190
248
  ...
249
+
250
+ >>> r.select('how_feeling').print(format='latex')
251
+ \\begin{tabular}{l}
252
+ \\toprule
253
+ ...
191
254
  """
192
255
  from IPython.display import HTML, display
193
256
  from edsl.utilities.utilities import is_notebook
194
257
 
195
- if format is None:
196
- if is_notebook():
197
- format = "html"
198
- else:
199
- format = "rich"
258
+ def _determine_format(format):
259
+ if format is None:
260
+ if is_notebook():
261
+ format = "html"
262
+ else:
263
+ format = "rich"
264
+ if format not in ["rich", "html", "markdown", "latex"]:
265
+ raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
266
+
267
+ return format
268
+
269
+ format = _determine_format(format)
200
270
 
201
271
  if pretty_labels is None:
202
272
  pretty_labels = {}
203
- else:
204
- # if the user passes in pretty_labels, we don't want to split at the dot
273
+
274
+ if pretty_labels != {}: # only split at dot if there are no pretty labels
205
275
  split_at_dot = False
206
276
 
207
- if format not in ["rich", "html", "markdown", "latex"]:
208
- raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
277
+ def _create_data():
278
+ for index, entry in enumerate(self):
279
+ key, list_of_values = list(entry.items())[0]
280
+ yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
281
+
282
+ new_data = list(_create_data())
209
283
 
210
- new_data = []
211
- for index, entry in enumerate(self):
212
- key, list_of_values = list(entry.items())[0]
213
- new_data.append({pretty_labels.get(key, key): list_of_values})
214
-
215
- if max_rows is not None:
216
- for entry in new_data:
217
- for key in entry:
218
- actual_rows = len(entry[key])
219
- entry[key] = entry[key][:max_rows]
220
-
221
284
  if format == "rich":
222
285
  from edsl.utilities.interface import print_dataset_with_rich
223
286
 
224
287
  print_dataset_with_rich(
225
288
  new_data, filename=filename, split_at_dot=split_at_dot
226
289
  )
227
- elif format == "html":
228
- notebook = is_notebook()
290
+ return self if tee else None
291
+
292
+ if format == "markdown":
293
+ from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
294
+
295
+ print_list_of_dicts_as_markdown_table(new_data, filename=filename)
296
+ return self if tee else None
297
+
298
+ if format == "latex":
299
+ df = self.to_pandas()
300
+ df.columns = [col.replace("_", " ") for col in df.columns]
301
+ latex_string = df.to_latex(index=False)
302
+
303
+ if filename is not None:
304
+ with open(filename, "w") as f:
305
+ f.write(latex_string)
306
+ else:
307
+ print(latex_string)
308
+
309
+ return self if tee else None
310
+
311
+ if format == "html":
229
312
  from edsl.utilities.interface import print_list_of_dicts_as_html_table
230
313
 
231
314
  html_source = print_list_of_dicts_as_html_table(
232
315
  new_data, interactive=interactive
233
316
  )
234
- if iframe:
235
- import html
236
317
 
237
- height = iframe_height
238
- width = iframe_width
239
- escaped_output = html.escape(html_source)
240
- # escaped_output = html_source
318
+ # if download_link:
319
+ # from IPython.display import HTML, display
320
+ # csv_file = output.getvalue()
321
+ # b64 = base64.b64encode(csv_file.encode()).decode()
322
+ # download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
323
+ # #display(HTML(download_link))
324
+
325
+ if iframe:
241
326
  iframe = f""""
242
- <iframe srcdoc="{ escaped_output }" style="width: {width}px; height: {height}px;"></iframe>
327
+ <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
243
328
  """
244
329
  display(HTML(iframe))
245
- elif notebook:
330
+ elif is_notebook():
246
331
  display(HTML(html_source))
247
332
  else:
248
333
  from edsl.utilities.interface import view_html
249
334
 
250
335
  view_html(html_source)
251
336
 
252
- elif format == "markdown":
253
- from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
254
-
255
- print_list_of_dicts_as_markdown_table(new_data, filename=filename)
256
- elif format == "latex":
257
- df = self.to_pandas()
258
- df.columns = [col.replace("_", " ") for col in df.columns]
259
- latex_string = df.to_latex()
260
- if filename is not None:
261
- with open(filename, "w") as f:
262
- f.write(latex_string)
263
- else:
264
- return latex_string
265
- # raise NotImplementedError("Latex format not yet implemented.")
266
- # latex_string = create_latex_table_from_data(new_data, filename=filename)
267
- # if filename is None:
268
- # return latex_string
269
- # Not working quite
270
-
271
- else:
272
- raise ValueError("format not recognized.")
273
-
274
- if tee:
275
- return self
337
+ return self if tee else None
276
338
 
277
339
  def to_csv(
278
340
  self,
@@ -293,10 +355,25 @@ class DatasetExportMixin:
293
355
  >>> r = Results.example()
294
356
  >>> r.select('how_feeling').to_csv()
295
357
  'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
296
-
358
+
297
359
  >>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
298
360
  'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
299
361
 
362
+ >>> import tempfile
363
+ >>> filename = tempfile.NamedTemporaryFile(delete=False).name
364
+ >>> r.select('how_feeling').to_csv(filename = filename)
365
+ >>> import os
366
+ >>> import csv
367
+ >>> with open(filename, newline='') as f:
368
+ ... reader = csv.reader(f)
369
+ ... for row in reader:
370
+ ... print(row)
371
+ ['answer.how_feeling']
372
+ ['OK']
373
+ ['Great']
374
+ ['Terrible']
375
+ ['OK']
376
+
300
377
  """
301
378
  if pretty_labels is None:
302
379
  pretty_labels = {}
@@ -316,6 +393,8 @@ class DatasetExportMixin:
316
393
  writer.writerows(rows)
317
394
 
318
395
  if download_link:
396
+ from IPython.display import HTML, display
397
+
319
398
  csv_file = output.getvalue()
320
399
  b64 = base64.b64encode(csv_file.encode()).decode()
321
400
  download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
@@ -323,6 +402,22 @@ class DatasetExportMixin:
323
402
  else:
324
403
  return output.getvalue()
325
404
 
405
+ def download_link(self, pretty_labels: Optional[dict] = None) -> str:
406
+ """Return a download link for the results.
407
+
408
+ :param pretty_labels: A dictionary of pretty labels for the columns.
409
+
410
+ >>> from edsl.results import Results
411
+ >>> r = Results.example()
412
+ >>> r.select('how_feeling').download_link()
413
+ '<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
414
+ """
415
+ import base64
416
+
417
+ csv_string = self.to_csv(pretty_labels=pretty_labels)
418
+ b64 = base64.b64encode(csv_string.encode()).decode()
419
+ return f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
420
+
326
421
  def to_pandas(self, remove_prefix: bool = False) -> "pd.DataFrame":
327
422
  """Convert the results to a pandas DataFrame.
328
423
 
@@ -342,8 +437,8 @@ class DatasetExportMixin:
342
437
  csv_string = self.to_csv(remove_prefix=remove_prefix)
343
438
  csv_buffer = io.StringIO(csv_string)
344
439
  df = pd.read_csv(csv_buffer)
345
- df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
346
- return df_sorted
440
+ # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
441
+ return df
347
442
 
348
443
  def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
349
444
  """Convert the results to a list of dictionaries, one per scenario.
@@ -362,7 +457,7 @@ class DatasetExportMixin:
362
457
 
363
458
  def to_agent_list(self, remove_prefix: bool = True):
364
459
  """Convert the results to a list of dictionaries, one per agent.
365
-
460
+
366
461
  :param remove_prefix: Whether to remove the prefix from the column names.
367
462
 
368
463
  >>> from edsl.results import Results
@@ -461,7 +556,10 @@ class DatasetExportMixin:
461
556
  return list_to_return
462
557
 
463
558
  def html(
464
- self, filename: Optional[str] = None, cta: str = "Open in browser", return_link:bool=False
559
+ self,
560
+ filename: Optional[str] = None,
561
+ cta: str = "Open in browser",
562
+ return_link: bool = False,
465
563
  ):
466
564
  import os
467
565
  import tempfile
@@ -495,7 +593,7 @@ class DatasetExportMixin:
495
593
  return filename
496
594
 
497
595
  def tally(
498
- self, *fields: Optional[str], top_n:Optional[int]=None, output="dict"
596
+ self, *fields: Optional[str], top_n: Optional[int] = None, output="dict"
499
597
  ) -> Union[dict, "Dataset"]:
500
598
  """Tally the values of a field or perform a cross-tab of multiple fields.
501
599
 
edsl/results/Result.py CHANGED
@@ -167,28 +167,30 @@ class Result(Base, UserDict):
167
167
  "answer": self.answer,
168
168
  "prompt": self.prompt,
169
169
  "raw_model_response": self.raw_model_response,
170
- # "iteration": {"iteration": self.iteration},
170
+ # "iteration": {"iteration": self.iteration},
171
171
  "question_text": question_text_dict,
172
172
  "question_options": question_options_dict,
173
173
  "question_type": question_type_dict,
174
174
  "comment": comments_dict,
175
175
  }
176
-
176
+
177
177
  def check_expression(self, expression) -> None:
178
178
  for key in self.problem_keys:
179
179
  if key in expression and not key + "." in expression:
180
- raise ValueError(f"Key by iself {key} is problematic. Use the full key {key + '.' + key} name instead.")
180
+ raise ValueError(
181
+ f"Key by iself {key} is problematic. Use the full key {key + '.' + key} name instead."
182
+ )
181
183
  return None
182
184
 
183
185
  def code(self):
184
186
  """Return a string of code that can be used to recreate the Result object."""
185
187
  raise NotImplementedError
186
-
188
+
187
189
  @property
188
190
  def problem_keys(self):
189
191
  """Return a list of keys that are problematic."""
190
192
  return self._problem_keys
191
-
193
+
192
194
  def _compute_combined_dict_and_problem_keys(self) -> None:
193
195
  combined = {}
194
196
  problem_keys = []
@@ -198,9 +200,9 @@ class Result(Base, UserDict):
198
200
  if key in combined:
199
201
  # The key is already in the combined dict
200
202
  problem_keys = problem_keys + [key]
201
-
203
+
202
204
  combined.update({key: sub_dict})
203
- # I *think* this allows us to do do things like "answer.how_feelling" i.e., that the evaluator can use
205
+ # I *think* this allows us to do do things like "answer.how_feelling" i.e., that the evaluator can use
204
206
  # dot notation to access the subdicts.
205
207
  self._combined_dict = combined
206
208
  self._problem_keys = problem_keys
@@ -208,7 +210,7 @@ class Result(Base, UserDict):
208
210
  @property
209
211
  def combined_dict(self) -> dict[str, Any]:
210
212
  """Return a dictionary that includes all sub_dicts, but also puts the key-value pairs in each sub_dict as a key_value pair in the combined dictionary.
211
-
213
+
212
214
  >>> r = Result.example()
213
215
  >>> r.combined_dict['how_feeling']
214
216
  'OK'
@@ -216,7 +218,7 @@ class Result(Base, UserDict):
216
218
  if self._combined_dict is None or self._problem_keys is None:
217
219
  self._compute_combined_dict_and_problem_keys()
218
220
  return self._combined_dict
219
-
221
+
220
222
  @property
221
223
  def problem_keys(self):
222
224
  """Return a list of keys that are problematic."""
@@ -267,11 +269,11 @@ class Result(Base, UserDict):
267
269
 
268
270
  def __eq__(self, other) -> bool:
269
271
  """Return True if the Result object is equal to another Result object.
270
-
272
+
271
273
  >>> r = Result.example()
272
274
  >>> r == r
273
275
  True
274
-
276
+
275
277
  """
276
278
  return self.to_dict() == other.to_dict()
277
279
 
edsl/results/Results.py CHANGED
@@ -603,24 +603,26 @@ class Results(UserList, Mixins, Base):
603
603
  values = [d[key] for d in columns]
604
604
  self = self.add_column(key, values)
605
605
  return self
606
-
606
+
607
607
  @staticmethod
608
- def _create_evaluator(result: Result, functions_dict: Optional[dict] = None) -> EvalWithCompoundTypes:
608
+ def _create_evaluator(
609
+ result: Result, functions_dict: Optional[dict] = None
610
+ ) -> EvalWithCompoundTypes:
609
611
  """Create an evaluator for the expression.
610
-
612
+
611
613
  >>> from unittest.mock import Mock
612
614
  >>> result = Mock()
613
- >>> result.combined_dict = {'how_feeling': 'OK'}
615
+ >>> result.combined_dict = {'how_feeling': 'OK'}
614
616
 
615
617
  >>> evaluator = Results._create_evaluator(result = result, functions_dict = {})
616
618
  >>> evaluator.eval("how_feeling == 'OK'")
617
619
  True
618
-
620
+
619
621
  >>> result.combined_dict = {'answer': {'how_feeling': 'OK'}}
620
622
  >>> evaluator = Results._create_evaluator(result = result, functions_dict = {})
621
623
  >>> evaluator.eval("answer.how_feeling== 'OK'")
622
624
  True
623
-
625
+
624
626
  Note that you need to refer to the answer dictionary in the expression.
625
627
 
626
628
  >>> evaluator.eval("how_feeling== 'OK'")
@@ -827,8 +829,9 @@ class Results(UserList, Mixins, Base):
827
829
  # Return the index of this key in the list_of_keys
828
830
  return items_in_order.index(single_key)
829
831
 
830
- #sorted(new_data, key=sort_by_key_order)
832
+ # sorted(new_data, key=sort_by_key_order)
831
833
  from edsl.results.Dataset import Dataset
834
+
832
835
  sorted_new_data = []
833
836
 
834
837
  # WORKS but slow
@@ -958,10 +961,10 @@ class Results(UserList, Mixins, Base):
958
961
  new_data = []
959
962
  for result in self.data:
960
963
  evaluator = self._create_evaluator(result)
961
- result.check_expression(expression) # check expression
964
+ result.check_expression(expression) # check expression
962
965
  if evaluator.eval(expression):
963
966
  new_data.append(result)
964
-
967
+
965
968
  except ValueError as e:
966
969
  raise ResultsFilterError(
967
970
  f"Error in filter. Exception:{e}",
@@ -970,14 +973,14 @@ class Results(UserList, Mixins, Base):
970
973
  )
971
974
  except Exception as e:
972
975
  raise ResultsFilterError(
973
- f"""Error in filter. Exception:{e}.""",
974
- f"""The expression you provided was: {expression}.""",
975
- """Please make sure that the expression is a valid Python expression that evaluates to a boolean.""",
976
- """For example, 'how_feeling == "Great"' is a valid expression, as is 'how_feeling in ["Great", "Terrible"]'., """,
977
- """However, 'how_feeling = "Great"' is not a valid expression.""",
978
- """See https://docs.expectedparrot.com/en/latest/results.html#filtering-results for more details."""
976
+ f"""Error in filter. Exception:{e}.""",
977
+ f"""The expression you provided was: {expression}.""",
978
+ """Please make sure that the expression is a valid Python expression that evaluates to a boolean.""",
979
+ """For example, 'how_feeling == "Great"' is a valid expression, as is 'how_feeling in ["Great", "Terrible"]'., """,
980
+ """However, 'how_feeling = "Great"' is not a valid expression.""",
981
+ """See https://docs.expectedparrot.com/en/latest/results.html#filtering-results for more details.""",
979
982
  )
980
-
983
+
981
984
  if len(new_data) == 0:
982
985
  import warnings
983
986
 
@@ -37,12 +37,12 @@ class ResultsToolsMixin:
37
37
  print_exceptions=False,
38
38
  ) -> dict:
39
39
  from edsl import ScenarioList
40
+ from edsl import QuestionCheckBox
40
41
 
41
42
  values = self.select(field).to_list()
42
43
  scenarios = ScenarioList.from_list("field", values).add_value(
43
44
  "context", context
44
45
  )
45
-
46
46
  q = QuestionCheckBox(
47
47
  question_text="""
48
48
  {{ context }}