edsl 0.1.30.dev5__py3-none-any.whl → 0.1.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. edsl/__version__.py +1 -1
  2. edsl/agents/Invigilator.py +7 -2
  3. edsl/agents/PromptConstructionMixin.py +18 -1
  4. edsl/config.py +4 -0
  5. edsl/conjure/Conjure.py +6 -0
  6. edsl/coop/coop.py +4 -0
  7. edsl/coop/utils.py +9 -1
  8. edsl/data/CacheHandler.py +3 -4
  9. edsl/enums.py +2 -0
  10. edsl/inference_services/DeepInfraService.py +6 -91
  11. edsl/inference_services/GroqService.py +18 -0
  12. edsl/inference_services/InferenceServicesCollection.py +13 -5
  13. edsl/inference_services/OpenAIService.py +64 -21
  14. edsl/inference_services/registry.py +2 -1
  15. edsl/jobs/Jobs.py +80 -33
  16. edsl/jobs/buckets/TokenBucket.py +15 -7
  17. edsl/jobs/interviews/Interview.py +41 -19
  18. edsl/jobs/interviews/InterviewExceptionEntry.py +101 -0
  19. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +58 -40
  20. edsl/jobs/interviews/interview_exception_tracking.py +68 -10
  21. edsl/jobs/runners/JobsRunnerAsyncio.py +112 -81
  22. edsl/jobs/runners/JobsRunnerStatusData.py +0 -237
  23. edsl/jobs/runners/JobsRunnerStatusMixin.py +291 -35
  24. edsl/jobs/tasks/QuestionTaskCreator.py +2 -3
  25. edsl/jobs/tasks/TaskCreators.py +8 -2
  26. edsl/jobs/tasks/TaskHistory.py +145 -1
  27. edsl/language_models/LanguageModel.py +133 -75
  28. edsl/language_models/ModelList.py +8 -2
  29. edsl/language_models/registry.py +16 -0
  30. edsl/questions/QuestionFunctional.py +8 -7
  31. edsl/questions/QuestionMultipleChoice.py +15 -12
  32. edsl/questions/QuestionNumerical.py +0 -1
  33. edsl/questions/descriptors.py +6 -4
  34. edsl/results/DatasetExportMixin.py +185 -78
  35. edsl/results/Result.py +13 -11
  36. edsl/results/Results.py +19 -16
  37. edsl/results/ResultsToolsMixin.py +1 -1
  38. edsl/scenarios/Scenario.py +14 -0
  39. edsl/scenarios/ScenarioList.py +59 -21
  40. edsl/scenarios/ScenarioListExportMixin.py +16 -5
  41. edsl/scenarios/ScenarioListPdfMixin.py +3 -0
  42. edsl/surveys/Survey.py +11 -8
  43. {edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dist-info}/METADATA +4 -2
  44. {edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dist-info}/RECORD +46 -44
  45. {edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dist-info}/LICENSE +0 -0
  46. {edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dist-info}/WHEEL +0 -0
@@ -249,6 +249,7 @@ class QuestionOptionsDescriptor(BaseDescriptor):
249
249
 
250
250
  def __init__(self, question_options: List[str]):
251
251
  self.question_options = question_options
252
+
252
253
  return TestQuestion
253
254
 
254
255
  def __init__(
@@ -264,16 +265,16 @@ class QuestionOptionsDescriptor(BaseDescriptor):
264
265
 
265
266
  def validate(self, value: Any, instance) -> None:
266
267
  """Validate the question options.
267
-
268
+
268
269
  >>> q_class = QuestionOptionsDescriptor.example()
269
270
  >>> _ = q_class(["a", "b", "c"])
270
271
  >>> _ = q_class(["a", "b", "c", "d", "d"])
271
272
  Traceback (most recent call last):
272
273
  ...
273
274
  edsl.exceptions.questions.QuestionCreationValidationError: Question options must be unique (got ['a', 'b', 'c', 'd', 'd']).
274
-
275
+
275
276
  We allow dynamic question options, which are strings of the form '{{ question_options }}'.
276
-
277
+
277
278
  >>> _ = q_class("{{dynamic_options}}")
278
279
  >>> _ = q_class("dynamic_options")
279
280
  Traceback (most recent call last):
@@ -373,7 +374,8 @@ class QuestionTextDescriptor(BaseDescriptor):
373
374
  UserWarning,
374
375
  )
375
376
 
377
+
376
378
  if __name__ == "__main__":
377
379
  import doctest
378
380
 
379
- doctest.testmod(optionflags=doctest.ELLIPSIS)
381
+ doctest.testmod(optionflags=doctest.ELLIPSIS)
@@ -3,12 +3,13 @@
3
3
  import base64
4
4
  import csv
5
5
  import io
6
+ import html
6
7
 
7
- from typing import Literal, Optional, Union
8
+ from typing import Literal, Optional, Union, List
8
9
 
9
10
 
10
11
  class DatasetExportMixin:
11
- """Mixin class"""
12
+ """Mixin class for exporting Dataset objects."""
12
13
 
13
14
  def relevant_columns(
14
15
  self, data_type: Optional[str] = None, remove_prefix=False
@@ -26,21 +27,70 @@ class DatasetExportMixin:
26
27
  >>> d.relevant_columns(remove_prefix=True)
27
28
  ['b']
28
29
 
30
+ >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[5,6,7,8]}])
31
+ >>> d.relevant_columns()
32
+ ['a', 'b']
33
+
29
34
  >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
30
35
  ['answer.how_feeling', 'answer.how_feeling_yesterday']
36
+
37
+ >>> from edsl.results import Results
38
+ >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
39
+ ['model.frequency_penalty', 'model.logprobs', 'model.max_tokens', 'model.model', 'model.presence_penalty', 'model.temperature', 'model.top_logprobs', 'model.top_p']
40
+
41
+ >>> Results.example().relevant_columns(data_type = "flimflam")
42
+ Traceback (most recent call last):
43
+ ...
44
+ ValueError: No columns found for data type: flimflam. Available data types are: ['agent', 'answer', 'comment', 'model', 'prompt', 'question_options', 'question_text', 'question_type', 'raw_model_response', 'scenario'].
31
45
  """
32
46
  columns = [list(x.keys())[0] for x in self]
33
47
  if remove_prefix:
34
48
  columns = [column.split(".")[-1] for column in columns]
35
49
 
50
+ def get_data_type(column):
51
+ if "." in column:
52
+ return column.split(".")[0]
53
+ else:
54
+ return None
55
+
36
56
  if data_type:
57
+ all_columns = columns[:]
37
58
  columns = [
38
- column for column in columns if column.split(".")[0] == data_type
59
+ column for column in columns if get_data_type(column) == data_type
39
60
  ]
61
+ if len(columns) == 0:
62
+ all_data_types = sorted(
63
+ list(set(get_data_type(column) for column in all_columns))
64
+ )
65
+ raise ValueError(
66
+ f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
67
+ )
40
68
 
41
69
  return columns
42
70
 
43
- def _make_tabular(self, remove_prefix: bool, pretty_labels: Optional[dict] = None):
71
+ def num_observations(self):
72
+ """Return the number of observations in the dataset.
73
+
74
+ >>> from edsl.results import Results
75
+ >>> Results.example().num_observations()
76
+ 4
77
+ """
78
+ _num_observations = None
79
+ for entry in self:
80
+ key, values = list(entry.items())[0]
81
+ if _num_observations is None:
82
+ _num_observations = len(values)
83
+ else:
84
+ if len(values) != _num_observations:
85
+ raise ValueError(
86
+ "The number of observations is not consistent across columns."
87
+ )
88
+
89
+ return _num_observations
90
+
91
+ def _make_tabular(
92
+ self, remove_prefix: bool, pretty_labels: Optional[dict] = None
93
+ ) -> tuple[list, List[list]]:
44
94
  """Turn the results into a tabular format.
45
95
 
46
96
  :param remove_prefix: Whether to remove the prefix from the column names.
@@ -53,23 +103,29 @@ class DatasetExportMixin:
53
103
  >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
54
104
  (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
55
105
  """
56
- d = {}
57
- full_header = sorted(list(self.relevant_columns()))
58
- for entry in self.data:
59
- key, list_of_values = list(entry.items())[0]
60
- d[key] = list_of_values
106
+
107
+ def create_dict_from_list_of_dicts(list_of_dicts):
108
+ for entry in list_of_dicts:
109
+ key, list_of_values = list(entry.items())[0]
110
+ yield key, list_of_values
111
+
112
+ tabular_repr = dict(create_dict_from_list_of_dicts(self.data))
113
+
114
+ full_header = [list(x.keys())[0] for x in self]
115
+
116
+ rows = []
117
+ for i in range(self.num_observations()):
118
+ row = [tabular_repr[h][i] for h in full_header]
119
+ rows.append(row)
120
+
61
121
  if remove_prefix:
62
122
  header = [h.split(".")[-1] for h in full_header]
63
123
  else:
64
124
  header = full_header
65
- num_observations = len(list(self[0].values())[0])
66
- rows = []
67
- # rows.append(header)
68
- for i in range(num_observations):
69
- row = [d[h][i] for h in full_header]
70
- rows.append(row)
125
+
71
126
  if pretty_labels is not None:
72
127
  header = [pretty_labels.get(h, h) for h in header]
128
+
73
129
  return header, rows
74
130
 
75
131
  def print_long(self):
@@ -91,7 +147,7 @@ class DatasetExportMixin:
91
147
  self,
92
148
  pretty_labels: Optional[dict] = None,
93
149
  filename: Optional[str] = None,
94
- format: Literal["rich", "html", "markdown", "latex"] = None,
150
+ format: Optional[Literal["rich", "html", "markdown", "latex"]] = None,
95
151
  interactive: bool = False,
96
152
  split_at_dot: bool = True,
97
153
  max_rows=None,
@@ -108,6 +164,12 @@ class DatasetExportMixin:
108
164
  :param format: The format to print the results in. Options are 'rich', 'html', or 'markdown'.
109
165
  :param interactive: Whether to print the results interactively in a Jupyter notebook.
110
166
  :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
167
+ :param max_rows: The maximum number of rows to print.
168
+ :param tee: Whether to return the dataset.
169
+ :param iframe: Whether to display the table in an iframe.
170
+ :param iframe_height: The height of the iframe.
171
+ :param iframe_width: The width of the iframe.
172
+ :param web: Whether to display the table in a web browser.
111
173
 
112
174
  Example: Print in rich format at the terminal
113
175
 
@@ -188,91 +250,95 @@ class DatasetExportMixin:
188
250
  | Terrible |
189
251
  | OK |
190
252
  ...
253
+
254
+ >>> r.select('how_feeling').print(format='latex')
255
+ \\begin{tabular}{l}
256
+ \\toprule
257
+ ...
191
258
  """
192
259
  from IPython.display import HTML, display
193
260
  from edsl.utilities.utilities import is_notebook
194
261
 
195
- if format is None:
196
- if is_notebook():
197
- format = "html"
198
- else:
199
- format = "rich"
262
+ def _determine_format(format):
263
+ if format is None:
264
+ if is_notebook():
265
+ format = "html"
266
+ else:
267
+ format = "rich"
268
+ if format not in ["rich", "html", "markdown", "latex"]:
269
+ raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
270
+
271
+ return format
272
+
273
+ format = _determine_format(format)
200
274
 
201
275
  if pretty_labels is None:
202
276
  pretty_labels = {}
203
- else:
204
- # if the user passes in pretty_labels, we don't want to split at the dot
277
+
278
+ if pretty_labels != {}: # only split at dot if there are no pretty labels
205
279
  split_at_dot = False
206
280
 
207
- if format not in ["rich", "html", "markdown", "latex"]:
208
- raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
281
+ def _create_data():
282
+ for index, entry in enumerate(self):
283
+ key, list_of_values = list(entry.items())[0]
284
+ yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
285
+
286
+ new_data = list(_create_data())
209
287
 
210
- new_data = []
211
- for index, entry in enumerate(self):
212
- key, list_of_values = list(entry.items())[0]
213
- new_data.append({pretty_labels.get(key, key): list_of_values})
214
-
215
- if max_rows is not None:
216
- for entry in new_data:
217
- for key in entry:
218
- actual_rows = len(entry[key])
219
- entry[key] = entry[key][:max_rows]
220
-
221
288
  if format == "rich":
222
289
  from edsl.utilities.interface import print_dataset_with_rich
223
290
 
224
291
  print_dataset_with_rich(
225
292
  new_data, filename=filename, split_at_dot=split_at_dot
226
293
  )
227
- elif format == "html":
228
- notebook = is_notebook()
294
+ return self if tee else None
295
+
296
+ if format == "markdown":
297
+ from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
298
+
299
+ print_list_of_dicts_as_markdown_table(new_data, filename=filename)
300
+ return self if tee else None
301
+
302
+ if format == "latex":
303
+ df = self.to_pandas()
304
+ df.columns = [col.replace("_", " ") for col in df.columns]
305
+ latex_string = df.to_latex(index=False)
306
+
307
+ if filename is not None:
308
+ with open(filename, "w") as f:
309
+ f.write(latex_string)
310
+ else:
311
+ print(latex_string)
312
+
313
+ return self if tee else None
314
+
315
+ if format == "html":
229
316
  from edsl.utilities.interface import print_list_of_dicts_as_html_table
230
317
 
231
318
  html_source = print_list_of_dicts_as_html_table(
232
319
  new_data, interactive=interactive
233
320
  )
234
- if iframe:
235
- import html
236
321
 
237
- height = iframe_height
238
- width = iframe_width
239
- escaped_output = html.escape(html_source)
240
- # escaped_output = html_source
322
+ # if download_link:
323
+ # from IPython.display import HTML, display
324
+ # csv_file = output.getvalue()
325
+ # b64 = base64.b64encode(csv_file.encode()).decode()
326
+ # download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
327
+ # #display(HTML(download_link))
328
+
329
+ if iframe:
241
330
  iframe = f""""
242
- <iframe srcdoc="{ escaped_output }" style="width: {width}px; height: {height}px;"></iframe>
331
+ <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
243
332
  """
244
333
  display(HTML(iframe))
245
- elif notebook:
334
+ elif is_notebook():
246
335
  display(HTML(html_source))
247
336
  else:
248
337
  from edsl.utilities.interface import view_html
249
338
 
250
339
  view_html(html_source)
251
340
 
252
- elif format == "markdown":
253
- from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
254
-
255
- print_list_of_dicts_as_markdown_table(new_data, filename=filename)
256
- elif format == "latex":
257
- df = self.to_pandas()
258
- df.columns = [col.replace("_", " ") for col in df.columns]
259
- latex_string = df.to_latex()
260
- if filename is not None:
261
- with open(filename, "w") as f:
262
- f.write(latex_string)
263
- else:
264
- return latex_string
265
- # raise NotImplementedError("Latex format not yet implemented.")
266
- # latex_string = create_latex_table_from_data(new_data, filename=filename)
267
- # if filename is None:
268
- # return latex_string
269
- # Not working quite
270
-
271
- else:
272
- raise ValueError("format not recognized.")
273
-
274
- if tee:
275
- return self
341
+ return self if tee else None
276
342
 
277
343
  def to_csv(
278
344
  self,
@@ -293,10 +359,25 @@ class DatasetExportMixin:
293
359
  >>> r = Results.example()
294
360
  >>> r.select('how_feeling').to_csv()
295
361
  'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
296
-
362
+
297
363
  >>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
298
364
  'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
299
365
 
366
+ >>> import tempfile
367
+ >>> filename = tempfile.NamedTemporaryFile(delete=False).name
368
+ >>> r.select('how_feeling').to_csv(filename = filename)
369
+ >>> import os
370
+ >>> import csv
371
+ >>> with open(filename, newline='') as f:
372
+ ... reader = csv.reader(f)
373
+ ... for row in reader:
374
+ ... print(row)
375
+ ['answer.how_feeling']
376
+ ['OK']
377
+ ['Great']
378
+ ['Terrible']
379
+ ['OK']
380
+
300
381
  """
301
382
  if pretty_labels is None:
302
383
  pretty_labels = {}
@@ -316,6 +397,8 @@ class DatasetExportMixin:
316
397
  writer.writerows(rows)
317
398
 
318
399
  if download_link:
400
+ from IPython.display import HTML, display
401
+
319
402
  csv_file = output.getvalue()
320
403
  b64 = base64.b64encode(csv_file.encode()).decode()
321
404
  download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
@@ -323,6 +406,22 @@ class DatasetExportMixin:
323
406
  else:
324
407
  return output.getvalue()
325
408
 
409
+ def download_link(self, pretty_labels: Optional[dict] = None) -> str:
410
+ """Return a download link for the results.
411
+
412
+ :param pretty_labels: A dictionary of pretty labels for the columns.
413
+
414
+ >>> from edsl.results import Results
415
+ >>> r = Results.example()
416
+ >>> r.select('how_feeling').download_link()
417
+ '<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
418
+ """
419
+ import base64
420
+
421
+ csv_string = self.to_csv(pretty_labels=pretty_labels)
422
+ b64 = base64.b64encode(csv_string.encode()).decode()
423
+ return f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
424
+
326
425
  def to_pandas(self, remove_prefix: bool = False) -> "pd.DataFrame":
327
426
  """Convert the results to a pandas DataFrame.
328
427
 
@@ -342,8 +441,8 @@ class DatasetExportMixin:
342
441
  csv_string = self.to_csv(remove_prefix=remove_prefix)
343
442
  csv_buffer = io.StringIO(csv_string)
344
443
  df = pd.read_csv(csv_buffer)
345
- df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
346
- return df_sorted
444
+ # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
445
+ return df
347
446
 
348
447
  def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
349
448
  """Convert the results to a list of dictionaries, one per scenario.
@@ -362,7 +461,7 @@ class DatasetExportMixin:
362
461
 
363
462
  def to_agent_list(self, remove_prefix: bool = True):
364
463
  """Convert the results to a list of dictionaries, one per agent.
365
-
464
+
366
465
  :param remove_prefix: Whether to remove the prefix from the column names.
367
466
 
368
467
  >>> from edsl.results import Results
@@ -461,7 +560,10 @@ class DatasetExportMixin:
461
560
  return list_to_return
462
561
 
463
562
  def html(
464
- self, filename: Optional[str] = None, cta: str = "Open in browser", return_link:bool=False
563
+ self,
564
+ filename: Optional[str] = None,
565
+ cta: str = "Open in browser",
566
+ return_link: bool = False,
465
567
  ):
466
568
  import os
467
569
  import tempfile
@@ -495,7 +597,7 @@ class DatasetExportMixin:
495
597
  return filename
496
598
 
497
599
  def tally(
498
- self, *fields: Optional[str], top_n:Optional[int]=None, output="dict"
600
+ self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
499
601
  ) -> Union[dict, "Dataset"]:
500
602
  """Tally the values of a field or perform a cross-tab of multiple fields.
501
603
 
@@ -503,9 +605,11 @@ class DatasetExportMixin:
503
605
 
504
606
  >>> from edsl.results import Results
505
607
  >>> r = Results.example()
506
- >>> r.select('how_feeling').tally('answer.how_feeling')
608
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
507
609
  {'OK': 2, 'Great': 1, 'Terrible': 1}
508
- >>> r.select('how_feeling', 'period').tally('how_feeling', 'period')
610
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset")
611
+ Dataset([{'value': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
612
+ >>> r.select('how_feeling', 'period').tally('how_feeling', 'period', output = "dict")
509
613
  {('OK', 'morning'): 1, ('Great', 'afternoon'): 1, ('Terrible', 'morning'): 1, ('OK', 'afternoon'): 1}
510
614
  """
511
615
  from collections import Counter
@@ -517,6 +621,8 @@ class DatasetExportMixin:
517
621
  column.split(".")[-1] for column in self.relevant_columns()
518
622
  ]
519
623
 
624
+ # breakpoint()
625
+
520
626
  if not all(
521
627
  f in self.relevant_columns() or f in relevant_columns_without_prefix
522
628
  for f in fields
@@ -543,6 +649,7 @@ class DatasetExportMixin:
543
649
  from edsl.results.Dataset import Dataset
544
650
 
545
651
  if output == "dict":
652
+ # why did I do this?
546
653
  warnings.warn(
547
654
  textwrap.dedent(
548
655
  """\
edsl/results/Result.py CHANGED
@@ -167,28 +167,30 @@ class Result(Base, UserDict):
167
167
  "answer": self.answer,
168
168
  "prompt": self.prompt,
169
169
  "raw_model_response": self.raw_model_response,
170
- # "iteration": {"iteration": self.iteration},
170
+ # "iteration": {"iteration": self.iteration},
171
171
  "question_text": question_text_dict,
172
172
  "question_options": question_options_dict,
173
173
  "question_type": question_type_dict,
174
174
  "comment": comments_dict,
175
175
  }
176
-
176
+
177
177
  def check_expression(self, expression) -> None:
178
178
  for key in self.problem_keys:
179
179
  if key in expression and not key + "." in expression:
180
- raise ValueError(f"Key by iself {key} is problematic. Use the full key {key + '.' + key} name instead.")
180
+ raise ValueError(
181
+ f"Key by iself {key} is problematic. Use the full key {key + '.' + key} name instead."
182
+ )
181
183
  return None
182
184
 
183
185
  def code(self):
184
186
  """Return a string of code that can be used to recreate the Result object."""
185
187
  raise NotImplementedError
186
-
188
+
187
189
  @property
188
190
  def problem_keys(self):
189
191
  """Return a list of keys that are problematic."""
190
192
  return self._problem_keys
191
-
193
+
192
194
  def _compute_combined_dict_and_problem_keys(self) -> None:
193
195
  combined = {}
194
196
  problem_keys = []
@@ -198,9 +200,9 @@ class Result(Base, UserDict):
198
200
  if key in combined:
199
201
  # The key is already in the combined dict
200
202
  problem_keys = problem_keys + [key]
201
-
203
+
202
204
  combined.update({key: sub_dict})
203
- # I *think* this allows us to do do things like "answer.how_feelling" i.e., that the evaluator can use
205
+ # I *think* this allows us to do do things like "answer.how_feelling" i.e., that the evaluator can use
204
206
  # dot notation to access the subdicts.
205
207
  self._combined_dict = combined
206
208
  self._problem_keys = problem_keys
@@ -208,7 +210,7 @@ class Result(Base, UserDict):
208
210
  @property
209
211
  def combined_dict(self) -> dict[str, Any]:
210
212
  """Return a dictionary that includes all sub_dicts, but also puts the key-value pairs in each sub_dict as a key_value pair in the combined dictionary.
211
-
213
+
212
214
  >>> r = Result.example()
213
215
  >>> r.combined_dict['how_feeling']
214
216
  'OK'
@@ -216,7 +218,7 @@ class Result(Base, UserDict):
216
218
  if self._combined_dict is None or self._problem_keys is None:
217
219
  self._compute_combined_dict_and_problem_keys()
218
220
  return self._combined_dict
219
-
221
+
220
222
  @property
221
223
  def problem_keys(self):
222
224
  """Return a list of keys that are problematic."""
@@ -267,11 +269,11 @@ class Result(Base, UserDict):
267
269
 
268
270
  def __eq__(self, other) -> bool:
269
271
  """Return True if the Result object is equal to another Result object.
270
-
272
+
271
273
  >>> r = Result.example()
272
274
  >>> r == r
273
275
  True
274
-
276
+
275
277
  """
276
278
  return self.to_dict() == other.to_dict()
277
279
 
edsl/results/Results.py CHANGED
@@ -603,24 +603,26 @@ class Results(UserList, Mixins, Base):
603
603
  values = [d[key] for d in columns]
604
604
  self = self.add_column(key, values)
605
605
  return self
606
-
606
+
607
607
  @staticmethod
608
- def _create_evaluator(result: Result, functions_dict: Optional[dict] = None) -> EvalWithCompoundTypes:
608
+ def _create_evaluator(
609
+ result: Result, functions_dict: Optional[dict] = None
610
+ ) -> EvalWithCompoundTypes:
609
611
  """Create an evaluator for the expression.
610
-
612
+
611
613
  >>> from unittest.mock import Mock
612
614
  >>> result = Mock()
613
- >>> result.combined_dict = {'how_feeling': 'OK'}
615
+ >>> result.combined_dict = {'how_feeling': 'OK'}
614
616
 
615
617
  >>> evaluator = Results._create_evaluator(result = result, functions_dict = {})
616
618
  >>> evaluator.eval("how_feeling == 'OK'")
617
619
  True
618
-
620
+
619
621
  >>> result.combined_dict = {'answer': {'how_feeling': 'OK'}}
620
622
  >>> evaluator = Results._create_evaluator(result = result, functions_dict = {})
621
623
  >>> evaluator.eval("answer.how_feeling== 'OK'")
622
624
  True
623
-
625
+
624
626
  Note that you need to refer to the answer dictionary in the expression.
625
627
 
626
628
  >>> evaluator.eval("how_feeling== 'OK'")
@@ -827,8 +829,9 @@ class Results(UserList, Mixins, Base):
827
829
  # Return the index of this key in the list_of_keys
828
830
  return items_in_order.index(single_key)
829
831
 
830
- #sorted(new_data, key=sort_by_key_order)
832
+ # sorted(new_data, key=sort_by_key_order)
831
833
  from edsl.results.Dataset import Dataset
834
+
832
835
  sorted_new_data = []
833
836
 
834
837
  # WORKS but slow
@@ -958,10 +961,10 @@ class Results(UserList, Mixins, Base):
958
961
  new_data = []
959
962
  for result in self.data:
960
963
  evaluator = self._create_evaluator(result)
961
- result.check_expression(expression) # check expression
964
+ result.check_expression(expression) # check expression
962
965
  if evaluator.eval(expression):
963
966
  new_data.append(result)
964
-
967
+
965
968
  except ValueError as e:
966
969
  raise ResultsFilterError(
967
970
  f"Error in filter. Exception:{e}",
@@ -970,14 +973,14 @@ class Results(UserList, Mixins, Base):
970
973
  )
971
974
  except Exception as e:
972
975
  raise ResultsFilterError(
973
- f"""Error in filter. Exception:{e}.""",
974
- f"""The expression you provided was: {expression}.""",
975
- """Please make sure that the expression is a valid Python expression that evaluates to a boolean.""",
976
- """For example, 'how_feeling == "Great"' is a valid expression, as is 'how_feeling in ["Great", "Terrible"]'., """,
977
- """However, 'how_feeling = "Great"' is not a valid expression.""",
978
- """See https://docs.expectedparrot.com/en/latest/results.html#filtering-results for more details."""
976
+ f"""Error in filter. Exception:{e}.""",
977
+ f"""The expression you provided was: {expression}.""",
978
+ """Please make sure that the expression is a valid Python expression that evaluates to a boolean.""",
979
+ """For example, 'how_feeling == "Great"' is a valid expression, as is 'how_feeling in ["Great", "Terrible"]'., """,
980
+ """However, 'how_feeling = "Great"' is not a valid expression.""",
981
+ """See https://docs.expectedparrot.com/en/latest/results.html#filtering-results for more details.""",
979
982
  )
980
-
983
+
981
984
  if len(new_data) == 0:
982
985
  import warnings
983
986
 
@@ -37,12 +37,12 @@ class ResultsToolsMixin:
37
37
  print_exceptions=False,
38
38
  ) -> dict:
39
39
  from edsl import ScenarioList
40
+ from edsl import QuestionCheckBox
40
41
 
41
42
  values = self.select(field).to_list()
42
43
  scenarios = ScenarioList.from_list("field", values).add_value(
43
44
  "context", context
44
45
  )
45
-
46
46
  q = QuestionCheckBox(
47
47
  question_text="""
48
48
  {{ context }}
@@ -182,6 +182,19 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
182
182
  new_scenario[key] = self[key]
183
183
  return new_scenario
184
184
 
185
+ @classmethod
186
+ def from_url(cls, url: str, field_name: Optional[str] = "text") -> "Scenario":
187
+ """Creates a scenario from a URL.
188
+
189
+ :param url: The URL to create the scenario from.
190
+ :param field_name: The field name to use for the text.
191
+
192
+ """
193
+ import requests
194
+
195
+ text = requests.get(url).text
196
+ return cls({"url": url, field_name: text})
197
+
185
198
  @classmethod
186
199
  def from_image(cls, image_path: str) -> str:
187
200
  """Creates a scenario with a base64 encoding of an image.
@@ -207,6 +220,7 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
207
220
  @classmethod
208
221
  def from_pdf(cls, pdf_path):
209
222
  import fitz # PyMuPDF
223
+ from edsl import Scenario
210
224
 
211
225
  # Ensure the file exists
212
226
  if not os.path.exists(pdf_path):