edsl 0.1.30.dev5__py3-none-any.whl → 0.1.31.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__version__.py +1 -1
- edsl/coop/utils.py +9 -1
- edsl/jobs/buckets/TokenBucket.py +3 -3
- edsl/jobs/interviews/Interview.py +10 -10
- edsl/jobs/interviews/InterviewTaskBuildingMixin.py +9 -7
- edsl/jobs/tasks/QuestionTaskCreator.py +2 -3
- edsl/language_models/LanguageModel.py +6 -1
- edsl/language_models/ModelList.py +8 -2
- edsl/language_models/registry.py +12 -0
- edsl/questions/QuestionFunctional.py +8 -7
- edsl/questions/QuestionMultipleChoice.py +14 -12
- edsl/questions/descriptors.py +6 -4
- edsl/results/DatasetExportMixin.py +174 -76
- edsl/results/Result.py +13 -11
- edsl/results/Results.py +19 -16
- edsl/results/ResultsToolsMixin.py +1 -1
- edsl/scenarios/ScenarioList.py +44 -19
- edsl/scenarios/ScenarioListExportMixin.py +1 -1
- edsl/surveys/Survey.py +11 -8
- {edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dev1.dist-info}/METADATA +2 -1
- {edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dev1.dist-info}/RECORD +23 -23
- {edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dev1.dist-info}/LICENSE +0 -0
- {edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dev1.dist-info}/WHEEL +0 -0
@@ -3,12 +3,13 @@
|
|
3
3
|
import base64
|
4
4
|
import csv
|
5
5
|
import io
|
6
|
+
import html
|
6
7
|
|
7
|
-
from typing import Literal, Optional, Union
|
8
|
+
from typing import Literal, Optional, Union, List
|
8
9
|
|
9
10
|
|
10
11
|
class DatasetExportMixin:
|
11
|
-
"""Mixin class"""
|
12
|
+
"""Mixin class for exporting Dataset objects."""
|
12
13
|
|
13
14
|
def relevant_columns(
|
14
15
|
self, data_type: Optional[str] = None, remove_prefix=False
|
@@ -28,19 +29,64 @@ class DatasetExportMixin:
|
|
28
29
|
|
29
30
|
>>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
|
30
31
|
['answer.how_feeling', 'answer.how_feeling_yesterday']
|
32
|
+
|
33
|
+
>>> from edsl.results import Results
|
34
|
+
>>> sorted(Results.example().select().relevant_columns(data_type = "model"))
|
35
|
+
['model.frequency_penalty', 'model.logprobs', 'model.max_tokens', 'model.model', 'model.presence_penalty', 'model.temperature', 'model.top_logprobs', 'model.top_p']
|
36
|
+
|
37
|
+
>>> Results.example().relevant_columns(data_type = "flimflam")
|
38
|
+
Traceback (most recent call last):
|
39
|
+
...
|
40
|
+
ValueError: No columns found for data type: flimflam. Available data types are: ['agent', 'answer', 'comment', 'model', 'prompt', 'question_options', 'question_text', 'question_type', 'raw_model_response', 'scenario'].
|
31
41
|
"""
|
32
42
|
columns = [list(x.keys())[0] for x in self]
|
33
43
|
if remove_prefix:
|
34
44
|
columns = [column.split(".")[-1] for column in columns]
|
35
45
|
|
46
|
+
def get_data_type(column):
|
47
|
+
if "." in column:
|
48
|
+
return column.split(".")[0]
|
49
|
+
else:
|
50
|
+
return None
|
51
|
+
|
36
52
|
if data_type:
|
53
|
+
all_columns = columns[:]
|
37
54
|
columns = [
|
38
|
-
column for column in columns if column
|
55
|
+
column for column in columns if get_data_type(column) == data_type
|
39
56
|
]
|
57
|
+
if len(columns) == 0:
|
58
|
+
all_data_types = sorted(
|
59
|
+
list(set(get_data_type(column) for column in all_columns))
|
60
|
+
)
|
61
|
+
raise ValueError(
|
62
|
+
f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
|
63
|
+
)
|
40
64
|
|
41
65
|
return columns
|
42
66
|
|
43
|
-
def
|
67
|
+
def num_observations(self):
|
68
|
+
"""Return the number of observations in the dataset.
|
69
|
+
|
70
|
+
>>> from edsl.results import Results
|
71
|
+
>>> Results.example().num_observations()
|
72
|
+
4
|
73
|
+
"""
|
74
|
+
_num_observations = None
|
75
|
+
for entry in self:
|
76
|
+
key, values = list(entry.items())[0]
|
77
|
+
if _num_observations is None:
|
78
|
+
_num_observations = len(values)
|
79
|
+
else:
|
80
|
+
if len(values) != _num_observations:
|
81
|
+
raise ValueError(
|
82
|
+
"The number of observations is not consistent across columns."
|
83
|
+
)
|
84
|
+
|
85
|
+
return _num_observations
|
86
|
+
|
87
|
+
def _make_tabular(
|
88
|
+
self, remove_prefix: bool, pretty_labels: Optional[dict] = None
|
89
|
+
) -> tuple[list, List[list]]:
|
44
90
|
"""Turn the results into a tabular format.
|
45
91
|
|
46
92
|
:param remove_prefix: Whether to remove the prefix from the column names.
|
@@ -53,23 +99,29 @@ class DatasetExportMixin:
|
|
53
99
|
>>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
|
54
100
|
(['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
|
55
101
|
"""
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
102
|
+
|
103
|
+
def create_dict_from_list_of_dicts(list_of_dicts):
|
104
|
+
for entry in list_of_dicts:
|
105
|
+
key, list_of_values = list(entry.items())[0]
|
106
|
+
yield key, list_of_values
|
107
|
+
|
108
|
+
tabular_repr = dict(create_dict_from_list_of_dicts(self.data))
|
109
|
+
|
110
|
+
full_header = [list(x.keys())[0] for x in self]
|
111
|
+
|
112
|
+
rows = []
|
113
|
+
for i in range(self.num_observations()):
|
114
|
+
row = [tabular_repr[h][i] for h in full_header]
|
115
|
+
rows.append(row)
|
116
|
+
|
61
117
|
if remove_prefix:
|
62
118
|
header = [h.split(".")[-1] for h in full_header]
|
63
119
|
else:
|
64
120
|
header = full_header
|
65
|
-
|
66
|
-
rows = []
|
67
|
-
# rows.append(header)
|
68
|
-
for i in range(num_observations):
|
69
|
-
row = [d[h][i] for h in full_header]
|
70
|
-
rows.append(row)
|
121
|
+
|
71
122
|
if pretty_labels is not None:
|
72
123
|
header = [pretty_labels.get(h, h) for h in header]
|
124
|
+
|
73
125
|
return header, rows
|
74
126
|
|
75
127
|
def print_long(self):
|
@@ -91,7 +143,7 @@ class DatasetExportMixin:
|
|
91
143
|
self,
|
92
144
|
pretty_labels: Optional[dict] = None,
|
93
145
|
filename: Optional[str] = None,
|
94
|
-
format: Literal["rich", "html", "markdown", "latex"] = None,
|
146
|
+
format: Optional[Literal["rich", "html", "markdown", "latex"]] = None,
|
95
147
|
interactive: bool = False,
|
96
148
|
split_at_dot: bool = True,
|
97
149
|
max_rows=None,
|
@@ -108,6 +160,12 @@ class DatasetExportMixin:
|
|
108
160
|
:param format: The format to print the results in. Options are 'rich', 'html', or 'markdown'.
|
109
161
|
:param interactive: Whether to print the results interactively in a Jupyter notebook.
|
110
162
|
:param split_at_dot: Whether to split the column names at the last dot w/ a newline.
|
163
|
+
:param max_rows: The maximum number of rows to print.
|
164
|
+
:param tee: Whether to return the dataset.
|
165
|
+
:param iframe: Whether to display the table in an iframe.
|
166
|
+
:param iframe_height: The height of the iframe.
|
167
|
+
:param iframe_width: The width of the iframe.
|
168
|
+
:param web: Whether to display the table in a web browser.
|
111
169
|
|
112
170
|
Example: Print in rich format at the terminal
|
113
171
|
|
@@ -188,91 +246,95 @@ class DatasetExportMixin:
|
|
188
246
|
| Terrible |
|
189
247
|
| OK |
|
190
248
|
...
|
249
|
+
|
250
|
+
>>> r.select('how_feeling').print(format='latex')
|
251
|
+
\\begin{tabular}{l}
|
252
|
+
\\toprule
|
253
|
+
...
|
191
254
|
"""
|
192
255
|
from IPython.display import HTML, display
|
193
256
|
from edsl.utilities.utilities import is_notebook
|
194
257
|
|
195
|
-
|
196
|
-
if
|
197
|
-
|
198
|
-
|
199
|
-
|
258
|
+
def _determine_format(format):
|
259
|
+
if format is None:
|
260
|
+
if is_notebook():
|
261
|
+
format = "html"
|
262
|
+
else:
|
263
|
+
format = "rich"
|
264
|
+
if format not in ["rich", "html", "markdown", "latex"]:
|
265
|
+
raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
|
266
|
+
|
267
|
+
return format
|
268
|
+
|
269
|
+
format = _determine_format(format)
|
200
270
|
|
201
271
|
if pretty_labels is None:
|
202
272
|
pretty_labels = {}
|
203
|
-
|
204
|
-
|
273
|
+
|
274
|
+
if pretty_labels != {}: # only split at dot if there are no pretty labels
|
205
275
|
split_at_dot = False
|
206
276
|
|
207
|
-
|
208
|
-
|
277
|
+
def _create_data():
|
278
|
+
for index, entry in enumerate(self):
|
279
|
+
key, list_of_values = list(entry.items())[0]
|
280
|
+
yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
|
281
|
+
|
282
|
+
new_data = list(_create_data())
|
209
283
|
|
210
|
-
new_data = []
|
211
|
-
for index, entry in enumerate(self):
|
212
|
-
key, list_of_values = list(entry.items())[0]
|
213
|
-
new_data.append({pretty_labels.get(key, key): list_of_values})
|
214
|
-
|
215
|
-
if max_rows is not None:
|
216
|
-
for entry in new_data:
|
217
|
-
for key in entry:
|
218
|
-
actual_rows = len(entry[key])
|
219
|
-
entry[key] = entry[key][:max_rows]
|
220
|
-
|
221
284
|
if format == "rich":
|
222
285
|
from edsl.utilities.interface import print_dataset_with_rich
|
223
286
|
|
224
287
|
print_dataset_with_rich(
|
225
288
|
new_data, filename=filename, split_at_dot=split_at_dot
|
226
289
|
)
|
227
|
-
|
228
|
-
|
290
|
+
return self if tee else None
|
291
|
+
|
292
|
+
if format == "markdown":
|
293
|
+
from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
|
294
|
+
|
295
|
+
print_list_of_dicts_as_markdown_table(new_data, filename=filename)
|
296
|
+
return self if tee else None
|
297
|
+
|
298
|
+
if format == "latex":
|
299
|
+
df = self.to_pandas()
|
300
|
+
df.columns = [col.replace("_", " ") for col in df.columns]
|
301
|
+
latex_string = df.to_latex(index=False)
|
302
|
+
|
303
|
+
if filename is not None:
|
304
|
+
with open(filename, "w") as f:
|
305
|
+
f.write(latex_string)
|
306
|
+
else:
|
307
|
+
print(latex_string)
|
308
|
+
|
309
|
+
return self if tee else None
|
310
|
+
|
311
|
+
if format == "html":
|
229
312
|
from edsl.utilities.interface import print_list_of_dicts_as_html_table
|
230
313
|
|
231
314
|
html_source = print_list_of_dicts_as_html_table(
|
232
315
|
new_data, interactive=interactive
|
233
316
|
)
|
234
|
-
if iframe:
|
235
|
-
import html
|
236
317
|
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
318
|
+
# if download_link:
|
319
|
+
# from IPython.display import HTML, display
|
320
|
+
# csv_file = output.getvalue()
|
321
|
+
# b64 = base64.b64encode(csv_file.encode()).decode()
|
322
|
+
# download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
|
323
|
+
# #display(HTML(download_link))
|
324
|
+
|
325
|
+
if iframe:
|
241
326
|
iframe = f""""
|
242
|
-
<iframe srcdoc="{
|
327
|
+
<iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
|
243
328
|
"""
|
244
329
|
display(HTML(iframe))
|
245
|
-
elif
|
330
|
+
elif is_notebook():
|
246
331
|
display(HTML(html_source))
|
247
332
|
else:
|
248
333
|
from edsl.utilities.interface import view_html
|
249
334
|
|
250
335
|
view_html(html_source)
|
251
336
|
|
252
|
-
|
253
|
-
from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
|
254
|
-
|
255
|
-
print_list_of_dicts_as_markdown_table(new_data, filename=filename)
|
256
|
-
elif format == "latex":
|
257
|
-
df = self.to_pandas()
|
258
|
-
df.columns = [col.replace("_", " ") for col in df.columns]
|
259
|
-
latex_string = df.to_latex()
|
260
|
-
if filename is not None:
|
261
|
-
with open(filename, "w") as f:
|
262
|
-
f.write(latex_string)
|
263
|
-
else:
|
264
|
-
return latex_string
|
265
|
-
# raise NotImplementedError("Latex format not yet implemented.")
|
266
|
-
# latex_string = create_latex_table_from_data(new_data, filename=filename)
|
267
|
-
# if filename is None:
|
268
|
-
# return latex_string
|
269
|
-
# Not working quite
|
270
|
-
|
271
|
-
else:
|
272
|
-
raise ValueError("format not recognized.")
|
273
|
-
|
274
|
-
if tee:
|
275
|
-
return self
|
337
|
+
return self if tee else None
|
276
338
|
|
277
339
|
def to_csv(
|
278
340
|
self,
|
@@ -293,10 +355,25 @@ class DatasetExportMixin:
|
|
293
355
|
>>> r = Results.example()
|
294
356
|
>>> r.select('how_feeling').to_csv()
|
295
357
|
'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
|
296
|
-
|
358
|
+
|
297
359
|
>>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
|
298
360
|
'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
|
299
361
|
|
362
|
+
>>> import tempfile
|
363
|
+
>>> filename = tempfile.NamedTemporaryFile(delete=False).name
|
364
|
+
>>> r.select('how_feeling').to_csv(filename = filename)
|
365
|
+
>>> import os
|
366
|
+
>>> import csv
|
367
|
+
>>> with open(filename, newline='') as f:
|
368
|
+
... reader = csv.reader(f)
|
369
|
+
... for row in reader:
|
370
|
+
... print(row)
|
371
|
+
['answer.how_feeling']
|
372
|
+
['OK']
|
373
|
+
['Great']
|
374
|
+
['Terrible']
|
375
|
+
['OK']
|
376
|
+
|
300
377
|
"""
|
301
378
|
if pretty_labels is None:
|
302
379
|
pretty_labels = {}
|
@@ -316,6 +393,8 @@ class DatasetExportMixin:
|
|
316
393
|
writer.writerows(rows)
|
317
394
|
|
318
395
|
if download_link:
|
396
|
+
from IPython.display import HTML, display
|
397
|
+
|
319
398
|
csv_file = output.getvalue()
|
320
399
|
b64 = base64.b64encode(csv_file.encode()).decode()
|
321
400
|
download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
|
@@ -323,6 +402,22 @@ class DatasetExportMixin:
|
|
323
402
|
else:
|
324
403
|
return output.getvalue()
|
325
404
|
|
405
|
+
def download_link(self, pretty_labels: Optional[dict] = None) -> str:
|
406
|
+
"""Return a download link for the results.
|
407
|
+
|
408
|
+
:param pretty_labels: A dictionary of pretty labels for the columns.
|
409
|
+
|
410
|
+
>>> from edsl.results import Results
|
411
|
+
>>> r = Results.example()
|
412
|
+
>>> r.select('how_feeling').download_link()
|
413
|
+
'<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
|
414
|
+
"""
|
415
|
+
import base64
|
416
|
+
|
417
|
+
csv_string = self.to_csv(pretty_labels=pretty_labels)
|
418
|
+
b64 = base64.b64encode(csv_string.encode()).decode()
|
419
|
+
return f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
|
420
|
+
|
326
421
|
def to_pandas(self, remove_prefix: bool = False) -> "pd.DataFrame":
|
327
422
|
"""Convert the results to a pandas DataFrame.
|
328
423
|
|
@@ -342,8 +437,8 @@ class DatasetExportMixin:
|
|
342
437
|
csv_string = self.to_csv(remove_prefix=remove_prefix)
|
343
438
|
csv_buffer = io.StringIO(csv_string)
|
344
439
|
df = pd.read_csv(csv_buffer)
|
345
|
-
df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
|
346
|
-
return
|
440
|
+
# df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
|
441
|
+
return df
|
347
442
|
|
348
443
|
def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
|
349
444
|
"""Convert the results to a list of dictionaries, one per scenario.
|
@@ -362,7 +457,7 @@ class DatasetExportMixin:
|
|
362
457
|
|
363
458
|
def to_agent_list(self, remove_prefix: bool = True):
|
364
459
|
"""Convert the results to a list of dictionaries, one per agent.
|
365
|
-
|
460
|
+
|
366
461
|
:param remove_prefix: Whether to remove the prefix from the column names.
|
367
462
|
|
368
463
|
>>> from edsl.results import Results
|
@@ -461,7 +556,10 @@ class DatasetExportMixin:
|
|
461
556
|
return list_to_return
|
462
557
|
|
463
558
|
def html(
|
464
|
-
self,
|
559
|
+
self,
|
560
|
+
filename: Optional[str] = None,
|
561
|
+
cta: str = "Open in browser",
|
562
|
+
return_link: bool = False,
|
465
563
|
):
|
466
564
|
import os
|
467
565
|
import tempfile
|
@@ -495,7 +593,7 @@ class DatasetExportMixin:
|
|
495
593
|
return filename
|
496
594
|
|
497
595
|
def tally(
|
498
|
-
self, *fields: Optional[str], top_n:Optional[int]=None, output="dict"
|
596
|
+
self, *fields: Optional[str], top_n: Optional[int] = None, output="dict"
|
499
597
|
) -> Union[dict, "Dataset"]:
|
500
598
|
"""Tally the values of a field or perform a cross-tab of multiple fields.
|
501
599
|
|
edsl/results/Result.py
CHANGED
@@ -167,28 +167,30 @@ class Result(Base, UserDict):
|
|
167
167
|
"answer": self.answer,
|
168
168
|
"prompt": self.prompt,
|
169
169
|
"raw_model_response": self.raw_model_response,
|
170
|
-
# "iteration": {"iteration": self.iteration},
|
170
|
+
# "iteration": {"iteration": self.iteration},
|
171
171
|
"question_text": question_text_dict,
|
172
172
|
"question_options": question_options_dict,
|
173
173
|
"question_type": question_type_dict,
|
174
174
|
"comment": comments_dict,
|
175
175
|
}
|
176
|
-
|
176
|
+
|
177
177
|
def check_expression(self, expression) -> None:
|
178
178
|
for key in self.problem_keys:
|
179
179
|
if key in expression and not key + "." in expression:
|
180
|
-
raise ValueError(
|
180
|
+
raise ValueError(
|
181
|
+
f"Key by iself {key} is problematic. Use the full key {key + '.' + key} name instead."
|
182
|
+
)
|
181
183
|
return None
|
182
184
|
|
183
185
|
def code(self):
|
184
186
|
"""Return a string of code that can be used to recreate the Result object."""
|
185
187
|
raise NotImplementedError
|
186
|
-
|
188
|
+
|
187
189
|
@property
|
188
190
|
def problem_keys(self):
|
189
191
|
"""Return a list of keys that are problematic."""
|
190
192
|
return self._problem_keys
|
191
|
-
|
193
|
+
|
192
194
|
def _compute_combined_dict_and_problem_keys(self) -> None:
|
193
195
|
combined = {}
|
194
196
|
problem_keys = []
|
@@ -198,9 +200,9 @@ class Result(Base, UserDict):
|
|
198
200
|
if key in combined:
|
199
201
|
# The key is already in the combined dict
|
200
202
|
problem_keys = problem_keys + [key]
|
201
|
-
|
203
|
+
|
202
204
|
combined.update({key: sub_dict})
|
203
|
-
# I *think* this allows us to do do things like "answer.how_feelling" i.e., that the evaluator can use
|
205
|
+
# I *think* this allows us to do do things like "answer.how_feelling" i.e., that the evaluator can use
|
204
206
|
# dot notation to access the subdicts.
|
205
207
|
self._combined_dict = combined
|
206
208
|
self._problem_keys = problem_keys
|
@@ -208,7 +210,7 @@ class Result(Base, UserDict):
|
|
208
210
|
@property
|
209
211
|
def combined_dict(self) -> dict[str, Any]:
|
210
212
|
"""Return a dictionary that includes all sub_dicts, but also puts the key-value pairs in each sub_dict as a key_value pair in the combined dictionary.
|
211
|
-
|
213
|
+
|
212
214
|
>>> r = Result.example()
|
213
215
|
>>> r.combined_dict['how_feeling']
|
214
216
|
'OK'
|
@@ -216,7 +218,7 @@ class Result(Base, UserDict):
|
|
216
218
|
if self._combined_dict is None or self._problem_keys is None:
|
217
219
|
self._compute_combined_dict_and_problem_keys()
|
218
220
|
return self._combined_dict
|
219
|
-
|
221
|
+
|
220
222
|
@property
|
221
223
|
def problem_keys(self):
|
222
224
|
"""Return a list of keys that are problematic."""
|
@@ -267,11 +269,11 @@ class Result(Base, UserDict):
|
|
267
269
|
|
268
270
|
def __eq__(self, other) -> bool:
|
269
271
|
"""Return True if the Result object is equal to another Result object.
|
270
|
-
|
272
|
+
|
271
273
|
>>> r = Result.example()
|
272
274
|
>>> r == r
|
273
275
|
True
|
274
|
-
|
276
|
+
|
275
277
|
"""
|
276
278
|
return self.to_dict() == other.to_dict()
|
277
279
|
|
edsl/results/Results.py
CHANGED
@@ -603,24 +603,26 @@ class Results(UserList, Mixins, Base):
|
|
603
603
|
values = [d[key] for d in columns]
|
604
604
|
self = self.add_column(key, values)
|
605
605
|
return self
|
606
|
-
|
606
|
+
|
607
607
|
@staticmethod
|
608
|
-
def _create_evaluator(
|
608
|
+
def _create_evaluator(
|
609
|
+
result: Result, functions_dict: Optional[dict] = None
|
610
|
+
) -> EvalWithCompoundTypes:
|
609
611
|
"""Create an evaluator for the expression.
|
610
|
-
|
612
|
+
|
611
613
|
>>> from unittest.mock import Mock
|
612
614
|
>>> result = Mock()
|
613
|
-
>>> result.combined_dict = {'how_feeling': 'OK'}
|
615
|
+
>>> result.combined_dict = {'how_feeling': 'OK'}
|
614
616
|
|
615
617
|
>>> evaluator = Results._create_evaluator(result = result, functions_dict = {})
|
616
618
|
>>> evaluator.eval("how_feeling == 'OK'")
|
617
619
|
True
|
618
|
-
|
620
|
+
|
619
621
|
>>> result.combined_dict = {'answer': {'how_feeling': 'OK'}}
|
620
622
|
>>> evaluator = Results._create_evaluator(result = result, functions_dict = {})
|
621
623
|
>>> evaluator.eval("answer.how_feeling== 'OK'")
|
622
624
|
True
|
623
|
-
|
625
|
+
|
624
626
|
Note that you need to refer to the answer dictionary in the expression.
|
625
627
|
|
626
628
|
>>> evaluator.eval("how_feeling== 'OK'")
|
@@ -827,8 +829,9 @@ class Results(UserList, Mixins, Base):
|
|
827
829
|
# Return the index of this key in the list_of_keys
|
828
830
|
return items_in_order.index(single_key)
|
829
831
|
|
830
|
-
#sorted(new_data, key=sort_by_key_order)
|
832
|
+
# sorted(new_data, key=sort_by_key_order)
|
831
833
|
from edsl.results.Dataset import Dataset
|
834
|
+
|
832
835
|
sorted_new_data = []
|
833
836
|
|
834
837
|
# WORKS but slow
|
@@ -958,10 +961,10 @@ class Results(UserList, Mixins, Base):
|
|
958
961
|
new_data = []
|
959
962
|
for result in self.data:
|
960
963
|
evaluator = self._create_evaluator(result)
|
961
|
-
result.check_expression(expression)
|
964
|
+
result.check_expression(expression) # check expression
|
962
965
|
if evaluator.eval(expression):
|
963
966
|
new_data.append(result)
|
964
|
-
|
967
|
+
|
965
968
|
except ValueError as e:
|
966
969
|
raise ResultsFilterError(
|
967
970
|
f"Error in filter. Exception:{e}",
|
@@ -970,14 +973,14 @@ class Results(UserList, Mixins, Base):
|
|
970
973
|
)
|
971
974
|
except Exception as e:
|
972
975
|
raise ResultsFilterError(
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
976
|
+
f"""Error in filter. Exception:{e}.""",
|
977
|
+
f"""The expression you provided was: {expression}.""",
|
978
|
+
"""Please make sure that the expression is a valid Python expression that evaluates to a boolean.""",
|
979
|
+
"""For example, 'how_feeling == "Great"' is a valid expression, as is 'how_feeling in ["Great", "Terrible"]'., """,
|
980
|
+
"""However, 'how_feeling = "Great"' is not a valid expression.""",
|
981
|
+
"""See https://docs.expectedparrot.com/en/latest/results.html#filtering-results for more details.""",
|
979
982
|
)
|
980
|
-
|
983
|
+
|
981
984
|
if len(new_data) == 0:
|
982
985
|
import warnings
|
983
986
|
|
@@ -37,12 +37,12 @@ class ResultsToolsMixin:
|
|
37
37
|
print_exceptions=False,
|
38
38
|
) -> dict:
|
39
39
|
from edsl import ScenarioList
|
40
|
+
from edsl import QuestionCheckBox
|
40
41
|
|
41
42
|
values = self.select(field).to_list()
|
42
43
|
scenarios = ScenarioList.from_list("field", values).add_value(
|
43
44
|
"context", context
|
44
45
|
)
|
45
|
-
|
46
46
|
q = QuestionCheckBox(
|
47
47
|
question_text="""
|
48
48
|
{{ context }}
|