edsl 0.1.27.dev2__py3-none-any.whl → 0.1.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +107 -30
- edsl/BaseDiff.py +260 -0
- edsl/__init__.py +25 -21
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +103 -46
- edsl/agents/AgentList.py +97 -13
- edsl/agents/Invigilator.py +23 -10
- edsl/agents/InvigilatorBase.py +19 -14
- edsl/agents/PromptConstructionMixin.py +342 -100
- edsl/agents/descriptors.py +5 -2
- edsl/base/Base.py +289 -0
- edsl/config.py +2 -1
- edsl/conjure/AgentConstructionMixin.py +152 -0
- edsl/conjure/Conjure.py +56 -0
- edsl/conjure/InputData.py +659 -0
- edsl/conjure/InputDataCSV.py +48 -0
- edsl/conjure/InputDataMixinQuestionStats.py +182 -0
- edsl/conjure/InputDataPyRead.py +91 -0
- edsl/conjure/InputDataSPSS.py +8 -0
- edsl/conjure/InputDataStata.py +8 -0
- edsl/conjure/QuestionOptionMixin.py +76 -0
- edsl/conjure/QuestionTypeMixin.py +23 -0
- edsl/conjure/RawQuestion.py +65 -0
- edsl/conjure/SurveyResponses.py +7 -0
- edsl/conjure/__init__.py +9 -4
- edsl/conjure/examples/placeholder.txt +0 -0
- edsl/conjure/naming_utilities.py +263 -0
- edsl/conjure/utilities.py +165 -28
- edsl/conversation/Conversation.py +238 -0
- edsl/conversation/car_buying.py +58 -0
- edsl/conversation/mug_negotiation.py +81 -0
- edsl/conversation/next_speaker_utilities.py +93 -0
- edsl/coop/coop.py +337 -121
- edsl/coop/utils.py +56 -70
- edsl/data/Cache.py +74 -22
- edsl/data/CacheHandler.py +10 -9
- edsl/data/SQLiteDict.py +11 -3
- edsl/inference_services/AnthropicService.py +1 -0
- edsl/inference_services/DeepInfraService.py +20 -13
- edsl/inference_services/GoogleService.py +7 -1
- edsl/inference_services/InferenceServicesCollection.py +33 -7
- edsl/inference_services/OpenAIService.py +17 -10
- edsl/inference_services/models_available_cache.py +69 -0
- edsl/inference_services/rate_limits_cache.py +25 -0
- edsl/inference_services/write_available.py +10 -0
- edsl/jobs/Answers.py +15 -1
- edsl/jobs/Jobs.py +322 -73
- edsl/jobs/buckets/BucketCollection.py +9 -3
- edsl/jobs/buckets/ModelBuckets.py +4 -2
- edsl/jobs/buckets/TokenBucket.py +1 -2
- edsl/jobs/interviews/Interview.py +7 -10
- edsl/jobs/interviews/InterviewStatusMixin.py +3 -3
- edsl/jobs/interviews/InterviewTaskBuildingMixin.py +39 -20
- edsl/jobs/interviews/retry_management.py +4 -4
- edsl/jobs/runners/JobsRunnerAsyncio.py +103 -65
- edsl/jobs/runners/JobsRunnerStatusData.py +3 -3
- edsl/jobs/tasks/QuestionTaskCreator.py +4 -2
- edsl/jobs/tasks/TaskHistory.py +4 -3
- edsl/language_models/LanguageModel.py +42 -55
- edsl/language_models/ModelList.py +96 -0
- edsl/language_models/registry.py +14 -0
- edsl/language_models/repair.py +97 -25
- edsl/notebooks/Notebook.py +157 -32
- edsl/prompts/Prompt.py +31 -19
- edsl/questions/QuestionBase.py +145 -23
- edsl/questions/QuestionBudget.py +5 -6
- edsl/questions/QuestionCheckBox.py +7 -3
- edsl/questions/QuestionExtract.py +5 -3
- edsl/questions/QuestionFreeText.py +3 -3
- edsl/questions/QuestionFunctional.py +0 -3
- edsl/questions/QuestionList.py +3 -4
- edsl/questions/QuestionMultipleChoice.py +16 -8
- edsl/questions/QuestionNumerical.py +4 -3
- edsl/questions/QuestionRank.py +5 -3
- edsl/questions/__init__.py +4 -3
- edsl/questions/descriptors.py +9 -4
- edsl/questions/question_registry.py +27 -31
- edsl/questions/settings.py +1 -1
- edsl/results/Dataset.py +31 -0
- edsl/results/DatasetExportMixin.py +493 -0
- edsl/results/Result.py +42 -82
- edsl/results/Results.py +178 -66
- edsl/results/ResultsDBMixin.py +10 -9
- edsl/results/ResultsExportMixin.py +23 -507
- edsl/results/ResultsGGMixin.py +3 -3
- edsl/results/ResultsToolsMixin.py +9 -9
- edsl/scenarios/FileStore.py +140 -0
- edsl/scenarios/Scenario.py +59 -6
- edsl/scenarios/ScenarioList.py +138 -52
- edsl/scenarios/ScenarioListExportMixin.py +32 -0
- edsl/scenarios/ScenarioListPdfMixin.py +2 -1
- edsl/scenarios/__init__.py +1 -0
- edsl/study/ObjectEntry.py +173 -0
- edsl/study/ProofOfWork.py +113 -0
- edsl/study/SnapShot.py +73 -0
- edsl/study/Study.py +498 -0
- edsl/study/__init__.py +4 -0
- edsl/surveys/MemoryPlan.py +11 -4
- edsl/surveys/Survey.py +124 -37
- edsl/surveys/SurveyExportMixin.py +25 -5
- edsl/surveys/SurveyFlowVisualizationMixin.py +6 -4
- edsl/tools/plotting.py +4 -2
- edsl/utilities/__init__.py +21 -20
- edsl/utilities/gcp_bucket/__init__.py +0 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
- edsl/utilities/gcp_bucket/simple_example.py +9 -0
- edsl/utilities/interface.py +90 -73
- edsl/utilities/repair_functions.py +28 -0
- edsl/utilities/utilities.py +59 -6
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/METADATA +42 -15
- edsl-0.1.29.dist-info/RECORD +203 -0
- edsl/conjure/RawResponseColumn.py +0 -327
- edsl/conjure/SurveyBuilder.py +0 -308
- edsl/conjure/SurveyBuilderCSV.py +0 -78
- edsl/conjure/SurveyBuilderSPSS.py +0 -118
- edsl/data/RemoteDict.py +0 -103
- edsl-0.1.27.dev2.dist-info/RECORD +0 -172
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/LICENSE +0 -0
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/WHEEL +0 -0
edsl/results/ResultsDBMixin.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
"""Mixin for working with SQLite respresentation of a 'Results' object."""
|
2
2
|
|
3
|
-
import pandas as pd
|
4
3
|
import sqlite3
|
5
|
-
from sqlalchemy import create_engine
|
6
4
|
from enum import Enum
|
7
5
|
from typing import Literal, Union, Optional
|
8
6
|
|
@@ -92,6 +90,8 @@ class ResultsDBMixin:
|
|
92
90
|
conn.commit()
|
93
91
|
return conn
|
94
92
|
elif shape == SQLDataShape.WIDE:
|
93
|
+
from sqlalchemy import create_engine
|
94
|
+
|
95
95
|
engine = create_engine("sqlite:///:memory:")
|
96
96
|
df = self.to_pandas(remove_prefix=remove_prefix)
|
97
97
|
df.to_sql("self", engine, index=False, if_exists="replace")
|
@@ -121,7 +121,7 @@ class ResultsDBMixin:
|
|
121
121
|
to_list=False,
|
122
122
|
to_latex=False,
|
123
123
|
filename: Optional[str] = None,
|
124
|
-
) -> Union[pd.DataFrame, str]:
|
124
|
+
) -> Union["pd.DataFrame", str]:
|
125
125
|
"""Execute a SQL query and return the results as a DataFrame.
|
126
126
|
|
127
127
|
:param query: The SQL query to execute
|
@@ -136,12 +136,9 @@ class ResultsDBMixin:
|
|
136
136
|
|
137
137
|
>>> from edsl.results import Results
|
138
138
|
>>> r = Results.example()
|
139
|
-
>>> r.sql("select data_type, key, value from self where data_type = 'answer' limit 3", shape="long")
|
140
|
-
|
141
|
-
|
142
|
-
1 answer how_feeling_comment This is a real survey response from a human.
|
143
|
-
2 answer how_feeling_yesterday Great
|
144
|
-
|
139
|
+
>>> d = r.sql("select data_type, key, value from self where data_type = 'answer' limit 3", shape="long")
|
140
|
+
>>> list(d['value'])
|
141
|
+
['OK', 'This is a real survey response from a human.', 'Great']
|
145
142
|
|
146
143
|
We can also return the data in wide format.
|
147
144
|
Note the use of single quotes to escape the column names, as required by sql.
|
@@ -154,6 +151,8 @@ class ResultsDBMixin:
|
|
154
151
|
2 Terrible
|
155
152
|
3 OK
|
156
153
|
"""
|
154
|
+
import pandas as pd
|
155
|
+
|
157
156
|
shape_enum = self._get_shape_enum(shape)
|
158
157
|
|
159
158
|
conn = self._db(shape=shape_enum, remove_prefix=remove_prefix)
|
@@ -208,6 +207,8 @@ class ResultsDBMixin:
|
|
208
207
|
...
|
209
208
|
<BLANKLINE>
|
210
209
|
"""
|
210
|
+
import pandas as pd
|
211
|
+
|
211
212
|
shape_enum = self._get_shape_enum(shape)
|
212
213
|
conn = self._db(shape=shape_enum, remove_prefix=remove_prefix)
|
213
214
|
|
@@ -1,527 +1,43 @@
|
|
1
1
|
"""Mixin class for exporting results."""
|
2
2
|
|
3
|
-
import base64
|
4
|
-
import csv
|
5
|
-
import io
|
6
|
-
import random
|
7
3
|
from functools import wraps
|
4
|
+
from typing import Literal, Optional, Union
|
8
5
|
|
9
|
-
from
|
6
|
+
from edsl.results.DatasetExportMixin import DatasetExportMixin
|
10
7
|
|
11
|
-
from edsl.utilities.utilities import is_notebook
|
12
8
|
|
13
|
-
|
14
|
-
|
15
|
-
from edsl.utilities.interface import (
|
16
|
-
print_dataset_with_rich,
|
17
|
-
print_list_of_dicts_as_html_table,
|
18
|
-
print_list_of_dicts_as_markdown_table,
|
19
|
-
create_latex_table_from_data,
|
20
|
-
)
|
21
|
-
|
22
|
-
|
23
|
-
class ResultsExportMixin:
|
24
|
-
"""Mixin class for exporting Results objects."""
|
25
|
-
|
26
|
-
def _convert_decorator(func):
|
27
|
-
"""Convert the Results object to a Dataset object before calling the function."""
|
28
|
-
|
29
|
-
@wraps(func)
|
30
|
-
def wrapper(self, *args, **kwargs):
|
31
|
-
"""Return the function with the Results object converted to a Dataset object."""
|
32
|
-
if self.__class__.__name__ == "Results":
|
33
|
-
return func(self.select(), *args, **kwargs)
|
34
|
-
elif self.__class__.__name__ == "Dataset":
|
35
|
-
return func(self, *args, **kwargs)
|
36
|
-
else:
|
37
|
-
raise Exception(
|
38
|
-
f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
|
39
|
-
)
|
40
|
-
|
41
|
-
return wrapper
|
42
|
-
|
43
|
-
@_convert_decorator
|
44
|
-
def relevant_columns(
|
45
|
-
self, data_type: Optional[str] = None, remove_prefix=False
|
46
|
-
) -> list:
|
47
|
-
"""Return the set of keys that are present in the dataset.
|
48
|
-
|
49
|
-
>>> d = Dataset([{'a.b':[1,2,3,4]}])
|
50
|
-
>>> d.relevant_columns()
|
51
|
-
['a.b']
|
52
|
-
|
53
|
-
>>> d.relevant_columns(remove_prefix=True)
|
54
|
-
['b']
|
55
|
-
|
56
|
-
>>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
|
57
|
-
['answer.how_feeling', 'answer.how_feeling_yesterday']
|
58
|
-
"""
|
59
|
-
columns = [list(x.keys())[0] for x in self]
|
60
|
-
# columns = set([list(result.keys())[0] for result in self.data])
|
61
|
-
if remove_prefix:
|
62
|
-
columns = [column.split(".")[-1] for column in columns]
|
63
|
-
|
64
|
-
if data_type:
|
65
|
-
columns = [
|
66
|
-
column for column in columns if column.split(".")[0] == data_type
|
67
|
-
]
|
68
|
-
|
69
|
-
return columns
|
70
|
-
|
71
|
-
# @_convert_decorator
|
72
|
-
def sample(self, n: int) -> "Results":
|
73
|
-
"""Return a random sample of the results.
|
74
|
-
|
75
|
-
:param n: The number of samples to return.
|
76
|
-
|
77
|
-
>>> from edsl.results import Results
|
78
|
-
>>> r = Results.example()
|
79
|
-
>>> len(r.sample(2))
|
80
|
-
2
|
81
|
-
"""
|
82
|
-
indices = None
|
83
|
-
|
84
|
-
for entry in self:
|
85
|
-
key, values = list(entry.items())[0]
|
86
|
-
if indices is None: # gets the indices for the first time
|
87
|
-
indices = list(range(len(values)))
|
88
|
-
sampled_indices = random.sample(indices, n)
|
89
|
-
if n > len(indices):
|
90
|
-
raise ValueError(
|
91
|
-
f"Cannot sample {n} items from a list of length {len(indices)}."
|
92
|
-
)
|
93
|
-
entry[key] = [values[i] for i in sampled_indices]
|
94
|
-
|
95
|
-
return self
|
96
|
-
|
97
|
-
@_convert_decorator
|
98
|
-
def _make_tabular(self, remove_prefix: bool, pretty_labels: Optional[dict] = None):
|
99
|
-
"""Turn the results into a tabular format.
|
100
|
-
|
101
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
102
|
-
|
103
|
-
>>> from edsl.results import Results
|
104
|
-
>>> r = Results.example()
|
105
|
-
>>> r.select('how_feeling')._make_tabular(remove_prefix = True)
|
106
|
-
(['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
|
107
|
-
|
108
|
-
>>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
|
109
|
-
(['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
|
110
|
-
"""
|
111
|
-
d = {}
|
112
|
-
full_header = sorted(list(self.relevant_columns()))
|
113
|
-
for entry in self.data:
|
114
|
-
key, list_of_values = list(entry.items())[0]
|
115
|
-
d[key] = list_of_values
|
116
|
-
if remove_prefix:
|
117
|
-
header = [h.split(".")[-1] for h in full_header]
|
118
|
-
else:
|
119
|
-
header = full_header
|
120
|
-
num_observations = len(list(self[0].values())[0])
|
121
|
-
rows = []
|
122
|
-
# rows.append(header)
|
123
|
-
for i in range(num_observations):
|
124
|
-
row = [d[h][i] for h in full_header]
|
125
|
-
rows.append(row)
|
126
|
-
if pretty_labels is not None:
|
127
|
-
header = [pretty_labels.get(h, h) for h in header]
|
128
|
-
return header, rows
|
129
|
-
|
130
|
-
def print_long(self, max_rows=None) -> None:
|
131
|
-
"""Print the results in long format.
|
132
|
-
|
133
|
-
>>> from edsl.results import Results
|
134
|
-
>>> r = Results.example()
|
135
|
-
>>> r.select('how_feeling').print_long(max_rows = 2)
|
136
|
-
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━┓
|
137
|
-
┃ Result index ┃ Key ┃ Value ┃
|
138
|
-
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━┩
|
139
|
-
│ 0 │ how_feeling │ OK │
|
140
|
-
│ 1 │ how_feeling │ Great │
|
141
|
-
└──────────────┴─────────────┴───────┘
|
142
|
-
"""
|
143
|
-
from edsl.utilities.interface import print_results_long
|
144
|
-
|
145
|
-
print_results_long(self, max_rows=max_rows)
|
146
|
-
|
147
|
-
@_convert_decorator
|
148
|
-
def print(
|
149
|
-
self,
|
150
|
-
pretty_labels: Optional[dict] = None,
|
151
|
-
filename: Optional[str] = None,
|
152
|
-
format: Literal["rich", "html", "markdown", "latex"] = None,
|
153
|
-
interactive: bool = False,
|
154
|
-
split_at_dot: bool = True,
|
155
|
-
max_rows=None,
|
156
|
-
tee=False,
|
157
|
-
iframe=False,
|
158
|
-
) -> None:
|
159
|
-
"""Print the results in a pretty format.
|
160
|
-
|
161
|
-
:param pretty_labels: A dictionary of pretty labels for the columns.
|
162
|
-
:param filename: The filename to save the results to.
|
163
|
-
:param format: The format to print the results in. Options are 'rich', 'html', or 'markdown'.
|
164
|
-
:param interactive: Whether to print the results interactively in a Jupyter notebook.
|
165
|
-
:param split_at_dot: Whether to split the column names at the last dot w/ a newline.
|
166
|
-
|
167
|
-
Example: Print in rich format at the terminal
|
168
|
-
|
169
|
-
>>> from edsl.results import Results
|
170
|
-
>>> r = Results.example()
|
171
|
-
>>> r.select('how_feeling').print(format = "rich")
|
172
|
-
┏━━━━━━━━━━━━━━┓
|
173
|
-
┃ answer ┃
|
174
|
-
┃ .how_feeling ┃
|
175
|
-
┡━━━━━━━━━━━━━━┩
|
176
|
-
│ OK │
|
177
|
-
├──────────────┤
|
178
|
-
│ Great │
|
179
|
-
├──────────────┤
|
180
|
-
│ Terrible │
|
181
|
-
├──────────────┤
|
182
|
-
│ OK │
|
183
|
-
└──────────────┘
|
184
|
-
|
185
|
-
Example: using the pretty_labels parameter
|
186
|
-
|
187
|
-
>>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
|
188
|
-
┏━━━━━━━━━━━━━━━━━━━━━┓
|
189
|
-
┃ How are you feeling ┃
|
190
|
-
┡━━━━━━━━━━━━━━━━━━━━━┩
|
191
|
-
│ OK │
|
192
|
-
├─────────────────────┤
|
193
|
-
│ Great │
|
194
|
-
├─────────────────────┤
|
195
|
-
│ Terrible │
|
196
|
-
├─────────────────────┤
|
197
|
-
│ OK │
|
198
|
-
└─────────────────────┘
|
199
|
-
|
200
|
-
Example: printing in markdown format
|
201
|
-
|
202
|
-
>>> r.select('how_feeling').print(format='markdown')
|
203
|
-
| answer.how_feeling |
|
204
|
-
|--|
|
205
|
-
| OK |
|
206
|
-
| Great |
|
207
|
-
| Terrible |
|
208
|
-
| OK |
|
209
|
-
...
|
210
|
-
"""
|
211
|
-
if format is None:
|
212
|
-
if is_notebook():
|
213
|
-
format = "html"
|
214
|
-
else:
|
215
|
-
format = "rich"
|
216
|
-
|
217
|
-
if pretty_labels is None:
|
218
|
-
pretty_labels = {}
|
219
|
-
|
220
|
-
if format not in ["rich", "html", "markdown", "latex"]:
|
221
|
-
raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
|
222
|
-
|
223
|
-
new_data = []
|
224
|
-
for index, entry in enumerate(self):
|
225
|
-
key, list_of_values = list(entry.items())[0]
|
226
|
-
new_data.append({pretty_labels.get(key, key): list_of_values})
|
227
|
-
|
228
|
-
if max_rows is not None:
|
229
|
-
for entry in new_data:
|
230
|
-
for key in entry:
|
231
|
-
actual_rows = len(entry[key])
|
232
|
-
entry[key] = entry[key][:max_rows]
|
233
|
-
# print(f"Showing only the first {max_rows} rows of {actual_rows} rows.")
|
234
|
-
|
235
|
-
if format == "rich":
|
236
|
-
print_dataset_with_rich(
|
237
|
-
new_data, filename=filename, split_at_dot=split_at_dot
|
238
|
-
)
|
239
|
-
elif format == "html":
|
240
|
-
notebook = is_notebook()
|
241
|
-
html_source = print_list_of_dicts_as_html_table(
|
242
|
-
new_data, filename=None, interactive=interactive, notebook=notebook
|
243
|
-
)
|
244
|
-
if iframe:
|
245
|
-
import html
|
246
|
-
|
247
|
-
height = 200
|
248
|
-
width = 600
|
249
|
-
escaped_output = html.escape(html_source)
|
250
|
-
# escaped_output = html_source
|
251
|
-
iframe = f""""
|
252
|
-
<iframe srcdoc="{ escaped_output }" style="width: {width}px; height: {height}px;"></iframe>
|
253
|
-
"""
|
254
|
-
display(HTML(iframe))
|
255
|
-
else:
|
256
|
-
display(HTML(html_source))
|
257
|
-
elif format == "markdown":
|
258
|
-
print_list_of_dicts_as_markdown_table(new_data, filename=filename)
|
259
|
-
elif format == "latex":
|
260
|
-
df = self.to_pandas()
|
261
|
-
df.columns = [col.replace("_", " ") for col in df.columns]
|
262
|
-
latex_string = df.to_latex()
|
263
|
-
if filename is not None:
|
264
|
-
with open(filename, "w") as f:
|
265
|
-
f.write(latex_string)
|
266
|
-
else:
|
267
|
-
return latex_string
|
268
|
-
# raise NotImplementedError("Latex format not yet implemented.")
|
269
|
-
# latex_string = create_latex_table_from_data(new_data, filename=filename)
|
270
|
-
# if filename is None:
|
271
|
-
# return latex_string
|
272
|
-
# Not working quite
|
9
|
+
def to_dataset(func):
|
10
|
+
"""Convert the Results object to a Dataset object before calling the function."""
|
273
11
|
|
12
|
+
@wraps(func)
|
13
|
+
def wrapper(self, *args, **kwargs):
|
14
|
+
"""Return the function with the Results object converted to a Dataset object."""
|
15
|
+
if self.__class__.__name__ == "Results":
|
16
|
+
return func(self.select(), *args, **kwargs)
|
274
17
|
else:
|
275
|
-
|
18
|
+
return func(self, *args, **kwargs)
|
276
19
|
|
277
|
-
|
278
|
-
|
20
|
+
wrapper._is_wrapped = True
|
21
|
+
return wrapper
|
279
22
|
|
280
|
-
@_convert_decorator
|
281
|
-
def to_csv(
|
282
|
-
self,
|
283
|
-
filename: Optional[str] = None,
|
284
|
-
remove_prefix: bool = False,
|
285
|
-
download_link: bool = False,
|
286
|
-
pretty_labels: Optional[dict] = None,
|
287
|
-
):
|
288
|
-
"""Export the results to a CSV file.
|
289
|
-
|
290
|
-
:param filename: The filename to save the CSV file to.
|
291
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
292
|
-
:param download_link: Whether to display a download link in a Jupyter notebook.
|
293
|
-
|
294
|
-
Example:
|
295
|
-
|
296
|
-
>>> from edsl.results import Results
|
297
|
-
>>> r = Results.example()
|
298
|
-
>>> r.select('how_feeling').to_csv()
|
299
|
-
'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
|
300
|
-
"""
|
301
|
-
if pretty_labels is None:
|
302
|
-
pretty_labels = {}
|
303
|
-
header, rows = self._make_tabular(
|
304
|
-
remove_prefix=remove_prefix, pretty_labels=pretty_labels
|
305
|
-
)
|
306
|
-
|
307
|
-
if filename is not None:
|
308
|
-
with open(filename, "w") as f:
|
309
|
-
writer = csv.writer(f)
|
310
|
-
writer.writerow(header)
|
311
|
-
writer.writerows(rows)
|
312
|
-
else:
|
313
|
-
output = io.StringIO()
|
314
|
-
writer = csv.writer(output)
|
315
|
-
writer.writerow(header)
|
316
|
-
writer.writerows(rows)
|
317
23
|
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
else:
|
324
|
-
return output.getvalue()
|
24
|
+
def decorate_methods_from_mixin(cls, mixin_cls):
|
25
|
+
for attr_name, attr_value in mixin_cls.__dict__.items():
|
26
|
+
if callable(attr_value) and not attr_name.startswith("__"):
|
27
|
+
setattr(cls, attr_name, to_dataset(attr_value))
|
28
|
+
return cls
|
325
29
|
|
326
|
-
@_convert_decorator
|
327
|
-
def to_pandas(self, remove_prefix: bool = False) -> pd.DataFrame:
|
328
|
-
"""Convert the results to a pandas DataFrame.
|
329
30
|
|
330
|
-
|
331
|
-
|
332
|
-
>>> from edsl.results import Results
|
333
|
-
>>> r = Results.example()
|
334
|
-
>>> r.select('how_feeling').to_pandas()
|
335
|
-
answer.how_feeling
|
336
|
-
0 OK
|
337
|
-
1 Great
|
338
|
-
2 Terrible
|
339
|
-
3 OK
|
340
|
-
"""
|
341
|
-
csv_string = self.to_csv(remove_prefix=remove_prefix)
|
342
|
-
csv_buffer = io.StringIO(csv_string)
|
343
|
-
df = pd.read_csv(csv_buffer)
|
344
|
-
df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
|
345
|
-
return df_sorted
|
346
|
-
|
347
|
-
@_convert_decorator
|
348
|
-
def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
|
349
|
-
"""Convert the results to a list of dictionaries, one per scenario.
|
350
|
-
|
351
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
352
|
-
|
353
|
-
>>> from edsl.results import Results
|
354
|
-
>>> r = Results.example()
|
355
|
-
>>> r.select('how_feeling').to_scenario_list()
|
356
|
-
ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
|
357
|
-
"""
|
358
|
-
from edsl import ScenarioList, Scenario
|
359
|
-
|
360
|
-
list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
|
361
|
-
return ScenarioList([Scenario(d) for d in list_of_dicts])
|
362
|
-
|
363
|
-
def to_agent_list(self, remove_prefix: bool = True):
|
364
|
-
from edsl import AgentList, Agent
|
365
|
-
|
366
|
-
list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
|
367
|
-
return AgentList([Agent(d) for d in list_of_dicts])
|
368
|
-
|
369
|
-
@_convert_decorator
|
370
|
-
def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
|
371
|
-
"""Convert the results to a list of dictionaries.
|
372
|
-
|
373
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
374
|
-
|
375
|
-
>>> from edsl.results import Results
|
376
|
-
>>> r = Results.example()
|
377
|
-
>>> r.select('how_feeling').to_dicts()
|
378
|
-
[{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
|
379
|
-
|
380
|
-
"""
|
381
|
-
list_of_keys = []
|
382
|
-
list_of_values = []
|
383
|
-
for entry in self:
|
384
|
-
key, values = list(entry.items())[0]
|
385
|
-
list_of_keys.append(key)
|
386
|
-
list_of_values.append(values)
|
387
|
-
|
388
|
-
if remove_prefix:
|
389
|
-
list_of_keys = [key.split(".")[-1] for key in list_of_keys]
|
390
|
-
# else:
|
391
|
-
# list_of_keys = [key.replace(".", "_") for key in list_of_keys]
|
392
|
-
|
393
|
-
list_of_dicts = []
|
394
|
-
for entries in zip(*list_of_values):
|
395
|
-
list_of_dicts.append(dict(zip(list_of_keys, entries)))
|
396
|
-
|
397
|
-
return list_of_dicts
|
398
|
-
|
399
|
-
@_convert_decorator
|
400
|
-
def to_list(self, flatten=False, remove_none=False) -> list[list]:
|
401
|
-
"""Convert the results to a list of lists.
|
402
|
-
|
403
|
-
>>> from edsl.results import Results
|
404
|
-
>>> Results.example().select('how_feeling', 'how_feeling_yesterday')
|
405
|
-
Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
|
406
|
-
|
407
|
-
>>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
|
408
|
-
[('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
|
409
|
-
|
410
|
-
>>> r = Results.example()
|
411
|
-
>>> r.select('how_feeling').to_list()
|
412
|
-
['OK', 'Great', 'Terrible', 'OK']
|
413
|
-
"""
|
414
|
-
if len(self.relevant_columns()) > 1 and flatten:
|
415
|
-
raise ValueError(
|
416
|
-
"Cannot flatten a list of lists when there are multiple columns selected."
|
417
|
-
)
|
418
|
-
|
419
|
-
if len(self.relevant_columns()) == 1:
|
420
|
-
# if only one 'column' is selected (which is typical for this method
|
421
|
-
list_to_return = list(self[0].values())[0]
|
422
|
-
else:
|
423
|
-
keys = self.relevant_columns()
|
424
|
-
data = self.to_dicts(remove_prefix=False)
|
425
|
-
list_to_return = []
|
426
|
-
for d in data:
|
427
|
-
list_to_return.append(tuple([d[key] for key in keys]))
|
428
|
-
|
429
|
-
if remove_none:
|
430
|
-
list_to_return = [item for item in list_to_return if item is not None]
|
431
|
-
|
432
|
-
if flatten:
|
433
|
-
new_list = []
|
434
|
-
for item in list_to_return:
|
435
|
-
if isinstance(item, list):
|
436
|
-
new_list.extend(item)
|
437
|
-
else:
|
438
|
-
new_list.append(item)
|
439
|
-
list_to_return = new_list
|
440
|
-
|
441
|
-
return list_to_return
|
442
|
-
|
443
|
-
@_convert_decorator
|
444
|
-
def html(
|
445
|
-
self, filename: str = None, cta: str = "Open in browser", return_link=False
|
446
|
-
):
|
447
|
-
import os
|
448
|
-
import tempfile
|
449
|
-
|
450
|
-
df = self.to_pandas()
|
451
|
-
|
452
|
-
if filename is None:
|
453
|
-
current_directory = os.getcwd()
|
454
|
-
filename = tempfile.NamedTemporaryFile(
|
455
|
-
"w", delete=False, suffix=".html", dir=current_directory
|
456
|
-
).name
|
457
|
-
|
458
|
-
with open(filename, "w") as f:
|
459
|
-
f.write(df.to_html())
|
460
|
-
|
461
|
-
if is_notebook():
|
462
|
-
html_url = f"/files/{filename}"
|
463
|
-
html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
|
464
|
-
display(HTML(html_link))
|
465
|
-
else:
|
466
|
-
print(f"Saved to {filename}")
|
467
|
-
import webbrowser
|
468
|
-
import os
|
469
|
-
|
470
|
-
webbrowser.open(f"file://{os.path.abspath(filename)}")
|
471
|
-
# webbrowser.open(filename)
|
472
|
-
|
473
|
-
if return_link:
|
474
|
-
return filename
|
475
|
-
|
476
|
-
@_convert_decorator
|
477
|
-
def tally(self, *fields: Optional[str], top_n=None, format=None):
|
478
|
-
"""Tally the values of a field or perform a cross-tab of multiple fields.
|
479
|
-
|
480
|
-
:param fields: The field(s) to tally, multiple fields for cross-tabulation.
|
481
|
-
|
482
|
-
>>> r = Results.example()
|
483
|
-
>>> r.select('how_feeling').tally('answer.how_feeling')
|
484
|
-
{'OK': 2, 'Great': 1, 'Terrible': 1}
|
485
|
-
>>> r.tally('field1', 'field2')
|
486
|
-
{('X', 'A'): 1, ('X', 'B'): 1, ('Y', 'A'): 1}
|
487
|
-
"""
|
488
|
-
from collections import Counter
|
489
|
-
|
490
|
-
if len(fields) == 0:
|
491
|
-
fields = self.relevant_columns()
|
492
|
-
|
493
|
-
relevant_columns_without_prefix = [
|
494
|
-
column.split(".")[-1] for column in self.relevant_columns()
|
495
|
-
]
|
496
|
-
|
497
|
-
if not all(
|
498
|
-
f in self.relevant_columns() or f in relevant_columns_without_prefix
|
499
|
-
for f in fields
|
500
|
-
):
|
501
|
-
raise ValueError("One or more specified fields are not in the dataset.")
|
502
|
-
|
503
|
-
if len(fields) == 1:
|
504
|
-
field = fields[0]
|
505
|
-
values = self._key_to_value(field)
|
506
|
-
else:
|
507
|
-
values = list(zip(*(self._key_to_value(field) for field in fields)))
|
508
|
-
|
509
|
-
tally = dict(Counter(values))
|
510
|
-
sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
|
511
|
-
if top_n is not None:
|
512
|
-
sorted_tally = dict(list(sorted_tally.items())[:top_n])
|
513
|
-
|
514
|
-
if format is not None:
|
515
|
-
if format == "rich":
|
516
|
-
from edsl.utilities.interface import print_tally_with_rich
|
517
|
-
|
518
|
-
print_tally_with_rich(sorted_tally)
|
519
|
-
return None
|
31
|
+
class ResultsExportMixin(DatasetExportMixin):
|
32
|
+
"""Mixin class for exporting Results objects."""
|
520
33
|
|
521
|
-
|
34
|
+
def __init_subclass__(cls, **kwargs):
|
35
|
+
super().__init_subclass__(**kwargs)
|
36
|
+
decorate_methods_from_mixin(cls, DatasetExportMixin)
|
522
37
|
|
523
38
|
|
524
39
|
if __name__ == "__main__":
|
40
|
+
# pass
|
525
41
|
import doctest
|
526
42
|
|
527
43
|
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
edsl/results/ResultsGGMixin.py
CHANGED
@@ -1,11 +1,8 @@
|
|
1
1
|
"""Mixin class for ggplot2 plotting."""
|
2
2
|
|
3
3
|
import subprocess
|
4
|
-
import pandas as pd
|
5
4
|
import tempfile
|
6
5
|
from typing import Optional
|
7
|
-
import matplotlib.pyplot as plt
|
8
|
-
import matplotlib.image as mpimg
|
9
6
|
|
10
7
|
|
11
8
|
class ResultsGGMixin:
|
@@ -105,6 +102,9 @@ class ResultsGGMixin:
|
|
105
102
|
|
106
103
|
def _display_plot(self, filename: str, width: float, height: float):
|
107
104
|
"""Display the plot in the notebook."""
|
105
|
+
import matplotlib.pyplot as plt
|
106
|
+
import matplotlib.image as mpimg
|
107
|
+
|
108
108
|
if filename.endswith(".png"):
|
109
109
|
img = mpimg.imread(filename)
|
110
110
|
plt.figure(
|
@@ -1,7 +1,3 @@
|
|
1
|
-
from edsl import ScenarioList
|
2
|
-
from edsl.questions import QuestionList, QuestionCheckBox
|
3
|
-
|
4
|
-
|
5
1
|
class ResultsToolsMixin:
|
6
2
|
def get_themes(
|
7
3
|
self,
|
@@ -13,7 +9,12 @@ class ResultsToolsMixin:
|
|
13
9
|
progress_bar=False,
|
14
10
|
print_exceptions=False,
|
15
11
|
) -> list:
|
16
|
-
values =
|
12
|
+
values = [
|
13
|
+
str(txt)[:1000]
|
14
|
+
for txt in self.shuffle(seed=seed).select(field).to_list()[:max_values]
|
15
|
+
]
|
16
|
+
from edsl import ScenarioList
|
17
|
+
from edsl.questions import QuestionList, QuestionCheckBox
|
17
18
|
|
18
19
|
q = QuestionList(
|
19
20
|
question_text=f"""
|
@@ -24,10 +25,7 @@ class ResultsToolsMixin:
|
|
24
25
|
""",
|
25
26
|
question_name="themes",
|
26
27
|
)
|
27
|
-
|
28
|
-
results = q.by(s).run(
|
29
|
-
print_exceptions=print_exceptions, progress_bar=progress_bar
|
30
|
-
)
|
28
|
+
results = q.run(print_exceptions=print_exceptions, progress_bar=progress_bar)
|
31
29
|
return results.select("themes").first()
|
32
30
|
|
33
31
|
def answers_to_themes(
|
@@ -38,6 +36,8 @@ class ResultsToolsMixin:
|
|
38
36
|
progress_bar=False,
|
39
37
|
print_exceptions=False,
|
40
38
|
) -> dict:
|
39
|
+
from edsl import ScenarioList
|
40
|
+
|
41
41
|
values = self.select(field).to_list()
|
42
42
|
scenarios = ScenarioList.from_list("field", values).add_value(
|
43
43
|
"context", context
|