edsl 0.1.27.dev2__py3-none-any.whl → 0.1.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. edsl/Base.py +107 -30
  2. edsl/BaseDiff.py +260 -0
  3. edsl/__init__.py +25 -21
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +103 -46
  6. edsl/agents/AgentList.py +97 -13
  7. edsl/agents/Invigilator.py +23 -10
  8. edsl/agents/InvigilatorBase.py +19 -14
  9. edsl/agents/PromptConstructionMixin.py +342 -100
  10. edsl/agents/descriptors.py +5 -2
  11. edsl/base/Base.py +289 -0
  12. edsl/config.py +2 -1
  13. edsl/conjure/AgentConstructionMixin.py +152 -0
  14. edsl/conjure/Conjure.py +56 -0
  15. edsl/conjure/InputData.py +659 -0
  16. edsl/conjure/InputDataCSV.py +48 -0
  17. edsl/conjure/InputDataMixinQuestionStats.py +182 -0
  18. edsl/conjure/InputDataPyRead.py +91 -0
  19. edsl/conjure/InputDataSPSS.py +8 -0
  20. edsl/conjure/InputDataStata.py +8 -0
  21. edsl/conjure/QuestionOptionMixin.py +76 -0
  22. edsl/conjure/QuestionTypeMixin.py +23 -0
  23. edsl/conjure/RawQuestion.py +65 -0
  24. edsl/conjure/SurveyResponses.py +7 -0
  25. edsl/conjure/__init__.py +9 -4
  26. edsl/conjure/examples/placeholder.txt +0 -0
  27. edsl/conjure/naming_utilities.py +263 -0
  28. edsl/conjure/utilities.py +165 -28
  29. edsl/conversation/Conversation.py +238 -0
  30. edsl/conversation/car_buying.py +58 -0
  31. edsl/conversation/mug_negotiation.py +81 -0
  32. edsl/conversation/next_speaker_utilities.py +93 -0
  33. edsl/coop/coop.py +337 -121
  34. edsl/coop/utils.py +56 -70
  35. edsl/data/Cache.py +74 -22
  36. edsl/data/CacheHandler.py +10 -9
  37. edsl/data/SQLiteDict.py +11 -3
  38. edsl/inference_services/AnthropicService.py +1 -0
  39. edsl/inference_services/DeepInfraService.py +20 -13
  40. edsl/inference_services/GoogleService.py +7 -1
  41. edsl/inference_services/InferenceServicesCollection.py +33 -7
  42. edsl/inference_services/OpenAIService.py +17 -10
  43. edsl/inference_services/models_available_cache.py +69 -0
  44. edsl/inference_services/rate_limits_cache.py +25 -0
  45. edsl/inference_services/write_available.py +10 -0
  46. edsl/jobs/Answers.py +15 -1
  47. edsl/jobs/Jobs.py +322 -73
  48. edsl/jobs/buckets/BucketCollection.py +9 -3
  49. edsl/jobs/buckets/ModelBuckets.py +4 -2
  50. edsl/jobs/buckets/TokenBucket.py +1 -2
  51. edsl/jobs/interviews/Interview.py +7 -10
  52. edsl/jobs/interviews/InterviewStatusMixin.py +3 -3
  53. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +39 -20
  54. edsl/jobs/interviews/retry_management.py +4 -4
  55. edsl/jobs/runners/JobsRunnerAsyncio.py +103 -65
  56. edsl/jobs/runners/JobsRunnerStatusData.py +3 -3
  57. edsl/jobs/tasks/QuestionTaskCreator.py +4 -2
  58. edsl/jobs/tasks/TaskHistory.py +4 -3
  59. edsl/language_models/LanguageModel.py +42 -55
  60. edsl/language_models/ModelList.py +96 -0
  61. edsl/language_models/registry.py +14 -0
  62. edsl/language_models/repair.py +97 -25
  63. edsl/notebooks/Notebook.py +157 -32
  64. edsl/prompts/Prompt.py +31 -19
  65. edsl/questions/QuestionBase.py +145 -23
  66. edsl/questions/QuestionBudget.py +5 -6
  67. edsl/questions/QuestionCheckBox.py +7 -3
  68. edsl/questions/QuestionExtract.py +5 -3
  69. edsl/questions/QuestionFreeText.py +3 -3
  70. edsl/questions/QuestionFunctional.py +0 -3
  71. edsl/questions/QuestionList.py +3 -4
  72. edsl/questions/QuestionMultipleChoice.py +16 -8
  73. edsl/questions/QuestionNumerical.py +4 -3
  74. edsl/questions/QuestionRank.py +5 -3
  75. edsl/questions/__init__.py +4 -3
  76. edsl/questions/descriptors.py +9 -4
  77. edsl/questions/question_registry.py +27 -31
  78. edsl/questions/settings.py +1 -1
  79. edsl/results/Dataset.py +31 -0
  80. edsl/results/DatasetExportMixin.py +493 -0
  81. edsl/results/Result.py +42 -82
  82. edsl/results/Results.py +178 -66
  83. edsl/results/ResultsDBMixin.py +10 -9
  84. edsl/results/ResultsExportMixin.py +23 -507
  85. edsl/results/ResultsGGMixin.py +3 -3
  86. edsl/results/ResultsToolsMixin.py +9 -9
  87. edsl/scenarios/FileStore.py +140 -0
  88. edsl/scenarios/Scenario.py +59 -6
  89. edsl/scenarios/ScenarioList.py +138 -52
  90. edsl/scenarios/ScenarioListExportMixin.py +32 -0
  91. edsl/scenarios/ScenarioListPdfMixin.py +2 -1
  92. edsl/scenarios/__init__.py +1 -0
  93. edsl/study/ObjectEntry.py +173 -0
  94. edsl/study/ProofOfWork.py +113 -0
  95. edsl/study/SnapShot.py +73 -0
  96. edsl/study/Study.py +498 -0
  97. edsl/study/__init__.py +4 -0
  98. edsl/surveys/MemoryPlan.py +11 -4
  99. edsl/surveys/Survey.py +124 -37
  100. edsl/surveys/SurveyExportMixin.py +25 -5
  101. edsl/surveys/SurveyFlowVisualizationMixin.py +6 -4
  102. edsl/tools/plotting.py +4 -2
  103. edsl/utilities/__init__.py +21 -20
  104. edsl/utilities/gcp_bucket/__init__.py +0 -0
  105. edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
  106. edsl/utilities/gcp_bucket/simple_example.py +9 -0
  107. edsl/utilities/interface.py +90 -73
  108. edsl/utilities/repair_functions.py +28 -0
  109. edsl/utilities/utilities.py +59 -6
  110. {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/METADATA +42 -15
  111. edsl-0.1.29.dist-info/RECORD +203 -0
  112. edsl/conjure/RawResponseColumn.py +0 -327
  113. edsl/conjure/SurveyBuilder.py +0 -308
  114. edsl/conjure/SurveyBuilderCSV.py +0 -78
  115. edsl/conjure/SurveyBuilderSPSS.py +0 -118
  116. edsl/data/RemoteDict.py +0 -103
  117. edsl-0.1.27.dev2.dist-info/RECORD +0 -172
  118. {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/LICENSE +0 -0
  119. {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/WHEEL +0 -0
@@ -1,8 +1,6 @@
1
1
  """Mixin for working with SQLite respresentation of a 'Results' object."""
2
2
 
3
- import pandas as pd
4
3
  import sqlite3
5
- from sqlalchemy import create_engine
6
4
  from enum import Enum
7
5
  from typing import Literal, Union, Optional
8
6
 
@@ -92,6 +90,8 @@ class ResultsDBMixin:
92
90
  conn.commit()
93
91
  return conn
94
92
  elif shape == SQLDataShape.WIDE:
93
+ from sqlalchemy import create_engine
94
+
95
95
  engine = create_engine("sqlite:///:memory:")
96
96
  df = self.to_pandas(remove_prefix=remove_prefix)
97
97
  df.to_sql("self", engine, index=False, if_exists="replace")
@@ -121,7 +121,7 @@ class ResultsDBMixin:
121
121
  to_list=False,
122
122
  to_latex=False,
123
123
  filename: Optional[str] = None,
124
- ) -> Union[pd.DataFrame, str]:
124
+ ) -> Union["pd.DataFrame", str]:
125
125
  """Execute a SQL query and return the results as a DataFrame.
126
126
 
127
127
  :param query: The SQL query to execute
@@ -136,12 +136,9 @@ class ResultsDBMixin:
136
136
 
137
137
  >>> from edsl.results import Results
138
138
  >>> r = Results.example()
139
- >>> r.sql("select data_type, key, value from self where data_type = 'answer' limit 3", shape="long")
140
- data_type key value
141
- 0 answer how_feeling OK
142
- 1 answer how_feeling_comment This is a real survey response from a human.
143
- 2 answer how_feeling_yesterday Great
144
-
139
+ >>> d = r.sql("select data_type, key, value from self where data_type = 'answer' limit 3", shape="long")
140
+ >>> list(d['value'])
141
+ ['OK', 'This is a real survey response from a human.', 'Great']
145
142
 
146
143
  We can also return the data in wide format.
147
144
  Note the use of single quotes to escape the column names, as required by sql.
@@ -154,6 +151,8 @@ class ResultsDBMixin:
154
151
  2 Terrible
155
152
  3 OK
156
153
  """
154
+ import pandas as pd
155
+
157
156
  shape_enum = self._get_shape_enum(shape)
158
157
 
159
158
  conn = self._db(shape=shape_enum, remove_prefix=remove_prefix)
@@ -208,6 +207,8 @@ class ResultsDBMixin:
208
207
  ...
209
208
  <BLANKLINE>
210
209
  """
210
+ import pandas as pd
211
+
211
212
  shape_enum = self._get_shape_enum(shape)
212
213
  conn = self._db(shape=shape_enum, remove_prefix=remove_prefix)
213
214
 
@@ -1,527 +1,43 @@
1
1
  """Mixin class for exporting results."""
2
2
 
3
- import base64
4
- import csv
5
- import io
6
- import random
7
3
  from functools import wraps
4
+ from typing import Literal, Optional, Union
8
5
 
9
- from typing import Literal, Optional
6
+ from edsl.results.DatasetExportMixin import DatasetExportMixin
10
7
 
11
- from edsl.utilities.utilities import is_notebook
12
8
 
13
- from IPython.display import HTML, display
14
- import pandas as pd
15
- from edsl.utilities.interface import (
16
- print_dataset_with_rich,
17
- print_list_of_dicts_as_html_table,
18
- print_list_of_dicts_as_markdown_table,
19
- create_latex_table_from_data,
20
- )
21
-
22
-
23
- class ResultsExportMixin:
24
- """Mixin class for exporting Results objects."""
25
-
26
- def _convert_decorator(func):
27
- """Convert the Results object to a Dataset object before calling the function."""
28
-
29
- @wraps(func)
30
- def wrapper(self, *args, **kwargs):
31
- """Return the function with the Results object converted to a Dataset object."""
32
- if self.__class__.__name__ == "Results":
33
- return func(self.select(), *args, **kwargs)
34
- elif self.__class__.__name__ == "Dataset":
35
- return func(self, *args, **kwargs)
36
- else:
37
- raise Exception(
38
- f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
39
- )
40
-
41
- return wrapper
42
-
43
- @_convert_decorator
44
- def relevant_columns(
45
- self, data_type: Optional[str] = None, remove_prefix=False
46
- ) -> list:
47
- """Return the set of keys that are present in the dataset.
48
-
49
- >>> d = Dataset([{'a.b':[1,2,3,4]}])
50
- >>> d.relevant_columns()
51
- ['a.b']
52
-
53
- >>> d.relevant_columns(remove_prefix=True)
54
- ['b']
55
-
56
- >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
57
- ['answer.how_feeling', 'answer.how_feeling_yesterday']
58
- """
59
- columns = [list(x.keys())[0] for x in self]
60
- # columns = set([list(result.keys())[0] for result in self.data])
61
- if remove_prefix:
62
- columns = [column.split(".")[-1] for column in columns]
63
-
64
- if data_type:
65
- columns = [
66
- column for column in columns if column.split(".")[0] == data_type
67
- ]
68
-
69
- return columns
70
-
71
- # @_convert_decorator
72
- def sample(self, n: int) -> "Results":
73
- """Return a random sample of the results.
74
-
75
- :param n: The number of samples to return.
76
-
77
- >>> from edsl.results import Results
78
- >>> r = Results.example()
79
- >>> len(r.sample(2))
80
- 2
81
- """
82
- indices = None
83
-
84
- for entry in self:
85
- key, values = list(entry.items())[0]
86
- if indices is None: # gets the indices for the first time
87
- indices = list(range(len(values)))
88
- sampled_indices = random.sample(indices, n)
89
- if n > len(indices):
90
- raise ValueError(
91
- f"Cannot sample {n} items from a list of length {len(indices)}."
92
- )
93
- entry[key] = [values[i] for i in sampled_indices]
94
-
95
- return self
96
-
97
- @_convert_decorator
98
- def _make_tabular(self, remove_prefix: bool, pretty_labels: Optional[dict] = None):
99
- """Turn the results into a tabular format.
100
-
101
- :param remove_prefix: Whether to remove the prefix from the column names.
102
-
103
- >>> from edsl.results import Results
104
- >>> r = Results.example()
105
- >>> r.select('how_feeling')._make_tabular(remove_prefix = True)
106
- (['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
107
-
108
- >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
109
- (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
110
- """
111
- d = {}
112
- full_header = sorted(list(self.relevant_columns()))
113
- for entry in self.data:
114
- key, list_of_values = list(entry.items())[0]
115
- d[key] = list_of_values
116
- if remove_prefix:
117
- header = [h.split(".")[-1] for h in full_header]
118
- else:
119
- header = full_header
120
- num_observations = len(list(self[0].values())[0])
121
- rows = []
122
- # rows.append(header)
123
- for i in range(num_observations):
124
- row = [d[h][i] for h in full_header]
125
- rows.append(row)
126
- if pretty_labels is not None:
127
- header = [pretty_labels.get(h, h) for h in header]
128
- return header, rows
129
-
130
- def print_long(self, max_rows=None) -> None:
131
- """Print the results in long format.
132
-
133
- >>> from edsl.results import Results
134
- >>> r = Results.example()
135
- >>> r.select('how_feeling').print_long(max_rows = 2)
136
- ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━┓
137
- ┃ Result index ┃ Key ┃ Value ┃
138
- ┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━┩
139
- │ 0 │ how_feeling │ OK │
140
- │ 1 │ how_feeling │ Great │
141
- └──────────────┴─────────────┴───────┘
142
- """
143
- from edsl.utilities.interface import print_results_long
144
-
145
- print_results_long(self, max_rows=max_rows)
146
-
147
- @_convert_decorator
148
- def print(
149
- self,
150
- pretty_labels: Optional[dict] = None,
151
- filename: Optional[str] = None,
152
- format: Literal["rich", "html", "markdown", "latex"] = None,
153
- interactive: bool = False,
154
- split_at_dot: bool = True,
155
- max_rows=None,
156
- tee=False,
157
- iframe=False,
158
- ) -> None:
159
- """Print the results in a pretty format.
160
-
161
- :param pretty_labels: A dictionary of pretty labels for the columns.
162
- :param filename: The filename to save the results to.
163
- :param format: The format to print the results in. Options are 'rich', 'html', or 'markdown'.
164
- :param interactive: Whether to print the results interactively in a Jupyter notebook.
165
- :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
166
-
167
- Example: Print in rich format at the terminal
168
-
169
- >>> from edsl.results import Results
170
- >>> r = Results.example()
171
- >>> r.select('how_feeling').print(format = "rich")
172
- ┏━━━━━━━━━━━━━━┓
173
- ┃ answer ┃
174
- ┃ .how_feeling ┃
175
- ┡━━━━━━━━━━━━━━┩
176
- │ OK │
177
- ├──────────────┤
178
- │ Great │
179
- ├──────────────┤
180
- │ Terrible │
181
- ├──────────────┤
182
- │ OK │
183
- └──────────────┘
184
-
185
- Example: using the pretty_labels parameter
186
-
187
- >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
188
- ┏━━━━━━━━━━━━━━━━━━━━━┓
189
- ┃ How are you feeling ┃
190
- ┡━━━━━━━━━━━━━━━━━━━━━┩
191
- │ OK │
192
- ├─────────────────────┤
193
- │ Great │
194
- ├─────────────────────┤
195
- │ Terrible │
196
- ├─────────────────────┤
197
- │ OK │
198
- └─────────────────────┘
199
-
200
- Example: printing in markdown format
201
-
202
- >>> r.select('how_feeling').print(format='markdown')
203
- | answer.how_feeling |
204
- |--|
205
- | OK |
206
- | Great |
207
- | Terrible |
208
- | OK |
209
- ...
210
- """
211
- if format is None:
212
- if is_notebook():
213
- format = "html"
214
- else:
215
- format = "rich"
216
-
217
- if pretty_labels is None:
218
- pretty_labels = {}
219
-
220
- if format not in ["rich", "html", "markdown", "latex"]:
221
- raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
222
-
223
- new_data = []
224
- for index, entry in enumerate(self):
225
- key, list_of_values = list(entry.items())[0]
226
- new_data.append({pretty_labels.get(key, key): list_of_values})
227
-
228
- if max_rows is not None:
229
- for entry in new_data:
230
- for key in entry:
231
- actual_rows = len(entry[key])
232
- entry[key] = entry[key][:max_rows]
233
- # print(f"Showing only the first {max_rows} rows of {actual_rows} rows.")
234
-
235
- if format == "rich":
236
- print_dataset_with_rich(
237
- new_data, filename=filename, split_at_dot=split_at_dot
238
- )
239
- elif format == "html":
240
- notebook = is_notebook()
241
- html_source = print_list_of_dicts_as_html_table(
242
- new_data, filename=None, interactive=interactive, notebook=notebook
243
- )
244
- if iframe:
245
- import html
246
-
247
- height = 200
248
- width = 600
249
- escaped_output = html.escape(html_source)
250
- # escaped_output = html_source
251
- iframe = f""""
252
- <iframe srcdoc="{ escaped_output }" style="width: {width}px; height: {height}px;"></iframe>
253
- """
254
- display(HTML(iframe))
255
- else:
256
- display(HTML(html_source))
257
- elif format == "markdown":
258
- print_list_of_dicts_as_markdown_table(new_data, filename=filename)
259
- elif format == "latex":
260
- df = self.to_pandas()
261
- df.columns = [col.replace("_", " ") for col in df.columns]
262
- latex_string = df.to_latex()
263
- if filename is not None:
264
- with open(filename, "w") as f:
265
- f.write(latex_string)
266
- else:
267
- return latex_string
268
- # raise NotImplementedError("Latex format not yet implemented.")
269
- # latex_string = create_latex_table_from_data(new_data, filename=filename)
270
- # if filename is None:
271
- # return latex_string
272
- # Not working quite
9
+ def to_dataset(func):
10
+ """Convert the Results object to a Dataset object before calling the function."""
273
11
 
12
+ @wraps(func)
13
+ def wrapper(self, *args, **kwargs):
14
+ """Return the function with the Results object converted to a Dataset object."""
15
+ if self.__class__.__name__ == "Results":
16
+ return func(self.select(), *args, **kwargs)
274
17
  else:
275
- raise ValueError("format not recognized.")
18
+ return func(self, *args, **kwargs)
276
19
 
277
- if tee:
278
- return self
20
+ wrapper._is_wrapped = True
21
+ return wrapper
279
22
 
280
- @_convert_decorator
281
- def to_csv(
282
- self,
283
- filename: Optional[str] = None,
284
- remove_prefix: bool = False,
285
- download_link: bool = False,
286
- pretty_labels: Optional[dict] = None,
287
- ):
288
- """Export the results to a CSV file.
289
-
290
- :param filename: The filename to save the CSV file to.
291
- :param remove_prefix: Whether to remove the prefix from the column names.
292
- :param download_link: Whether to display a download link in a Jupyter notebook.
293
-
294
- Example:
295
-
296
- >>> from edsl.results import Results
297
- >>> r = Results.example()
298
- >>> r.select('how_feeling').to_csv()
299
- 'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
300
- """
301
- if pretty_labels is None:
302
- pretty_labels = {}
303
- header, rows = self._make_tabular(
304
- remove_prefix=remove_prefix, pretty_labels=pretty_labels
305
- )
306
-
307
- if filename is not None:
308
- with open(filename, "w") as f:
309
- writer = csv.writer(f)
310
- writer.writerow(header)
311
- writer.writerows(rows)
312
- else:
313
- output = io.StringIO()
314
- writer = csv.writer(output)
315
- writer.writerow(header)
316
- writer.writerows(rows)
317
23
 
318
- if download_link:
319
- csv_file = output.getvalue()
320
- b64 = base64.b64encode(csv_file.encode()).decode()
321
- download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
322
- display(HTML(download_link))
323
- else:
324
- return output.getvalue()
24
+ def decorate_methods_from_mixin(cls, mixin_cls):
25
+ for attr_name, attr_value in mixin_cls.__dict__.items():
26
+ if callable(attr_value) and not attr_name.startswith("__"):
27
+ setattr(cls, attr_name, to_dataset(attr_value))
28
+ return cls
325
29
 
326
- @_convert_decorator
327
- def to_pandas(self, remove_prefix: bool = False) -> pd.DataFrame:
328
- """Convert the results to a pandas DataFrame.
329
30
 
330
- :param remove_prefix: Whether to remove the prefix from the column names.
331
-
332
- >>> from edsl.results import Results
333
- >>> r = Results.example()
334
- >>> r.select('how_feeling').to_pandas()
335
- answer.how_feeling
336
- 0 OK
337
- 1 Great
338
- 2 Terrible
339
- 3 OK
340
- """
341
- csv_string = self.to_csv(remove_prefix=remove_prefix)
342
- csv_buffer = io.StringIO(csv_string)
343
- df = pd.read_csv(csv_buffer)
344
- df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
345
- return df_sorted
346
-
347
- @_convert_decorator
348
- def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
349
- """Convert the results to a list of dictionaries, one per scenario.
350
-
351
- :param remove_prefix: Whether to remove the prefix from the column names.
352
-
353
- >>> from edsl.results import Results
354
- >>> r = Results.example()
355
- >>> r.select('how_feeling').to_scenario_list()
356
- ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
357
- """
358
- from edsl import ScenarioList, Scenario
359
-
360
- list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
361
- return ScenarioList([Scenario(d) for d in list_of_dicts])
362
-
363
- def to_agent_list(self, remove_prefix: bool = True):
364
- from edsl import AgentList, Agent
365
-
366
- list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
367
- return AgentList([Agent(d) for d in list_of_dicts])
368
-
369
- @_convert_decorator
370
- def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
371
- """Convert the results to a list of dictionaries.
372
-
373
- :param remove_prefix: Whether to remove the prefix from the column names.
374
-
375
- >>> from edsl.results import Results
376
- >>> r = Results.example()
377
- >>> r.select('how_feeling').to_dicts()
378
- [{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
379
-
380
- """
381
- list_of_keys = []
382
- list_of_values = []
383
- for entry in self:
384
- key, values = list(entry.items())[0]
385
- list_of_keys.append(key)
386
- list_of_values.append(values)
387
-
388
- if remove_prefix:
389
- list_of_keys = [key.split(".")[-1] for key in list_of_keys]
390
- # else:
391
- # list_of_keys = [key.replace(".", "_") for key in list_of_keys]
392
-
393
- list_of_dicts = []
394
- for entries in zip(*list_of_values):
395
- list_of_dicts.append(dict(zip(list_of_keys, entries)))
396
-
397
- return list_of_dicts
398
-
399
- @_convert_decorator
400
- def to_list(self, flatten=False, remove_none=False) -> list[list]:
401
- """Convert the results to a list of lists.
402
-
403
- >>> from edsl.results import Results
404
- >>> Results.example().select('how_feeling', 'how_feeling_yesterday')
405
- Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
406
-
407
- >>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
408
- [('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
409
-
410
- >>> r = Results.example()
411
- >>> r.select('how_feeling').to_list()
412
- ['OK', 'Great', 'Terrible', 'OK']
413
- """
414
- if len(self.relevant_columns()) > 1 and flatten:
415
- raise ValueError(
416
- "Cannot flatten a list of lists when there are multiple columns selected."
417
- )
418
-
419
- if len(self.relevant_columns()) == 1:
420
- # if only one 'column' is selected (which is typical for this method
421
- list_to_return = list(self[0].values())[0]
422
- else:
423
- keys = self.relevant_columns()
424
- data = self.to_dicts(remove_prefix=False)
425
- list_to_return = []
426
- for d in data:
427
- list_to_return.append(tuple([d[key] for key in keys]))
428
-
429
- if remove_none:
430
- list_to_return = [item for item in list_to_return if item is not None]
431
-
432
- if flatten:
433
- new_list = []
434
- for item in list_to_return:
435
- if isinstance(item, list):
436
- new_list.extend(item)
437
- else:
438
- new_list.append(item)
439
- list_to_return = new_list
440
-
441
- return list_to_return
442
-
443
- @_convert_decorator
444
- def html(
445
- self, filename: str = None, cta: str = "Open in browser", return_link=False
446
- ):
447
- import os
448
- import tempfile
449
-
450
- df = self.to_pandas()
451
-
452
- if filename is None:
453
- current_directory = os.getcwd()
454
- filename = tempfile.NamedTemporaryFile(
455
- "w", delete=False, suffix=".html", dir=current_directory
456
- ).name
457
-
458
- with open(filename, "w") as f:
459
- f.write(df.to_html())
460
-
461
- if is_notebook():
462
- html_url = f"/files/{filename}"
463
- html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
464
- display(HTML(html_link))
465
- else:
466
- print(f"Saved to {filename}")
467
- import webbrowser
468
- import os
469
-
470
- webbrowser.open(f"file://{os.path.abspath(filename)}")
471
- # webbrowser.open(filename)
472
-
473
- if return_link:
474
- return filename
475
-
476
- @_convert_decorator
477
- def tally(self, *fields: Optional[str], top_n=None, format=None):
478
- """Tally the values of a field or perform a cross-tab of multiple fields.
479
-
480
- :param fields: The field(s) to tally, multiple fields for cross-tabulation.
481
-
482
- >>> r = Results.example()
483
- >>> r.select('how_feeling').tally('answer.how_feeling')
484
- {'OK': 2, 'Great': 1, 'Terrible': 1}
485
- >>> r.tally('field1', 'field2')
486
- {('X', 'A'): 1, ('X', 'B'): 1, ('Y', 'A'): 1}
487
- """
488
- from collections import Counter
489
-
490
- if len(fields) == 0:
491
- fields = self.relevant_columns()
492
-
493
- relevant_columns_without_prefix = [
494
- column.split(".")[-1] for column in self.relevant_columns()
495
- ]
496
-
497
- if not all(
498
- f in self.relevant_columns() or f in relevant_columns_without_prefix
499
- for f in fields
500
- ):
501
- raise ValueError("One or more specified fields are not in the dataset.")
502
-
503
- if len(fields) == 1:
504
- field = fields[0]
505
- values = self._key_to_value(field)
506
- else:
507
- values = list(zip(*(self._key_to_value(field) for field in fields)))
508
-
509
- tally = dict(Counter(values))
510
- sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
511
- if top_n is not None:
512
- sorted_tally = dict(list(sorted_tally.items())[:top_n])
513
-
514
- if format is not None:
515
- if format == "rich":
516
- from edsl.utilities.interface import print_tally_with_rich
517
-
518
- print_tally_with_rich(sorted_tally)
519
- return None
31
+ class ResultsExportMixin(DatasetExportMixin):
32
+ """Mixin class for exporting Results objects."""
520
33
 
521
- return sorted_tally
34
+ def __init_subclass__(cls, **kwargs):
35
+ super().__init_subclass__(**kwargs)
36
+ decorate_methods_from_mixin(cls, DatasetExportMixin)
522
37
 
523
38
 
524
39
  if __name__ == "__main__":
40
+ # pass
525
41
  import doctest
526
42
 
527
43
  doctest.testmod(optionflags=doctest.ELLIPSIS)
@@ -1,11 +1,8 @@
1
1
  """Mixin class for ggplot2 plotting."""
2
2
 
3
3
  import subprocess
4
- import pandas as pd
5
4
  import tempfile
6
5
  from typing import Optional
7
- import matplotlib.pyplot as plt
8
- import matplotlib.image as mpimg
9
6
 
10
7
 
11
8
  class ResultsGGMixin:
@@ -105,6 +102,9 @@ class ResultsGGMixin:
105
102
 
106
103
  def _display_plot(self, filename: str, width: float, height: float):
107
104
  """Display the plot in the notebook."""
105
+ import matplotlib.pyplot as plt
106
+ import matplotlib.image as mpimg
107
+
108
108
  if filename.endswith(".png"):
109
109
  img = mpimg.imread(filename)
110
110
  plt.figure(
@@ -1,7 +1,3 @@
1
- from edsl import ScenarioList
2
- from edsl.questions import QuestionList, QuestionCheckBox
3
-
4
-
5
1
  class ResultsToolsMixin:
6
2
  def get_themes(
7
3
  self,
@@ -13,7 +9,12 @@ class ResultsToolsMixin:
13
9
  progress_bar=False,
14
10
  print_exceptions=False,
15
11
  ) -> list:
16
- values = self.shuffle(seed=seed).select(field).to_list()[:max_values]
12
+ values = [
13
+ str(txt)[:1000]
14
+ for txt in self.shuffle(seed=seed).select(field).to_list()[:max_values]
15
+ ]
16
+ from edsl import ScenarioList
17
+ from edsl.questions import QuestionList, QuestionCheckBox
17
18
 
18
19
  q = QuestionList(
19
20
  question_text=f"""
@@ -24,10 +25,7 @@ class ResultsToolsMixin:
24
25
  """,
25
26
  question_name="themes",
26
27
  )
27
- s = ScenarioList.from_list(field, values)
28
- results = q.by(s).run(
29
- print_exceptions=print_exceptions, progress_bar=progress_bar
30
- )
28
+ results = q.run(print_exceptions=print_exceptions, progress_bar=progress_bar)
31
29
  return results.select("themes").first()
32
30
 
33
31
  def answers_to_themes(
@@ -38,6 +36,8 @@ class ResultsToolsMixin:
38
36
  progress_bar=False,
39
37
  print_exceptions=False,
40
38
  ) -> dict:
39
+ from edsl import ScenarioList
40
+
41
41
  values = self.select(field).to_list()
42
42
  scenarios = ScenarioList.from_list("field", values).add_value(
43
43
  "context", context