edsl 0.1.27.dev2__py3-none-any.whl → 0.1.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. edsl/Base.py +107 -30
  2. edsl/BaseDiff.py +260 -0
  3. edsl/__init__.py +25 -21
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +103 -46
  6. edsl/agents/AgentList.py +97 -13
  7. edsl/agents/Invigilator.py +23 -10
  8. edsl/agents/InvigilatorBase.py +19 -14
  9. edsl/agents/PromptConstructionMixin.py +342 -100
  10. edsl/agents/descriptors.py +5 -2
  11. edsl/base/Base.py +289 -0
  12. edsl/config.py +2 -1
  13. edsl/conjure/AgentConstructionMixin.py +152 -0
  14. edsl/conjure/Conjure.py +56 -0
  15. edsl/conjure/InputData.py +659 -0
  16. edsl/conjure/InputDataCSV.py +48 -0
  17. edsl/conjure/InputDataMixinQuestionStats.py +182 -0
  18. edsl/conjure/InputDataPyRead.py +91 -0
  19. edsl/conjure/InputDataSPSS.py +8 -0
  20. edsl/conjure/InputDataStata.py +8 -0
  21. edsl/conjure/QuestionOptionMixin.py +76 -0
  22. edsl/conjure/QuestionTypeMixin.py +23 -0
  23. edsl/conjure/RawQuestion.py +65 -0
  24. edsl/conjure/SurveyResponses.py +7 -0
  25. edsl/conjure/__init__.py +9 -4
  26. edsl/conjure/examples/placeholder.txt +0 -0
  27. edsl/conjure/naming_utilities.py +263 -0
  28. edsl/conjure/utilities.py +165 -28
  29. edsl/conversation/Conversation.py +238 -0
  30. edsl/conversation/car_buying.py +58 -0
  31. edsl/conversation/mug_negotiation.py +81 -0
  32. edsl/conversation/next_speaker_utilities.py +93 -0
  33. edsl/coop/coop.py +337 -121
  34. edsl/coop/utils.py +56 -70
  35. edsl/data/Cache.py +74 -22
  36. edsl/data/CacheHandler.py +10 -9
  37. edsl/data/SQLiteDict.py +11 -3
  38. edsl/inference_services/AnthropicService.py +1 -0
  39. edsl/inference_services/DeepInfraService.py +20 -13
  40. edsl/inference_services/GoogleService.py +7 -1
  41. edsl/inference_services/InferenceServicesCollection.py +33 -7
  42. edsl/inference_services/OpenAIService.py +17 -10
  43. edsl/inference_services/models_available_cache.py +69 -0
  44. edsl/inference_services/rate_limits_cache.py +25 -0
  45. edsl/inference_services/write_available.py +10 -0
  46. edsl/jobs/Answers.py +15 -1
  47. edsl/jobs/Jobs.py +322 -73
  48. edsl/jobs/buckets/BucketCollection.py +9 -3
  49. edsl/jobs/buckets/ModelBuckets.py +4 -2
  50. edsl/jobs/buckets/TokenBucket.py +1 -2
  51. edsl/jobs/interviews/Interview.py +7 -10
  52. edsl/jobs/interviews/InterviewStatusMixin.py +3 -3
  53. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +39 -20
  54. edsl/jobs/interviews/retry_management.py +4 -4
  55. edsl/jobs/runners/JobsRunnerAsyncio.py +103 -65
  56. edsl/jobs/runners/JobsRunnerStatusData.py +3 -3
  57. edsl/jobs/tasks/QuestionTaskCreator.py +4 -2
  58. edsl/jobs/tasks/TaskHistory.py +4 -3
  59. edsl/language_models/LanguageModel.py +42 -55
  60. edsl/language_models/ModelList.py +96 -0
  61. edsl/language_models/registry.py +14 -0
  62. edsl/language_models/repair.py +97 -25
  63. edsl/notebooks/Notebook.py +157 -32
  64. edsl/prompts/Prompt.py +31 -19
  65. edsl/questions/QuestionBase.py +145 -23
  66. edsl/questions/QuestionBudget.py +5 -6
  67. edsl/questions/QuestionCheckBox.py +7 -3
  68. edsl/questions/QuestionExtract.py +5 -3
  69. edsl/questions/QuestionFreeText.py +3 -3
  70. edsl/questions/QuestionFunctional.py +0 -3
  71. edsl/questions/QuestionList.py +3 -4
  72. edsl/questions/QuestionMultipleChoice.py +16 -8
  73. edsl/questions/QuestionNumerical.py +4 -3
  74. edsl/questions/QuestionRank.py +5 -3
  75. edsl/questions/__init__.py +4 -3
  76. edsl/questions/descriptors.py +9 -4
  77. edsl/questions/question_registry.py +27 -31
  78. edsl/questions/settings.py +1 -1
  79. edsl/results/Dataset.py +31 -0
  80. edsl/results/DatasetExportMixin.py +493 -0
  81. edsl/results/Result.py +42 -82
  82. edsl/results/Results.py +178 -66
  83. edsl/results/ResultsDBMixin.py +10 -9
  84. edsl/results/ResultsExportMixin.py +23 -507
  85. edsl/results/ResultsGGMixin.py +3 -3
  86. edsl/results/ResultsToolsMixin.py +9 -9
  87. edsl/scenarios/FileStore.py +140 -0
  88. edsl/scenarios/Scenario.py +59 -6
  89. edsl/scenarios/ScenarioList.py +138 -52
  90. edsl/scenarios/ScenarioListExportMixin.py +32 -0
  91. edsl/scenarios/ScenarioListPdfMixin.py +2 -1
  92. edsl/scenarios/__init__.py +1 -0
  93. edsl/study/ObjectEntry.py +173 -0
  94. edsl/study/ProofOfWork.py +113 -0
  95. edsl/study/SnapShot.py +73 -0
  96. edsl/study/Study.py +498 -0
  97. edsl/study/__init__.py +4 -0
  98. edsl/surveys/MemoryPlan.py +11 -4
  99. edsl/surveys/Survey.py +124 -37
  100. edsl/surveys/SurveyExportMixin.py +25 -5
  101. edsl/surveys/SurveyFlowVisualizationMixin.py +6 -4
  102. edsl/tools/plotting.py +4 -2
  103. edsl/utilities/__init__.py +21 -20
  104. edsl/utilities/gcp_bucket/__init__.py +0 -0
  105. edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
  106. edsl/utilities/gcp_bucket/simple_example.py +9 -0
  107. edsl/utilities/interface.py +90 -73
  108. edsl/utilities/repair_functions.py +28 -0
  109. edsl/utilities/utilities.py +59 -6
  110. {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/METADATA +42 -15
  111. edsl-0.1.29.dist-info/RECORD +203 -0
  112. edsl/conjure/RawResponseColumn.py +0 -327
  113. edsl/conjure/SurveyBuilder.py +0 -308
  114. edsl/conjure/SurveyBuilderCSV.py +0 -78
  115. edsl/conjure/SurveyBuilderSPSS.py +0 -118
  116. edsl/data/RemoteDict.py +0 -103
  117. edsl-0.1.27.dev2.dist-info/RECORD +0 -172
  118. {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/LICENSE +0 -0
  119. {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/WHEEL +0 -0
@@ -1,10 +1,10 @@
1
1
  """This module provides a factory class for creating question objects."""
2
2
 
3
3
  import textwrap
4
- from typing import Union
4
+ from uuid import UUID
5
+ from typing import Any, Optional, Union
6
+
5
7
 
6
- from edsl.exceptions import QuestionSerializationError
7
- from edsl.exceptions import QuestionCreationValidationError
8
8
  from edsl.questions.QuestionBase import RegisterQuestionsMeta
9
9
 
10
10
 
@@ -53,46 +53,42 @@ class Question(metaclass=Meta):
53
53
  return instance
54
54
 
55
55
  @classmethod
56
- def pull(cls, id_or_url: str):
56
+ def example(cls, question_type: str):
57
+ """Return an example question of the given type."""
58
+ get_question_classes = RegisterQuestionsMeta.question_types_to_classes()
59
+ q = get_question_classes.get(question_type, None)
60
+ return q.example()
61
+
62
+ @classmethod
63
+ def pull(cls, uuid: Optional[Union[str, UUID]] = None, url: Optional[str] = None):
57
64
  """Pull the object from coop."""
58
65
  from edsl.coop import Coop
59
66
 
60
- c = Coop()
61
- if c.url in id_or_url:
62
- id = id_or_url.split("/")[-1]
63
- else:
64
- id = id_or_url
65
- from edsl.questions.QuestionBase import QuestionBase
66
-
67
- return c._get_base(QuestionBase, id)
67
+ coop = Coop()
68
+ return coop.get(uuid, url, "question")
68
69
 
69
70
  @classmethod
70
- def delete(cls, id_or_url: str):
71
+ def delete(cls, uuid: Optional[Union[str, UUID]] = None, url: Optional[str] = None):
71
72
  """Delete the object from coop."""
72
73
  from edsl.coop import Coop
73
74
 
74
- c = Coop()
75
- if c.url in id_or_url:
76
- id = id_or_url.split("/")[-1]
77
- else:
78
- id = id_or_url
79
- from edsl.questions.QuestionBase import QuestionBase
80
-
81
- return c._delete_base(QuestionBase, id)
75
+ coop = Coop()
76
+ return coop.delete(uuid, url)
82
77
 
83
78
  @classmethod
84
- def update(cls, id_or_url: str, visibility: str):
85
- """Update the object on coop."""
79
+ def patch(
80
+ cls,
81
+ uuid: Optional[Union[str, UUID]] = None,
82
+ url: Optional[str] = None,
83
+ description: Optional[str] = None,
84
+ value: Optional[Any] = None,
85
+ visibility: Optional[str] = None,
86
+ ):
87
+ """Patch the object on coop."""
86
88
  from edsl.coop import Coop
87
89
 
88
- c = Coop()
89
- if c.url in id_or_url:
90
- id = id_or_url.split("/")[-1]
91
- else:
92
- id = id_or_url
93
- from edsl.questions.QuestionBase import QuestionBase
94
-
95
- return c._update_base(QuestionBase, id, visibility)
90
+ coop = Coop()
91
+ return coop.patch(uuid, url, description, value, visibility)
96
92
 
97
93
  @classmethod
98
94
  def available(cls, show_class_names: bool = False) -> Union[list, dict]:
@@ -8,5 +8,5 @@ class Settings:
8
8
  MAX_EXPRESSION_CONSTRAINT_LENGTH = 1000
9
9
  MAX_NUM_OPTIONS = 200
10
10
  MIN_NUM_OPTIONS = 2
11
- MAX_OPTION_LENGTH = 1000
11
+ MAX_OPTION_LENGTH = 10000
12
12
  MAX_QUESTION_LENGTH = 100000
edsl/results/Dataset.py CHANGED
@@ -78,6 +78,28 @@ class Dataset(UserList, ResultsExportMixin):
78
78
 
79
79
  return get_values(self.data[0])[0]
80
80
 
81
+ def select(self, *keys):
82
+ """Return a new dataset with only the selected keys.
83
+
84
+ :param keys: The keys to select.
85
+
86
+ >>> d = Dataset([{'a.b':[1,2,3,4]}, {'c.d':[5,6,7,8]}])
87
+ >>> d.select('a.b')
88
+ Dataset([{'a.b': [1, 2, 3, 4]}])
89
+
90
+ >>> d.select('a.b', 'c.d')
91
+ Dataset([{'a.b': [1, 2, 3, 4]}, {'c.d': [5, 6, 7, 8]}])
92
+ """
93
+ if isinstance(keys, str):
94
+ keys = [keys]
95
+
96
+ new_data = []
97
+ for observation in self.data:
98
+ observation_key = list(observation.keys())[0]
99
+ if observation_key in keys:
100
+ new_data.append(observation)
101
+ return Dataset(new_data)
102
+
81
103
  def _repr_html_(self) -> str:
82
104
  """Return an HTML representation of the dataset."""
83
105
  from edsl.utilities.utilities import data_to_html
@@ -223,6 +245,15 @@ class Dataset(UserList, ResultsExportMixin):
223
245
 
224
246
  return Dataset(new_data)
225
247
 
248
+ @classmethod
249
+ def example(self):
250
+ """Return an example dataset.
251
+
252
+ >>> Dataset.example()
253
+ Dataset([{'a': [1, 2, 3, 4]}, {'b': [4, 3, 2, 1]}])
254
+ """
255
+ return Dataset([{"a": [1, 2, 3, 4]}, {"b": [4, 3, 2, 1]}])
256
+
226
257
 
227
258
  if __name__ == "__main__":
228
259
  import doctest
@@ -0,0 +1,493 @@
1
+ """Mixin class for exporting results."""
2
+
3
+ import base64
4
+ import csv
5
+ import io
6
+
7
+ from typing import Literal, Optional, Union
8
+
9
+
10
+ class DatasetExportMixin:
11
+ """Mixin class"""
12
+
13
+ def relevant_columns(
14
+ self, data_type: Optional[str] = None, remove_prefix=False
15
+ ) -> list:
16
+ """Return the set of keys that are present in the dataset.
17
+
18
+ >>> from edsl.results.Dataset import Dataset
19
+ >>> d = Dataset([{'a.b':[1,2,3,4]}])
20
+ >>> d.relevant_columns()
21
+ ['a.b']
22
+
23
+ >>> d.relevant_columns(remove_prefix=True)
24
+ ['b']
25
+
26
+ >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
27
+ ['answer.how_feeling', 'answer.how_feeling_yesterday']
28
+ """
29
+ columns = [list(x.keys())[0] for x in self]
30
+ # columns = set([list(result.keys())[0] for result in self.data])
31
+ if remove_prefix:
32
+ columns = [column.split(".")[-1] for column in columns]
33
+
34
+ if data_type:
35
+ columns = [
36
+ column for column in columns if column.split(".")[0] == data_type
37
+ ]
38
+
39
+ return columns
40
+
41
+ def _make_tabular(self, remove_prefix: bool, pretty_labels: Optional[dict] = None):
42
+ """Turn the results into a tabular format.
43
+
44
+ :param remove_prefix: Whether to remove the prefix from the column names.
45
+
46
+ >>> from edsl.results import Results
47
+ >>> r = Results.example()
48
+ >>> r.select('how_feeling')._make_tabular(remove_prefix = True)
49
+ (['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
50
+
51
+ >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
52
+ (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
53
+ """
54
+ d = {}
55
+ full_header = sorted(list(self.relevant_columns()))
56
+ for entry in self.data:
57
+ key, list_of_values = list(entry.items())[0]
58
+ d[key] = list_of_values
59
+ if remove_prefix:
60
+ header = [h.split(".")[-1] for h in full_header]
61
+ else:
62
+ header = full_header
63
+ num_observations = len(list(self[0].values())[0])
64
+ rows = []
65
+ # rows.append(header)
66
+ for i in range(num_observations):
67
+ row = [d[h][i] for h in full_header]
68
+ rows.append(row)
69
+ if pretty_labels is not None:
70
+ header = [pretty_labels.get(h, h) for h in header]
71
+ return header, rows
72
+
73
+ def print_long(self):
74
+ """Print the results in a long format."""
75
+ for entry in self:
76
+ key, list_of_values = list(entry.items())[0]
77
+ for value in list_of_values:
78
+ print(f"{key}: {value}")
79
+
80
+ def print(
81
+ self,
82
+ pretty_labels: Optional[dict] = None,
83
+ filename: Optional[str] = None,
84
+ format: Literal["rich", "html", "markdown", "latex"] = None,
85
+ interactive: bool = False,
86
+ split_at_dot: bool = True,
87
+ max_rows=None,
88
+ tee=False,
89
+ iframe=False,
90
+ iframe_height: int = 200,
91
+ iframe_width: int = 600,
92
+ web=False,
93
+ ) -> None:
94
+ """Print the results in a pretty format.
95
+
96
+ :param pretty_labels: A dictionary of pretty labels for the columns.
97
+ :param filename: The filename to save the results to.
98
+ :param format: The format to print the results in. Options are 'rich', 'html', or 'markdown'.
99
+ :param interactive: Whether to print the results interactively in a Jupyter notebook.
100
+ :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
101
+
102
+ Example: Print in rich format at the terminal
103
+
104
+ >>> from edsl.results import Results
105
+ >>> r = Results.example()
106
+ >>> r.select('how_feeling').print(format = "rich")
107
+ ┏━━━━━━━━━━━━━━┓
108
+ ┃ answer ┃
109
+ ┃ .how_feeling ┃
110
+ ┡━━━━━━━━━━━━━━┩
111
+ │ OK │
112
+ ├──────────────┤
113
+ │ Great │
114
+ ├──────────────┤
115
+ │ Terrible │
116
+ ├──────────────┤
117
+ │ OK │
118
+ └──────────────┘
119
+
120
+ Example: using the pretty_labels parameter
121
+
122
+ >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
123
+ ┏━━━━━━━━━━━━━━━━━━━━━┓
124
+ ┃ How are you feeling ┃
125
+ ┡━━━━━━━━━━━━━━━━━━━━━┩
126
+ │ OK │
127
+ ├─────────────────────┤
128
+ │ Great │
129
+ ├─────────────────────┤
130
+ │ Terrible │
131
+ ├─────────────────────┤
132
+ │ OK │
133
+ └─────────────────────┘
134
+
135
+ Example: printing in markdown format
136
+
137
+ >>> r.select('how_feeling').print(format='markdown')
138
+ | answer.how_feeling |
139
+ |--|
140
+ | OK |
141
+ | Great |
142
+ | Terrible |
143
+ | OK |
144
+ ...
145
+ """
146
+ from IPython.display import HTML, display
147
+ from edsl.utilities.utilities import is_notebook
148
+
149
+ if format is None:
150
+
151
+ if is_notebook():
152
+ format = "html"
153
+ else:
154
+ format = "rich"
155
+
156
+ if pretty_labels is None:
157
+ pretty_labels = {}
158
+
159
+ if format not in ["rich", "html", "markdown", "latex"]:
160
+ raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
161
+
162
+ new_data = []
163
+ for index, entry in enumerate(self):
164
+ key, list_of_values = list(entry.items())[0]
165
+ new_data.append({pretty_labels.get(key, key): list_of_values})
166
+
167
+ if max_rows is not None:
168
+ for entry in new_data:
169
+ for key in entry:
170
+ actual_rows = len(entry[key])
171
+ entry[key] = entry[key][:max_rows]
172
+ # print(f"Showing only the first {max_rows} rows of {actual_rows} rows.")
173
+
174
+ if format == "rich":
175
+ from edsl.utilities.interface import print_dataset_with_rich
176
+
177
+ print_dataset_with_rich(
178
+ new_data, filename=filename, split_at_dot=split_at_dot
179
+ )
180
+ elif format == "html":
181
+ notebook = is_notebook()
182
+ from edsl.utilities.interface import print_list_of_dicts_as_html_table
183
+
184
+ html_source = print_list_of_dicts_as_html_table(
185
+ new_data, interactive=interactive
186
+ )
187
+ if iframe:
188
+ import html
189
+
190
+ height = iframe_height
191
+ width = iframe_width
192
+ escaped_output = html.escape(html_source)
193
+ # escaped_output = html_source
194
+ iframe = f""""
195
+ <iframe srcdoc="{ escaped_output }" style="width: {width}px; height: {height}px;"></iframe>
196
+ """
197
+ display(HTML(iframe))
198
+ elif notebook:
199
+ display(HTML(html_source))
200
+ else:
201
+ from edsl.utilities.interface import view_html
202
+
203
+ view_html(html_source)
204
+
205
+ elif format == "markdown":
206
+ from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
207
+
208
+ print_list_of_dicts_as_markdown_table(new_data, filename=filename)
209
+ elif format == "latex":
210
+ df = self.to_pandas()
211
+ df.columns = [col.replace("_", " ") for col in df.columns]
212
+ latex_string = df.to_latex()
213
+ if filename is not None:
214
+ with open(filename, "w") as f:
215
+ f.write(latex_string)
216
+ else:
217
+ return latex_string
218
+ # raise NotImplementedError("Latex format not yet implemented.")
219
+ # latex_string = create_latex_table_from_data(new_data, filename=filename)
220
+ # if filename is None:
221
+ # return latex_string
222
+ # Not working quite
223
+
224
+ else:
225
+ raise ValueError("format not recognized.")
226
+
227
+ if tee:
228
+ return self
229
+
230
+ def to_csv(
231
+ self,
232
+ filename: Optional[str] = None,
233
+ remove_prefix: bool = False,
234
+ download_link: bool = False,
235
+ pretty_labels: Optional[dict] = None,
236
+ ):
237
+ """Export the results to a CSV file.
238
+
239
+ :param filename: The filename to save the CSV file to.
240
+ :param remove_prefix: Whether to remove the prefix from the column names.
241
+ :param download_link: Whether to display a download link in a Jupyter notebook.
242
+
243
+ Example:
244
+
245
+ >>> from edsl.results import Results
246
+ >>> r = Results.example()
247
+ >>> r.select('how_feeling').to_csv()
248
+ 'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
249
+ """
250
+ if pretty_labels is None:
251
+ pretty_labels = {}
252
+ header, rows = self._make_tabular(
253
+ remove_prefix=remove_prefix, pretty_labels=pretty_labels
254
+ )
255
+
256
+ if filename is not None:
257
+ with open(filename, "w") as f:
258
+ writer = csv.writer(f)
259
+ writer.writerow(header)
260
+ writer.writerows(rows)
261
+ else:
262
+ output = io.StringIO()
263
+ writer = csv.writer(output)
264
+ writer.writerow(header)
265
+ writer.writerows(rows)
266
+
267
+ if download_link:
268
+ csv_file = output.getvalue()
269
+ b64 = base64.b64encode(csv_file.encode()).decode()
270
+ download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
271
+ display(HTML(download_link))
272
+ else:
273
+ return output.getvalue()
274
+
275
+ def to_pandas(self, remove_prefix: bool = False) -> "pd.DataFrame":
276
+ """Convert the results to a pandas DataFrame.
277
+
278
+ :param remove_prefix: Whether to remove the prefix from the column names.
279
+
280
+ >>> from edsl.results import Results
281
+ >>> r = Results.example()
282
+ >>> r.select('how_feeling').to_pandas()
283
+ answer.how_feeling
284
+ 0 OK
285
+ 1 Great
286
+ 2 Terrible
287
+ 3 OK
288
+ """
289
+ import pandas as pd
290
+
291
+ csv_string = self.to_csv(remove_prefix=remove_prefix)
292
+ csv_buffer = io.StringIO(csv_string)
293
+ df = pd.read_csv(csv_buffer)
294
+ df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
295
+ return df_sorted
296
+
297
+ def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
298
+ """Convert the results to a list of dictionaries, one per scenario.
299
+
300
+ :param remove_prefix: Whether to remove the prefix from the column names.
301
+
302
+ >>> from edsl.results import Results
303
+ >>> r = Results.example()
304
+ >>> r.select('how_feeling').to_scenario_list()
305
+ ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
306
+ """
307
+ from edsl import ScenarioList, Scenario
308
+
309
+ list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
310
+ return ScenarioList([Scenario(d) for d in list_of_dicts])
311
+
312
+ def to_agent_list(self, remove_prefix: bool = True):
313
+ from edsl import AgentList, Agent
314
+
315
+ list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
316
+ return AgentList([Agent(d) for d in list_of_dicts])
317
+
318
+ def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
319
+ """Convert the results to a list of dictionaries.
320
+
321
+ :param remove_prefix: Whether to remove the prefix from the column names.
322
+
323
+ >>> from edsl.results import Results
324
+ >>> r = Results.example()
325
+ >>> r.select('how_feeling').to_dicts()
326
+ [{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
327
+
328
+ """
329
+ list_of_keys = []
330
+ list_of_values = []
331
+ for entry in self:
332
+ key, values = list(entry.items())[0]
333
+ list_of_keys.append(key)
334
+ list_of_values.append(values)
335
+
336
+ if remove_prefix:
337
+ list_of_keys = [key.split(".")[-1] for key in list_of_keys]
338
+
339
+ list_of_dicts = []
340
+ for entries in zip(*list_of_values):
341
+ list_of_dicts.append(dict(zip(list_of_keys, entries)))
342
+
343
+ return list_of_dicts
344
+
345
+ def to_list(self, flatten=False, remove_none=False) -> list[list]:
346
+ """Convert the results to a list of lists.
347
+
348
+ >>> from edsl.results import Results
349
+ >>> Results.example().select('how_feeling', 'how_feeling_yesterday')
350
+ Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
351
+
352
+ >>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
353
+ [('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
354
+
355
+ >>> r = Results.example()
356
+ >>> r.select('how_feeling').to_list()
357
+ ['OK', 'Great', 'Terrible', 'OK']
358
+ """
359
+ if len(self.relevant_columns()) > 1 and flatten:
360
+ raise ValueError(
361
+ "Cannot flatten a list of lists when there are multiple columns selected."
362
+ )
363
+
364
+ if len(self.relevant_columns()) == 1:
365
+ # if only one 'column' is selected (which is typical for this method
366
+ list_to_return = list(self[0].values())[0]
367
+ else:
368
+ keys = self.relevant_columns()
369
+ data = self.to_dicts(remove_prefix=False)
370
+ list_to_return = []
371
+ for d in data:
372
+ list_to_return.append(tuple([d[key] for key in keys]))
373
+
374
+ if remove_none:
375
+ list_to_return = [item for item in list_to_return if item is not None]
376
+
377
+ if flatten:
378
+ new_list = []
379
+ for item in list_to_return:
380
+ if isinstance(item, list):
381
+ new_list.extend(item)
382
+ else:
383
+ new_list.append(item)
384
+ list_to_return = new_list
385
+
386
+ return list_to_return
387
+
388
+ def html(
389
+ self, filename: str = None, cta: str = "Open in browser", return_link=False
390
+ ):
391
+ import os
392
+ import tempfile
393
+ from edsl.utilities.utilities import is_notebook
394
+ from IPython.display import HTML, display
395
+ from edsl.utilities.utilities import is_notebook
396
+
397
+ df = self.to_pandas()
398
+
399
+ if filename is None:
400
+ current_directory = os.getcwd()
401
+ filename = tempfile.NamedTemporaryFile(
402
+ "w", delete=False, suffix=".html", dir=current_directory
403
+ ).name
404
+
405
+ with open(filename, "w") as f:
406
+ f.write(df.to_html())
407
+
408
+ if is_notebook():
409
+
410
+ html_url = f"/files/{filename}"
411
+ html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
412
+ display(HTML(html_link))
413
+ else:
414
+ print(f"Saved to {filename}")
415
+ import webbrowser
416
+ import os
417
+
418
+ webbrowser.open(f"file://{os.path.abspath(filename)}")
419
+
420
+ if return_link:
421
+ return filename
422
+
423
+ def tally(
424
+ self, *fields: Optional[str], top_n=None, output="dict"
425
+ ) -> Union[dict, "Dataset"]:
426
+ """Tally the values of a field or perform a cross-tab of multiple fields.
427
+
428
+ :param fields: The field(s) to tally, multiple fields for cross-tabulation.
429
+
430
+ >>> from edsl.results import Results
431
+ >>> r = Results.example()
432
+ >>> r.select('how_feeling').tally('answer.how_feeling')
433
+ {'OK': 2, 'Great': 1, 'Terrible': 1}
434
+ >>> r.select('how_feeling', 'period').tally('how_feeling', 'period')
435
+ {('OK', 'morning'): 1, ('Great', 'afternoon'): 1, ('Terrible', 'morning'): 1, ('OK', 'afternoon'): 1}
436
+ """
437
+ from collections import Counter
438
+
439
+ if len(fields) == 0:
440
+ fields = self.relevant_columns()
441
+
442
+ relevant_columns_without_prefix = [
443
+ column.split(".")[-1] for column in self.relevant_columns()
444
+ ]
445
+
446
+ if not all(
447
+ f in self.relevant_columns() or f in relevant_columns_without_prefix
448
+ for f in fields
449
+ ):
450
+ raise ValueError("One or more specified fields are not in the dataset.")
451
+
452
+ if len(fields) == 1:
453
+ field = fields[0]
454
+ values = self._key_to_value(field)
455
+ else:
456
+ values = list(zip(*(self._key_to_value(field) for field in fields)))
457
+
458
+ for value in values:
459
+ if isinstance(value, list):
460
+ value = tuple(value)
461
+
462
+ tally = dict(Counter(values))
463
+ sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
464
+ if top_n is not None:
465
+ sorted_tally = dict(list(sorted_tally.items())[:top_n])
466
+
467
+ import warnings
468
+ import textwrap
469
+ from edsl.results.Dataset import Dataset
470
+
471
+ if output == "dict":
472
+ warnings.warn(
473
+ textwrap.dedent(
474
+ """\
475
+ The default output from tally will change to Dataset in the future.
476
+ Use output='Dataset' to get the Dataset object for now.
477
+ """
478
+ )
479
+ )
480
+ return sorted_tally
481
+ elif output == "Dataset":
482
+ return Dataset(
483
+ [
484
+ {"value": list(sorted_tally.keys())},
485
+ {"count": list(sorted_tally.values())},
486
+ ]
487
+ )
488
+
489
+
490
+ if __name__ == "__main__":
491
+ import doctest
492
+
493
+ doctest.testmod(optionflags=doctest.ELLIPSIS)