edsl 0.1.38.dev2__py3-none-any.whl → 0.1.38.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. edsl/Base.py +303 -303
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +49 -49
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +858 -858
  7. edsl/agents/AgentList.py +362 -362
  8. edsl/agents/Invigilator.py +222 -222
  9. edsl/agents/InvigilatorBase.py +284 -284
  10. edsl/agents/PromptConstructor.py +353 -353
  11. edsl/agents/__init__.py +3 -3
  12. edsl/agents/descriptors.py +99 -99
  13. edsl/agents/prompt_helpers.py +129 -129
  14. edsl/auto/AutoStudy.py +117 -117
  15. edsl/auto/StageBase.py +230 -230
  16. edsl/auto/StageGenerateSurvey.py +178 -178
  17. edsl/auto/StageLabelQuestions.py +125 -125
  18. edsl/auto/StagePersona.py +61 -61
  19. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  20. edsl/auto/StagePersonaDimensionValues.py +74 -74
  21. edsl/auto/StagePersonaDimensions.py +69 -69
  22. edsl/auto/StageQuestions.py +73 -73
  23. edsl/auto/SurveyCreatorPipeline.py +21 -21
  24. edsl/auto/utilities.py +224 -224
  25. edsl/base/Base.py +279 -279
  26. edsl/config.py +149 -149
  27. edsl/conversation/Conversation.py +290 -290
  28. edsl/conversation/car_buying.py +58 -58
  29. edsl/conversation/chips.py +95 -95
  30. edsl/conversation/mug_negotiation.py +81 -81
  31. edsl/conversation/next_speaker_utilities.py +93 -93
  32. edsl/coop/PriceFetcher.py +54 -54
  33. edsl/coop/__init__.py +2 -2
  34. edsl/coop/coop.py +961 -961
  35. edsl/coop/utils.py +131 -131
  36. edsl/data/Cache.py +530 -530
  37. edsl/data/CacheEntry.py +228 -228
  38. edsl/data/CacheHandler.py +149 -149
  39. edsl/data/RemoteCacheSync.py +97 -97
  40. edsl/data/SQLiteDict.py +292 -292
  41. edsl/data/__init__.py +4 -4
  42. edsl/data/orm.py +10 -10
  43. edsl/data_transfer_models.py +73 -73
  44. edsl/enums.py +173 -173
  45. edsl/exceptions/BaseException.py +21 -21
  46. edsl/exceptions/__init__.py +54 -54
  47. edsl/exceptions/agents.py +42 -42
  48. edsl/exceptions/cache.py +5 -5
  49. edsl/exceptions/configuration.py +16 -16
  50. edsl/exceptions/coop.py +10 -10
  51. edsl/exceptions/data.py +14 -14
  52. edsl/exceptions/general.py +34 -34
  53. edsl/exceptions/jobs.py +33 -33
  54. edsl/exceptions/language_models.py +63 -63
  55. edsl/exceptions/prompts.py +15 -15
  56. edsl/exceptions/questions.py +91 -91
  57. edsl/exceptions/results.py +29 -29
  58. edsl/exceptions/scenarios.py +22 -22
  59. edsl/exceptions/surveys.py +37 -37
  60. edsl/inference_services/AnthropicService.py +87 -87
  61. edsl/inference_services/AwsBedrock.py +120 -120
  62. edsl/inference_services/AzureAI.py +217 -217
  63. edsl/inference_services/DeepInfraService.py +18 -18
  64. edsl/inference_services/GoogleService.py +156 -156
  65. edsl/inference_services/GroqService.py +20 -20
  66. edsl/inference_services/InferenceServiceABC.py +147 -147
  67. edsl/inference_services/InferenceServicesCollection.py +97 -97
  68. edsl/inference_services/MistralAIService.py +123 -123
  69. edsl/inference_services/OllamaService.py +18 -18
  70. edsl/inference_services/OpenAIService.py +224 -224
  71. edsl/inference_services/TestService.py +89 -89
  72. edsl/inference_services/TogetherAIService.py +170 -170
  73. edsl/inference_services/models_available_cache.py +118 -118
  74. edsl/inference_services/rate_limits_cache.py +25 -25
  75. edsl/inference_services/registry.py +39 -39
  76. edsl/inference_services/write_available.py +10 -10
  77. edsl/jobs/Answers.py +56 -56
  78. edsl/jobs/Jobs.py +1358 -1358
  79. edsl/jobs/__init__.py +1 -1
  80. edsl/jobs/buckets/BucketCollection.py +63 -63
  81. edsl/jobs/buckets/ModelBuckets.py +65 -65
  82. edsl/jobs/buckets/TokenBucket.py +251 -251
  83. edsl/jobs/interviews/Interview.py +661 -661
  84. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  85. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
  86. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  87. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  88. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  89. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  90. edsl/jobs/interviews/ReportErrors.py +66 -66
  91. edsl/jobs/interviews/interview_status_enum.py +9 -9
  92. edsl/jobs/runners/JobsRunnerAsyncio.py +361 -361
  93. edsl/jobs/runners/JobsRunnerStatus.py +332 -332
  94. edsl/jobs/tasks/QuestionTaskCreator.py +242 -242
  95. edsl/jobs/tasks/TaskCreators.py +64 -64
  96. edsl/jobs/tasks/TaskHistory.py +451 -451
  97. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  98. edsl/jobs/tasks/task_status_enum.py +163 -163
  99. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  100. edsl/jobs/tokens/TokenUsage.py +34 -34
  101. edsl/language_models/KeyLookup.py +30 -30
  102. edsl/language_models/LanguageModel.py +708 -708
  103. edsl/language_models/ModelList.py +109 -109
  104. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  105. edsl/language_models/__init__.py +3 -3
  106. edsl/language_models/fake_openai_call.py +15 -15
  107. edsl/language_models/fake_openai_service.py +61 -61
  108. edsl/language_models/registry.py +137 -137
  109. edsl/language_models/repair.py +156 -156
  110. edsl/language_models/unused/ReplicateBase.py +83 -83
  111. edsl/language_models/utilities.py +64 -64
  112. edsl/notebooks/Notebook.py +258 -258
  113. edsl/notebooks/__init__.py +1 -1
  114. edsl/prompts/Prompt.py +357 -357
  115. edsl/prompts/__init__.py +2 -2
  116. edsl/questions/AnswerValidatorMixin.py +289 -289
  117. edsl/questions/QuestionBase.py +660 -660
  118. edsl/questions/QuestionBaseGenMixin.py +161 -161
  119. edsl/questions/QuestionBasePromptsMixin.py +217 -217
  120. edsl/questions/QuestionBudget.py +227 -227
  121. edsl/questions/QuestionCheckBox.py +359 -359
  122. edsl/questions/QuestionExtract.py +183 -183
  123. edsl/questions/QuestionFreeText.py +114 -114
  124. edsl/questions/QuestionFunctional.py +166 -166
  125. edsl/questions/QuestionList.py +231 -231
  126. edsl/questions/QuestionMultipleChoice.py +286 -286
  127. edsl/questions/QuestionNumerical.py +153 -153
  128. edsl/questions/QuestionRank.py +324 -324
  129. edsl/questions/Quick.py +41 -41
  130. edsl/questions/RegisterQuestionsMeta.py +71 -71
  131. edsl/questions/ResponseValidatorABC.py +174 -174
  132. edsl/questions/SimpleAskMixin.py +73 -73
  133. edsl/questions/__init__.py +26 -26
  134. edsl/questions/compose_questions.py +98 -98
  135. edsl/questions/decorators.py +21 -21
  136. edsl/questions/derived/QuestionLikertFive.py +76 -76
  137. edsl/questions/derived/QuestionLinearScale.py +87 -87
  138. edsl/questions/derived/QuestionTopK.py +93 -93
  139. edsl/questions/derived/QuestionYesNo.py +82 -82
  140. edsl/questions/descriptors.py +413 -413
  141. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  142. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  143. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  144. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  145. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  146. edsl/questions/prompt_templates/question_list.jinja +17 -17
  147. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  148. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  149. edsl/questions/question_registry.py +147 -147
  150. edsl/questions/settings.py +12 -12
  151. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  152. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  153. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  154. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  155. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  156. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  157. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  158. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  159. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  160. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  161. edsl/questions/templates/list/question_presentation.jinja +5 -5
  162. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  163. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  164. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  165. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  166. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  167. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  168. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  169. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  170. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  171. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  172. edsl/results/Dataset.py +293 -293
  173. edsl/results/DatasetExportMixin.py +717 -717
  174. edsl/results/DatasetTree.py +145 -145
  175. edsl/results/Result.py +456 -456
  176. edsl/results/Results.py +1071 -1071
  177. edsl/results/ResultsDBMixin.py +238 -238
  178. edsl/results/ResultsExportMixin.py +43 -43
  179. edsl/results/ResultsFetchMixin.py +33 -33
  180. edsl/results/ResultsGGMixin.py +121 -121
  181. edsl/results/ResultsToolsMixin.py +98 -98
  182. edsl/results/Selector.py +135 -135
  183. edsl/results/__init__.py +2 -2
  184. edsl/results/tree_explore.py +115 -115
  185. edsl/scenarios/FileStore.py +458 -458
  186. edsl/scenarios/Scenario.py +544 -544
  187. edsl/scenarios/ScenarioHtmlMixin.py +64 -64
  188. edsl/scenarios/ScenarioList.py +1112 -1112
  189. edsl/scenarios/ScenarioListExportMixin.py +52 -52
  190. edsl/scenarios/ScenarioListPdfMixin.py +261 -261
  191. edsl/scenarios/__init__.py +4 -4
  192. edsl/shared.py +1 -1
  193. edsl/study/ObjectEntry.py +173 -173
  194. edsl/study/ProofOfWork.py +113 -113
  195. edsl/study/SnapShot.py +80 -80
  196. edsl/study/Study.py +528 -528
  197. edsl/study/__init__.py +4 -4
  198. edsl/surveys/DAG.py +148 -148
  199. edsl/surveys/Memory.py +31 -31
  200. edsl/surveys/MemoryPlan.py +244 -244
  201. edsl/surveys/Rule.py +326 -326
  202. edsl/surveys/RuleCollection.py +387 -387
  203. edsl/surveys/Survey.py +1787 -1787
  204. edsl/surveys/SurveyCSS.py +261 -261
  205. edsl/surveys/SurveyExportMixin.py +259 -259
  206. edsl/surveys/SurveyFlowVisualizationMixin.py +121 -121
  207. edsl/surveys/SurveyQualtricsImport.py +284 -284
  208. edsl/surveys/__init__.py +3 -3
  209. edsl/surveys/base.py +53 -53
  210. edsl/surveys/descriptors.py +56 -56
  211. edsl/surveys/instructions/ChangeInstruction.py +49 -49
  212. edsl/surveys/instructions/Instruction.py +53 -53
  213. edsl/surveys/instructions/InstructionCollection.py +77 -77
  214. edsl/templates/error_reporting/base.html +23 -23
  215. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  216. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  217. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  218. edsl/templates/error_reporting/interview_details.html +115 -115
  219. edsl/templates/error_reporting/interviews.html +9 -9
  220. edsl/templates/error_reporting/overview.html +4 -4
  221. edsl/templates/error_reporting/performance_plot.html +1 -1
  222. edsl/templates/error_reporting/report.css +73 -73
  223. edsl/templates/error_reporting/report.html +117 -117
  224. edsl/templates/error_reporting/report.js +25 -25
  225. edsl/tools/__init__.py +1 -1
  226. edsl/tools/clusters.py +192 -192
  227. edsl/tools/embeddings.py +27 -27
  228. edsl/tools/embeddings_plotting.py +118 -118
  229. edsl/tools/plotting.py +112 -112
  230. edsl/tools/summarize.py +18 -18
  231. edsl/utilities/SystemInfo.py +28 -28
  232. edsl/utilities/__init__.py +22 -22
  233. edsl/utilities/ast_utilities.py +25 -25
  234. edsl/utilities/data/Registry.py +6 -6
  235. edsl/utilities/data/__init__.py +1 -1
  236. edsl/utilities/data/scooter_results.json +1 -1
  237. edsl/utilities/decorators.py +77 -77
  238. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  239. edsl/utilities/interface.py +627 -627
  240. edsl/utilities/naming_utilities.py +263 -263
  241. edsl/utilities/repair_functions.py +28 -28
  242. edsl/utilities/restricted_python.py +70 -70
  243. edsl/utilities/utilities.py +409 -409
  244. {edsl-0.1.38.dev2.dist-info → edsl-0.1.38.dev3.dist-info}/LICENSE +21 -21
  245. {edsl-0.1.38.dev2.dist-info → edsl-0.1.38.dev3.dist-info}/METADATA +1 -1
  246. edsl-0.1.38.dev3.dist-info/RECORD +269 -0
  247. edsl-0.1.38.dev2.dist-info/RECORD +0 -269
  248. {edsl-0.1.38.dev2.dist-info → edsl-0.1.38.dev3.dist-info}/WHEEL +0 -0
@@ -1,717 +1,717 @@
1
- """Mixin class for exporting results."""
2
-
3
- import base64
4
- import csv
5
- import io
6
- import html
7
- from typing import Optional
8
-
9
- from typing import Literal, Optional, Union, List
10
-
11
-
12
- class DatasetExportMixin:
13
- """Mixin class for exporting Dataset objects."""
14
-
15
- def relevant_columns(
16
- self, data_type: Optional[str] = None, remove_prefix=False
17
- ) -> list:
18
- """Return the set of keys that are present in the dataset.
19
-
20
- :param data_type: The data type to filter by.
21
- :param remove_prefix: Whether to remove the prefix from the column names.
22
-
23
- >>> from edsl.results.Dataset import Dataset
24
- >>> d = Dataset([{'a.b':[1,2,3,4]}])
25
- >>> d.relevant_columns()
26
- ['a.b']
27
-
28
- >>> d.relevant_columns(remove_prefix=True)
29
- ['b']
30
-
31
- >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[5,6,7,8]}])
32
- >>> d.relevant_columns()
33
- ['a', 'b']
34
-
35
- >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
36
- ['answer.how_feeling', 'answer.how_feeling_yesterday']
37
-
38
- >>> from edsl.results import Results
39
- >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
40
- ['model.frequency_penalty', 'model.logprobs', 'model.max_tokens', 'model.model', 'model.presence_penalty', 'model.temperature', 'model.top_logprobs', 'model.top_p']
41
-
42
- >>> Results.example().relevant_columns(data_type = "flimflam")
43
- Traceback (most recent call last):
44
- ...
45
- ValueError: No columns found for data type: flimflam. Available data types are: ...
46
- """
47
- columns = [list(x.keys())[0] for x in self]
48
- if remove_prefix:
49
- columns = [column.split(".")[-1] for column in columns]
50
-
51
- def get_data_type(column):
52
- if "." in column:
53
- return column.split(".")[0]
54
- else:
55
- return None
56
-
57
- if data_type:
58
- all_columns = columns[:]
59
- columns = [
60
- column for column in columns if get_data_type(column) == data_type
61
- ]
62
- if len(columns) == 0:
63
- all_data_types = sorted(
64
- list(set(get_data_type(column) for column in all_columns))
65
- )
66
- raise ValueError(
67
- f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
68
- )
69
-
70
- return columns
71
-
72
- def num_observations(self):
73
- """Return the number of observations in the dataset.
74
-
75
- >>> from edsl.results import Results
76
- >>> Results.example().num_observations()
77
- 4
78
- """
79
- _num_observations = None
80
- for entry in self:
81
- key, values = list(entry.items())[0]
82
- if _num_observations is None:
83
- _num_observations = len(values)
84
- else:
85
- if len(values) != _num_observations:
86
- raise ValueError(
87
- "The number of observations is not consistent across columns."
88
- )
89
-
90
- return _num_observations
91
-
92
- def _make_tabular(
93
- self, remove_prefix: bool, pretty_labels: Optional[dict] = None
94
- ) -> tuple[list, List[list]]:
95
- """Turn the results into a tabular format.
96
-
97
- :param remove_prefix: Whether to remove the prefix from the column names.
98
-
99
- >>> from edsl.results import Results
100
- >>> r = Results.example()
101
- >>> r.select('how_feeling')._make_tabular(remove_prefix = True)
102
- (['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
103
-
104
- >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
105
- (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
106
- """
107
-
108
- def create_dict_from_list_of_dicts(list_of_dicts):
109
- for entry in list_of_dicts:
110
- key, list_of_values = list(entry.items())[0]
111
- yield key, list_of_values
112
-
113
- tabular_repr = dict(create_dict_from_list_of_dicts(self.data))
114
-
115
- full_header = [list(x.keys())[0] for x in self]
116
-
117
- rows = []
118
- for i in range(self.num_observations()):
119
- row = [tabular_repr[h][i] for h in full_header]
120
- rows.append(row)
121
-
122
- if remove_prefix:
123
- header = [h.split(".")[-1] for h in full_header]
124
- else:
125
- header = full_header
126
-
127
- if pretty_labels is not None:
128
- header = [pretty_labels.get(h, h) for h in header]
129
-
130
- return header, rows
131
-
132
- def print_long(self):
133
- """Print the results in a long format.
134
- >>> from edsl.results import Results
135
- >>> r = Results.example()
136
- >>> r.select('how_feeling').print_long()
137
- answer.how_feeling: OK
138
- answer.how_feeling: Great
139
- answer.how_feeling: Terrible
140
- answer.how_feeling: OK
141
- """
142
- for entry in self:
143
- key, list_of_values = list(entry.items())[0]
144
- for value in list_of_values:
145
- print(f"{key}: {value}")
146
-
147
- def print(
148
- self,
149
- pretty_labels: Optional[dict] = None,
150
- filename: Optional[str] = None,
151
- format: Optional[Literal["rich", "html", "markdown", "latex"]] = None,
152
- interactive: bool = False,
153
- split_at_dot: bool = True,
154
- max_rows=None,
155
- tee=False,
156
- iframe=False,
157
- iframe_height: int = 200,
158
- iframe_width: int = 600,
159
- web=False,
160
- return_string: bool = False,
161
- ) -> Union[None, str, "Results"]:
162
- """Print the results in a pretty format.
163
-
164
- :param pretty_labels: A dictionary of pretty labels for the columns.
165
- :param filename: The filename to save the results to.
166
- :param format: The format to print the results in. Options are 'rich', 'html', 'markdown', or 'latex'.
167
- :param interactive: Whether to print the results interactively in a Jupyter notebook.
168
- :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
169
- :param max_rows: The maximum number of rows to print.
170
- :param tee: Whether to return the dataset.
171
- :param iframe: Whether to display the table in an iframe.
172
- :param iframe_height: The height of the iframe.
173
- :param iframe_width: The width of the iframe.
174
- :param web: Whether to display the table in a web browser.
175
- :param return_string: Whether to return the output as a string instead of printing.
176
-
177
- :return: None if tee is False and return_string is False, the dataset if tee is True, or a string if return_string is True.
178
-
179
- Example: Print in rich format at the terminal
180
-
181
- >>> from edsl.results import Results
182
- >>> r = Results.example()
183
- >>> r.select('how_feeling').print(format = "rich")
184
- ┏━━━━━━━━━━━━━━┓
185
- ┃ answer ┃
186
- ┃ .how_feeling ┃
187
- ┡━━━━━━━━━━━━━━┩
188
- │ OK │
189
- ├──────────────┤
190
- │ Great │
191
- ├──────────────┤
192
- │ Terrible │
193
- ├──────────────┤
194
- │ OK │
195
- └──────────────┘
196
-
197
- >>> r = Results.example()
198
- >>> r2 = r.select("how_feeling").print(format = "rich", tee = True, max_rows = 2)
199
- ┏━━━━━━━━━━━━━━┓
200
- ┃ answer ┃
201
- ┃ .how_feeling ┃
202
- ┡━━━━━━━━━━━━━━┩
203
- │ OK │
204
- ├──────────────┤
205
- │ Great │
206
- └──────────────┘
207
- >>> r2
208
- Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
209
-
210
- >>> r.select('how_feeling').print(format = "rich", max_rows = 2)
211
- ┏━━━━━━━━━━━━━━┓
212
- ┃ answer ┃
213
- ┃ .how_feeling ┃
214
- ┡━━━━━━━━━━━━━━┩
215
- │ OK │
216
- ├──────────────┤
217
- │ Great │
218
- └──────────────┘
219
-
220
- >>> r.select('how_feeling').print(format = "rich", split_at_dot = False)
221
- ┏━━━━━━━━━━━━━━━━━━━━┓
222
- ┃ answer.how_feeling ┃
223
- ┡━━━━━━━━━━━━━━━━━━━━┩
224
- │ OK │
225
- ├────────────────────┤
226
- │ Great │
227
- ├────────────────────┤
228
- │ Terrible │
229
- ├────────────────────┤
230
- │ OK │
231
- └────────────────────┘
232
-
233
- Example: using the pretty_labels parameter
234
-
235
- >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
236
- ┏━━━━━━━━━━━━━━━━━━━━━┓
237
- ┃ How are you feeling ┃
238
- ┡━━━━━━━━━━━━━━━━━━━━━┩
239
- │ OK │
240
- ├─────────────────────┤
241
- │ Great │
242
- ├─────────────────────┤
243
- │ Terrible │
244
- ├─────────────────────┤
245
- │ OK │
246
- └─────────────────────┘
247
-
248
- Example: printing in markdown format
249
-
250
- >>> r.select('how_feeling').print(format='markdown')
251
- | answer.how_feeling |
252
- |--|
253
- | OK |
254
- | Great |
255
- | Terrible |
256
- | OK |
257
- ...
258
-
259
- >>> r.select('how_feeling').print(format='latex')
260
- \\begin{tabular}{l}
261
- ...
262
- \\end{tabular}
263
- <BLANKLINE>
264
- """
265
- from IPython.display import HTML, display
266
- from edsl.utilities.utilities import is_notebook
267
- import io
268
- import sys
269
-
270
- def _determine_format(format):
271
- if format is None:
272
- if is_notebook():
273
- format = "html"
274
- else:
275
- format = "rich"
276
- if format not in ["rich", "html", "markdown", "latex"]:
277
- raise ValueError(
278
- "format must be one of 'rich', 'html', 'markdown', or 'latex'."
279
- )
280
-
281
- return format
282
-
283
- format = _determine_format(format)
284
-
285
- if pretty_labels is None:
286
- pretty_labels = {}
287
-
288
- if pretty_labels != {}: # only split at dot if there are no pretty labels
289
- split_at_dot = False
290
-
291
- def _create_data():
292
- for index, entry in enumerate(self):
293
- key, list_of_values = list(entry.items())[0]
294
- yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
295
-
296
- new_data = list(_create_data())
297
-
298
- # Capture output if return_string is True
299
- if return_string:
300
- old_stdout = sys.stdout
301
- sys.stdout = io.StringIO()
302
-
303
- output = None
304
-
305
- if format == "rich":
306
- from edsl.utilities.interface import print_dataset_with_rich
307
-
308
- output = print_dataset_with_rich(
309
- new_data, filename=filename, split_at_dot=split_at_dot
310
- )
311
- elif format == "markdown":
312
- from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
313
-
314
- output = print_list_of_dicts_as_markdown_table(new_data, filename=filename)
315
- elif format == "latex":
316
- df = self.to_pandas()
317
- df.columns = [col.replace("_", " ") for col in df.columns]
318
- latex_string = df.to_latex(index=False)
319
-
320
- if filename is not None:
321
- with open(filename, "w") as f:
322
- f.write(latex_string)
323
- else:
324
- print(latex_string)
325
- output = latex_string
326
- elif format == "html":
327
- from edsl.utilities.interface import print_list_of_dicts_as_html_table
328
-
329
- html_source = print_list_of_dicts_as_html_table(
330
- new_data, interactive=interactive
331
- )
332
-
333
- if iframe:
334
- iframe = f""""
335
- <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
336
- """
337
- display(HTML(iframe))
338
- elif is_notebook():
339
- display(HTML(html_source))
340
- else:
341
- from edsl.utilities.interface import view_html
342
-
343
- view_html(html_source)
344
-
345
- output = html_source
346
-
347
- # Restore stdout and get captured output if return_string is True
348
- if return_string:
349
- captured_output = sys.stdout.getvalue()
350
- sys.stdout = old_stdout
351
- return captured_output or output
352
-
353
- if tee:
354
- return self
355
-
356
- return None
357
-
358
- def to_csv(
359
- self,
360
- filename: Optional[str] = None,
361
- remove_prefix: bool = False,
362
- download_link: bool = False,
363
- pretty_labels: Optional[dict] = None,
364
- ):
365
- """Export the results to a CSV file.
366
-
367
- :param filename: The filename to save the CSV file to.
368
- :param remove_prefix: Whether to remove the prefix from the column names.
369
- :param download_link: Whether to display a download link in a Jupyter notebook.
370
-
371
- Example:
372
-
373
- >>> from edsl.results import Results
374
- >>> r = Results.example()
375
- >>> r.select('how_feeling').to_csv()
376
- 'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
377
-
378
- >>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
379
- 'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
380
-
381
- >>> import tempfile
382
- >>> filename = tempfile.NamedTemporaryFile(delete=False).name
383
- >>> r.select('how_feeling').to_csv(filename = filename)
384
- >>> import os
385
- >>> import csv
386
- >>> with open(filename, newline='') as f:
387
- ... reader = csv.reader(f)
388
- ... for row in reader:
389
- ... print(row)
390
- ['answer.how_feeling']
391
- ['OK']
392
- ['Great']
393
- ['Terrible']
394
- ['OK']
395
-
396
- """
397
- if pretty_labels is None:
398
- pretty_labels = {}
399
- header, rows = self._make_tabular(
400
- remove_prefix=remove_prefix, pretty_labels=pretty_labels
401
- )
402
-
403
- if filename is not None:
404
- with open(filename, "w") as f:
405
- writer = csv.writer(f)
406
- writer.writerow(header)
407
- writer.writerows(rows)
408
- else:
409
- output = io.StringIO()
410
- writer = csv.writer(output)
411
- writer.writerow(header)
412
- writer.writerows(rows)
413
-
414
- if download_link:
415
- from IPython.display import HTML, display
416
-
417
- csv_file = output.getvalue()
418
- b64 = base64.b64encode(csv_file.encode()).decode()
419
- download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
420
- display(HTML(download_link))
421
- else:
422
- return output.getvalue()
423
-
424
- def download_link(self, pretty_labels: Optional[dict] = None) -> str:
425
- """Return a download link for the results.
426
-
427
- :param pretty_labels: A dictionary of pretty labels for the columns.
428
-
429
- >>> from edsl.results import Results
430
- >>> r = Results.example()
431
- >>> r.select('how_feeling').download_link()
432
- '<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
433
- """
434
- import base64
435
-
436
- csv_string = self.to_csv(pretty_labels=pretty_labels)
437
- b64 = base64.b64encode(csv_string.encode()).decode()
438
- return f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
439
-
440
- def to_pandas(
441
- self, remove_prefix: bool = False, lists_as_strings=False
442
- ) -> "DataFrame":
443
- """Convert the results to a pandas DataFrame, ensuring that lists remain as lists.
444
-
445
- :param remove_prefix: Whether to remove the prefix from the column names.
446
-
447
- """
448
- return self._to_pandas_strings(remove_prefix)
449
- # if lists_as_strings:
450
- # return self._to_pandas_strings(remove_prefix=remove_prefix)
451
-
452
- # import pandas as pd
453
-
454
- # df = pd.DataFrame(self.data)
455
-
456
- # if remove_prefix:
457
- # # Optionally remove prefixes from column names
458
- # df.columns = [col.split(".")[-1] for col in df.columns]
459
-
460
- # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
461
- # return df_sorted
462
-
463
- def _to_pandas_strings(self, remove_prefix: bool = False) -> "pd.DataFrame":
464
- """Convert the results to a pandas DataFrame.
465
-
466
- :param remove_prefix: Whether to remove the prefix from the column names.
467
-
468
- >>> from edsl.results import Results
469
- >>> r = Results.example()
470
- >>> r.select('how_feeling').to_pandas()
471
- answer.how_feeling
472
- 0 OK
473
- 1 Great
474
- 2 Terrible
475
- 3 OK
476
- """
477
-
478
- import pandas as pd
479
-
480
- csv_string = self.to_csv(remove_prefix=remove_prefix)
481
- csv_buffer = io.StringIO(csv_string)
482
- df = pd.read_csv(csv_buffer)
483
- # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
484
- return df
485
-
486
- def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
487
- """Convert the results to a list of dictionaries, one per scenario.
488
-
489
- :param remove_prefix: Whether to remove the prefix from the column names.
490
-
491
- >>> from edsl.results import Results
492
- >>> r = Results.example()
493
- >>> r.select('how_feeling').to_scenario_list()
494
- ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
495
- """
496
- from edsl import ScenarioList, Scenario
497
-
498
- list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
499
- scenarios = []
500
- for d in list_of_dicts:
501
- scenarios.append(Scenario(d))
502
- return ScenarioList(scenarios)
503
- # return ScenarioList([Scenario(d) for d in list_of_dicts])
504
-
505
- def to_agent_list(self, remove_prefix: bool = True):
506
- """Convert the results to a list of dictionaries, one per agent.
507
-
508
- :param remove_prefix: Whether to remove the prefix from the column names.
509
-
510
- >>> from edsl.results import Results
511
- >>> r = Results.example()
512
- >>> r.select('how_feeling').to_agent_list()
513
- AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
514
- """
515
- from edsl import AgentList, Agent
516
-
517
- list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
518
- return AgentList([Agent(d) for d in list_of_dicts])
519
-
520
- def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
521
- """Convert the results to a list of dictionaries.
522
-
523
- :param remove_prefix: Whether to remove the prefix from the column names.
524
-
525
- >>> from edsl.results import Results
526
- >>> r = Results.example()
527
- >>> r.select('how_feeling').to_dicts()
528
- [{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
529
-
530
- """
531
- list_of_keys = []
532
- list_of_values = []
533
- for entry in self:
534
- key, values = list(entry.items())[0]
535
- list_of_keys.append(key)
536
- list_of_values.append(values)
537
-
538
- if remove_prefix:
539
- list_of_keys = [key.split(".")[-1] for key in list_of_keys]
540
-
541
- list_of_dicts = []
542
- for entries in zip(*list_of_values):
543
- list_of_dicts.append(dict(zip(list_of_keys, entries)))
544
-
545
- return list_of_dicts
546
-
547
- def to_list(self, flatten=False, remove_none=False, unzipped=False) -> list[list]:
548
- """Convert the results to a list of lists.
549
-
550
- :param flatten: Whether to flatten the list of lists.
551
- :param remove_none: Whether to remove None values from the list.
552
-
553
- >>> from edsl.results import Results
554
- >>> Results.example().select('how_feeling', 'how_feeling_yesterday')
555
- Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
556
-
557
- >>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
558
- [('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
559
-
560
- >>> r = Results.example()
561
- >>> r.select('how_feeling').to_list()
562
- ['OK', 'Great', 'Terrible', 'OK']
563
-
564
- >>> from edsl.results.Dataset import Dataset
565
- >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}]).select('a.b').to_list(flatten = True)
566
- [1, 9, 2, 3, 4]
567
-
568
- >>> from edsl.results.Dataset import Dataset
569
- >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}, {'c': [6, 2, 3, 4]}]).select('a.b', 'c').to_list(flatten = True)
570
- Traceback (most recent call last):
571
- ...
572
- ValueError: Cannot flatten a list of lists when there are multiple columns selected.
573
-
574
-
575
- """
576
- if len(self.relevant_columns()) > 1 and flatten:
577
- raise ValueError(
578
- "Cannot flatten a list of lists when there are multiple columns selected."
579
- )
580
-
581
- if len(self.relevant_columns()) == 1:
582
- # if only one 'column' is selected (which is typical for this method
583
- list_to_return = list(self[0].values())[0]
584
- else:
585
- keys = self.relevant_columns()
586
- data = self.to_dicts(remove_prefix=False)
587
- list_to_return = []
588
- for d in data:
589
- list_to_return.append(tuple([d[key] for key in keys]))
590
-
591
- if remove_none:
592
- list_to_return = [item for item in list_to_return if item is not None]
593
-
594
- if flatten:
595
- new_list = []
596
- for item in list_to_return:
597
- if isinstance(item, list):
598
- new_list.extend(item)
599
- else:
600
- new_list.append(item)
601
- list_to_return = new_list
602
-
603
- return list_to_return
604
-
605
- def html(
606
- self,
607
- filename: Optional[str] = None,
608
- cta: str = "Open in browser",
609
- return_link: bool = False,
610
- ):
611
- import os
612
- import tempfile
613
- from edsl.utilities.utilities import is_notebook
614
- from IPython.display import HTML, display
615
- from edsl.utilities.utilities import is_notebook
616
-
617
- df = self.to_pandas()
618
-
619
- if filename is None:
620
- current_directory = os.getcwd()
621
- filename = tempfile.NamedTemporaryFile(
622
- "w", delete=False, suffix=".html", dir=current_directory
623
- ).name
624
-
625
- with open(filename, "w") as f:
626
- f.write(df.to_html())
627
-
628
- if is_notebook():
629
- html_url = f"/files/{filename}"
630
- html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
631
- display(HTML(html_link))
632
- else:
633
- print(f"Saved to {filename}")
634
- import webbrowser
635
- import os
636
-
637
- webbrowser.open(f"file://{os.path.abspath(filename)}")
638
-
639
- if return_link:
640
- return filename
641
-
642
- def tally(
643
- self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
644
- ) -> Union[dict, "Dataset"]:
645
- """Tally the values of a field or perform a cross-tab of multiple fields.
646
-
647
- :param fields: The field(s) to tally, multiple fields for cross-tabulation.
648
-
649
- >>> from edsl.results import Results
650
- >>> r = Results.example()
651
- >>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
652
- {'OK': 2, 'Great': 1, 'Terrible': 1}
653
- >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset")
654
- Dataset([{'value': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
655
- >>> r.select('how_feeling', 'period').tally('how_feeling', 'period', output = "dict")
656
- {('OK', 'morning'): 1, ('Great', 'afternoon'): 1, ('Terrible', 'morning'): 1, ('OK', 'afternoon'): 1}
657
- """
658
- from collections import Counter
659
-
660
- if len(fields) == 0:
661
- fields = self.relevant_columns()
662
-
663
- relevant_columns_without_prefix = [
664
- column.split(".")[-1] for column in self.relevant_columns()
665
- ]
666
-
667
- # breakpoint()
668
-
669
- if not all(
670
- f in self.relevant_columns() or f in relevant_columns_without_prefix
671
- for f in fields
672
- ):
673
- raise ValueError("One or more specified fields are not in the dataset.")
674
-
675
- if len(fields) == 1:
676
- field = fields[0]
677
- values = self._key_to_value(field)
678
- else:
679
- values = list(zip(*(self._key_to_value(field) for field in fields)))
680
-
681
- for value in values:
682
- if isinstance(value, list):
683
- value = tuple(value)
684
-
685
- tally = dict(Counter(values))
686
- sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
687
- if top_n is not None:
688
- sorted_tally = dict(list(sorted_tally.items())[:top_n])
689
-
690
- import warnings
691
- import textwrap
692
- from edsl.results.Dataset import Dataset
693
-
694
- if output == "dict":
695
- # why did I do this?
696
- warnings.warn(
697
- textwrap.dedent(
698
- """\
699
- The default output from tally will change to Dataset in the future.
700
- Use output='Dataset' to get the Dataset object for now.
701
- """
702
- )
703
- )
704
- return sorted_tally
705
- elif output == "Dataset":
706
- return Dataset(
707
- [
708
- {"value": list(sorted_tally.keys())},
709
- {"count": list(sorted_tally.values())},
710
- ]
711
- )
712
-
713
-
714
- if __name__ == "__main__":
715
- import doctest
716
-
717
- doctest.testmod(optionflags=doctest.ELLIPSIS)
1
+ """Mixin class for exporting results."""
2
+
3
+ import base64
4
+ import csv
5
+ import io
6
+ import html
7
+ from typing import Optional
8
+
9
+ from typing import Literal, Optional, Union, List
10
+
11
+
12
+ class DatasetExportMixin:
13
+ """Mixin class for exporting Dataset objects."""
14
+
15
+ def relevant_columns(
16
+ self, data_type: Optional[str] = None, remove_prefix=False
17
+ ) -> list:
18
+ """Return the set of keys that are present in the dataset.
19
+
20
+ :param data_type: The data type to filter by.
21
+ :param remove_prefix: Whether to remove the prefix from the column names.
22
+
23
+ >>> from edsl.results.Dataset import Dataset
24
+ >>> d = Dataset([{'a.b':[1,2,3,4]}])
25
+ >>> d.relevant_columns()
26
+ ['a.b']
27
+
28
+ >>> d.relevant_columns(remove_prefix=True)
29
+ ['b']
30
+
31
+ >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[5,6,7,8]}])
32
+ >>> d.relevant_columns()
33
+ ['a', 'b']
34
+
35
+ >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
36
+ ['answer.how_feeling', 'answer.how_feeling_yesterday']
37
+
38
+ >>> from edsl.results import Results
39
+ >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
40
+ ['model.frequency_penalty', 'model.logprobs', 'model.max_tokens', 'model.model', 'model.presence_penalty', 'model.temperature', 'model.top_logprobs', 'model.top_p']
41
+
42
+ >>> Results.example().relevant_columns(data_type = "flimflam")
43
+ Traceback (most recent call last):
44
+ ...
45
+ ValueError: No columns found for data type: flimflam. Available data types are: ...
46
+ """
47
+ columns = [list(x.keys())[0] for x in self]
48
+ if remove_prefix:
49
+ columns = [column.split(".")[-1] for column in columns]
50
+
51
+ def get_data_type(column):
52
+ if "." in column:
53
+ return column.split(".")[0]
54
+ else:
55
+ return None
56
+
57
+ if data_type:
58
+ all_columns = columns[:]
59
+ columns = [
60
+ column for column in columns if get_data_type(column) == data_type
61
+ ]
62
+ if len(columns) == 0:
63
+ all_data_types = sorted(
64
+ list(set(get_data_type(column) for column in all_columns))
65
+ )
66
+ raise ValueError(
67
+ f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
68
+ )
69
+
70
+ return columns
71
+
72
+ def num_observations(self):
73
+ """Return the number of observations in the dataset.
74
+
75
+ >>> from edsl.results import Results
76
+ >>> Results.example().num_observations()
77
+ 4
78
+ """
79
+ _num_observations = None
80
+ for entry in self:
81
+ key, values = list(entry.items())[0]
82
+ if _num_observations is None:
83
+ _num_observations = len(values)
84
+ else:
85
+ if len(values) != _num_observations:
86
+ raise ValueError(
87
+ "The number of observations is not consistent across columns."
88
+ )
89
+
90
+ return _num_observations
91
+
92
+ def _make_tabular(
93
+ self, remove_prefix: bool, pretty_labels: Optional[dict] = None
94
+ ) -> tuple[list, List[list]]:
95
+ """Turn the results into a tabular format.
96
+
97
+ :param remove_prefix: Whether to remove the prefix from the column names.
98
+
99
+ >>> from edsl.results import Results
100
+ >>> r = Results.example()
101
+ >>> r.select('how_feeling')._make_tabular(remove_prefix = True)
102
+ (['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
103
+
104
+ >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
105
+ (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
106
+ """
107
+
108
+ def create_dict_from_list_of_dicts(list_of_dicts):
109
+ for entry in list_of_dicts:
110
+ key, list_of_values = list(entry.items())[0]
111
+ yield key, list_of_values
112
+
113
+ tabular_repr = dict(create_dict_from_list_of_dicts(self.data))
114
+
115
+ full_header = [list(x.keys())[0] for x in self]
116
+
117
+ rows = []
118
+ for i in range(self.num_observations()):
119
+ row = [tabular_repr[h][i] for h in full_header]
120
+ rows.append(row)
121
+
122
+ if remove_prefix:
123
+ header = [h.split(".")[-1] for h in full_header]
124
+ else:
125
+ header = full_header
126
+
127
+ if pretty_labels is not None:
128
+ header = [pretty_labels.get(h, h) for h in header]
129
+
130
+ return header, rows
131
+
132
+ def print_long(self):
133
+ """Print the results in a long format.
134
+ >>> from edsl.results import Results
135
+ >>> r = Results.example()
136
+ >>> r.select('how_feeling').print_long()
137
+ answer.how_feeling: OK
138
+ answer.how_feeling: Great
139
+ answer.how_feeling: Terrible
140
+ answer.how_feeling: OK
141
+ """
142
+ for entry in self:
143
+ key, list_of_values = list(entry.items())[0]
144
+ for value in list_of_values:
145
+ print(f"{key}: {value}")
146
+
147
+ def print(
148
+ self,
149
+ pretty_labels: Optional[dict] = None,
150
+ filename: Optional[str] = None,
151
+ format: Optional[Literal["rich", "html", "markdown", "latex"]] = None,
152
+ interactive: bool = False,
153
+ split_at_dot: bool = True,
154
+ max_rows=None,
155
+ tee=False,
156
+ iframe=False,
157
+ iframe_height: int = 200,
158
+ iframe_width: int = 600,
159
+ web=False,
160
+ return_string: bool = False,
161
+ ) -> Union[None, str, "Results"]:
162
+ """Print the results in a pretty format.
163
+
164
+ :param pretty_labels: A dictionary of pretty labels for the columns.
165
+ :param filename: The filename to save the results to.
166
+ :param format: The format to print the results in. Options are 'rich', 'html', 'markdown', or 'latex'.
167
+ :param interactive: Whether to print the results interactively in a Jupyter notebook.
168
+ :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
169
+ :param max_rows: The maximum number of rows to print.
170
+ :param tee: Whether to return the dataset.
171
+ :param iframe: Whether to display the table in an iframe.
172
+ :param iframe_height: The height of the iframe.
173
+ :param iframe_width: The width of the iframe.
174
+ :param web: Whether to display the table in a web browser.
175
+ :param return_string: Whether to return the output as a string instead of printing.
176
+
177
+ :return: None if tee is False and return_string is False, the dataset if tee is True, or a string if return_string is True.
178
+
179
+ Example: Print in rich format at the terminal
180
+
181
+ >>> from edsl.results import Results
182
+ >>> r = Results.example()
183
+ >>> r.select('how_feeling').print(format = "rich")
184
+ ┏━━━━━━━━━━━━━━┓
185
+ ┃ answer ┃
186
+ ┃ .how_feeling ┃
187
+ ┡━━━━━━━━━━━━━━┩
188
+ │ OK │
189
+ ├──────────────┤
190
+ │ Great │
191
+ ├──────────────┤
192
+ │ Terrible │
193
+ ├──────────────┤
194
+ │ OK │
195
+ └──────────────┘
196
+
197
+ >>> r = Results.example()
198
+ >>> r2 = r.select("how_feeling").print(format = "rich", tee = True, max_rows = 2)
199
+ ┏━━━━━━━━━━━━━━┓
200
+ ┃ answer ┃
201
+ ┃ .how_feeling ┃
202
+ ┡━━━━━━━━━━━━━━┩
203
+ │ OK │
204
+ ├──────────────┤
205
+ │ Great │
206
+ └──────────────┘
207
+ >>> r2
208
+ Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
209
+
210
+ >>> r.select('how_feeling').print(format = "rich", max_rows = 2)
211
+ ┏━━━━━━━━━━━━━━┓
212
+ ┃ answer ┃
213
+ ┃ .how_feeling ┃
214
+ ┡━━━━━━━━━━━━━━┩
215
+ │ OK │
216
+ ├──────────────┤
217
+ │ Great │
218
+ └──────────────┘
219
+
220
+ >>> r.select('how_feeling').print(format = "rich", split_at_dot = False)
221
+ ┏━━━━━━━━━━━━━━━━━━━━┓
222
+ ┃ answer.how_feeling ┃
223
+ ┡━━━━━━━━━━━━━━━━━━━━┩
224
+ │ OK │
225
+ ├────────────────────┤
226
+ │ Great │
227
+ ├────────────────────┤
228
+ │ Terrible │
229
+ ├────────────────────┤
230
+ │ OK │
231
+ └────────────────────┘
232
+
233
+ Example: using the pretty_labels parameter
234
+
235
+ >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
236
+ ┏━━━━━━━━━━━━━━━━━━━━━┓
237
+ ┃ How are you feeling ┃
238
+ ┡━━━━━━━━━━━━━━━━━━━━━┩
239
+ │ OK │
240
+ ├─────────────────────┤
241
+ │ Great │
242
+ ├─────────────────────┤
243
+ │ Terrible │
244
+ ├─────────────────────┤
245
+ │ OK │
246
+ └─────────────────────┘
247
+
248
+ Example: printing in markdown format
249
+
250
+ >>> r.select('how_feeling').print(format='markdown')
251
+ | answer.how_feeling |
252
+ |--|
253
+ | OK |
254
+ | Great |
255
+ | Terrible |
256
+ | OK |
257
+ ...
258
+
259
+ >>> r.select('how_feeling').print(format='latex')
260
+ \\begin{tabular}{l}
261
+ ...
262
+ \\end{tabular}
263
+ <BLANKLINE>
264
+ """
265
+ from IPython.display import HTML, display
266
+ from edsl.utilities.utilities import is_notebook
267
+ import io
268
+ import sys
269
+
270
+ def _determine_format(format):
271
+ if format is None:
272
+ if is_notebook():
273
+ format = "html"
274
+ else:
275
+ format = "rich"
276
+ if format not in ["rich", "html", "markdown", "latex"]:
277
+ raise ValueError(
278
+ "format must be one of 'rich', 'html', 'markdown', or 'latex'."
279
+ )
280
+
281
+ return format
282
+
283
+ format = _determine_format(format)
284
+
285
+ if pretty_labels is None:
286
+ pretty_labels = {}
287
+
288
+ if pretty_labels != {}: # only split at dot if there are no pretty labels
289
+ split_at_dot = False
290
+
291
+ def _create_data():
292
+ for index, entry in enumerate(self):
293
+ key, list_of_values = list(entry.items())[0]
294
+ yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
295
+
296
+ new_data = list(_create_data())
297
+
298
+ # Capture output if return_string is True
299
+ if return_string:
300
+ old_stdout = sys.stdout
301
+ sys.stdout = io.StringIO()
302
+
303
+ output = None
304
+
305
+ if format == "rich":
306
+ from edsl.utilities.interface import print_dataset_with_rich
307
+
308
+ output = print_dataset_with_rich(
309
+ new_data, filename=filename, split_at_dot=split_at_dot
310
+ )
311
+ elif format == "markdown":
312
+ from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
313
+
314
+ output = print_list_of_dicts_as_markdown_table(new_data, filename=filename)
315
+ elif format == "latex":
316
+ df = self.to_pandas()
317
+ df.columns = [col.replace("_", " ") for col in df.columns]
318
+ latex_string = df.to_latex(index=False)
319
+
320
+ if filename is not None:
321
+ with open(filename, "w") as f:
322
+ f.write(latex_string)
323
+ else:
324
+ print(latex_string)
325
+ output = latex_string
326
+ elif format == "html":
327
+ from edsl.utilities.interface import print_list_of_dicts_as_html_table
328
+
329
+ html_source = print_list_of_dicts_as_html_table(
330
+ new_data, interactive=interactive
331
+ )
332
+
333
+ if iframe:
334
+ iframe = f""""
335
+ <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
336
+ """
337
+ display(HTML(iframe))
338
+ elif is_notebook():
339
+ display(HTML(html_source))
340
+ else:
341
+ from edsl.utilities.interface import view_html
342
+
343
+ view_html(html_source)
344
+
345
+ output = html_source
346
+
347
+ # Restore stdout and get captured output if return_string is True
348
+ if return_string:
349
+ captured_output = sys.stdout.getvalue()
350
+ sys.stdout = old_stdout
351
+ return captured_output or output
352
+
353
+ if tee:
354
+ return self
355
+
356
+ return None
357
+
358
+ def to_csv(
359
+ self,
360
+ filename: Optional[str] = None,
361
+ remove_prefix: bool = False,
362
+ download_link: bool = False,
363
+ pretty_labels: Optional[dict] = None,
364
+ ):
365
+ """Export the results to a CSV file.
366
+
367
+ :param filename: The filename to save the CSV file to.
368
+ :param remove_prefix: Whether to remove the prefix from the column names.
369
+ :param download_link: Whether to display a download link in a Jupyter notebook.
370
+
371
+ Example:
372
+
373
+ >>> from edsl.results import Results
374
+ >>> r = Results.example()
375
+ >>> r.select('how_feeling').to_csv()
376
+ 'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
377
+
378
+ >>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
379
+ 'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
380
+
381
+ >>> import tempfile
382
+ >>> filename = tempfile.NamedTemporaryFile(delete=False).name
383
+ >>> r.select('how_feeling').to_csv(filename = filename)
384
+ >>> import os
385
+ >>> import csv
386
+ >>> with open(filename, newline='') as f:
387
+ ... reader = csv.reader(f)
388
+ ... for row in reader:
389
+ ... print(row)
390
+ ['answer.how_feeling']
391
+ ['OK']
392
+ ['Great']
393
+ ['Terrible']
394
+ ['OK']
395
+
396
+ """
397
+ if pretty_labels is None:
398
+ pretty_labels = {}
399
+ header, rows = self._make_tabular(
400
+ remove_prefix=remove_prefix, pretty_labels=pretty_labels
401
+ )
402
+
403
+ if filename is not None:
404
+ with open(filename, "w") as f:
405
+ writer = csv.writer(f)
406
+ writer.writerow(header)
407
+ writer.writerows(rows)
408
+ else:
409
+ output = io.StringIO()
410
+ writer = csv.writer(output)
411
+ writer.writerow(header)
412
+ writer.writerows(rows)
413
+
414
+ if download_link:
415
+ from IPython.display import HTML, display
416
+
417
+ csv_file = output.getvalue()
418
+ b64 = base64.b64encode(csv_file.encode()).decode()
419
+ download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
420
+ display(HTML(download_link))
421
+ else:
422
+ return output.getvalue()
423
+
424
+ def download_link(self, pretty_labels: Optional[dict] = None) -> str:
425
+ """Return a download link for the results.
426
+
427
+ :param pretty_labels: A dictionary of pretty labels for the columns.
428
+
429
+ >>> from edsl.results import Results
430
+ >>> r = Results.example()
431
+ >>> r.select('how_feeling').download_link()
432
+ '<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
433
+ """
434
+ import base64
435
+
436
+ csv_string = self.to_csv(pretty_labels=pretty_labels)
437
+ b64 = base64.b64encode(csv_string.encode()).decode()
438
+ return f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
439
+
440
+ def to_pandas(
441
+ self, remove_prefix: bool = False, lists_as_strings=False
442
+ ) -> "DataFrame":
443
+ """Convert the results to a pandas DataFrame, ensuring that lists remain as lists.
444
+
445
+ :param remove_prefix: Whether to remove the prefix from the column names.
446
+
447
+ """
448
+ return self._to_pandas_strings(remove_prefix)
449
+ # if lists_as_strings:
450
+ # return self._to_pandas_strings(remove_prefix=remove_prefix)
451
+
452
+ # import pandas as pd
453
+
454
+ # df = pd.DataFrame(self.data)
455
+
456
+ # if remove_prefix:
457
+ # # Optionally remove prefixes from column names
458
+ # df.columns = [col.split(".")[-1] for col in df.columns]
459
+
460
+ # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
461
+ # return df_sorted
462
+
463
+ def _to_pandas_strings(self, remove_prefix: bool = False) -> "pd.DataFrame":
464
+ """Convert the results to a pandas DataFrame.
465
+
466
+ :param remove_prefix: Whether to remove the prefix from the column names.
467
+
468
+ >>> from edsl.results import Results
469
+ >>> r = Results.example()
470
+ >>> r.select('how_feeling').to_pandas()
471
+ answer.how_feeling
472
+ 0 OK
473
+ 1 Great
474
+ 2 Terrible
475
+ 3 OK
476
+ """
477
+
478
+ import pandas as pd
479
+
480
+ csv_string = self.to_csv(remove_prefix=remove_prefix)
481
+ csv_buffer = io.StringIO(csv_string)
482
+ df = pd.read_csv(csv_buffer)
483
+ # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
484
+ return df
485
+
486
+ def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
487
+ """Convert the results to a list of dictionaries, one per scenario.
488
+
489
+ :param remove_prefix: Whether to remove the prefix from the column names.
490
+
491
+ >>> from edsl.results import Results
492
+ >>> r = Results.example()
493
+ >>> r.select('how_feeling').to_scenario_list()
494
+ ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
495
+ """
496
+ from edsl import ScenarioList, Scenario
497
+
498
+ list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
499
+ scenarios = []
500
+ for d in list_of_dicts:
501
+ scenarios.append(Scenario(d))
502
+ return ScenarioList(scenarios)
503
+ # return ScenarioList([Scenario(d) for d in list_of_dicts])
504
+
505
+ def to_agent_list(self, remove_prefix: bool = True):
506
+ """Convert the results to a list of dictionaries, one per agent.
507
+
508
+ :param remove_prefix: Whether to remove the prefix from the column names.
509
+
510
+ >>> from edsl.results import Results
511
+ >>> r = Results.example()
512
+ >>> r.select('how_feeling').to_agent_list()
513
+ AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
514
+ """
515
+ from edsl import AgentList, Agent
516
+
517
+ list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
518
+ return AgentList([Agent(d) for d in list_of_dicts])
519
+
520
+ def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
521
+ """Convert the results to a list of dictionaries.
522
+
523
+ :param remove_prefix: Whether to remove the prefix from the column names.
524
+
525
+ >>> from edsl.results import Results
526
+ >>> r = Results.example()
527
+ >>> r.select('how_feeling').to_dicts()
528
+ [{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
529
+
530
+ """
531
+ list_of_keys = []
532
+ list_of_values = []
533
+ for entry in self:
534
+ key, values = list(entry.items())[0]
535
+ list_of_keys.append(key)
536
+ list_of_values.append(values)
537
+
538
+ if remove_prefix:
539
+ list_of_keys = [key.split(".")[-1] for key in list_of_keys]
540
+
541
+ list_of_dicts = []
542
+ for entries in zip(*list_of_values):
543
+ list_of_dicts.append(dict(zip(list_of_keys, entries)))
544
+
545
+ return list_of_dicts
546
+
547
+ def to_list(self, flatten=False, remove_none=False, unzipped=False) -> list[list]:
548
+ """Convert the results to a list of lists.
549
+
550
+ :param flatten: Whether to flatten the list of lists.
551
+ :param remove_none: Whether to remove None values from the list.
552
+
553
+ >>> from edsl.results import Results
554
+ >>> Results.example().select('how_feeling', 'how_feeling_yesterday')
555
+ Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
556
+
557
+ >>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
558
+ [('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
559
+
560
+ >>> r = Results.example()
561
+ >>> r.select('how_feeling').to_list()
562
+ ['OK', 'Great', 'Terrible', 'OK']
563
+
564
+ >>> from edsl.results.Dataset import Dataset
565
+ >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}]).select('a.b').to_list(flatten = True)
566
+ [1, 9, 2, 3, 4]
567
+
568
+ >>> from edsl.results.Dataset import Dataset
569
+ >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}, {'c': [6, 2, 3, 4]}]).select('a.b', 'c').to_list(flatten = True)
570
+ Traceback (most recent call last):
571
+ ...
572
+ ValueError: Cannot flatten a list of lists when there are multiple columns selected.
573
+
574
+
575
+ """
576
+ if len(self.relevant_columns()) > 1 and flatten:
577
+ raise ValueError(
578
+ "Cannot flatten a list of lists when there are multiple columns selected."
579
+ )
580
+
581
+ if len(self.relevant_columns()) == 1:
582
+ # if only one 'column' is selected (which is typical for this method
583
+ list_to_return = list(self[0].values())[0]
584
+ else:
585
+ keys = self.relevant_columns()
586
+ data = self.to_dicts(remove_prefix=False)
587
+ list_to_return = []
588
+ for d in data:
589
+ list_to_return.append(tuple([d[key] for key in keys]))
590
+
591
+ if remove_none:
592
+ list_to_return = [item for item in list_to_return if item is not None]
593
+
594
+ if flatten:
595
+ new_list = []
596
+ for item in list_to_return:
597
+ if isinstance(item, list):
598
+ new_list.extend(item)
599
+ else:
600
+ new_list.append(item)
601
+ list_to_return = new_list
602
+
603
+ return list_to_return
604
+
605
+ def html(
606
+ self,
607
+ filename: Optional[str] = None,
608
+ cta: str = "Open in browser",
609
+ return_link: bool = False,
610
+ ):
611
+ import os
612
+ import tempfile
613
+ from edsl.utilities.utilities import is_notebook
614
+ from IPython.display import HTML, display
615
+ from edsl.utilities.utilities import is_notebook
616
+
617
+ df = self.to_pandas()
618
+
619
+ if filename is None:
620
+ current_directory = os.getcwd()
621
+ filename = tempfile.NamedTemporaryFile(
622
+ "w", delete=False, suffix=".html", dir=current_directory
623
+ ).name
624
+
625
+ with open(filename, "w") as f:
626
+ f.write(df.to_html())
627
+
628
+ if is_notebook():
629
+ html_url = f"/files/{filename}"
630
+ html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
631
+ display(HTML(html_link))
632
+ else:
633
+ print(f"Saved to {filename}")
634
+ import webbrowser
635
+ import os
636
+
637
+ webbrowser.open(f"file://{os.path.abspath(filename)}")
638
+
639
+ if return_link:
640
+ return filename
641
+
642
+ def tally(
643
+ self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
644
+ ) -> Union[dict, "Dataset"]:
645
+ """Tally the values of a field or perform a cross-tab of multiple fields.
646
+
647
+ :param fields: The field(s) to tally, multiple fields for cross-tabulation.
648
+
649
+ >>> from edsl.results import Results
650
+ >>> r = Results.example()
651
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
652
+ {'OK': 2, 'Great': 1, 'Terrible': 1}
653
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset")
654
+ Dataset([{'value': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
655
+ >>> r.select('how_feeling', 'period').tally('how_feeling', 'period', output = "dict")
656
+ {('OK', 'morning'): 1, ('Great', 'afternoon'): 1, ('Terrible', 'morning'): 1, ('OK', 'afternoon'): 1}
657
+ """
658
+ from collections import Counter
659
+
660
+ if len(fields) == 0:
661
+ fields = self.relevant_columns()
662
+
663
+ relevant_columns_without_prefix = [
664
+ column.split(".")[-1] for column in self.relevant_columns()
665
+ ]
666
+
667
+ # breakpoint()
668
+
669
+ if not all(
670
+ f in self.relevant_columns() or f in relevant_columns_without_prefix
671
+ for f in fields
672
+ ):
673
+ raise ValueError("One or more specified fields are not in the dataset.")
674
+
675
+ if len(fields) == 1:
676
+ field = fields[0]
677
+ values = self._key_to_value(field)
678
+ else:
679
+ values = list(zip(*(self._key_to_value(field) for field in fields)))
680
+
681
+ for value in values:
682
+ if isinstance(value, list):
683
+ value = tuple(value)
684
+
685
+ tally = dict(Counter(values))
686
+ sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
687
+ if top_n is not None:
688
+ sorted_tally = dict(list(sorted_tally.items())[:top_n])
689
+
690
+ import warnings
691
+ import textwrap
692
+ from edsl.results.Dataset import Dataset
693
+
694
+ if output == "dict":
695
+ # why did I do this?
696
+ warnings.warn(
697
+ textwrap.dedent(
698
+ """\
699
+ The default output from tally will change to Dataset in the future.
700
+ Use output='Dataset' to get the Dataset object for now.
701
+ """
702
+ )
703
+ )
704
+ return sorted_tally
705
+ elif output == "Dataset":
706
+ return Dataset(
707
+ [
708
+ {"value": list(sorted_tally.keys())},
709
+ {"count": list(sorted_tally.values())},
710
+ ]
711
+ )
712
+
713
+
714
+ if __name__ == "__main__":
715
+ import doctest
716
+
717
+ doctest.testmod(optionflags=doctest.ELLIPSIS)