edsl 0.1.38.dev3__py3-none-any.whl → 0.1.38.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. edsl/Base.py +332 -303
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +49 -49
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +867 -858
  7. edsl/agents/AgentList.py +413 -362
  8. edsl/agents/Invigilator.py +233 -222
  9. edsl/agents/InvigilatorBase.py +265 -284
  10. edsl/agents/PromptConstructor.py +354 -353
  11. edsl/agents/__init__.py +3 -3
  12. edsl/agents/descriptors.py +99 -99
  13. edsl/agents/prompt_helpers.py +129 -129
  14. edsl/auto/AutoStudy.py +117 -117
  15. edsl/auto/StageBase.py +230 -230
  16. edsl/auto/StageGenerateSurvey.py +178 -178
  17. edsl/auto/StageLabelQuestions.py +125 -125
  18. edsl/auto/StagePersona.py +61 -61
  19. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  20. edsl/auto/StagePersonaDimensionValues.py +74 -74
  21. edsl/auto/StagePersonaDimensions.py +69 -69
  22. edsl/auto/StageQuestions.py +73 -73
  23. edsl/auto/SurveyCreatorPipeline.py +21 -21
  24. edsl/auto/utilities.py +224 -224
  25. edsl/base/Base.py +279 -279
  26. edsl/config.py +157 -149
  27. edsl/conversation/Conversation.py +290 -290
  28. edsl/conversation/car_buying.py +58 -58
  29. edsl/conversation/chips.py +95 -95
  30. edsl/conversation/mug_negotiation.py +81 -81
  31. edsl/conversation/next_speaker_utilities.py +93 -93
  32. edsl/coop/PriceFetcher.py +54 -54
  33. edsl/coop/__init__.py +2 -2
  34. edsl/coop/coop.py +1028 -961
  35. edsl/coop/utils.py +131 -131
  36. edsl/data/Cache.py +555 -530
  37. edsl/data/CacheEntry.py +233 -228
  38. edsl/data/CacheHandler.py +149 -149
  39. edsl/data/RemoteCacheSync.py +78 -97
  40. edsl/data/SQLiteDict.py +292 -292
  41. edsl/data/__init__.py +4 -4
  42. edsl/data/orm.py +10 -10
  43. edsl/data_transfer_models.py +73 -73
  44. edsl/enums.py +175 -173
  45. edsl/exceptions/BaseException.py +21 -21
  46. edsl/exceptions/__init__.py +54 -54
  47. edsl/exceptions/agents.py +42 -42
  48. edsl/exceptions/cache.py +5 -5
  49. edsl/exceptions/configuration.py +16 -16
  50. edsl/exceptions/coop.py +10 -10
  51. edsl/exceptions/data.py +14 -14
  52. edsl/exceptions/general.py +34 -34
  53. edsl/exceptions/jobs.py +33 -33
  54. edsl/exceptions/language_models.py +63 -63
  55. edsl/exceptions/prompts.py +15 -15
  56. edsl/exceptions/questions.py +91 -91
  57. edsl/exceptions/results.py +29 -29
  58. edsl/exceptions/scenarios.py +22 -22
  59. edsl/exceptions/surveys.py +37 -37
  60. edsl/inference_services/AnthropicService.py +87 -87
  61. edsl/inference_services/AwsBedrock.py +120 -120
  62. edsl/inference_services/AzureAI.py +217 -217
  63. edsl/inference_services/DeepInfraService.py +18 -18
  64. edsl/inference_services/GoogleService.py +148 -156
  65. edsl/inference_services/GroqService.py +20 -20
  66. edsl/inference_services/InferenceServiceABC.py +147 -147
  67. edsl/inference_services/InferenceServicesCollection.py +97 -97
  68. edsl/inference_services/MistralAIService.py +123 -123
  69. edsl/inference_services/OllamaService.py +18 -18
  70. edsl/inference_services/OpenAIService.py +224 -224
  71. edsl/inference_services/PerplexityService.py +163 -0
  72. edsl/inference_services/TestService.py +89 -89
  73. edsl/inference_services/TogetherAIService.py +170 -170
  74. edsl/inference_services/models_available_cache.py +118 -118
  75. edsl/inference_services/rate_limits_cache.py +25 -25
  76. edsl/inference_services/registry.py +41 -39
  77. edsl/inference_services/write_available.py +10 -10
  78. edsl/jobs/Answers.py +56 -56
  79. edsl/jobs/Jobs.py +898 -1358
  80. edsl/jobs/JobsChecks.py +147 -0
  81. edsl/jobs/JobsPrompts.py +268 -0
  82. edsl/jobs/JobsRemoteInferenceHandler.py +239 -0
  83. edsl/jobs/__init__.py +1 -1
  84. edsl/jobs/buckets/BucketCollection.py +63 -63
  85. edsl/jobs/buckets/ModelBuckets.py +65 -65
  86. edsl/jobs/buckets/TokenBucket.py +251 -251
  87. edsl/jobs/interviews/Interview.py +661 -661
  88. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  89. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
  90. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  91. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  92. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  93. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  94. edsl/jobs/interviews/ReportErrors.py +66 -66
  95. edsl/jobs/interviews/interview_status_enum.py +9 -9
  96. edsl/jobs/runners/JobsRunnerAsyncio.py +466 -361
  97. edsl/jobs/runners/JobsRunnerStatus.py +330 -332
  98. edsl/jobs/tasks/QuestionTaskCreator.py +242 -242
  99. edsl/jobs/tasks/TaskCreators.py +64 -64
  100. edsl/jobs/tasks/TaskHistory.py +450 -451
  101. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  102. edsl/jobs/tasks/task_status_enum.py +163 -163
  103. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  104. edsl/jobs/tokens/TokenUsage.py +34 -34
  105. edsl/language_models/KeyLookup.py +30 -30
  106. edsl/language_models/LanguageModel.py +668 -708
  107. edsl/language_models/ModelList.py +155 -109
  108. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  109. edsl/language_models/__init__.py +3 -3
  110. edsl/language_models/fake_openai_call.py +15 -15
  111. edsl/language_models/fake_openai_service.py +61 -61
  112. edsl/language_models/registry.py +190 -137
  113. edsl/language_models/repair.py +156 -156
  114. edsl/language_models/unused/ReplicateBase.py +83 -83
  115. edsl/language_models/utilities.py +64 -64
  116. edsl/notebooks/Notebook.py +258 -258
  117. edsl/notebooks/__init__.py +1 -1
  118. edsl/prompts/Prompt.py +362 -357
  119. edsl/prompts/__init__.py +2 -2
  120. edsl/questions/AnswerValidatorMixin.py +289 -289
  121. edsl/questions/QuestionBase.py +664 -660
  122. edsl/questions/QuestionBaseGenMixin.py +161 -161
  123. edsl/questions/QuestionBasePromptsMixin.py +217 -217
  124. edsl/questions/QuestionBudget.py +227 -227
  125. edsl/questions/QuestionCheckBox.py +359 -359
  126. edsl/questions/QuestionExtract.py +182 -183
  127. edsl/questions/QuestionFreeText.py +114 -114
  128. edsl/questions/QuestionFunctional.py +166 -166
  129. edsl/questions/QuestionList.py +231 -231
  130. edsl/questions/QuestionMultipleChoice.py +286 -286
  131. edsl/questions/QuestionNumerical.py +153 -153
  132. edsl/questions/QuestionRank.py +324 -324
  133. edsl/questions/Quick.py +41 -41
  134. edsl/questions/RegisterQuestionsMeta.py +71 -71
  135. edsl/questions/ResponseValidatorABC.py +174 -174
  136. edsl/questions/SimpleAskMixin.py +73 -73
  137. edsl/questions/__init__.py +26 -26
  138. edsl/questions/compose_questions.py +98 -98
  139. edsl/questions/decorators.py +21 -21
  140. edsl/questions/derived/QuestionLikertFive.py +76 -76
  141. edsl/questions/derived/QuestionLinearScale.py +87 -87
  142. edsl/questions/derived/QuestionTopK.py +93 -93
  143. edsl/questions/derived/QuestionYesNo.py +82 -82
  144. edsl/questions/descriptors.py +413 -413
  145. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  146. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  147. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  148. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  149. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  150. edsl/questions/prompt_templates/question_list.jinja +17 -17
  151. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  152. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  153. edsl/questions/question_registry.py +177 -147
  154. edsl/questions/settings.py +12 -12
  155. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  156. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  157. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  158. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  159. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  160. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  161. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  162. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  163. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  164. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  165. edsl/questions/templates/list/question_presentation.jinja +5 -5
  166. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  167. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  168. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  169. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  170. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  171. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  172. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  173. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  174. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  175. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  176. edsl/results/CSSParameterizer.py +108 -0
  177. edsl/results/Dataset.py +424 -293
  178. edsl/results/DatasetExportMixin.py +731 -717
  179. edsl/results/DatasetTree.py +275 -145
  180. edsl/results/Result.py +465 -456
  181. edsl/results/Results.py +1165 -1071
  182. edsl/results/ResultsDBMixin.py +238 -238
  183. edsl/results/ResultsExportMixin.py +43 -43
  184. edsl/results/ResultsFetchMixin.py +33 -33
  185. edsl/results/ResultsGGMixin.py +121 -121
  186. edsl/results/ResultsToolsMixin.py +98 -98
  187. edsl/results/Selector.py +135 -135
  188. edsl/results/TableDisplay.py +198 -0
  189. edsl/results/__init__.py +2 -2
  190. edsl/results/table_display.css +78 -0
  191. edsl/results/tree_explore.py +115 -115
  192. edsl/scenarios/FileStore.py +632 -458
  193. edsl/scenarios/Scenario.py +601 -544
  194. edsl/scenarios/ScenarioHtmlMixin.py +64 -64
  195. edsl/scenarios/ScenarioJoin.py +127 -0
  196. edsl/scenarios/ScenarioList.py +1287 -1112
  197. edsl/scenarios/ScenarioListExportMixin.py +52 -52
  198. edsl/scenarios/ScenarioListPdfMixin.py +261 -261
  199. edsl/scenarios/__init__.py +4 -4
  200. edsl/shared.py +1 -1
  201. edsl/study/ObjectEntry.py +173 -173
  202. edsl/study/ProofOfWork.py +113 -113
  203. edsl/study/SnapShot.py +80 -80
  204. edsl/study/Study.py +528 -528
  205. edsl/study/__init__.py +4 -4
  206. edsl/surveys/DAG.py +148 -148
  207. edsl/surveys/Memory.py +31 -31
  208. edsl/surveys/MemoryPlan.py +244 -244
  209. edsl/surveys/Rule.py +326 -326
  210. edsl/surveys/RuleCollection.py +387 -387
  211. edsl/surveys/Survey.py +1801 -1787
  212. edsl/surveys/SurveyCSS.py +261 -261
  213. edsl/surveys/SurveyExportMixin.py +259 -259
  214. edsl/surveys/SurveyFlowVisualizationMixin.py +179 -121
  215. edsl/surveys/SurveyQualtricsImport.py +284 -284
  216. edsl/surveys/__init__.py +3 -3
  217. edsl/surveys/base.py +53 -53
  218. edsl/surveys/descriptors.py +56 -56
  219. edsl/surveys/instructions/ChangeInstruction.py +49 -49
  220. edsl/surveys/instructions/Instruction.py +65 -53
  221. edsl/surveys/instructions/InstructionCollection.py +77 -77
  222. edsl/templates/error_reporting/base.html +23 -23
  223. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  224. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  225. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  226. edsl/templates/error_reporting/interview_details.html +115 -115
  227. edsl/templates/error_reporting/interviews.html +19 -10
  228. edsl/templates/error_reporting/overview.html +4 -4
  229. edsl/templates/error_reporting/performance_plot.html +1 -1
  230. edsl/templates/error_reporting/report.css +73 -73
  231. edsl/templates/error_reporting/report.html +117 -117
  232. edsl/templates/error_reporting/report.js +25 -25
  233. edsl/tools/__init__.py +1 -1
  234. edsl/tools/clusters.py +192 -192
  235. edsl/tools/embeddings.py +27 -27
  236. edsl/tools/embeddings_plotting.py +118 -118
  237. edsl/tools/plotting.py +112 -112
  238. edsl/tools/summarize.py +18 -18
  239. edsl/utilities/SystemInfo.py +28 -28
  240. edsl/utilities/__init__.py +22 -22
  241. edsl/utilities/ast_utilities.py +25 -25
  242. edsl/utilities/data/Registry.py +6 -6
  243. edsl/utilities/data/__init__.py +1 -1
  244. edsl/utilities/data/scooter_results.json +1 -1
  245. edsl/utilities/decorators.py +77 -77
  246. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  247. edsl/utilities/interface.py +627 -627
  248. edsl/utilities/naming_utilities.py +263 -263
  249. edsl/utilities/repair_functions.py +28 -28
  250. edsl/utilities/restricted_python.py +70 -70
  251. edsl/utilities/utilities.py +424 -409
  252. {edsl-0.1.38.dev3.dist-info → edsl-0.1.38.dev4.dist-info}/LICENSE +21 -21
  253. {edsl-0.1.38.dev3.dist-info → edsl-0.1.38.dev4.dist-info}/METADATA +2 -1
  254. edsl-0.1.38.dev4.dist-info/RECORD +277 -0
  255. edsl-0.1.38.dev3.dist-info/RECORD +0 -269
  256. {edsl-0.1.38.dev3.dist-info → edsl-0.1.38.dev4.dist-info}/WHEEL +0 -0
@@ -1,717 +1,731 @@
1
- """Mixin class for exporting results."""
2
-
3
- import base64
4
- import csv
5
- import io
6
- import html
7
- from typing import Optional
8
-
9
- from typing import Literal, Optional, Union, List
10
-
11
-
12
- class DatasetExportMixin:
13
- """Mixin class for exporting Dataset objects."""
14
-
15
- def relevant_columns(
16
- self, data_type: Optional[str] = None, remove_prefix=False
17
- ) -> list:
18
- """Return the set of keys that are present in the dataset.
19
-
20
- :param data_type: The data type to filter by.
21
- :param remove_prefix: Whether to remove the prefix from the column names.
22
-
23
- >>> from edsl.results.Dataset import Dataset
24
- >>> d = Dataset([{'a.b':[1,2,3,4]}])
25
- >>> d.relevant_columns()
26
- ['a.b']
27
-
28
- >>> d.relevant_columns(remove_prefix=True)
29
- ['b']
30
-
31
- >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[5,6,7,8]}])
32
- >>> d.relevant_columns()
33
- ['a', 'b']
34
-
35
- >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
36
- ['answer.how_feeling', 'answer.how_feeling_yesterday']
37
-
38
- >>> from edsl.results import Results
39
- >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
40
- ['model.frequency_penalty', 'model.logprobs', 'model.max_tokens', 'model.model', 'model.presence_penalty', 'model.temperature', 'model.top_logprobs', 'model.top_p']
41
-
42
- >>> Results.example().relevant_columns(data_type = "flimflam")
43
- Traceback (most recent call last):
44
- ...
45
- ValueError: No columns found for data type: flimflam. Available data types are: ...
46
- """
47
- columns = [list(x.keys())[0] for x in self]
48
- if remove_prefix:
49
- columns = [column.split(".")[-1] for column in columns]
50
-
51
- def get_data_type(column):
52
- if "." in column:
53
- return column.split(".")[0]
54
- else:
55
- return None
56
-
57
- if data_type:
58
- all_columns = columns[:]
59
- columns = [
60
- column for column in columns if get_data_type(column) == data_type
61
- ]
62
- if len(columns) == 0:
63
- all_data_types = sorted(
64
- list(set(get_data_type(column) for column in all_columns))
65
- )
66
- raise ValueError(
67
- f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
68
- )
69
-
70
- return columns
71
-
72
- def num_observations(self):
73
- """Return the number of observations in the dataset.
74
-
75
- >>> from edsl.results import Results
76
- >>> Results.example().num_observations()
77
- 4
78
- """
79
- _num_observations = None
80
- for entry in self:
81
- key, values = list(entry.items())[0]
82
- if _num_observations is None:
83
- _num_observations = len(values)
84
- else:
85
- if len(values) != _num_observations:
86
- raise ValueError(
87
- "The number of observations is not consistent across columns."
88
- )
89
-
90
- return _num_observations
91
-
92
- def _make_tabular(
93
- self, remove_prefix: bool, pretty_labels: Optional[dict] = None
94
- ) -> tuple[list, List[list]]:
95
- """Turn the results into a tabular format.
96
-
97
- :param remove_prefix: Whether to remove the prefix from the column names.
98
-
99
- >>> from edsl.results import Results
100
- >>> r = Results.example()
101
- >>> r.select('how_feeling')._make_tabular(remove_prefix = True)
102
- (['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
103
-
104
- >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
105
- (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
106
- """
107
-
108
- def create_dict_from_list_of_dicts(list_of_dicts):
109
- for entry in list_of_dicts:
110
- key, list_of_values = list(entry.items())[0]
111
- yield key, list_of_values
112
-
113
- tabular_repr = dict(create_dict_from_list_of_dicts(self.data))
114
-
115
- full_header = [list(x.keys())[0] for x in self]
116
-
117
- rows = []
118
- for i in range(self.num_observations()):
119
- row = [tabular_repr[h][i] for h in full_header]
120
- rows.append(row)
121
-
122
- if remove_prefix:
123
- header = [h.split(".")[-1] for h in full_header]
124
- else:
125
- header = full_header
126
-
127
- if pretty_labels is not None:
128
- header = [pretty_labels.get(h, h) for h in header]
129
-
130
- return header, rows
131
-
132
- def print_long(self):
133
- """Print the results in a long format.
134
- >>> from edsl.results import Results
135
- >>> r = Results.example()
136
- >>> r.select('how_feeling').print_long()
137
- answer.how_feeling: OK
138
- answer.how_feeling: Great
139
- answer.how_feeling: Terrible
140
- answer.how_feeling: OK
141
- """
142
- for entry in self:
143
- key, list_of_values = list(entry.items())[0]
144
- for value in list_of_values:
145
- print(f"{key}: {value}")
146
-
147
- def print(
148
- self,
149
- pretty_labels: Optional[dict] = None,
150
- filename: Optional[str] = None,
151
- format: Optional[Literal["rich", "html", "markdown", "latex"]] = None,
152
- interactive: bool = False,
153
- split_at_dot: bool = True,
154
- max_rows=None,
155
- tee=False,
156
- iframe=False,
157
- iframe_height: int = 200,
158
- iframe_width: int = 600,
159
- web=False,
160
- return_string: bool = False,
161
- ) -> Union[None, str, "Results"]:
162
- """Print the results in a pretty format.
163
-
164
- :param pretty_labels: A dictionary of pretty labels for the columns.
165
- :param filename: The filename to save the results to.
166
- :param format: The format to print the results in. Options are 'rich', 'html', 'markdown', or 'latex'.
167
- :param interactive: Whether to print the results interactively in a Jupyter notebook.
168
- :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
169
- :param max_rows: The maximum number of rows to print.
170
- :param tee: Whether to return the dataset.
171
- :param iframe: Whether to display the table in an iframe.
172
- :param iframe_height: The height of the iframe.
173
- :param iframe_width: The width of the iframe.
174
- :param web: Whether to display the table in a web browser.
175
- :param return_string: Whether to return the output as a string instead of printing.
176
-
177
- :return: None if tee is False and return_string is False, the dataset if tee is True, or a string if return_string is True.
178
-
179
- Example: Print in rich format at the terminal
180
-
181
- >>> from edsl.results import Results
182
- >>> r = Results.example()
183
- >>> r.select('how_feeling').print(format = "rich")
184
- ┏━━━━━━━━━━━━━━┓
185
- ┃ answer ┃
186
- ┃ .how_feeling ┃
187
- ┡━━━━━━━━━━━━━━┩
188
- │ OK │
189
- ├──────────────┤
190
- │ Great │
191
- ├──────────────┤
192
- │ Terrible │
193
- ├──────────────┤
194
- │ OK │
195
- └──────────────┘
196
-
197
- >>> r = Results.example()
198
- >>> r2 = r.select("how_feeling").print(format = "rich", tee = True, max_rows = 2)
199
- ┏━━━━━━━━━━━━━━┓
200
- ┃ answer ┃
201
- ┃ .how_feeling ┃
202
- ┡━━━━━━━━━━━━━━┩
203
- │ OK │
204
- ├──────────────┤
205
- │ Great │
206
- └──────────────┘
207
- >>> r2
208
- Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
209
-
210
- >>> r.select('how_feeling').print(format = "rich", max_rows = 2)
211
- ┏━━━━━━━━━━━━━━┓
212
- ┃ answer ┃
213
- ┃ .how_feeling ┃
214
- ┡━━━━━━━━━━━━━━┩
215
- │ OK │
216
- ├──────────────┤
217
- │ Great │
218
- └──────────────┘
219
-
220
- >>> r.select('how_feeling').print(format = "rich", split_at_dot = False)
221
- ┏━━━━━━━━━━━━━━━━━━━━┓
222
- ┃ answer.how_feeling ┃
223
- ┡━━━━━━━━━━━━━━━━━━━━┩
224
- │ OK │
225
- ├────────────────────┤
226
- │ Great │
227
- ├────────────────────┤
228
- │ Terrible │
229
- ├────────────────────┤
230
- │ OK │
231
- └────────────────────┘
232
-
233
- Example: using the pretty_labels parameter
234
-
235
- >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
236
- ┏━━━━━━━━━━━━━━━━━━━━━┓
237
- ┃ How are you feeling ┃
238
- ┡━━━━━━━━━━━━━━━━━━━━━┩
239
- │ OK │
240
- ├─────────────────────┤
241
- │ Great │
242
- ├─────────────────────┤
243
- │ Terrible │
244
- ├─────────────────────┤
245
- │ OK │
246
- └─────────────────────┘
247
-
248
- Example: printing in markdown format
249
-
250
- >>> r.select('how_feeling').print(format='markdown')
251
- | answer.how_feeling |
252
- |--|
253
- | OK |
254
- | Great |
255
- | Terrible |
256
- | OK |
257
- ...
258
-
259
- >>> r.select('how_feeling').print(format='latex')
260
- \\begin{tabular}{l}
261
- ...
262
- \\end{tabular}
263
- <BLANKLINE>
264
- """
265
- from IPython.display import HTML, display
266
- from edsl.utilities.utilities import is_notebook
267
- import io
268
- import sys
269
-
270
- def _determine_format(format):
271
- if format is None:
272
- if is_notebook():
273
- format = "html"
274
- else:
275
- format = "rich"
276
- if format not in ["rich", "html", "markdown", "latex"]:
277
- raise ValueError(
278
- "format must be one of 'rich', 'html', 'markdown', or 'latex'."
279
- )
280
-
281
- return format
282
-
283
- format = _determine_format(format)
284
-
285
- if pretty_labels is None:
286
- pretty_labels = {}
287
-
288
- if pretty_labels != {}: # only split at dot if there are no pretty labels
289
- split_at_dot = False
290
-
291
- def _create_data():
292
- for index, entry in enumerate(self):
293
- key, list_of_values = list(entry.items())[0]
294
- yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
295
-
296
- new_data = list(_create_data())
297
-
298
- # Capture output if return_string is True
299
- if return_string:
300
- old_stdout = sys.stdout
301
- sys.stdout = io.StringIO()
302
-
303
- output = None
304
-
305
- if format == "rich":
306
- from edsl.utilities.interface import print_dataset_with_rich
307
-
308
- output = print_dataset_with_rich(
309
- new_data, filename=filename, split_at_dot=split_at_dot
310
- )
311
- elif format == "markdown":
312
- from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
313
-
314
- output = print_list_of_dicts_as_markdown_table(new_data, filename=filename)
315
- elif format == "latex":
316
- df = self.to_pandas()
317
- df.columns = [col.replace("_", " ") for col in df.columns]
318
- latex_string = df.to_latex(index=False)
319
-
320
- if filename is not None:
321
- with open(filename, "w") as f:
322
- f.write(latex_string)
323
- else:
324
- print(latex_string)
325
- output = latex_string
326
- elif format == "html":
327
- from edsl.utilities.interface import print_list_of_dicts_as_html_table
328
-
329
- html_source = print_list_of_dicts_as_html_table(
330
- new_data, interactive=interactive
331
- )
332
-
333
- if iframe:
334
- iframe = f""""
335
- <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
336
- """
337
- display(HTML(iframe))
338
- elif is_notebook():
339
- display(HTML(html_source))
340
- else:
341
- from edsl.utilities.interface import view_html
342
-
343
- view_html(html_source)
344
-
345
- output = html_source
346
-
347
- # Restore stdout and get captured output if return_string is True
348
- if return_string:
349
- captured_output = sys.stdout.getvalue()
350
- sys.stdout = old_stdout
351
- return captured_output or output
352
-
353
- if tee:
354
- return self
355
-
356
- return None
357
-
358
- def to_csv(
359
- self,
360
- filename: Optional[str] = None,
361
- remove_prefix: bool = False,
362
- download_link: bool = False,
363
- pretty_labels: Optional[dict] = None,
364
- ):
365
- """Export the results to a CSV file.
366
-
367
- :param filename: The filename to save the CSV file to.
368
- :param remove_prefix: Whether to remove the prefix from the column names.
369
- :param download_link: Whether to display a download link in a Jupyter notebook.
370
-
371
- Example:
372
-
373
- >>> from edsl.results import Results
374
- >>> r = Results.example()
375
- >>> r.select('how_feeling').to_csv()
376
- 'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
377
-
378
- >>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
379
- 'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
380
-
381
- >>> import tempfile
382
- >>> filename = tempfile.NamedTemporaryFile(delete=False).name
383
- >>> r.select('how_feeling').to_csv(filename = filename)
384
- >>> import os
385
- >>> import csv
386
- >>> with open(filename, newline='') as f:
387
- ... reader = csv.reader(f)
388
- ... for row in reader:
389
- ... print(row)
390
- ['answer.how_feeling']
391
- ['OK']
392
- ['Great']
393
- ['Terrible']
394
- ['OK']
395
-
396
- """
397
- if pretty_labels is None:
398
- pretty_labels = {}
399
- header, rows = self._make_tabular(
400
- remove_prefix=remove_prefix, pretty_labels=pretty_labels
401
- )
402
-
403
- if filename is not None:
404
- with open(filename, "w") as f:
405
- writer = csv.writer(f)
406
- writer.writerow(header)
407
- writer.writerows(rows)
408
- else:
409
- output = io.StringIO()
410
- writer = csv.writer(output)
411
- writer.writerow(header)
412
- writer.writerows(rows)
413
-
414
- if download_link:
415
- from IPython.display import HTML, display
416
-
417
- csv_file = output.getvalue()
418
- b64 = base64.b64encode(csv_file.encode()).decode()
419
- download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
420
- display(HTML(download_link))
421
- else:
422
- return output.getvalue()
423
-
424
- def download_link(self, pretty_labels: Optional[dict] = None) -> str:
425
- """Return a download link for the results.
426
-
427
- :param pretty_labels: A dictionary of pretty labels for the columns.
428
-
429
- >>> from edsl.results import Results
430
- >>> r = Results.example()
431
- >>> r.select('how_feeling').download_link()
432
- '<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
433
- """
434
- import base64
435
-
436
- csv_string = self.to_csv(pretty_labels=pretty_labels)
437
- b64 = base64.b64encode(csv_string.encode()).decode()
438
- return f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
439
-
440
- def to_pandas(
441
- self, remove_prefix: bool = False, lists_as_strings=False
442
- ) -> "DataFrame":
443
- """Convert the results to a pandas DataFrame, ensuring that lists remain as lists.
444
-
445
- :param remove_prefix: Whether to remove the prefix from the column names.
446
-
447
- """
448
- return self._to_pandas_strings(remove_prefix)
449
- # if lists_as_strings:
450
- # return self._to_pandas_strings(remove_prefix=remove_prefix)
451
-
452
- # import pandas as pd
453
-
454
- # df = pd.DataFrame(self.data)
455
-
456
- # if remove_prefix:
457
- # # Optionally remove prefixes from column names
458
- # df.columns = [col.split(".")[-1] for col in df.columns]
459
-
460
- # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
461
- # return df_sorted
462
-
463
- def _to_pandas_strings(self, remove_prefix: bool = False) -> "pd.DataFrame":
464
- """Convert the results to a pandas DataFrame.
465
-
466
- :param remove_prefix: Whether to remove the prefix from the column names.
467
-
468
- >>> from edsl.results import Results
469
- >>> r = Results.example()
470
- >>> r.select('how_feeling').to_pandas()
471
- answer.how_feeling
472
- 0 OK
473
- 1 Great
474
- 2 Terrible
475
- 3 OK
476
- """
477
-
478
- import pandas as pd
479
-
480
- csv_string = self.to_csv(remove_prefix=remove_prefix)
481
- csv_buffer = io.StringIO(csv_string)
482
- df = pd.read_csv(csv_buffer)
483
- # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
484
- return df
485
-
486
- def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
487
- """Convert the results to a list of dictionaries, one per scenario.
488
-
489
- :param remove_prefix: Whether to remove the prefix from the column names.
490
-
491
- >>> from edsl.results import Results
492
- >>> r = Results.example()
493
- >>> r.select('how_feeling').to_scenario_list()
494
- ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
495
- """
496
- from edsl import ScenarioList, Scenario
497
-
498
- list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
499
- scenarios = []
500
- for d in list_of_dicts:
501
- scenarios.append(Scenario(d))
502
- return ScenarioList(scenarios)
503
- # return ScenarioList([Scenario(d) for d in list_of_dicts])
504
-
505
- def to_agent_list(self, remove_prefix: bool = True):
506
- """Convert the results to a list of dictionaries, one per agent.
507
-
508
- :param remove_prefix: Whether to remove the prefix from the column names.
509
-
510
- >>> from edsl.results import Results
511
- >>> r = Results.example()
512
- >>> r.select('how_feeling').to_agent_list()
513
- AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
514
- """
515
- from edsl import AgentList, Agent
516
-
517
- list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
518
- return AgentList([Agent(d) for d in list_of_dicts])
519
-
520
- def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
521
- """Convert the results to a list of dictionaries.
522
-
523
- :param remove_prefix: Whether to remove the prefix from the column names.
524
-
525
- >>> from edsl.results import Results
526
- >>> r = Results.example()
527
- >>> r.select('how_feeling').to_dicts()
528
- [{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
529
-
530
- """
531
- list_of_keys = []
532
- list_of_values = []
533
- for entry in self:
534
- key, values = list(entry.items())[0]
535
- list_of_keys.append(key)
536
- list_of_values.append(values)
537
-
538
- if remove_prefix:
539
- list_of_keys = [key.split(".")[-1] for key in list_of_keys]
540
-
541
- list_of_dicts = []
542
- for entries in zip(*list_of_values):
543
- list_of_dicts.append(dict(zip(list_of_keys, entries)))
544
-
545
- return list_of_dicts
546
-
547
- def to_list(self, flatten=False, remove_none=False, unzipped=False) -> list[list]:
548
- """Convert the results to a list of lists.
549
-
550
- :param flatten: Whether to flatten the list of lists.
551
- :param remove_none: Whether to remove None values from the list.
552
-
553
- >>> from edsl.results import Results
554
- >>> Results.example().select('how_feeling', 'how_feeling_yesterday')
555
- Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
556
-
557
- >>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
558
- [('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
559
-
560
- >>> r = Results.example()
561
- >>> r.select('how_feeling').to_list()
562
- ['OK', 'Great', 'Terrible', 'OK']
563
-
564
- >>> from edsl.results.Dataset import Dataset
565
- >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}]).select('a.b').to_list(flatten = True)
566
- [1, 9, 2, 3, 4]
567
-
568
- >>> from edsl.results.Dataset import Dataset
569
- >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}, {'c': [6, 2, 3, 4]}]).select('a.b', 'c').to_list(flatten = True)
570
- Traceback (most recent call last):
571
- ...
572
- ValueError: Cannot flatten a list of lists when there are multiple columns selected.
573
-
574
-
575
- """
576
- if len(self.relevant_columns()) > 1 and flatten:
577
- raise ValueError(
578
- "Cannot flatten a list of lists when there are multiple columns selected."
579
- )
580
-
581
- if len(self.relevant_columns()) == 1:
582
- # if only one 'column' is selected (which is typical for this method
583
- list_to_return = list(self[0].values())[0]
584
- else:
585
- keys = self.relevant_columns()
586
- data = self.to_dicts(remove_prefix=False)
587
- list_to_return = []
588
- for d in data:
589
- list_to_return.append(tuple([d[key] for key in keys]))
590
-
591
- if remove_none:
592
- list_to_return = [item for item in list_to_return if item is not None]
593
-
594
- if flatten:
595
- new_list = []
596
- for item in list_to_return:
597
- if isinstance(item, list):
598
- new_list.extend(item)
599
- else:
600
- new_list.append(item)
601
- list_to_return = new_list
602
-
603
- return list_to_return
604
-
605
- def html(
606
- self,
607
- filename: Optional[str] = None,
608
- cta: str = "Open in browser",
609
- return_link: bool = False,
610
- ):
611
- import os
612
- import tempfile
613
- from edsl.utilities.utilities import is_notebook
614
- from IPython.display import HTML, display
615
- from edsl.utilities.utilities import is_notebook
616
-
617
- df = self.to_pandas()
618
-
619
- if filename is None:
620
- current_directory = os.getcwd()
621
- filename = tempfile.NamedTemporaryFile(
622
- "w", delete=False, suffix=".html", dir=current_directory
623
- ).name
624
-
625
- with open(filename, "w") as f:
626
- f.write(df.to_html())
627
-
628
- if is_notebook():
629
- html_url = f"/files/{filename}"
630
- html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
631
- display(HTML(html_link))
632
- else:
633
- print(f"Saved to {filename}")
634
- import webbrowser
635
- import os
636
-
637
- webbrowser.open(f"file://{os.path.abspath(filename)}")
638
-
639
- if return_link:
640
- return filename
641
-
642
- def tally(
643
- self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
644
- ) -> Union[dict, "Dataset"]:
645
- """Tally the values of a field or perform a cross-tab of multiple fields.
646
-
647
- :param fields: The field(s) to tally, multiple fields for cross-tabulation.
648
-
649
- >>> from edsl.results import Results
650
- >>> r = Results.example()
651
- >>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
652
- {'OK': 2, 'Great': 1, 'Terrible': 1}
653
- >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset")
654
- Dataset([{'value': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
655
- >>> r.select('how_feeling', 'period').tally('how_feeling', 'period', output = "dict")
656
- {('OK', 'morning'): 1, ('Great', 'afternoon'): 1, ('Terrible', 'morning'): 1, ('OK', 'afternoon'): 1}
657
- """
658
- from collections import Counter
659
-
660
- if len(fields) == 0:
661
- fields = self.relevant_columns()
662
-
663
- relevant_columns_without_prefix = [
664
- column.split(".")[-1] for column in self.relevant_columns()
665
- ]
666
-
667
- # breakpoint()
668
-
669
- if not all(
670
- f in self.relevant_columns() or f in relevant_columns_without_prefix
671
- for f in fields
672
- ):
673
- raise ValueError("One or more specified fields are not in the dataset.")
674
-
675
- if len(fields) == 1:
676
- field = fields[0]
677
- values = self._key_to_value(field)
678
- else:
679
- values = list(zip(*(self._key_to_value(field) for field in fields)))
680
-
681
- for value in values:
682
- if isinstance(value, list):
683
- value = tuple(value)
684
-
685
- tally = dict(Counter(values))
686
- sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
687
- if top_n is not None:
688
- sorted_tally = dict(list(sorted_tally.items())[:top_n])
689
-
690
- import warnings
691
- import textwrap
692
- from edsl.results.Dataset import Dataset
693
-
694
- if output == "dict":
695
- # why did I do this?
696
- warnings.warn(
697
- textwrap.dedent(
698
- """\
699
- The default output from tally will change to Dataset in the future.
700
- Use output='Dataset' to get the Dataset object for now.
701
- """
702
- )
703
- )
704
- return sorted_tally
705
- elif output == "Dataset":
706
- return Dataset(
707
- [
708
- {"value": list(sorted_tally.keys())},
709
- {"count": list(sorted_tally.values())},
710
- ]
711
- )
712
-
713
-
714
- if __name__ == "__main__":
715
- import doctest
716
-
717
- doctest.testmod(optionflags=doctest.ELLIPSIS)
1
+ """Mixin class for exporting results."""
2
+
3
+ import base64
4
+ import csv
5
+ import io
6
+ import html
7
+ from typing import Optional
8
+
9
+ from typing import Literal, Optional, Union, List
10
+
11
+
12
+ class DatasetExportMixin:
13
+ """Mixin class for exporting Dataset objects."""
14
+
15
+ def relevant_columns(
16
+ self, data_type: Optional[str] = None, remove_prefix=False
17
+ ) -> list:
18
+ """Return the set of keys that are present in the dataset.
19
+
20
+ :param data_type: The data type to filter by.
21
+ :param remove_prefix: Whether to remove the prefix from the column names.
22
+
23
+ >>> from edsl.results.Dataset import Dataset
24
+ >>> d = Dataset([{'a.b':[1,2,3,4]}])
25
+ >>> d.relevant_columns()
26
+ ['a.b']
27
+
28
+ >>> d.relevant_columns(remove_prefix=True)
29
+ ['b']
30
+
31
+ >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[5,6,7,8]}])
32
+ >>> d.relevant_columns()
33
+ ['a', 'b']
34
+
35
+ >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
36
+ ['answer.how_feeling', 'answer.how_feeling_yesterday']
37
+
38
+ >>> from edsl.results import Results
39
+ >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
40
+ ['model.frequency_penalty', 'model.logprobs', 'model.max_tokens', 'model.model', 'model.presence_penalty', 'model.temperature', 'model.top_logprobs', 'model.top_p']
41
+
42
+ >>> Results.example().relevant_columns(data_type = "flimflam")
43
+ Traceback (most recent call last):
44
+ ...
45
+ ValueError: No columns found for data type: flimflam. Available data types are: ...
46
+ """
47
+ columns = [list(x.keys())[0] for x in self]
48
+ if remove_prefix:
49
+ columns = [column.split(".")[-1] for column in columns]
50
+
51
+ def get_data_type(column):
52
+ if "." in column:
53
+ return column.split(".")[0]
54
+ else:
55
+ return None
56
+
57
+ if data_type:
58
+ all_columns = columns[:]
59
+ columns = [
60
+ column for column in columns if get_data_type(column) == data_type
61
+ ]
62
+ if len(columns) == 0:
63
+ all_data_types = sorted(
64
+ list(set(get_data_type(column) for column in all_columns))
65
+ )
66
+ raise ValueError(
67
+ f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
68
+ )
69
+
70
+ return columns
71
+
72
+ def num_observations(self):
73
+ """Return the number of observations in the dataset.
74
+
75
+ >>> from edsl.results import Results
76
+ >>> Results.example().num_observations()
77
+ 4
78
+ """
79
+ _num_observations = None
80
+ for entry in self:
81
+ key, values = list(entry.items())[0]
82
+ if _num_observations is None:
83
+ _num_observations = len(values)
84
+ else:
85
+ if len(values) != _num_observations:
86
+ raise ValueError(
87
+ "The number of observations is not consistent across columns."
88
+ )
89
+
90
+ return _num_observations
91
+
92
+ def _make_tabular(
93
+ self, remove_prefix: bool, pretty_labels: Optional[dict] = None
94
+ ) -> tuple[list, List[list]]:
95
+ """Turn the results into a tabular format.
96
+
97
+ :param remove_prefix: Whether to remove the prefix from the column names.
98
+
99
+ >>> from edsl.results import Results
100
+ >>> r = Results.example()
101
+ >>> r.select('how_feeling')._make_tabular(remove_prefix = True)
102
+ (['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
103
+
104
+ >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
105
+ (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
106
+ """
107
+
108
+ def create_dict_from_list_of_dicts(list_of_dicts):
109
+ for entry in list_of_dicts:
110
+ key, list_of_values = list(entry.items())[0]
111
+ yield key, list_of_values
112
+
113
+ tabular_repr = dict(create_dict_from_list_of_dicts(self.data))
114
+
115
+ full_header = [list(x.keys())[0] for x in self]
116
+
117
+ rows = []
118
+ for i in range(self.num_observations()):
119
+ row = [tabular_repr[h][i] for h in full_header]
120
+ rows.append(row)
121
+
122
+ if remove_prefix:
123
+ header = [h.split(".")[-1] for h in full_header]
124
+ else:
125
+ header = full_header
126
+
127
+ if pretty_labels is not None:
128
+ header = [pretty_labels.get(h, h) for h in header]
129
+
130
+ return header, rows
131
+
132
+ def print_long(self):
133
+ """Print the results in a long format.
134
+ >>> from edsl.results import Results
135
+ >>> r = Results.example()
136
+ >>> r.select('how_feeling').print_long()
137
+ answer.how_feeling: OK
138
+ answer.how_feeling: Great
139
+ answer.how_feeling: Terrible
140
+ answer.how_feeling: OK
141
+ """
142
+ for entry in self:
143
+ key, list_of_values = list(entry.items())[0]
144
+ for value in list_of_values:
145
+ print(f"{key}: {value}")
146
+
147
+ # def print(
148
+ # self,
149
+ # pretty_labels: Optional[dict] = None,
150
+ # filename: Optional[str] = None,
151
+ # format: Optional[Literal["rich", "html", "markdown", "latex"]] = None,
152
+ # interactive: bool = False,
153
+ # split_at_dot: bool = True,
154
+ # max_rows=None,
155
+ # tee=False,
156
+ # iframe=False,
157
+ # iframe_height: int = 200,
158
+ # iframe_width: int = 600,
159
+ # web=False,
160
+ # return_string: bool = False,
161
+ # ) -> Union[None, str, "Results"]:
162
+ # """Print the results in a pretty format.
163
+
164
+ # :param pretty_labels: A dictionary of pretty labels for the columns.
165
+ # :param filename: The filename to save the results to.
166
+ # :param format: The format to print the results in. Options are 'rich', 'html', 'markdown', or 'latex'.
167
+ # :param interactive: Whether to print the results interactively in a Jupyter notebook.
168
+ # :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
169
+ # :param max_rows: The maximum number of rows to print.
170
+ # :param tee: Whether to return the dataset.
171
+ # :param iframe: Whether to display the table in an iframe.
172
+ # :param iframe_height: The height of the iframe.
173
+ # :param iframe_width: The width of the iframe.
174
+ # :param web: Whether to display the table in a web browser.
175
+ # :param return_string: Whether to return the output as a string instead of printing.
176
+
177
+ # :return: None if tee is False and return_string is False, the dataset if tee is True, or a string if return_string is True.
178
+
179
+ # Example: Print in rich format at the terminal
180
+
181
+ # >>> from edsl.results import Results
182
+ # >>> r = Results.example()
183
+ # >>> r.select('how_feeling').print(format = "rich")
184
+ # ┏━━━━━━━━━━━━━━┓
185
+ # ┃ answer ┃
186
+ # ┃ .how_feeling ┃
187
+ # ┡━━━━━━━━━━━━━━┩
188
+ # │ OK │
189
+ # ├──────────────┤
190
+ # │ Great │
191
+ # ├──────────────┤
192
+ # │ Terrible │
193
+ # ├──────────────┤
194
+ # │ OK │
195
+ # └──────────────┘
196
+
197
+ # >>> r = Results.example()
198
+ # >>> r2 = r.select("how_feeling").print(format = "rich", tee = True, max_rows = 2)
199
+ # ┏━━━━━━━━━━━━━━┓
200
+ # ┃ answer ┃
201
+ # ┃ .how_feeling ┃
202
+ # ┡━━━━━━━━━━━━━━┩
203
+ # │ OK │
204
+ # ├──────────────┤
205
+ # │ Great │
206
+ # └──────────────┘
207
+ # >>> r2
208
+ # Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
209
+
210
+ # >>> r.select('how_feeling').print(format = "rich", max_rows = 2)
211
+ # ┏━━━━━━━━━━━━━━┓
212
+ # ┃ answer ┃
213
+ # ┃ .how_feeling ┃
214
+ # ┡━━━━━━━━━━━━━━┩
215
+ # │ OK │
216
+ # ├──────────────┤
217
+ # │ Great │
218
+ # └──────────────┘
219
+
220
+ # >>> r.select('how_feeling').print(format = "rich", split_at_dot = False)
221
+ # ┏━━━━━━━━━━━━━━━━━━━━┓
222
+ # ┃ answer.how_feeling ┃
223
+ # ┡━━━━━━━━━━━━━━━━━━━━┩
224
+ # │ OK │
225
+ # ├────────────────────┤
226
+ # │ Great │
227
+ # ├────────────────────┤
228
+ # │ Terrible │
229
+ # ├────────────────────┤
230
+ # │ OK │
231
+ # └────────────────────┘
232
+
233
+ # Example: using the pretty_labels parameter
234
+
235
+ # >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
236
+ # ┏━━━━━━━━━━━━━━━━━━━━━┓
237
+ # ┃ How are you feeling ┃
238
+ # ┡━━━━━━━━━━━━━━━━━━━━━┩
239
+ # │ OK │
240
+ # ├─────────────────────┤
241
+ # │ Great │
242
+ # ├─────────────────────┤
243
+ # │ Terrible │
244
+ # ├─────────────────────┤
245
+ # │ OK │
246
+ # └─────────────────────┘
247
+
248
+ # Example: printing in markdown format
249
+
250
+ # >>> r.select('how_feeling').print(format='markdown')
251
+ # | answer.how_feeling |
252
+ # |--|
253
+ # | OK |
254
+ # | Great |
255
+ # | Terrible |
256
+ # | OK |
257
+ # ...
258
+
259
+ # >>> r.select('how_feeling').print(format='latex')
260
+ # \\begin{tabular}{l}
261
+ # ...
262
+ # \\end{tabular}
263
+ # <BLANKLINE>
264
+ # """
265
+ # from IPython.display import HTML, display
266
+ # from edsl.utilities.utilities import is_notebook
267
+ # import io
268
+ # import sys
269
+
270
+ # def _determine_format(format):
271
+ # if format is None:
272
+ # if is_notebook():
273
+ # format = "html"
274
+ # else:
275
+ # format = "rich"
276
+ # if format not in ["rich", "html", "markdown", "latex"]:
277
+ # raise ValueError(
278
+ # "format must be one of 'rich', 'html', 'markdown', or 'latex'."
279
+ # )
280
+
281
+ # return format
282
+
283
+ # format = _determine_format(format)
284
+
285
+ # if pretty_labels is None:
286
+ # pretty_labels = {}
287
+
288
+ # if pretty_labels != {}: # only split at dot if there are no pretty labels
289
+ # split_at_dot = False
290
+
291
+ # def _create_data():
292
+ # for index, entry in enumerate(self):
293
+ # key, list_of_values = list(entry.items())[0]
294
+ # yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
295
+
296
+ # new_data = list(_create_data())
297
+
298
+ # # Capture output if return_string is True
299
+ # if return_string:
300
+ # old_stdout = sys.stdout
301
+ # sys.stdout = io.StringIO()
302
+
303
+ # output = None
304
+
305
+ # if format == "rich":
306
+ # from edsl.utilities.interface import print_dataset_with_rich
307
+
308
+ # output = print_dataset_with_rich(
309
+ # new_data, filename=filename, split_at_dot=split_at_dot
310
+ # )
311
+ # elif format == "markdown":
312
+ # from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
313
+
314
+ # output = print_list_of_dicts_as_markdown_table(new_data, filename=filename)
315
+ # elif format == "latex":
316
+ # df = self.to_pandas()
317
+ # df.columns = [col.replace("_", " ") for col in df.columns]
318
+ # latex_string = df.to_latex(index=False)
319
+
320
+ # if filename is not None:
321
+ # with open(filename, "w") as f:
322
+ # f.write(latex_string)
323
+ # else:
324
+ # print(latex_string)
325
+ # output = latex_string
326
+ # elif format == "html":
327
+ # from edsl.utilities.interface import print_list_of_dicts_as_html_table
328
+
329
+ # html_source = print_list_of_dicts_as_html_table(
330
+ # new_data, interactive=interactive
331
+ # )
332
+
333
+ # if iframe:
334
+ # iframe = f""""
335
+ # <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
336
+ # """
337
+ # display(HTML(iframe))
338
+ # elif is_notebook():
339
+ # display(HTML(html_source))
340
+ # else:
341
+ # from edsl.utilities.interface import view_html
342
+
343
+ # view_html(html_source)
344
+
345
+ # output = html_source
346
+
347
+ # # Restore stdout and get captured output if return_string is True
348
+ # if return_string:
349
+ # captured_output = sys.stdout.getvalue()
350
+ # sys.stdout = old_stdout
351
+ # return captured_output or output
352
+
353
+ # if tee:
354
+ # return self
355
+
356
+ # return None
357
+
358
+ def to_csv(
359
+ self,
360
+ filename: Optional[str] = None,
361
+ remove_prefix: bool = False,
362
+ download_link: bool = False,
363
+ pretty_labels: Optional[dict] = None,
364
+ ):
365
+ """Export the results to a CSV file.
366
+
367
+ :param filename: The filename to save the CSV file to.
368
+ :param remove_prefix: Whether to remove the prefix from the column names.
369
+ :param download_link: Whether to display a download link in a Jupyter notebook.
370
+
371
+ Example:
372
+
373
+ >>> from edsl.results import Results
374
+ >>> r = Results.example()
375
+ >>> r.select('how_feeling').to_csv()
376
+ 'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
377
+
378
+ >>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
379
+ 'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
380
+
381
+ >>> import tempfile
382
+ >>> filename = tempfile.NamedTemporaryFile(delete=False).name
383
+ >>> r.select('how_feeling').to_csv(filename = filename)
384
+ >>> import os
385
+ >>> import csv
386
+ >>> with open(filename, newline='') as f:
387
+ ... reader = csv.reader(f)
388
+ ... for row in reader:
389
+ ... print(row)
390
+ ['answer.how_feeling']
391
+ ['OK']
392
+ ['Great']
393
+ ['Terrible']
394
+ ['OK']
395
+
396
+ """
397
+ if pretty_labels is None:
398
+ pretty_labels = {}
399
+ header, rows = self._make_tabular(
400
+ remove_prefix=remove_prefix, pretty_labels=pretty_labels
401
+ )
402
+
403
+ if filename is not None:
404
+ with open(filename, "w") as f:
405
+ writer = csv.writer(f)
406
+ writer.writerow(header)
407
+ writer.writerows(rows)
408
+ # print(f"Saved to {filename}")
409
+ else:
410
+ output = io.StringIO()
411
+ writer = csv.writer(output)
412
+ writer.writerow(header)
413
+ writer.writerows(rows)
414
+
415
+ if download_link:
416
+ from IPython.display import HTML, display
417
+
418
+ csv_file = output.getvalue()
419
+ b64 = base64.b64encode(csv_file.encode()).decode()
420
+ download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
421
+ display(HTML(download_link))
422
+ else:
423
+ return output.getvalue()
424
+
425
+ def download_link(self, pretty_labels: Optional[dict] = None) -> str:
426
+ """Return a download link for the results.
427
+
428
+ :param pretty_labels: A dictionary of pretty labels for the columns.
429
+
430
+ >>> from edsl.results import Results
431
+ >>> r = Results.example()
432
+ >>> r.select('how_feeling').download_link()
433
+ '<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
434
+ """
435
+ import base64
436
+
437
+ csv_string = self.to_csv(pretty_labels=pretty_labels)
438
+ b64 = base64.b64encode(csv_string.encode()).decode()
439
+ return f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
440
+
441
+ def to_pandas(
442
+ self, remove_prefix: bool = False, lists_as_strings=False
443
+ ) -> "DataFrame":
444
+ """Convert the results to a pandas DataFrame, ensuring that lists remain as lists.
445
+
446
+ :param remove_prefix: Whether to remove the prefix from the column names.
447
+
448
+ """
449
+ return self._to_pandas_strings(remove_prefix)
450
+ # if lists_as_strings:
451
+ # return self._to_pandas_strings(remove_prefix=remove_prefix)
452
+
453
+ # import pandas as pd
454
+
455
+ # df = pd.DataFrame(self.data)
456
+
457
+ # if remove_prefix:
458
+ # # Optionally remove prefixes from column names
459
+ # df.columns = [col.split(".")[-1] for col in df.columns]
460
+
461
+ # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
462
+ # return df_sorted
463
+
464
+ def _to_pandas_strings(self, remove_prefix: bool = False) -> "pd.DataFrame":
465
+ """Convert the results to a pandas DataFrame.
466
+
467
+ :param remove_prefix: Whether to remove the prefix from the column names.
468
+
469
+ >>> from edsl.results import Results
470
+ >>> r = Results.example()
471
+ >>> r.select('how_feeling').to_pandas()
472
+ answer.how_feeling
473
+ 0 OK
474
+ 1 Great
475
+ 2 Terrible
476
+ 3 OK
477
+ """
478
+
479
+ import pandas as pd
480
+
481
+ csv_string = self.to_csv(remove_prefix=remove_prefix)
482
+ csv_buffer = io.StringIO(csv_string)
483
+ df = pd.read_csv(csv_buffer)
484
+ # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
485
+ return df
486
+
487
+ def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
488
+ """Convert the results to a list of dictionaries, one per scenario.
489
+
490
+ :param remove_prefix: Whether to remove the prefix from the column names.
491
+
492
+ >>> from edsl.results import Results
493
+ >>> r = Results.example()
494
+ >>> r.select('how_feeling').to_scenario_list()
495
+ ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
496
+ """
497
+ from edsl import ScenarioList, Scenario
498
+
499
+ list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
500
+ scenarios = []
501
+ for d in list_of_dicts:
502
+ scenarios.append(Scenario(d))
503
+ return ScenarioList(scenarios)
504
+ # return ScenarioList([Scenario(d) for d in list_of_dicts])
505
+
506
+ def to_agent_list(self, remove_prefix: bool = True):
507
+ """Convert the results to a list of dictionaries, one per agent.
508
+
509
+ :param remove_prefix: Whether to remove the prefix from the column names.
510
+
511
+ >>> from edsl.results import Results
512
+ >>> r = Results.example()
513
+ >>> r.select('how_feeling').to_agent_list()
514
+ AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
515
+ """
516
+ from edsl import AgentList, Agent
517
+
518
+ list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
519
+ agents = []
520
+ for d in list_of_dicts:
521
+ if "name" in d:
522
+ d["agent_name"] = d.pop("name")
523
+ agents.append(Agent(d, name=d["agent_name"]))
524
+ else:
525
+ agents.append(Agent(d))
526
+ return AgentList(agents)
527
+
528
+ def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
529
+ """Convert the results to a list of dictionaries.
530
+
531
+ :param remove_prefix: Whether to remove the prefix from the column names.
532
+
533
+ >>> from edsl.results import Results
534
+ >>> r = Results.example()
535
+ >>> r.select('how_feeling').to_dicts()
536
+ [{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
537
+
538
+ """
539
+ list_of_keys = []
540
+ list_of_values = []
541
+ for entry in self:
542
+ key, values = list(entry.items())[0]
543
+ list_of_keys.append(key)
544
+ list_of_values.append(values)
545
+
546
+ if remove_prefix:
547
+ list_of_keys = [key.split(".")[-1] for key in list_of_keys]
548
+
549
+ list_of_dicts = []
550
+ for entries in zip(*list_of_values):
551
+ list_of_dicts.append(dict(zip(list_of_keys, entries)))
552
+
553
+ return list_of_dicts
554
+
555
+ def to_list(self, flatten=False, remove_none=False, unzipped=False) -> list[list]:
556
+ """Convert the results to a list of lists.
557
+
558
+ :param flatten: Whether to flatten the list of lists.
559
+ :param remove_none: Whether to remove None values from the list.
560
+
561
+ >>> from edsl.results import Results
562
+ >>> Results.example().select('how_feeling', 'how_feeling_yesterday')
563
+ Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
564
+
565
+ >>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
566
+ [('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
567
+
568
+ >>> r = Results.example()
569
+ >>> r.select('how_feeling').to_list()
570
+ ['OK', 'Great', 'Terrible', 'OK']
571
+
572
+ >>> from edsl.results.Dataset import Dataset
573
+ >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}]).select('a.b').to_list(flatten = True)
574
+ [1, 9, 2, 3, 4]
575
+
576
+ >>> from edsl.results.Dataset import Dataset
577
+ >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}, {'c': [6, 2, 3, 4]}]).select('a.b', 'c').to_list(flatten = True)
578
+ Traceback (most recent call last):
579
+ ...
580
+ ValueError: Cannot flatten a list of lists when there are multiple columns selected.
581
+
582
+
583
+ """
584
+ if len(self.relevant_columns()) > 1 and flatten:
585
+ raise ValueError(
586
+ "Cannot flatten a list of lists when there are multiple columns selected."
587
+ )
588
+
589
+ if len(self.relevant_columns()) == 1:
590
+ # if only one 'column' is selected (which is typical for this method
591
+ list_to_return = list(self[0].values())[0]
592
+ else:
593
+ keys = self.relevant_columns()
594
+ data = self.to_dicts(remove_prefix=False)
595
+ list_to_return = []
596
+ for d in data:
597
+ list_to_return.append(tuple([d[key] for key in keys]))
598
+
599
+ if remove_none:
600
+ list_to_return = [item for item in list_to_return if item is not None]
601
+
602
+ if flatten:
603
+ new_list = []
604
+ for item in list_to_return:
605
+ if isinstance(item, list):
606
+ new_list.extend(item)
607
+ else:
608
+ new_list.append(item)
609
+ list_to_return = new_list
610
+
611
+ return list_to_return
612
+
613
+ def html(
614
+ self,
615
+ filename: Optional[str] = None,
616
+ cta: str = "Open in browser",
617
+ return_link: bool = False,
618
+ ):
619
+ import os
620
+ import tempfile
621
+ from edsl.utilities.utilities import is_notebook
622
+ from IPython.display import HTML, display
623
+ from edsl.utilities.utilities import is_notebook
624
+
625
+ df = self.to_pandas()
626
+
627
+ if filename is None:
628
+ current_directory = os.getcwd()
629
+ filename = tempfile.NamedTemporaryFile(
630
+ "w", delete=False, suffix=".html", dir=current_directory
631
+ ).name
632
+
633
+ with open(filename, "w") as f:
634
+ f.write(df.to_html())
635
+
636
+ if is_notebook():
637
+ html_url = f"/files/{filename}"
638
+ html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
639
+ display(HTML(html_link))
640
+ else:
641
+ print(f"Saved to {filename}")
642
+ import webbrowser
643
+ import os
644
+
645
+ webbrowser.open(f"file://{os.path.abspath(filename)}")
646
+
647
+ if return_link:
648
+ return filename
649
+
650
+ def tally(
651
+ self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
652
+ ) -> Union[dict, "Dataset"]:
653
+ """Tally the values of a field or perform a cross-tab of multiple fields.
654
+
655
+ :param fields: The field(s) to tally, multiple fields for cross-tabulation.
656
+
657
+ >>> from edsl.results import Results
658
+ >>> r = Results.example()
659
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
660
+ {'OK': 2, 'Great': 1, 'Terrible': 1}
661
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset")
662
+ Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
663
+ """
664
+ from collections import Counter
665
+
666
+ if len(fields) == 0:
667
+ fields = self.relevant_columns()
668
+
669
+ relevant_columns_without_prefix = [
670
+ column.split(".")[-1] for column in self.relevant_columns()
671
+ ]
672
+
673
+ if not all(
674
+ f in self.relevant_columns() or f in relevant_columns_without_prefix
675
+ for f in fields
676
+ ):
677
+ raise ValueError("One or more specified fields are not in the dataset.")
678
+
679
+ if len(fields) == 1:
680
+ field = fields[0]
681
+ values = self._key_to_value(field)
682
+ else:
683
+ values = list(zip(*(self._key_to_value(field) for field in fields)))
684
+
685
+ for value in values:
686
+ if isinstance(value, list):
687
+ value = tuple(value)
688
+
689
+ tally = dict(Counter(values))
690
+ sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
691
+ if top_n is not None:
692
+ sorted_tally = dict(list(sorted_tally.items())[:top_n])
693
+
694
+ import warnings
695
+ import textwrap
696
+ from edsl.results.Dataset import Dataset
697
+
698
+ if output == "dict":
699
+ # why did I do this?
700
+ warnings.warn(
701
+ textwrap.dedent(
702
+ """\
703
+ The default output from tally will change to Dataset in the future.
704
+ Use output='Dataset' to get the Dataset object for now.
705
+ """
706
+ )
707
+ )
708
+ return sorted_tally
709
+ elif output == "Dataset":
710
+ dataset = Dataset(
711
+ [
712
+ {"value": list(sorted_tally.keys())},
713
+ {"count": list(sorted_tally.values())},
714
+ ]
715
+ )
716
+ # return dataset
717
+ sl = dataset.to_scenario_list().unpack(
718
+ "value",
719
+ new_names=[fields] if isinstance(fields, str) else fields,
720
+ keep_original=False,
721
+ )
722
+ keys = list(sl[0].keys())
723
+ keys.remove("count")
724
+ keys.append("count")
725
+ return sl.reorder_keys(keys).to_dataset()
726
+
727
+
728
+ if __name__ == "__main__":
729
+ import doctest
730
+
731
+ doctest.testmod(optionflags=doctest.ELLIPSIS)