edsl 0.1.36.dev7__py3-none-any.whl → 0.1.37.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (257) hide show
  1. edsl/Base.py +303 -303
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +48 -48
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +804 -804
  7. edsl/agents/AgentList.py +345 -337
  8. edsl/agents/Invigilator.py +222 -222
  9. edsl/agents/InvigilatorBase.py +305 -298
  10. edsl/agents/PromptConstructor.py +310 -320
  11. edsl/agents/__init__.py +3 -3
  12. edsl/agents/descriptors.py +86 -86
  13. edsl/agents/prompt_helpers.py +129 -129
  14. edsl/auto/AutoStudy.py +117 -117
  15. edsl/auto/StageBase.py +230 -230
  16. edsl/auto/StageGenerateSurvey.py +178 -178
  17. edsl/auto/StageLabelQuestions.py +125 -125
  18. edsl/auto/StagePersona.py +61 -61
  19. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  20. edsl/auto/StagePersonaDimensionValues.py +74 -74
  21. edsl/auto/StagePersonaDimensions.py +69 -69
  22. edsl/auto/StageQuestions.py +73 -73
  23. edsl/auto/SurveyCreatorPipeline.py +21 -21
  24. edsl/auto/utilities.py +224 -224
  25. edsl/base/Base.py +289 -289
  26. edsl/config.py +149 -149
  27. edsl/conjure/AgentConstructionMixin.py +152 -152
  28. edsl/conjure/Conjure.py +62 -62
  29. edsl/conjure/InputData.py +659 -659
  30. edsl/conjure/InputDataCSV.py +48 -48
  31. edsl/conjure/InputDataMixinQuestionStats.py +182 -182
  32. edsl/conjure/InputDataPyRead.py +91 -91
  33. edsl/conjure/InputDataSPSS.py +8 -8
  34. edsl/conjure/InputDataStata.py +8 -8
  35. edsl/conjure/QuestionOptionMixin.py +76 -76
  36. edsl/conjure/QuestionTypeMixin.py +23 -23
  37. edsl/conjure/RawQuestion.py +65 -65
  38. edsl/conjure/SurveyResponses.py +7 -7
  39. edsl/conjure/__init__.py +9 -9
  40. edsl/conjure/naming_utilities.py +263 -263
  41. edsl/conjure/utilities.py +201 -201
  42. edsl/conversation/Conversation.py +238 -238
  43. edsl/conversation/car_buying.py +58 -58
  44. edsl/conversation/mug_negotiation.py +81 -81
  45. edsl/conversation/next_speaker_utilities.py +93 -93
  46. edsl/coop/PriceFetcher.py +54 -54
  47. edsl/coop/__init__.py +2 -2
  48. edsl/coop/coop.py +824 -849
  49. edsl/coop/utils.py +131 -131
  50. edsl/data/Cache.py +527 -527
  51. edsl/data/CacheEntry.py +228 -228
  52. edsl/data/CacheHandler.py +149 -149
  53. edsl/data/RemoteCacheSync.py +97 -84
  54. edsl/data/SQLiteDict.py +292 -292
  55. edsl/data/__init__.py +4 -4
  56. edsl/data/orm.py +10 -10
  57. edsl/data_transfer_models.py +73 -73
  58. edsl/enums.py +173 -173
  59. edsl/exceptions/__init__.py +50 -50
  60. edsl/exceptions/agents.py +40 -40
  61. edsl/exceptions/configuration.py +16 -16
  62. edsl/exceptions/coop.py +10 -10
  63. edsl/exceptions/data.py +14 -14
  64. edsl/exceptions/general.py +34 -34
  65. edsl/exceptions/jobs.py +33 -33
  66. edsl/exceptions/language_models.py +63 -63
  67. edsl/exceptions/prompts.py +15 -15
  68. edsl/exceptions/questions.py +91 -91
  69. edsl/exceptions/results.py +26 -26
  70. edsl/exceptions/surveys.py +34 -34
  71. edsl/inference_services/AnthropicService.py +87 -87
  72. edsl/inference_services/AwsBedrock.py +115 -115
  73. edsl/inference_services/AzureAI.py +217 -217
  74. edsl/inference_services/DeepInfraService.py +18 -18
  75. edsl/inference_services/GoogleService.py +156 -156
  76. edsl/inference_services/GroqService.py +20 -20
  77. edsl/inference_services/InferenceServiceABC.py +147 -147
  78. edsl/inference_services/InferenceServicesCollection.py +74 -74
  79. edsl/inference_services/MistralAIService.py +123 -123
  80. edsl/inference_services/OllamaService.py +18 -18
  81. edsl/inference_services/OpenAIService.py +224 -224
  82. edsl/inference_services/TestService.py +89 -89
  83. edsl/inference_services/TogetherAIService.py +170 -170
  84. edsl/inference_services/models_available_cache.py +118 -118
  85. edsl/inference_services/rate_limits_cache.py +25 -25
  86. edsl/inference_services/registry.py +39 -39
  87. edsl/inference_services/write_available.py +10 -10
  88. edsl/jobs/Answers.py +56 -56
  89. edsl/jobs/Jobs.py +1112 -1112
  90. edsl/jobs/__init__.py +1 -1
  91. edsl/jobs/buckets/BucketCollection.py +63 -63
  92. edsl/jobs/buckets/ModelBuckets.py +65 -65
  93. edsl/jobs/buckets/TokenBucket.py +248 -248
  94. edsl/jobs/interviews/Interview.py +661 -661
  95. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  96. edsl/jobs/interviews/InterviewExceptionEntry.py +182 -189
  97. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  98. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  99. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  100. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  101. edsl/jobs/interviews/ReportErrors.py +66 -66
  102. edsl/jobs/interviews/interview_status_enum.py +9 -9
  103. edsl/jobs/runners/JobsRunnerAsyncio.py +338 -337
  104. edsl/jobs/runners/JobsRunnerStatus.py +332 -332
  105. edsl/jobs/tasks/QuestionTaskCreator.py +242 -242
  106. edsl/jobs/tasks/TaskCreators.py +64 -64
  107. edsl/jobs/tasks/TaskHistory.py +441 -441
  108. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  109. edsl/jobs/tasks/task_status_enum.py +163 -163
  110. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  111. edsl/jobs/tokens/TokenUsage.py +34 -34
  112. edsl/language_models/LanguageModel.py +718 -718
  113. edsl/language_models/ModelList.py +102 -102
  114. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  115. edsl/language_models/__init__.py +2 -2
  116. edsl/language_models/fake_openai_call.py +15 -15
  117. edsl/language_models/fake_openai_service.py +61 -61
  118. edsl/language_models/registry.py +137 -137
  119. edsl/language_models/repair.py +156 -156
  120. edsl/language_models/unused/ReplicateBase.py +83 -83
  121. edsl/language_models/utilities.py +64 -64
  122. edsl/notebooks/Notebook.py +259 -259
  123. edsl/notebooks/__init__.py +1 -1
  124. edsl/prompts/Prompt.py +350 -358
  125. edsl/prompts/__init__.py +2 -2
  126. edsl/questions/AnswerValidatorMixin.py +289 -289
  127. edsl/questions/QuestionBase.py +616 -616
  128. edsl/questions/QuestionBaseGenMixin.py +161 -161
  129. edsl/questions/QuestionBasePromptsMixin.py +266 -266
  130. edsl/questions/QuestionBudget.py +227 -227
  131. edsl/questions/QuestionCheckBox.py +359 -359
  132. edsl/questions/QuestionExtract.py +183 -183
  133. edsl/questions/QuestionFreeText.py +113 -113
  134. edsl/questions/QuestionFunctional.py +159 -159
  135. edsl/questions/QuestionList.py +231 -231
  136. edsl/questions/QuestionMultipleChoice.py +286 -286
  137. edsl/questions/QuestionNumerical.py +153 -153
  138. edsl/questions/QuestionRank.py +324 -324
  139. edsl/questions/Quick.py +41 -41
  140. edsl/questions/RegisterQuestionsMeta.py +71 -71
  141. edsl/questions/ResponseValidatorABC.py +174 -174
  142. edsl/questions/SimpleAskMixin.py +73 -73
  143. edsl/questions/__init__.py +26 -26
  144. edsl/questions/compose_questions.py +98 -98
  145. edsl/questions/decorators.py +21 -21
  146. edsl/questions/derived/QuestionLikertFive.py +76 -76
  147. edsl/questions/derived/QuestionLinearScale.py +87 -87
  148. edsl/questions/derived/QuestionTopK.py +91 -91
  149. edsl/questions/derived/QuestionYesNo.py +82 -82
  150. edsl/questions/descriptors.py +418 -418
  151. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  152. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  153. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  154. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  155. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  156. edsl/questions/prompt_templates/question_list.jinja +17 -17
  157. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  158. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  159. edsl/questions/question_registry.py +147 -147
  160. edsl/questions/settings.py +12 -12
  161. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  162. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  163. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  164. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  165. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  166. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  167. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  168. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  169. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  170. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  171. edsl/questions/templates/list/question_presentation.jinja +5 -5
  172. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  173. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  174. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  175. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  176. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  177. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  178. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  179. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  180. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  181. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  182. edsl/results/Dataset.py +293 -293
  183. edsl/results/DatasetExportMixin.py +693 -693
  184. edsl/results/DatasetTree.py +145 -145
  185. edsl/results/Result.py +435 -433
  186. edsl/results/Results.py +1160 -1158
  187. edsl/results/ResultsDBMixin.py +238 -238
  188. edsl/results/ResultsExportMixin.py +43 -43
  189. edsl/results/ResultsFetchMixin.py +33 -33
  190. edsl/results/ResultsGGMixin.py +121 -121
  191. edsl/results/ResultsToolsMixin.py +98 -98
  192. edsl/results/Selector.py +118 -118
  193. edsl/results/__init__.py +2 -2
  194. edsl/results/tree_explore.py +115 -115
  195. edsl/scenarios/FileStore.py +458 -458
  196. edsl/scenarios/Scenario.py +510 -510
  197. edsl/scenarios/ScenarioHtmlMixin.py +59 -59
  198. edsl/scenarios/ScenarioList.py +1101 -1101
  199. edsl/scenarios/ScenarioListExportMixin.py +52 -52
  200. edsl/scenarios/ScenarioListPdfMixin.py +261 -261
  201. edsl/scenarios/__init__.py +4 -4
  202. edsl/shared.py +1 -1
  203. edsl/study/ObjectEntry.py +173 -173
  204. edsl/study/ProofOfWork.py +113 -113
  205. edsl/study/SnapShot.py +80 -80
  206. edsl/study/Study.py +528 -528
  207. edsl/study/__init__.py +4 -4
  208. edsl/surveys/DAG.py +148 -148
  209. edsl/surveys/Memory.py +31 -31
  210. edsl/surveys/MemoryPlan.py +244 -244
  211. edsl/surveys/Rule.py +324 -324
  212. edsl/surveys/RuleCollection.py +387 -387
  213. edsl/surveys/Survey.py +1772 -1772
  214. edsl/surveys/SurveyCSS.py +261 -261
  215. edsl/surveys/SurveyExportMixin.py +259 -259
  216. edsl/surveys/SurveyFlowVisualizationMixin.py +121 -121
  217. edsl/surveys/SurveyQualtricsImport.py +284 -284
  218. edsl/surveys/__init__.py +3 -3
  219. edsl/surveys/base.py +53 -53
  220. edsl/surveys/descriptors.py +56 -56
  221. edsl/surveys/instructions/ChangeInstruction.py +47 -47
  222. edsl/surveys/instructions/Instruction.py +51 -51
  223. edsl/surveys/instructions/InstructionCollection.py +77 -77
  224. edsl/templates/error_reporting/base.html +23 -23
  225. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  226. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  227. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  228. edsl/templates/error_reporting/interview_details.html +115 -115
  229. edsl/templates/error_reporting/interviews.html +9 -9
  230. edsl/templates/error_reporting/overview.html +4 -4
  231. edsl/templates/error_reporting/performance_plot.html +1 -1
  232. edsl/templates/error_reporting/report.css +73 -73
  233. edsl/templates/error_reporting/report.html +117 -117
  234. edsl/templates/error_reporting/report.js +25 -25
  235. edsl/tools/__init__.py +1 -1
  236. edsl/tools/clusters.py +192 -192
  237. edsl/tools/embeddings.py +27 -27
  238. edsl/tools/embeddings_plotting.py +118 -118
  239. edsl/tools/plotting.py +112 -112
  240. edsl/tools/summarize.py +18 -18
  241. edsl/utilities/SystemInfo.py +28 -28
  242. edsl/utilities/__init__.py +22 -22
  243. edsl/utilities/ast_utilities.py +25 -25
  244. edsl/utilities/data/Registry.py +6 -6
  245. edsl/utilities/data/__init__.py +1 -1
  246. edsl/utilities/data/scooter_results.json +1 -1
  247. edsl/utilities/decorators.py +77 -77
  248. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  249. edsl/utilities/interface.py +627 -627
  250. edsl/utilities/repair_functions.py +28 -28
  251. edsl/utilities/restricted_python.py +70 -70
  252. edsl/utilities/utilities.py +391 -391
  253. {edsl-0.1.36.dev7.dist-info → edsl-0.1.37.dev1.dist-info}/LICENSE +21 -21
  254. {edsl-0.1.36.dev7.dist-info → edsl-0.1.37.dev1.dist-info}/METADATA +1 -1
  255. edsl-0.1.37.dev1.dist-info/RECORD +279 -0
  256. edsl-0.1.36.dev7.dist-info/RECORD +0 -279
  257. {edsl-0.1.36.dev7.dist-info → edsl-0.1.37.dev1.dist-info}/WHEEL +0 -0
@@ -1,693 +1,693 @@
1
- """Mixin class for exporting results."""
2
-
3
- import base64
4
- import csv
5
- import io
6
- import html
7
- from typing import Optional
8
-
9
- from typing import Literal, Optional, Union, List
10
-
11
-
12
- class DatasetExportMixin:
13
- """Mixin class for exporting Dataset objects."""
14
-
15
- def relevant_columns(
16
- self, data_type: Optional[str] = None, remove_prefix=False
17
- ) -> list:
18
- """Return the set of keys that are present in the dataset.
19
-
20
- :param data_type: The data type to filter by.
21
- :param remove_prefix: Whether to remove the prefix from the column names.
22
-
23
- >>> from edsl.results.Dataset import Dataset
24
- >>> d = Dataset([{'a.b':[1,2,3,4]}])
25
- >>> d.relevant_columns()
26
- ['a.b']
27
-
28
- >>> d.relevant_columns(remove_prefix=True)
29
- ['b']
30
-
31
- >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[5,6,7,8]}])
32
- >>> d.relevant_columns()
33
- ['a', 'b']
34
-
35
- >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
36
- ['answer.how_feeling', 'answer.how_feeling_yesterday']
37
-
38
- >>> from edsl.results import Results
39
- >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
40
- ['model.frequency_penalty', 'model.logprobs', 'model.max_tokens', 'model.model', 'model.presence_penalty', 'model.temperature', 'model.top_logprobs', 'model.top_p']
41
-
42
- >>> Results.example().relevant_columns(data_type = "flimflam")
43
- Traceback (most recent call last):
44
- ...
45
- ValueError: No columns found for data type: flimflam. Available data types are: ...
46
- """
47
- columns = [list(x.keys())[0] for x in self]
48
- if remove_prefix:
49
- columns = [column.split(".")[-1] for column in columns]
50
-
51
- def get_data_type(column):
52
- if "." in column:
53
- return column.split(".")[0]
54
- else:
55
- return None
56
-
57
- if data_type:
58
- all_columns = columns[:]
59
- columns = [
60
- column for column in columns if get_data_type(column) == data_type
61
- ]
62
- if len(columns) == 0:
63
- all_data_types = sorted(
64
- list(set(get_data_type(column) for column in all_columns))
65
- )
66
- raise ValueError(
67
- f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
68
- )
69
-
70
- return columns
71
-
72
- def num_observations(self):
73
- """Return the number of observations in the dataset.
74
-
75
- >>> from edsl.results import Results
76
- >>> Results.example().num_observations()
77
- 4
78
- """
79
- _num_observations = None
80
- for entry in self:
81
- key, values = list(entry.items())[0]
82
- if _num_observations is None:
83
- _num_observations = len(values)
84
- else:
85
- if len(values) != _num_observations:
86
- raise ValueError(
87
- "The number of observations is not consistent across columns."
88
- )
89
-
90
- return _num_observations
91
-
92
- def _make_tabular(
93
- self, remove_prefix: bool, pretty_labels: Optional[dict] = None
94
- ) -> tuple[list, List[list]]:
95
- """Turn the results into a tabular format.
96
-
97
- :param remove_prefix: Whether to remove the prefix from the column names.
98
-
99
- >>> from edsl.results import Results
100
- >>> r = Results.example()
101
- >>> r.select('how_feeling')._make_tabular(remove_prefix = True)
102
- (['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
103
-
104
- >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
105
- (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
106
- """
107
-
108
- def create_dict_from_list_of_dicts(list_of_dicts):
109
- for entry in list_of_dicts:
110
- key, list_of_values = list(entry.items())[0]
111
- yield key, list_of_values
112
-
113
- tabular_repr = dict(create_dict_from_list_of_dicts(self.data))
114
-
115
- full_header = [list(x.keys())[0] for x in self]
116
-
117
- rows = []
118
- for i in range(self.num_observations()):
119
- row = [tabular_repr[h][i] for h in full_header]
120
- rows.append(row)
121
-
122
- if remove_prefix:
123
- header = [h.split(".")[-1] for h in full_header]
124
- else:
125
- header = full_header
126
-
127
- if pretty_labels is not None:
128
- header = [pretty_labels.get(h, h) for h in header]
129
-
130
- return header, rows
131
-
132
- def print_long(self):
133
- """Print the results in a long format.
134
- >>> from edsl.results import Results
135
- >>> r = Results.example()
136
- >>> r.select('how_feeling').print_long()
137
- answer.how_feeling: OK
138
- answer.how_feeling: Great
139
- answer.how_feeling: Terrible
140
- answer.how_feeling: OK
141
- """
142
- for entry in self:
143
- key, list_of_values = list(entry.items())[0]
144
- for value in list_of_values:
145
- print(f"{key}: {value}")
146
-
147
- def print(
148
- self,
149
- pretty_labels: Optional[dict] = None,
150
- filename: Optional[str] = None,
151
- format: Optional[Literal["rich", "html", "markdown", "latex"]] = None,
152
- interactive: bool = False,
153
- split_at_dot: bool = True,
154
- max_rows=None,
155
- tee=False,
156
- iframe=False,
157
- iframe_height: int = 200,
158
- iframe_width: int = 600,
159
- web=False,
160
- return_string: bool = False,
161
- ) -> Union[None, str, "Results"]:
162
- """Print the results in a pretty format.
163
-
164
- :param pretty_labels: A dictionary of pretty labels for the columns.
165
- :param filename: The filename to save the results to.
166
- :param format: The format to print the results in. Options are 'rich', 'html', 'markdown', or 'latex'.
167
- :param interactive: Whether to print the results interactively in a Jupyter notebook.
168
- :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
169
- :param max_rows: The maximum number of rows to print.
170
- :param tee: Whether to return the dataset.
171
- :param iframe: Whether to display the table in an iframe.
172
- :param iframe_height: The height of the iframe.
173
- :param iframe_width: The width of the iframe.
174
- :param web: Whether to display the table in a web browser.
175
- :param return_string: Whether to return the output as a string instead of printing.
176
-
177
- :return: None if tee is False and return_string is False, the dataset if tee is True, or a string if return_string is True.
178
-
179
- Example: Print in rich format at the terminal
180
-
181
- >>> from edsl.results import Results
182
- >>> r = Results.example()
183
- >>> r.select('how_feeling').print(format = "rich")
184
- ┏━━━━━━━━━━━━━━┓
185
- ┃ answer ┃
186
- ┃ .how_feeling ┃
187
- ┡━━━━━━━━━━━━━━┩
188
- │ OK │
189
- ├──────────────┤
190
- │ Great │
191
- ├──────────────┤
192
- │ Terrible │
193
- ├──────────────┤
194
- │ OK │
195
- └──────────────┘
196
-
197
- >>> r = Results.example()
198
- >>> r2 = r.select("how_feeling").print(format = "rich", tee = True, max_rows = 2)
199
- ┏━━━━━━━━━━━━━━┓
200
- ┃ answer ┃
201
- ┃ .how_feeling ┃
202
- ┡━━━━━━━━━━━━━━┩
203
- │ OK │
204
- ├──────────────┤
205
- │ Great │
206
- └──────────────┘
207
- >>> r2
208
- Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
209
-
210
- >>> r.select('how_feeling').print(format = "rich", max_rows = 2)
211
- ┏━━━━━━━━━━━━━━┓
212
- ┃ answer ┃
213
- ┃ .how_feeling ┃
214
- ┡━━━━━━━━━━━━━━┩
215
- │ OK │
216
- ├──────────────┤
217
- │ Great │
218
- └──────────────┘
219
-
220
- >>> r.select('how_feeling').print(format = "rich", split_at_dot = False)
221
- ┏━━━━━━━━━━━━━━━━━━━━┓
222
- ┃ answer.how_feeling ┃
223
- ┡━━━━━━━━━━━━━━━━━━━━┩
224
- │ OK │
225
- ├────────────────────┤
226
- │ Great │
227
- ├────────────────────┤
228
- │ Terrible │
229
- ├────────────────────┤
230
- │ OK │
231
- └────────────────────┘
232
-
233
- Example: using the pretty_labels parameter
234
-
235
- >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
236
- ┏━━━━━━━━━━━━━━━━━━━━━┓
237
- ┃ How are you feeling ┃
238
- ┡━━━━━━━━━━━━━━━━━━━━━┩
239
- │ OK │
240
- ├─────────────────────┤
241
- │ Great │
242
- ├─────────────────────┤
243
- │ Terrible │
244
- ├─────────────────────┤
245
- │ OK │
246
- └─────────────────────┘
247
-
248
- Example: printing in markdown format
249
-
250
- >>> r.select('how_feeling').print(format='markdown')
251
- | answer.how_feeling |
252
- |--|
253
- | OK |
254
- | Great |
255
- | Terrible |
256
- | OK |
257
- ...
258
-
259
- >>> r.select('how_feeling').print(format='latex')
260
- \\begin{tabular}{l}
261
- ...
262
- \\end{tabular}
263
- <BLANKLINE>
264
- """
265
- from IPython.display import HTML, display
266
- from edsl.utilities.utilities import is_notebook
267
- import io
268
- import sys
269
-
270
- def _determine_format(format):
271
- if format is None:
272
- if is_notebook():
273
- format = "html"
274
- else:
275
- format = "rich"
276
- if format not in ["rich", "html", "markdown", "latex"]:
277
- raise ValueError(
278
- "format must be one of 'rich', 'html', 'markdown', or 'latex'."
279
- )
280
-
281
- return format
282
-
283
- format = _determine_format(format)
284
-
285
- if pretty_labels is None:
286
- pretty_labels = {}
287
-
288
- if pretty_labels != {}: # only split at dot if there are no pretty labels
289
- split_at_dot = False
290
-
291
- def _create_data():
292
- for index, entry in enumerate(self):
293
- key, list_of_values = list(entry.items())[0]
294
- yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
295
-
296
- new_data = list(_create_data())
297
-
298
- # Capture output if return_string is True
299
- if return_string:
300
- old_stdout = sys.stdout
301
- sys.stdout = io.StringIO()
302
-
303
- output = None
304
-
305
- if format == "rich":
306
- from edsl.utilities.interface import print_dataset_with_rich
307
-
308
- output = print_dataset_with_rich(
309
- new_data, filename=filename, split_at_dot=split_at_dot
310
- )
311
- elif format == "markdown":
312
- from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
313
-
314
- output = print_list_of_dicts_as_markdown_table(new_data, filename=filename)
315
- elif format == "latex":
316
- df = self.to_pandas()
317
- df.columns = [col.replace("_", " ") for col in df.columns]
318
- latex_string = df.to_latex(index=False)
319
-
320
- if filename is not None:
321
- with open(filename, "w") as f:
322
- f.write(latex_string)
323
- else:
324
- print(latex_string)
325
- output = latex_string
326
- elif format == "html":
327
- from edsl.utilities.interface import print_list_of_dicts_as_html_table
328
-
329
- html_source = print_list_of_dicts_as_html_table(
330
- new_data, interactive=interactive
331
- )
332
-
333
- if iframe:
334
- iframe = f""""
335
- <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
336
- """
337
- display(HTML(iframe))
338
- elif is_notebook():
339
- display(HTML(html_source))
340
- else:
341
- from edsl.utilities.interface import view_html
342
-
343
- view_html(html_source)
344
-
345
- output = html_source
346
-
347
- # Restore stdout and get captured output if return_string is True
348
- if return_string:
349
- captured_output = sys.stdout.getvalue()
350
- sys.stdout = old_stdout
351
- return captured_output or output
352
-
353
- if tee:
354
- return self
355
-
356
- return None
357
-
358
- def to_csv(
359
- self,
360
- filename: Optional[str] = None,
361
- remove_prefix: bool = False,
362
- download_link: bool = False,
363
- pretty_labels: Optional[dict] = None,
364
- ):
365
- """Export the results to a CSV file.
366
-
367
- :param filename: The filename to save the CSV file to.
368
- :param remove_prefix: Whether to remove the prefix from the column names.
369
- :param download_link: Whether to display a download link in a Jupyter notebook.
370
-
371
- Example:
372
-
373
- >>> from edsl.results import Results
374
- >>> r = Results.example()
375
- >>> r.select('how_feeling').to_csv()
376
- 'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
377
-
378
- >>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
379
- 'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
380
-
381
- >>> import tempfile
382
- >>> filename = tempfile.NamedTemporaryFile(delete=False).name
383
- >>> r.select('how_feeling').to_csv(filename = filename)
384
- >>> import os
385
- >>> import csv
386
- >>> with open(filename, newline='') as f:
387
- ... reader = csv.reader(f)
388
- ... for row in reader:
389
- ... print(row)
390
- ['answer.how_feeling']
391
- ['OK']
392
- ['Great']
393
- ['Terrible']
394
- ['OK']
395
-
396
- """
397
- if pretty_labels is None:
398
- pretty_labels = {}
399
- header, rows = self._make_tabular(
400
- remove_prefix=remove_prefix, pretty_labels=pretty_labels
401
- )
402
-
403
- if filename is not None:
404
- with open(filename, "w") as f:
405
- writer = csv.writer(f)
406
- writer.writerow(header)
407
- writer.writerows(rows)
408
- else:
409
- output = io.StringIO()
410
- writer = csv.writer(output)
411
- writer.writerow(header)
412
- writer.writerows(rows)
413
-
414
- if download_link:
415
- from IPython.display import HTML, display
416
-
417
- csv_file = output.getvalue()
418
- b64 = base64.b64encode(csv_file.encode()).decode()
419
- download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
420
- display(HTML(download_link))
421
- else:
422
- return output.getvalue()
423
-
424
- def download_link(self, pretty_labels: Optional[dict] = None) -> str:
425
- """Return a download link for the results.
426
-
427
- :param pretty_labels: A dictionary of pretty labels for the columns.
428
-
429
- >>> from edsl.results import Results
430
- >>> r = Results.example()
431
- >>> r.select('how_feeling').download_link()
432
- '<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
433
- """
434
- import base64
435
-
436
- csv_string = self.to_csv(pretty_labels=pretty_labels)
437
- b64 = base64.b64encode(csv_string.encode()).decode()
438
- return f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
439
-
440
- def to_pandas(self, remove_prefix: bool = False) -> "pd.DataFrame":
441
- """Convert the results to a pandas DataFrame.
442
-
443
- :param remove_prefix: Whether to remove the prefix from the column names.
444
-
445
- >>> from edsl.results import Results
446
- >>> r = Results.example()
447
- >>> r.select('how_feeling').to_pandas()
448
- answer.how_feeling
449
- 0 OK
450
- 1 Great
451
- 2 Terrible
452
- 3 OK
453
- """
454
- import pandas as pd
455
-
456
- csv_string = self.to_csv(remove_prefix=remove_prefix)
457
- csv_buffer = io.StringIO(csv_string)
458
- df = pd.read_csv(csv_buffer)
459
- # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
460
- return df
461
-
462
- def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
463
- """Convert the results to a list of dictionaries, one per scenario.
464
-
465
- :param remove_prefix: Whether to remove the prefix from the column names.
466
-
467
- >>> from edsl.results import Results
468
- >>> r = Results.example()
469
- >>> r.select('how_feeling').to_scenario_list()
470
- ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
471
- """
472
- from edsl import ScenarioList, Scenario
473
-
474
- list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
475
- scenarios = []
476
- for d in list_of_dicts:
477
- scenarios.append(Scenario(d))
478
- return ScenarioList(scenarios)
479
- # return ScenarioList([Scenario(d) for d in list_of_dicts])
480
-
481
- def to_agent_list(self, remove_prefix: bool = True):
482
- """Convert the results to a list of dictionaries, one per agent.
483
-
484
- :param remove_prefix: Whether to remove the prefix from the column names.
485
-
486
- >>> from edsl.results import Results
487
- >>> r = Results.example()
488
- >>> r.select('how_feeling').to_agent_list()
489
- AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
490
- """
491
- from edsl import AgentList, Agent
492
-
493
- list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
494
- return AgentList([Agent(d) for d in list_of_dicts])
495
-
496
- def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
497
- """Convert the results to a list of dictionaries.
498
-
499
- :param remove_prefix: Whether to remove the prefix from the column names.
500
-
501
- >>> from edsl.results import Results
502
- >>> r = Results.example()
503
- >>> r.select('how_feeling').to_dicts()
504
- [{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
505
-
506
- """
507
- list_of_keys = []
508
- list_of_values = []
509
- for entry in self:
510
- key, values = list(entry.items())[0]
511
- list_of_keys.append(key)
512
- list_of_values.append(values)
513
-
514
- if remove_prefix:
515
- list_of_keys = [key.split(".")[-1] for key in list_of_keys]
516
-
517
- list_of_dicts = []
518
- for entries in zip(*list_of_values):
519
- list_of_dicts.append(dict(zip(list_of_keys, entries)))
520
-
521
- return list_of_dicts
522
-
523
- def to_list(self, flatten=False, remove_none=False, unzipped=False) -> list[list]:
524
- """Convert the results to a list of lists.
525
-
526
- :param flatten: Whether to flatten the list of lists.
527
- :param remove_none: Whether to remove None values from the list.
528
-
529
- >>> from edsl.results import Results
530
- >>> Results.example().select('how_feeling', 'how_feeling_yesterday')
531
- Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
532
-
533
- >>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
534
- [('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
535
-
536
- >>> r = Results.example()
537
- >>> r.select('how_feeling').to_list()
538
- ['OK', 'Great', 'Terrible', 'OK']
539
-
540
- >>> from edsl.results.Dataset import Dataset
541
- >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}]).select('a.b').to_list(flatten = True)
542
- [1, 9, 2, 3, 4]
543
-
544
- >>> from edsl.results.Dataset import Dataset
545
- >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}, {'c': [6, 2, 3, 4]}]).select('a.b', 'c').to_list(flatten = True)
546
- Traceback (most recent call last):
547
- ...
548
- ValueError: Cannot flatten a list of lists when there are multiple columns selected.
549
-
550
-
551
- """
552
- if len(self.relevant_columns()) > 1 and flatten:
553
- raise ValueError(
554
- "Cannot flatten a list of lists when there are multiple columns selected."
555
- )
556
-
557
- if len(self.relevant_columns()) == 1:
558
- # if only one 'column' is selected (which is typical for this method
559
- list_to_return = list(self[0].values())[0]
560
- else:
561
- keys = self.relevant_columns()
562
- data = self.to_dicts(remove_prefix=False)
563
- list_to_return = []
564
- for d in data:
565
- list_to_return.append(tuple([d[key] for key in keys]))
566
-
567
- if remove_none:
568
- list_to_return = [item for item in list_to_return if item is not None]
569
-
570
- if flatten:
571
- new_list = []
572
- for item in list_to_return:
573
- if isinstance(item, list):
574
- new_list.extend(item)
575
- else:
576
- new_list.append(item)
577
- list_to_return = new_list
578
-
579
- return list_to_return
580
-
581
- def html(
582
- self,
583
- filename: Optional[str] = None,
584
- cta: str = "Open in browser",
585
- return_link: bool = False,
586
- ):
587
- import os
588
- import tempfile
589
- from edsl.utilities.utilities import is_notebook
590
- from IPython.display import HTML, display
591
- from edsl.utilities.utilities import is_notebook
592
-
593
- df = self.to_pandas()
594
-
595
- if filename is None:
596
- current_directory = os.getcwd()
597
- filename = tempfile.NamedTemporaryFile(
598
- "w", delete=False, suffix=".html", dir=current_directory
599
- ).name
600
-
601
- with open(filename, "w") as f:
602
- f.write(df.to_html())
603
-
604
- if is_notebook():
605
- html_url = f"/files/{filename}"
606
- html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
607
- display(HTML(html_link))
608
- else:
609
- print(f"Saved to {filename}")
610
- import webbrowser
611
- import os
612
-
613
- webbrowser.open(f"file://{os.path.abspath(filename)}")
614
-
615
- if return_link:
616
- return filename
617
-
618
- def tally(
619
- self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
620
- ) -> Union[dict, "Dataset"]:
621
- """Tally the values of a field or perform a cross-tab of multiple fields.
622
-
623
- :param fields: The field(s) to tally, multiple fields for cross-tabulation.
624
-
625
- >>> from edsl.results import Results
626
- >>> r = Results.example()
627
- >>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
628
- {'OK': 2, 'Great': 1, 'Terrible': 1}
629
- >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset")
630
- Dataset([{'value': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
631
- >>> r.select('how_feeling', 'period').tally('how_feeling', 'period', output = "dict")
632
- {('OK', 'morning'): 1, ('Great', 'afternoon'): 1, ('Terrible', 'morning'): 1, ('OK', 'afternoon'): 1}
633
- """
634
- from collections import Counter
635
-
636
- if len(fields) == 0:
637
- fields = self.relevant_columns()
638
-
639
- relevant_columns_without_prefix = [
640
- column.split(".")[-1] for column in self.relevant_columns()
641
- ]
642
-
643
- # breakpoint()
644
-
645
- if not all(
646
- f in self.relevant_columns() or f in relevant_columns_without_prefix
647
- for f in fields
648
- ):
649
- raise ValueError("One or more specified fields are not in the dataset.")
650
-
651
- if len(fields) == 1:
652
- field = fields[0]
653
- values = self._key_to_value(field)
654
- else:
655
- values = list(zip(*(self._key_to_value(field) for field in fields)))
656
-
657
- for value in values:
658
- if isinstance(value, list):
659
- value = tuple(value)
660
-
661
- tally = dict(Counter(values))
662
- sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
663
- if top_n is not None:
664
- sorted_tally = dict(list(sorted_tally.items())[:top_n])
665
-
666
- import warnings
667
- import textwrap
668
- from edsl.results.Dataset import Dataset
669
-
670
- if output == "dict":
671
- # why did I do this?
672
- warnings.warn(
673
- textwrap.dedent(
674
- """\
675
- The default output from tally will change to Dataset in the future.
676
- Use output='Dataset' to get the Dataset object for now.
677
- """
678
- )
679
- )
680
- return sorted_tally
681
- elif output == "Dataset":
682
- return Dataset(
683
- [
684
- {"value": list(sorted_tally.keys())},
685
- {"count": list(sorted_tally.values())},
686
- ]
687
- )
688
-
689
-
690
- if __name__ == "__main__":
691
- import doctest
692
-
693
- doctest.testmod(optionflags=doctest.ELLIPSIS)
1
+ """Mixin class for exporting results."""
2
+
3
+ import base64
4
+ import csv
5
+ import io
6
+ import html
7
+ from typing import Optional
8
+
9
+ from typing import Literal, Optional, Union, List
10
+
11
+
12
+ class DatasetExportMixin:
13
+ """Mixin class for exporting Dataset objects."""
14
+
15
+ def relevant_columns(
16
+ self, data_type: Optional[str] = None, remove_prefix=False
17
+ ) -> list:
18
+ """Return the set of keys that are present in the dataset.
19
+
20
+ :param data_type: The data type to filter by.
21
+ :param remove_prefix: Whether to remove the prefix from the column names.
22
+
23
+ >>> from edsl.results.Dataset import Dataset
24
+ >>> d = Dataset([{'a.b':[1,2,3,4]}])
25
+ >>> d.relevant_columns()
26
+ ['a.b']
27
+
28
+ >>> d.relevant_columns(remove_prefix=True)
29
+ ['b']
30
+
31
+ >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[5,6,7,8]}])
32
+ >>> d.relevant_columns()
33
+ ['a', 'b']
34
+
35
+ >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
36
+ ['answer.how_feeling', 'answer.how_feeling_yesterday']
37
+
38
+ >>> from edsl.results import Results
39
+ >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
40
+ ['model.frequency_penalty', 'model.logprobs', 'model.max_tokens', 'model.model', 'model.presence_penalty', 'model.temperature', 'model.top_logprobs', 'model.top_p']
41
+
42
+ >>> Results.example().relevant_columns(data_type = "flimflam")
43
+ Traceback (most recent call last):
44
+ ...
45
+ ValueError: No columns found for data type: flimflam. Available data types are: ...
46
+ """
47
+ columns = [list(x.keys())[0] for x in self]
48
+ if remove_prefix:
49
+ columns = [column.split(".")[-1] for column in columns]
50
+
51
+ def get_data_type(column):
52
+ if "." in column:
53
+ return column.split(".")[0]
54
+ else:
55
+ return None
56
+
57
+ if data_type:
58
+ all_columns = columns[:]
59
+ columns = [
60
+ column for column in columns if get_data_type(column) == data_type
61
+ ]
62
+ if len(columns) == 0:
63
+ all_data_types = sorted(
64
+ list(set(get_data_type(column) for column in all_columns))
65
+ )
66
+ raise ValueError(
67
+ f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
68
+ )
69
+
70
+ return columns
71
+
72
+ def num_observations(self):
73
+ """Return the number of observations in the dataset.
74
+
75
+ >>> from edsl.results import Results
76
+ >>> Results.example().num_observations()
77
+ 4
78
+ """
79
+ _num_observations = None
80
+ for entry in self:
81
+ key, values = list(entry.items())[0]
82
+ if _num_observations is None:
83
+ _num_observations = len(values)
84
+ else:
85
+ if len(values) != _num_observations:
86
+ raise ValueError(
87
+ "The number of observations is not consistent across columns."
88
+ )
89
+
90
+ return _num_observations
91
+
92
+ def _make_tabular(
93
+ self, remove_prefix: bool, pretty_labels: Optional[dict] = None
94
+ ) -> tuple[list, List[list]]:
95
+ """Turn the results into a tabular format.
96
+
97
+ :param remove_prefix: Whether to remove the prefix from the column names.
98
+
99
+ >>> from edsl.results import Results
100
+ >>> r = Results.example()
101
+ >>> r.select('how_feeling')._make_tabular(remove_prefix = True)
102
+ (['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
103
+
104
+ >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
105
+ (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
106
+ """
107
+
108
+ def create_dict_from_list_of_dicts(list_of_dicts):
109
+ for entry in list_of_dicts:
110
+ key, list_of_values = list(entry.items())[0]
111
+ yield key, list_of_values
112
+
113
+ tabular_repr = dict(create_dict_from_list_of_dicts(self.data))
114
+
115
+ full_header = [list(x.keys())[0] for x in self]
116
+
117
+ rows = []
118
+ for i in range(self.num_observations()):
119
+ row = [tabular_repr[h][i] for h in full_header]
120
+ rows.append(row)
121
+
122
+ if remove_prefix:
123
+ header = [h.split(".")[-1] for h in full_header]
124
+ else:
125
+ header = full_header
126
+
127
+ if pretty_labels is not None:
128
+ header = [pretty_labels.get(h, h) for h in header]
129
+
130
+ return header, rows
131
+
132
+ def print_long(self):
133
+ """Print the results in a long format.
134
+ >>> from edsl.results import Results
135
+ >>> r = Results.example()
136
+ >>> r.select('how_feeling').print_long()
137
+ answer.how_feeling: OK
138
+ answer.how_feeling: Great
139
+ answer.how_feeling: Terrible
140
+ answer.how_feeling: OK
141
+ """
142
+ for entry in self:
143
+ key, list_of_values = list(entry.items())[0]
144
+ for value in list_of_values:
145
+ print(f"{key}: {value}")
146
+
147
+ def print(
148
+ self,
149
+ pretty_labels: Optional[dict] = None,
150
+ filename: Optional[str] = None,
151
+ format: Optional[Literal["rich", "html", "markdown", "latex"]] = None,
152
+ interactive: bool = False,
153
+ split_at_dot: bool = True,
154
+ max_rows=None,
155
+ tee=False,
156
+ iframe=False,
157
+ iframe_height: int = 200,
158
+ iframe_width: int = 600,
159
+ web=False,
160
+ return_string: bool = False,
161
+ ) -> Union[None, str, "Results"]:
162
+ """Print the results in a pretty format.
163
+
164
+ :param pretty_labels: A dictionary of pretty labels for the columns.
165
+ :param filename: The filename to save the results to.
166
+ :param format: The format to print the results in. Options are 'rich', 'html', 'markdown', or 'latex'.
167
+ :param interactive: Whether to print the results interactively in a Jupyter notebook.
168
+ :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
169
+ :param max_rows: The maximum number of rows to print.
170
+ :param tee: Whether to return the dataset.
171
+ :param iframe: Whether to display the table in an iframe.
172
+ :param iframe_height: The height of the iframe.
173
+ :param iframe_width: The width of the iframe.
174
+ :param web: Whether to display the table in a web browser.
175
+ :param return_string: Whether to return the output as a string instead of printing.
176
+
177
+ :return: None if tee is False and return_string is False, the dataset if tee is True, or a string if return_string is True.
178
+
179
+ Example: Print in rich format at the terminal
180
+
181
+ >>> from edsl.results import Results
182
+ >>> r = Results.example()
183
+ >>> r.select('how_feeling').print(format = "rich")
184
+ ┏━━━━━━━━━━━━━━┓
185
+ ┃ answer ┃
186
+ ┃ .how_feeling ┃
187
+ ┡━━━━━━━━━━━━━━┩
188
+ │ OK │
189
+ ├──────────────┤
190
+ │ Great │
191
+ ├──────────────┤
192
+ │ Terrible │
193
+ ├──────────────┤
194
+ │ OK │
195
+ └──────────────┘
196
+
197
+ >>> r = Results.example()
198
+ >>> r2 = r.select("how_feeling").print(format = "rich", tee = True, max_rows = 2)
199
+ ┏━━━━━━━━━━━━━━┓
200
+ ┃ answer ┃
201
+ ┃ .how_feeling ┃
202
+ ┡━━━━━━━━━━━━━━┩
203
+ │ OK │
204
+ ├──────────────┤
205
+ │ Great │
206
+ └──────────────┘
207
+ >>> r2
208
+ Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
209
+
210
+ >>> r.select('how_feeling').print(format = "rich", max_rows = 2)
211
+ ┏━━━━━━━━━━━━━━┓
212
+ ┃ answer ┃
213
+ ┃ .how_feeling ┃
214
+ ┡━━━━━━━━━━━━━━┩
215
+ │ OK │
216
+ ├──────────────┤
217
+ │ Great │
218
+ └──────────────┘
219
+
220
+ >>> r.select('how_feeling').print(format = "rich", split_at_dot = False)
221
+ ┏━━━━━━━━━━━━━━━━━━━━┓
222
+ ┃ answer.how_feeling ┃
223
+ ┡━━━━━━━━━━━━━━━━━━━━┩
224
+ │ OK │
225
+ ├────────────────────┤
226
+ │ Great │
227
+ ├────────────────────┤
228
+ │ Terrible │
229
+ ├────────────────────┤
230
+ │ OK │
231
+ └────────────────────┘
232
+
233
+ Example: using the pretty_labels parameter
234
+
235
+ >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
236
+ ┏━━━━━━━━━━━━━━━━━━━━━┓
237
+ ┃ How are you feeling ┃
238
+ ┡━━━━━━━━━━━━━━━━━━━━━┩
239
+ │ OK │
240
+ ├─────────────────────┤
241
+ │ Great │
242
+ ├─────────────────────┤
243
+ │ Terrible │
244
+ ├─────────────────────┤
245
+ │ OK │
246
+ └─────────────────────┘
247
+
248
+ Example: printing in markdown format
249
+
250
+ >>> r.select('how_feeling').print(format='markdown')
251
+ | answer.how_feeling |
252
+ |--|
253
+ | OK |
254
+ | Great |
255
+ | Terrible |
256
+ | OK |
257
+ ...
258
+
259
+ >>> r.select('how_feeling').print(format='latex')
260
+ \\begin{tabular}{l}
261
+ ...
262
+ \\end{tabular}
263
+ <BLANKLINE>
264
+ """
265
+ from IPython.display import HTML, display
266
+ from edsl.utilities.utilities import is_notebook
267
+ import io
268
+ import sys
269
+
270
+ def _determine_format(format):
271
+ if format is None:
272
+ if is_notebook():
273
+ format = "html"
274
+ else:
275
+ format = "rich"
276
+ if format not in ["rich", "html", "markdown", "latex"]:
277
+ raise ValueError(
278
+ "format must be one of 'rich', 'html', 'markdown', or 'latex'."
279
+ )
280
+
281
+ return format
282
+
283
+ format = _determine_format(format)
284
+
285
+ if pretty_labels is None:
286
+ pretty_labels = {}
287
+
288
+ if pretty_labels != {}: # only split at dot if there are no pretty labels
289
+ split_at_dot = False
290
+
291
+ def _create_data():
292
+ for index, entry in enumerate(self):
293
+ key, list_of_values = list(entry.items())[0]
294
+ yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
295
+
296
+ new_data = list(_create_data())
297
+
298
+ # Capture output if return_string is True
299
+ if return_string:
300
+ old_stdout = sys.stdout
301
+ sys.stdout = io.StringIO()
302
+
303
+ output = None
304
+
305
+ if format == "rich":
306
+ from edsl.utilities.interface import print_dataset_with_rich
307
+
308
+ output = print_dataset_with_rich(
309
+ new_data, filename=filename, split_at_dot=split_at_dot
310
+ )
311
+ elif format == "markdown":
312
+ from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
313
+
314
+ output = print_list_of_dicts_as_markdown_table(new_data, filename=filename)
315
+ elif format == "latex":
316
+ df = self.to_pandas()
317
+ df.columns = [col.replace("_", " ") for col in df.columns]
318
+ latex_string = df.to_latex(index=False)
319
+
320
+ if filename is not None:
321
+ with open(filename, "w") as f:
322
+ f.write(latex_string)
323
+ else:
324
+ print(latex_string)
325
+ output = latex_string
326
+ elif format == "html":
327
+ from edsl.utilities.interface import print_list_of_dicts_as_html_table
328
+
329
+ html_source = print_list_of_dicts_as_html_table(
330
+ new_data, interactive=interactive
331
+ )
332
+
333
+ if iframe:
334
+ iframe = f""""
335
+ <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
336
+ """
337
+ display(HTML(iframe))
338
+ elif is_notebook():
339
+ display(HTML(html_source))
340
+ else:
341
+ from edsl.utilities.interface import view_html
342
+
343
+ view_html(html_source)
344
+
345
+ output = html_source
346
+
347
+ # Restore stdout and get captured output if return_string is True
348
+ if return_string:
349
+ captured_output = sys.stdout.getvalue()
350
+ sys.stdout = old_stdout
351
+ return captured_output or output
352
+
353
+ if tee:
354
+ return self
355
+
356
+ return None
357
+
358
+ def to_csv(
359
+ self,
360
+ filename: Optional[str] = None,
361
+ remove_prefix: bool = False,
362
+ download_link: bool = False,
363
+ pretty_labels: Optional[dict] = None,
364
+ ):
365
+ """Export the results to a CSV file.
366
+
367
+ :param filename: The filename to save the CSV file to.
368
+ :param remove_prefix: Whether to remove the prefix from the column names.
369
+ :param download_link: Whether to display a download link in a Jupyter notebook.
370
+
371
+ Example:
372
+
373
+ >>> from edsl.results import Results
374
+ >>> r = Results.example()
375
+ >>> r.select('how_feeling').to_csv()
376
+ 'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
377
+
378
+ >>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
379
+ 'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
380
+
381
+ >>> import tempfile
382
+ >>> filename = tempfile.NamedTemporaryFile(delete=False).name
383
+ >>> r.select('how_feeling').to_csv(filename = filename)
384
+ >>> import os
385
+ >>> import csv
386
+ >>> with open(filename, newline='') as f:
387
+ ... reader = csv.reader(f)
388
+ ... for row in reader:
389
+ ... print(row)
390
+ ['answer.how_feeling']
391
+ ['OK']
392
+ ['Great']
393
+ ['Terrible']
394
+ ['OK']
395
+
396
+ """
397
+ if pretty_labels is None:
398
+ pretty_labels = {}
399
+ header, rows = self._make_tabular(
400
+ remove_prefix=remove_prefix, pretty_labels=pretty_labels
401
+ )
402
+
403
+ if filename is not None:
404
+ with open(filename, "w") as f:
405
+ writer = csv.writer(f)
406
+ writer.writerow(header)
407
+ writer.writerows(rows)
408
+ else:
409
+ output = io.StringIO()
410
+ writer = csv.writer(output)
411
+ writer.writerow(header)
412
+ writer.writerows(rows)
413
+
414
+ if download_link:
415
+ from IPython.display import HTML, display
416
+
417
+ csv_file = output.getvalue()
418
+ b64 = base64.b64encode(csv_file.encode()).decode()
419
+ download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
420
+ display(HTML(download_link))
421
+ else:
422
+ return output.getvalue()
423
+
424
+ def download_link(self, pretty_labels: Optional[dict] = None) -> str:
425
+ """Return a download link for the results.
426
+
427
+ :param pretty_labels: A dictionary of pretty labels for the columns.
428
+
429
+ >>> from edsl.results import Results
430
+ >>> r = Results.example()
431
+ >>> r.select('how_feeling').download_link()
432
+ '<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
433
+ """
434
+ import base64
435
+
436
+ csv_string = self.to_csv(pretty_labels=pretty_labels)
437
+ b64 = base64.b64encode(csv_string.encode()).decode()
438
+ return f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
439
+
440
+ def to_pandas(self, remove_prefix: bool = False) -> "pd.DataFrame":
441
+ """Convert the results to a pandas DataFrame.
442
+
443
+ :param remove_prefix: Whether to remove the prefix from the column names.
444
+
445
+ >>> from edsl.results import Results
446
+ >>> r = Results.example()
447
+ >>> r.select('how_feeling').to_pandas()
448
+ answer.how_feeling
449
+ 0 OK
450
+ 1 Great
451
+ 2 Terrible
452
+ 3 OK
453
+ """
454
+ import pandas as pd
455
+
456
+ csv_string = self.to_csv(remove_prefix=remove_prefix)
457
+ csv_buffer = io.StringIO(csv_string)
458
+ df = pd.read_csv(csv_buffer)
459
+ # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
460
+ return df
461
+
462
+ def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
463
+ """Convert the results to a list of dictionaries, one per scenario.
464
+
465
+ :param remove_prefix: Whether to remove the prefix from the column names.
466
+
467
+ >>> from edsl.results import Results
468
+ >>> r = Results.example()
469
+ >>> r.select('how_feeling').to_scenario_list()
470
+ ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
471
+ """
472
+ from edsl import ScenarioList, Scenario
473
+
474
+ list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
475
+ scenarios = []
476
+ for d in list_of_dicts:
477
+ scenarios.append(Scenario(d))
478
+ return ScenarioList(scenarios)
479
+ # return ScenarioList([Scenario(d) for d in list_of_dicts])
480
+
481
+ def to_agent_list(self, remove_prefix: bool = True):
482
+ """Convert the results to a list of dictionaries, one per agent.
483
+
484
+ :param remove_prefix: Whether to remove the prefix from the column names.
485
+
486
+ >>> from edsl.results import Results
487
+ >>> r = Results.example()
488
+ >>> r.select('how_feeling').to_agent_list()
489
+ AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
490
+ """
491
+ from edsl import AgentList, Agent
492
+
493
+ list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
494
+ return AgentList([Agent(d) for d in list_of_dicts])
495
+
496
+ def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
497
+ """Convert the results to a list of dictionaries.
498
+
499
+ :param remove_prefix: Whether to remove the prefix from the column names.
500
+
501
+ >>> from edsl.results import Results
502
+ >>> r = Results.example()
503
+ >>> r.select('how_feeling').to_dicts()
504
+ [{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
505
+
506
+ """
507
+ list_of_keys = []
508
+ list_of_values = []
509
+ for entry in self:
510
+ key, values = list(entry.items())[0]
511
+ list_of_keys.append(key)
512
+ list_of_values.append(values)
513
+
514
+ if remove_prefix:
515
+ list_of_keys = [key.split(".")[-1] for key in list_of_keys]
516
+
517
+ list_of_dicts = []
518
+ for entries in zip(*list_of_values):
519
+ list_of_dicts.append(dict(zip(list_of_keys, entries)))
520
+
521
+ return list_of_dicts
522
+
523
+ def to_list(self, flatten=False, remove_none=False, unzipped=False) -> list[list]:
524
+ """Convert the results to a list of lists.
525
+
526
+ :param flatten: Whether to flatten the list of lists.
527
+ :param remove_none: Whether to remove None values from the list.
528
+
529
+ >>> from edsl.results import Results
530
+ >>> Results.example().select('how_feeling', 'how_feeling_yesterday')
531
+ Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
532
+
533
+ >>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
534
+ [('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
535
+
536
+ >>> r = Results.example()
537
+ >>> r.select('how_feeling').to_list()
538
+ ['OK', 'Great', 'Terrible', 'OK']
539
+
540
+ >>> from edsl.results.Dataset import Dataset
541
+ >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}]).select('a.b').to_list(flatten = True)
542
+ [1, 9, 2, 3, 4]
543
+
544
+ >>> from edsl.results.Dataset import Dataset
545
+ >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}, {'c': [6, 2, 3, 4]}]).select('a.b', 'c').to_list(flatten = True)
546
+ Traceback (most recent call last):
547
+ ...
548
+ ValueError: Cannot flatten a list of lists when there are multiple columns selected.
549
+
550
+
551
+ """
552
+ if len(self.relevant_columns()) > 1 and flatten:
553
+ raise ValueError(
554
+ "Cannot flatten a list of lists when there are multiple columns selected."
555
+ )
556
+
557
+ if len(self.relevant_columns()) == 1:
558
+ # if only one 'column' is selected (which is typical for this method
559
+ list_to_return = list(self[0].values())[0]
560
+ else:
561
+ keys = self.relevant_columns()
562
+ data = self.to_dicts(remove_prefix=False)
563
+ list_to_return = []
564
+ for d in data:
565
+ list_to_return.append(tuple([d[key] for key in keys]))
566
+
567
+ if remove_none:
568
+ list_to_return = [item for item in list_to_return if item is not None]
569
+
570
+ if flatten:
571
+ new_list = []
572
+ for item in list_to_return:
573
+ if isinstance(item, list):
574
+ new_list.extend(item)
575
+ else:
576
+ new_list.append(item)
577
+ list_to_return = new_list
578
+
579
+ return list_to_return
580
+
581
+ def html(
582
+ self,
583
+ filename: Optional[str] = None,
584
+ cta: str = "Open in browser",
585
+ return_link: bool = False,
586
+ ):
587
+ import os
588
+ import tempfile
589
+ from edsl.utilities.utilities import is_notebook
590
+ from IPython.display import HTML, display
591
+ from edsl.utilities.utilities import is_notebook
592
+
593
+ df = self.to_pandas()
594
+
595
+ if filename is None:
596
+ current_directory = os.getcwd()
597
+ filename = tempfile.NamedTemporaryFile(
598
+ "w", delete=False, suffix=".html", dir=current_directory
599
+ ).name
600
+
601
+ with open(filename, "w") as f:
602
+ f.write(df.to_html())
603
+
604
+ if is_notebook():
605
+ html_url = f"/files/{filename}"
606
+ html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
607
+ display(HTML(html_link))
608
+ else:
609
+ print(f"Saved to {filename}")
610
+ import webbrowser
611
+ import os
612
+
613
+ webbrowser.open(f"file://{os.path.abspath(filename)}")
614
+
615
+ if return_link:
616
+ return filename
617
+
618
+ def tally(
619
+ self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
620
+ ) -> Union[dict, "Dataset"]:
621
+ """Tally the values of a field or perform a cross-tab of multiple fields.
622
+
623
+ :param fields: The field(s) to tally, multiple fields for cross-tabulation.
624
+
625
+ >>> from edsl.results import Results
626
+ >>> r = Results.example()
627
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
628
+ {'OK': 2, 'Great': 1, 'Terrible': 1}
629
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset")
630
+ Dataset([{'value': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
631
+ >>> r.select('how_feeling', 'period').tally('how_feeling', 'period', output = "dict")
632
+ {('OK', 'morning'): 1, ('Great', 'afternoon'): 1, ('Terrible', 'morning'): 1, ('OK', 'afternoon'): 1}
633
+ """
634
+ from collections import Counter
635
+
636
+ if len(fields) == 0:
637
+ fields = self.relevant_columns()
638
+
639
+ relevant_columns_without_prefix = [
640
+ column.split(".")[-1] for column in self.relevant_columns()
641
+ ]
642
+
643
+ # breakpoint()
644
+
645
+ if not all(
646
+ f in self.relevant_columns() or f in relevant_columns_without_prefix
647
+ for f in fields
648
+ ):
649
+ raise ValueError("One or more specified fields are not in the dataset.")
650
+
651
+ if len(fields) == 1:
652
+ field = fields[0]
653
+ values = self._key_to_value(field)
654
+ else:
655
+ values = list(zip(*(self._key_to_value(field) for field in fields)))
656
+
657
+ for value in values:
658
+ if isinstance(value, list):
659
+ value = tuple(value)
660
+
661
+ tally = dict(Counter(values))
662
+ sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
663
+ if top_n is not None:
664
+ sorted_tally = dict(list(sorted_tally.items())[:top_n])
665
+
666
+ import warnings
667
+ import textwrap
668
+ from edsl.results.Dataset import Dataset
669
+
670
+ if output == "dict":
671
+ # why did I do this?
672
+ warnings.warn(
673
+ textwrap.dedent(
674
+ """\
675
+ The default output from tally will change to Dataset in the future.
676
+ Use output='Dataset' to get the Dataset object for now.
677
+ """
678
+ )
679
+ )
680
+ return sorted_tally
681
+ elif output == "Dataset":
682
+ return Dataset(
683
+ [
684
+ {"value": list(sorted_tally.keys())},
685
+ {"count": list(sorted_tally.values())},
686
+ ]
687
+ )
688
+
689
+
690
+ if __name__ == "__main__":
691
+ import doctest
692
+
693
+ doctest.testmod(optionflags=doctest.ELLIPSIS)