edsl 0.1.39.dev2__py3-none-any.whl → 0.1.39.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (334) hide show
  1. edsl/Base.py +332 -385
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +49 -57
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +867 -1079
  7. edsl/agents/AgentList.py +413 -551
  8. edsl/agents/Invigilator.py +233 -285
  9. edsl/agents/InvigilatorBase.py +270 -254
  10. edsl/agents/PromptConstructor.py +354 -252
  11. edsl/agents/__init__.py +3 -2
  12. edsl/agents/descriptors.py +99 -99
  13. edsl/agents/prompt_helpers.py +129 -129
  14. edsl/auto/AutoStudy.py +117 -117
  15. edsl/auto/StageBase.py +230 -230
  16. edsl/auto/StageGenerateSurvey.py +178 -178
  17. edsl/auto/StageLabelQuestions.py +125 -125
  18. edsl/auto/StagePersona.py +61 -61
  19. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  20. edsl/auto/StagePersonaDimensionValues.py +74 -74
  21. edsl/auto/StagePersonaDimensions.py +69 -69
  22. edsl/auto/StageQuestions.py +73 -73
  23. edsl/auto/SurveyCreatorPipeline.py +21 -21
  24. edsl/auto/utilities.py +224 -224
  25. edsl/base/Base.py +279 -279
  26. edsl/config.py +157 -177
  27. edsl/conversation/Conversation.py +290 -290
  28. edsl/conversation/car_buying.py +58 -59
  29. edsl/conversation/chips.py +95 -95
  30. edsl/conversation/mug_negotiation.py +81 -81
  31. edsl/conversation/next_speaker_utilities.py +93 -93
  32. edsl/coop/PriceFetcher.py +54 -54
  33. edsl/coop/__init__.py +2 -2
  34. edsl/coop/coop.py +1028 -1090
  35. edsl/coop/utils.py +131 -131
  36. edsl/data/Cache.py +555 -562
  37. edsl/data/CacheEntry.py +233 -230
  38. edsl/data/CacheHandler.py +149 -170
  39. edsl/data/RemoteCacheSync.py +78 -78
  40. edsl/data/SQLiteDict.py +292 -292
  41. edsl/data/__init__.py +4 -5
  42. edsl/data/orm.py +10 -10
  43. edsl/data_transfer_models.py +73 -74
  44. edsl/enums.py +175 -195
  45. edsl/exceptions/BaseException.py +21 -21
  46. edsl/exceptions/__init__.py +54 -54
  47. edsl/exceptions/agents.py +42 -54
  48. edsl/exceptions/cache.py +5 -5
  49. edsl/exceptions/configuration.py +16 -16
  50. edsl/exceptions/coop.py +10 -10
  51. edsl/exceptions/data.py +14 -14
  52. edsl/exceptions/general.py +34 -34
  53. edsl/exceptions/jobs.py +33 -33
  54. edsl/exceptions/language_models.py +63 -63
  55. edsl/exceptions/prompts.py +15 -15
  56. edsl/exceptions/questions.py +91 -109
  57. edsl/exceptions/results.py +29 -29
  58. edsl/exceptions/scenarios.py +22 -29
  59. edsl/exceptions/surveys.py +37 -37
  60. edsl/inference_services/AnthropicService.py +87 -84
  61. edsl/inference_services/AwsBedrock.py +120 -118
  62. edsl/inference_services/AzureAI.py +217 -215
  63. edsl/inference_services/DeepInfraService.py +18 -18
  64. edsl/inference_services/GoogleService.py +148 -139
  65. edsl/inference_services/GroqService.py +20 -20
  66. edsl/inference_services/InferenceServiceABC.py +147 -80
  67. edsl/inference_services/InferenceServicesCollection.py +97 -122
  68. edsl/inference_services/MistralAIService.py +123 -120
  69. edsl/inference_services/OllamaService.py +18 -18
  70. edsl/inference_services/OpenAIService.py +224 -221
  71. edsl/inference_services/PerplexityService.py +163 -160
  72. edsl/inference_services/TestService.py +89 -92
  73. edsl/inference_services/TogetherAIService.py +170 -170
  74. edsl/inference_services/models_available_cache.py +118 -118
  75. edsl/inference_services/rate_limits_cache.py +25 -25
  76. edsl/inference_services/registry.py +41 -41
  77. edsl/inference_services/write_available.py +10 -10
  78. edsl/jobs/Answers.py +56 -43
  79. edsl/jobs/Jobs.py +898 -757
  80. edsl/jobs/JobsChecks.py +147 -172
  81. edsl/jobs/JobsPrompts.py +268 -270
  82. edsl/jobs/JobsRemoteInferenceHandler.py +239 -287
  83. edsl/jobs/__init__.py +1 -1
  84. edsl/jobs/buckets/BucketCollection.py +63 -104
  85. edsl/jobs/buckets/ModelBuckets.py +65 -65
  86. edsl/jobs/buckets/TokenBucket.py +251 -283
  87. edsl/jobs/interviews/Interview.py +661 -358
  88. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  89. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
  90. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  91. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  92. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  93. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  94. edsl/jobs/interviews/ReportErrors.py +66 -66
  95. edsl/jobs/interviews/interview_status_enum.py +9 -9
  96. edsl/jobs/runners/JobsRunnerAsyncio.py +466 -421
  97. edsl/jobs/runners/JobsRunnerStatus.py +330 -330
  98. edsl/jobs/tasks/QuestionTaskCreator.py +242 -244
  99. edsl/jobs/tasks/TaskCreators.py +64 -64
  100. edsl/jobs/tasks/TaskHistory.py +450 -449
  101. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  102. edsl/jobs/tasks/task_status_enum.py +163 -161
  103. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  104. edsl/jobs/tokens/TokenUsage.py +34 -34
  105. edsl/language_models/KeyLookup.py +30 -0
  106. edsl/language_models/LanguageModel.py +668 -571
  107. edsl/language_models/ModelList.py +155 -153
  108. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  109. edsl/language_models/__init__.py +3 -2
  110. edsl/language_models/fake_openai_call.py +15 -15
  111. edsl/language_models/fake_openai_service.py +61 -61
  112. edsl/language_models/registry.py +190 -180
  113. edsl/language_models/repair.py +156 -156
  114. edsl/language_models/unused/ReplicateBase.py +83 -0
  115. edsl/language_models/utilities.py +64 -65
  116. edsl/notebooks/Notebook.py +258 -263
  117. edsl/notebooks/__init__.py +1 -1
  118. edsl/prompts/Prompt.py +362 -352
  119. edsl/prompts/__init__.py +2 -2
  120. edsl/questions/AnswerValidatorMixin.py +289 -334
  121. edsl/questions/QuestionBase.py +664 -509
  122. edsl/questions/QuestionBaseGenMixin.py +161 -165
  123. edsl/questions/QuestionBasePromptsMixin.py +217 -221
  124. edsl/questions/QuestionBudget.py +227 -227
  125. edsl/questions/QuestionCheckBox.py +359 -359
  126. edsl/questions/QuestionExtract.py +182 -182
  127. edsl/questions/QuestionFreeText.py +114 -113
  128. edsl/questions/QuestionFunctional.py +166 -166
  129. edsl/questions/QuestionList.py +231 -229
  130. edsl/questions/QuestionMultipleChoice.py +286 -330
  131. edsl/questions/QuestionNumerical.py +153 -151
  132. edsl/questions/QuestionRank.py +324 -314
  133. edsl/questions/Quick.py +41 -41
  134. edsl/questions/RegisterQuestionsMeta.py +71 -71
  135. edsl/questions/ResponseValidatorABC.py +174 -200
  136. edsl/questions/SimpleAskMixin.py +73 -74
  137. edsl/questions/__init__.py +26 -27
  138. edsl/questions/compose_questions.py +98 -98
  139. edsl/questions/decorators.py +21 -21
  140. edsl/questions/derived/QuestionLikertFive.py +76 -76
  141. edsl/questions/derived/QuestionLinearScale.py +87 -90
  142. edsl/questions/derived/QuestionTopK.py +93 -93
  143. edsl/questions/derived/QuestionYesNo.py +82 -82
  144. edsl/questions/descriptors.py +413 -427
  145. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  146. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  147. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  148. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  149. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  150. edsl/questions/prompt_templates/question_list.jinja +17 -17
  151. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  152. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  153. edsl/questions/question_registry.py +177 -177
  154. edsl/questions/settings.py +12 -12
  155. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  156. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  157. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  158. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  159. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  160. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  161. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  162. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  163. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  164. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  165. edsl/questions/templates/list/question_presentation.jinja +5 -5
  166. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  167. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  168. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  169. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  170. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  171. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  172. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  173. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  174. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  175. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  176. edsl/results/CSSParameterizer.py +108 -108
  177. edsl/results/Dataset.py +424 -587
  178. edsl/results/DatasetExportMixin.py +731 -653
  179. edsl/results/DatasetTree.py +275 -295
  180. edsl/results/Result.py +465 -451
  181. edsl/results/Results.py +1165 -1172
  182. edsl/results/ResultsDBMixin.py +238 -0
  183. edsl/results/ResultsExportMixin.py +43 -45
  184. edsl/results/ResultsFetchMixin.py +33 -33
  185. edsl/results/ResultsGGMixin.py +121 -121
  186. edsl/results/ResultsToolsMixin.py +98 -98
  187. edsl/results/Selector.py +135 -145
  188. edsl/results/TableDisplay.py +198 -125
  189. edsl/results/__init__.py +2 -2
  190. edsl/results/table_display.css +77 -77
  191. edsl/results/tree_explore.py +115 -115
  192. edsl/scenarios/FileStore.py +632 -511
  193. edsl/scenarios/Scenario.py +601 -498
  194. edsl/scenarios/ScenarioHtmlMixin.py +64 -65
  195. edsl/scenarios/ScenarioJoin.py +127 -131
  196. edsl/scenarios/ScenarioList.py +1287 -1430
  197. edsl/scenarios/ScenarioListExportMixin.py +52 -45
  198. edsl/scenarios/ScenarioListPdfMixin.py +261 -239
  199. edsl/scenarios/__init__.py +4 -3
  200. edsl/shared.py +1 -1
  201. edsl/study/ObjectEntry.py +173 -173
  202. edsl/study/ProofOfWork.py +113 -113
  203. edsl/study/SnapShot.py +80 -80
  204. edsl/study/Study.py +528 -521
  205. edsl/study/__init__.py +4 -4
  206. edsl/surveys/DAG.py +148 -148
  207. edsl/surveys/Memory.py +31 -31
  208. edsl/surveys/MemoryPlan.py +244 -244
  209. edsl/surveys/Rule.py +326 -327
  210. edsl/surveys/RuleCollection.py +387 -385
  211. edsl/surveys/Survey.py +1801 -1229
  212. edsl/surveys/SurveyCSS.py +261 -273
  213. edsl/surveys/SurveyExportMixin.py +259 -259
  214. edsl/surveys/{SurveyFlowVisualization.py → SurveyFlowVisualizationMixin.py} +179 -181
  215. edsl/surveys/SurveyQualtricsImport.py +284 -284
  216. edsl/surveys/__init__.py +3 -5
  217. edsl/surveys/base.py +53 -53
  218. edsl/surveys/descriptors.py +56 -60
  219. edsl/surveys/instructions/ChangeInstruction.py +49 -48
  220. edsl/surveys/instructions/Instruction.py +65 -56
  221. edsl/surveys/instructions/InstructionCollection.py +77 -82
  222. edsl/templates/error_reporting/base.html +23 -23
  223. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  224. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  225. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  226. edsl/templates/error_reporting/interview_details.html +115 -115
  227. edsl/templates/error_reporting/interviews.html +19 -19
  228. edsl/templates/error_reporting/overview.html +4 -4
  229. edsl/templates/error_reporting/performance_plot.html +1 -1
  230. edsl/templates/error_reporting/report.css +73 -73
  231. edsl/templates/error_reporting/report.html +117 -117
  232. edsl/templates/error_reporting/report.js +25 -25
  233. edsl/tools/__init__.py +1 -1
  234. edsl/tools/clusters.py +192 -192
  235. edsl/tools/embeddings.py +27 -27
  236. edsl/tools/embeddings_plotting.py +118 -118
  237. edsl/tools/plotting.py +112 -112
  238. edsl/tools/summarize.py +18 -18
  239. edsl/utilities/SystemInfo.py +28 -28
  240. edsl/utilities/__init__.py +22 -22
  241. edsl/utilities/ast_utilities.py +25 -25
  242. edsl/utilities/data/Registry.py +6 -6
  243. edsl/utilities/data/__init__.py +1 -1
  244. edsl/utilities/data/scooter_results.json +1 -1
  245. edsl/utilities/decorators.py +77 -77
  246. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  247. edsl/utilities/interface.py +627 -627
  248. edsl/utilities/naming_utilities.py +263 -263
  249. edsl/utilities/repair_functions.py +28 -28
  250. edsl/utilities/restricted_python.py +70 -70
  251. edsl/utilities/utilities.py +424 -436
  252. {edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev3.dist-info}/LICENSE +21 -21
  253. {edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev3.dist-info}/METADATA +10 -12
  254. edsl-0.1.39.dev3.dist-info/RECORD +277 -0
  255. edsl/agents/QuestionInstructionPromptBuilder.py +0 -128
  256. edsl/agents/QuestionOptionProcessor.py +0 -172
  257. edsl/agents/QuestionTemplateReplacementsBuilder.py +0 -137
  258. edsl/coop/CoopFunctionsMixin.py +0 -15
  259. edsl/coop/ExpectedParrotKeyHandler.py +0 -125
  260. edsl/exceptions/inference_services.py +0 -5
  261. edsl/inference_services/AvailableModelCacheHandler.py +0 -184
  262. edsl/inference_services/AvailableModelFetcher.py +0 -209
  263. edsl/inference_services/ServiceAvailability.py +0 -135
  264. edsl/inference_services/data_structures.py +0 -62
  265. edsl/jobs/AnswerQuestionFunctionConstructor.py +0 -188
  266. edsl/jobs/FetchInvigilator.py +0 -40
  267. edsl/jobs/InterviewTaskManager.py +0 -98
  268. edsl/jobs/InterviewsConstructor.py +0 -48
  269. edsl/jobs/JobsComponentConstructor.py +0 -189
  270. edsl/jobs/JobsRemoteInferenceLogger.py +0 -239
  271. edsl/jobs/RequestTokenEstimator.py +0 -30
  272. edsl/jobs/buckets/TokenBucketAPI.py +0 -211
  273. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  274. edsl/jobs/decorators.py +0 -35
  275. edsl/jobs/jobs_status_enums.py +0 -9
  276. edsl/jobs/loggers/HTMLTableJobLogger.py +0 -304
  277. edsl/language_models/ComputeCost.py +0 -63
  278. edsl/language_models/PriceManager.py +0 -127
  279. edsl/language_models/RawResponseHandler.py +0 -106
  280. edsl/language_models/ServiceDataSources.py +0 -0
  281. edsl/language_models/key_management/KeyLookup.py +0 -63
  282. edsl/language_models/key_management/KeyLookupBuilder.py +0 -273
  283. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  284. edsl/language_models/key_management/__init__.py +0 -0
  285. edsl/language_models/key_management/models.py +0 -131
  286. edsl/notebooks/NotebookToLaTeX.py +0 -142
  287. edsl/questions/ExceptionExplainer.py +0 -77
  288. edsl/questions/HTMLQuestion.py +0 -103
  289. edsl/questions/LoopProcessor.py +0 -149
  290. edsl/questions/QuestionMatrix.py +0 -265
  291. edsl/questions/ResponseValidatorFactory.py +0 -28
  292. edsl/questions/templates/matrix/__init__.py +0 -1
  293. edsl/questions/templates/matrix/answering_instructions.jinja +0 -5
  294. edsl/questions/templates/matrix/question_presentation.jinja +0 -20
  295. edsl/results/MarkdownToDocx.py +0 -122
  296. edsl/results/MarkdownToPDF.py +0 -111
  297. edsl/results/TextEditor.py +0 -50
  298. edsl/results/smart_objects.py +0 -96
  299. edsl/results/table_data_class.py +0 -12
  300. edsl/results/table_renderers.py +0 -118
  301. edsl/scenarios/ConstructDownloadLink.py +0 -109
  302. edsl/scenarios/DirectoryScanner.py +0 -96
  303. edsl/scenarios/DocumentChunker.py +0 -102
  304. edsl/scenarios/DocxScenario.py +0 -16
  305. edsl/scenarios/PdfExtractor.py +0 -40
  306. edsl/scenarios/ScenarioSelector.py +0 -156
  307. edsl/scenarios/file_methods.py +0 -85
  308. edsl/scenarios/handlers/__init__.py +0 -13
  309. edsl/scenarios/handlers/csv.py +0 -38
  310. edsl/scenarios/handlers/docx.py +0 -76
  311. edsl/scenarios/handlers/html.py +0 -37
  312. edsl/scenarios/handlers/json.py +0 -111
  313. edsl/scenarios/handlers/latex.py +0 -5
  314. edsl/scenarios/handlers/md.py +0 -51
  315. edsl/scenarios/handlers/pdf.py +0 -68
  316. edsl/scenarios/handlers/png.py +0 -39
  317. edsl/scenarios/handlers/pptx.py +0 -105
  318. edsl/scenarios/handlers/py.py +0 -294
  319. edsl/scenarios/handlers/sql.py +0 -313
  320. edsl/scenarios/handlers/sqlite.py +0 -149
  321. edsl/scenarios/handlers/txt.py +0 -33
  322. edsl/surveys/ConstructDAG.py +0 -92
  323. edsl/surveys/EditSurvey.py +0 -221
  324. edsl/surveys/InstructionHandler.py +0 -100
  325. edsl/surveys/MemoryManagement.py +0 -72
  326. edsl/surveys/RuleManager.py +0 -172
  327. edsl/surveys/Simulator.py +0 -75
  328. edsl/surveys/SurveyToApp.py +0 -141
  329. edsl/utilities/PrettyList.py +0 -56
  330. edsl/utilities/is_notebook.py +0 -18
  331. edsl/utilities/is_valid_variable_name.py +0 -11
  332. edsl/utilities/remove_edsl_version.py +0 -24
  333. edsl-0.1.39.dev2.dist-info/RECORD +0 -352
  334. {edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev3.dist-info}/WHEEL +0 -0
@@ -1,653 +1,731 @@
1
- """Mixin class for exporting results."""
2
-
3
- import base64
4
- import csv
5
- import io
6
- import warnings
7
- import textwrap
8
- from typing import Optional, Tuple, Union, List
9
-
10
-
11
- class DatasetExportMixin:
12
- """Mixin class for exporting Dataset objects."""
13
-
14
- def relevant_columns(
15
- self, data_type: Optional[str] = None, remove_prefix=False
16
- ) -> list:
17
- """Return the set of keys that are present in the dataset.
18
-
19
- :param data_type: The data type to filter by.
20
- :param remove_prefix: Whether to remove the prefix from the column names.
21
-
22
- >>> from edsl.results.Dataset import Dataset
23
- >>> d = Dataset([{'a.b':[1,2,3,4]}])
24
- >>> d.relevant_columns()
25
- ['a.b']
26
-
27
- >>> d.relevant_columns(remove_prefix=True)
28
- ['b']
29
-
30
- >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[5,6,7,8]}])
31
- >>> d.relevant_columns()
32
- ['a', 'b']
33
-
34
- >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
35
- ['answer.how_feeling', 'answer.how_feeling_yesterday']
36
-
37
- >>> from edsl.results import Results
38
- >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
39
- ['model.frequency_penalty', ...]
40
-
41
- >>> Results.example().relevant_columns(data_type = "flimflam")
42
- Traceback (most recent call last):
43
- ...
44
- ValueError: No columns found for data type: flimflam. Available data types are: ...
45
- """
46
- columns = [list(x.keys())[0] for x in self]
47
- if remove_prefix:
48
- columns = [column.split(".")[-1] for column in columns]
49
-
50
- def get_data_type(column):
51
- if "." in column:
52
- return column.split(".")[0]
53
- else:
54
- return None
55
-
56
- if data_type:
57
- all_columns = columns[:]
58
- columns = [
59
- column for column in columns if get_data_type(column) == data_type
60
- ]
61
- if len(columns) == 0:
62
- all_data_types = sorted(
63
- list(set(get_data_type(column) for column in all_columns))
64
- )
65
- raise ValueError(
66
- f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
67
- )
68
-
69
- return columns
70
-
71
- def num_observations(self):
72
- """Return the number of observations in the dataset.
73
-
74
- >>> from edsl.results.Results import Results
75
- >>> Results.example().num_observations()
76
- 4
77
- """
78
- _num_observations = None
79
- for entry in self:
80
- key, values = list(entry.items())[0]
81
- if _num_observations is None:
82
- _num_observations = len(values)
83
- else:
84
- if len(values) != _num_observations:
85
- raise ValueError(
86
- "The number of observations is not consistent across columns."
87
- )
88
-
89
- return _num_observations
90
-
91
- def _make_tabular(
92
- self, remove_prefix: bool, pretty_labels: Optional[dict] = None
93
- ) -> tuple[list, List[list]]:
94
- """Turn the results into a tabular format.
95
-
96
- :param remove_prefix: Whether to remove the prefix from the column names.
97
-
98
- >>> from edsl.results import Results
99
- >>> r = Results.example()
100
- >>> r.select('how_feeling')._make_tabular(remove_prefix = True)
101
- (['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
102
-
103
- >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
104
- (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
105
- """
106
-
107
- def create_dict_from_list_of_dicts(list_of_dicts):
108
- for entry in list_of_dicts:
109
- key, list_of_values = list(entry.items())[0]
110
- yield key, list_of_values
111
-
112
- tabular_repr = dict(create_dict_from_list_of_dicts(self.data))
113
-
114
- full_header = [list(x.keys())[0] for x in self]
115
-
116
- rows = []
117
- for i in range(self.num_observations()):
118
- row = [tabular_repr[h][i] for h in full_header]
119
- rows.append(row)
120
-
121
- if remove_prefix:
122
- header = [h.split(".")[-1] for h in full_header]
123
- else:
124
- header = full_header
125
-
126
- if pretty_labels is not None:
127
- header = [pretty_labels.get(h, h) for h in header]
128
-
129
- return header, rows
130
-
131
- def print_long(self):
132
- """Print the results in a long format.
133
- >>> from edsl.results import Results
134
- >>> r = Results.example()
135
- >>> r.select('how_feeling').print_long()
136
- answer.how_feeling: OK
137
- answer.how_feeling: Great
138
- answer.how_feeling: Terrible
139
- answer.how_feeling: OK
140
- """
141
- for entry in self:
142
- key, list_of_values = list(entry.items())[0]
143
- for value in list_of_values:
144
- print(f"{key}: {value}")
145
-
146
- def _get_tabular_data(
147
- self,
148
- remove_prefix: bool = False,
149
- pretty_labels: Optional[dict] = None,
150
- ) -> Tuple[List[str], List[List]]:
151
- """Internal method to get tabular data in a standard format.
152
-
153
- Args:
154
- remove_prefix: Whether to remove the prefix from column names
155
- pretty_labels: Dictionary mapping original column names to pretty labels
156
-
157
- Returns:
158
- Tuple containing (header_row, data_rows)
159
- """
160
- if pretty_labels is None:
161
- pretty_labels = {}
162
-
163
- return self._make_tabular(
164
- remove_prefix=remove_prefix, pretty_labels=pretty_labels
165
- )
166
-
167
- def to_jsonl(self, filename: Optional[str] = None) -> "FileStore":
168
- """Export the results to a FileStore instance containing JSONL data.
169
-
170
- Args:
171
- filename: Optional filename for the JSONL file (defaults to "results.jsonl")
172
-
173
- Returns:
174
- FileStore: Instance containing the JSONL data
175
- """
176
- if filename is None:
177
- filename = "results.jsonl"
178
-
179
- # Write to string buffer
180
- output = io.StringIO()
181
- for entry in self:
182
- key, values = list(entry.items())[0]
183
- output.write(f'{{"{key}": {values}}}\n')
184
-
185
- # Get the CSV string and encode to base64
186
- jsonl_string = output.getvalue()
187
- base64_string = base64.b64encode(jsonl_string.encode()).decode()
188
- from edsl.scenarios.FileStore import FileStore
189
-
190
- return FileStore(
191
- path=filename,
192
- mime_type="application/jsonl",
193
- binary=False,
194
- suffix="jsonl",
195
- base64_string=base64_string,
196
- )
197
-
198
- def to_csv(
199
- self,
200
- filename: Optional[str] = None,
201
- remove_prefix: bool = False,
202
- pretty_labels: Optional[dict] = None,
203
- ) -> "FileStore":
204
- """Export the results to a FileStore instance containing CSV data.
205
-
206
- Args:
207
- filename: Optional filename for the CSV (defaults to "results.csv")
208
- remove_prefix: Whether to remove the prefix from column names
209
- pretty_labels: Dictionary mapping original column names to pretty labels
210
-
211
- Returns:
212
- FileStore: Instance containing the CSV data
213
- """
214
- if filename is None:
215
- filename = "results.csv"
216
-
217
- # Get the tabular data
218
- header, rows = self._get_tabular_data(
219
- remove_prefix=remove_prefix, pretty_labels=pretty_labels
220
- )
221
-
222
- # Write to string buffer
223
- output = io.StringIO()
224
- writer = csv.writer(output)
225
- writer.writerow(header)
226
- writer.writerows(rows)
227
-
228
- # Get the CSV string and encode to base64
229
- csv_string = output.getvalue()
230
- base64_string = base64.b64encode(csv_string.encode()).decode()
231
- from edsl.scenarios.FileStore import FileStore
232
-
233
- return FileStore(
234
- path=filename,
235
- mime_type="text/csv",
236
- binary=False,
237
- suffix="csv",
238
- base64_string=base64_string,
239
- )
240
-
241
- def to_excel(
242
- self,
243
- filename: Optional[str] = None,
244
- remove_prefix: bool = False,
245
- pretty_labels: Optional[dict] = None,
246
- sheet_name: Optional[str] = None,
247
- ) -> "FileStore":
248
- """Export the results to a FileStore instance containing Excel data.
249
-
250
- Args:
251
- filename: Optional filename for the Excel file (defaults to "results.xlsx")
252
- remove_prefix: Whether to remove the prefix from column names
253
- pretty_labels: Dictionary mapping original column names to pretty labels
254
- sheet_name: Name of the worksheet (defaults to "Results")
255
-
256
- Returns:
257
- FileStore: Instance containing the Excel data
258
- """
259
- from openpyxl import Workbook
260
-
261
- if filename is None:
262
- filename = "results.xlsx"
263
- if sheet_name is None:
264
- sheet_name = "Results"
265
-
266
- # Get the tabular data
267
- header, rows = self._get_tabular_data(
268
- remove_prefix=remove_prefix, pretty_labels=pretty_labels
269
- )
270
-
271
- # Create Excel workbook in memory
272
- wb = Workbook()
273
- ws = wb.active
274
- ws.title = sheet_name
275
-
276
- # Write header
277
- for col, value in enumerate(header, 1):
278
- ws.cell(row=1, column=col, value=value)
279
-
280
- # Write data rows
281
- for row_idx, row_data in enumerate(rows, 2):
282
- for col, value in enumerate(row_data, 1):
283
- ws.cell(row=row_idx, column=col, value=value)
284
-
285
- # Save to bytes buffer
286
- buffer = io.BytesIO()
287
- wb.save(buffer)
288
- buffer.seek(0)
289
-
290
- # Convert to base64
291
- base64_string = base64.b64encode(buffer.getvalue()).decode()
292
- from edsl.scenarios.FileStore import FileStore
293
-
294
- return FileStore(
295
- path=filename,
296
- mime_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
297
- binary=True,
298
- suffix="xlsx",
299
- base64_string=base64_string,
300
- )
301
-
302
- def _db(self, remove_prefix: bool = True):
303
- """Create a SQLite database in memory and return the connection.
304
-
305
- Args:
306
- shape: The shape of the data in the database (wide or long)
307
- remove_prefix: Whether to remove the prefix from the column names
308
-
309
- Returns:
310
- A database connection
311
- """
312
- from sqlalchemy import create_engine
313
-
314
- engine = create_engine("sqlite:///:memory:")
315
- if remove_prefix:
316
- df = self.remove_prefix().to_pandas(lists_as_strings=True)
317
- else:
318
- df = self.to_pandas(lists_as_strings=True)
319
- df.to_sql(
320
- "self",
321
- engine,
322
- index=False,
323
- if_exists="replace",
324
- )
325
- return engine.connect()
326
-
327
- def sql(
328
- self,
329
- query: str,
330
- transpose: bool = None,
331
- transpose_by: str = None,
332
- remove_prefix: bool = True,
333
- ) -> Union["pd.DataFrame", str]:
334
- """Execute a SQL query and return the results as a DataFrame.
335
-
336
- Args:
337
- query: The SQL query to execute
338
- shape: The shape of the data in the database (wide or long)
339
- remove_prefix: Whether to remove the prefix from the column names
340
- transpose: Whether to transpose the DataFrame
341
- transpose_by: The column to use as the index when transposing
342
- csv: Whether to return the DataFrame as a CSV string
343
- to_list: Whether to return the results as a list
344
- to_latex: Whether to return the results as LaTeX
345
- filename: Optional filename to save the results to
346
-
347
- Returns:
348
- DataFrame, CSV string, list, or LaTeX string depending on parameters
349
-
350
- """
351
- import pandas as pd
352
-
353
- conn = self._db(remove_prefix=remove_prefix)
354
- df = pd.read_sql_query(query, conn)
355
-
356
- # Transpose the DataFrame if transpose is True
357
- if transpose or transpose_by:
358
- df = pd.DataFrame(df)
359
- if transpose_by:
360
- df = df.set_index(transpose_by)
361
- else:
362
- df = df.set_index(df.columns[0])
363
- df = df.transpose()
364
- from edsl.results.Dataset import Dataset
365
-
366
- return Dataset.from_pandas_dataframe(df)
367
-
368
- def to_pandas(
369
- self, remove_prefix: bool = False, lists_as_strings=False
370
- ) -> "DataFrame":
371
- """Convert the results to a pandas DataFrame, ensuring that lists remain as lists.
372
-
373
- :param remove_prefix: Whether to remove the prefix from the column names.
374
-
375
- """
376
- return self._to_pandas_strings(remove_prefix)
377
-
378
- def _to_pandas_strings(self, remove_prefix: bool = False) -> "pd.DataFrame":
379
- """Convert the results to a pandas DataFrame.
380
-
381
- :param remove_prefix: Whether to remove the prefix from the column names.
382
-
383
- >>> from edsl.results import Results
384
- >>> r = Results.example()
385
- >>> r.select('how_feeling').to_pandas()
386
- answer.how_feeling
387
- 0 OK
388
- 1 Great
389
- 2 Terrible
390
- 3 OK
391
- """
392
-
393
- import pandas as pd
394
-
395
- csv_string = self.to_csv(remove_prefix=remove_prefix).text
396
- csv_buffer = io.StringIO(csv_string)
397
- df = pd.read_csv(csv_buffer)
398
- # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
399
- return df
400
-
401
- def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
402
- """Convert the results to a list of dictionaries, one per scenario.
403
-
404
- :param remove_prefix: Whether to remove the prefix from the column names.
405
-
406
- >>> from edsl.results import Results
407
- >>> r = Results.example()
408
- >>> r.select('how_feeling').to_scenario_list()
409
- ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
410
- """
411
- from edsl.scenarios.ScenarioList import ScenarioList
412
- from edsl.scenarios.Scenario import Scenario
413
-
414
- list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
415
- scenarios = []
416
- for d in list_of_dicts:
417
- scenarios.append(Scenario(d))
418
- return ScenarioList(scenarios)
419
-
420
- def to_agent_list(self, remove_prefix: bool = True):
421
- """Convert the results to a list of dictionaries, one per agent.
422
-
423
- :param remove_prefix: Whether to remove the prefix from the column names.
424
-
425
- >>> from edsl.results import Results
426
- >>> r = Results.example()
427
- >>> r.select('how_feeling').to_agent_list()
428
- AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
429
- """
430
- from edsl.agents import Agent
431
- from edsl.agents.AgentList import AgentList
432
-
433
- list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
434
- agents = []
435
- for d in list_of_dicts:
436
- if "name" in d:
437
- d["agent_name"] = d.pop("name")
438
- agents.append(Agent(d, name=d["agent_name"]))
439
- if "agent_parameters" in d:
440
- agent_parameters = d.pop("agent_parameters")
441
- agent_name = agent_parameters.get("name", None)
442
- instruction = agent_parameters.get("instruction", None)
443
- agents.append(Agent(d, name=agent_name, instruction=instruction))
444
- else:
445
- agents.append(Agent(d))
446
- return AgentList(agents)
447
-
448
- def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
449
- """Convert the results to a list of dictionaries.
450
-
451
- :param remove_prefix: Whether to remove the prefix from the column names.
452
-
453
- >>> from edsl.results import Results
454
- >>> r = Results.example()
455
- >>> r.select('how_feeling').to_dicts()
456
- [{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
457
-
458
- """
459
- list_of_keys = []
460
- list_of_values = []
461
- for entry in self:
462
- key, values = list(entry.items())[0]
463
- list_of_keys.append(key)
464
- list_of_values.append(values)
465
-
466
- if remove_prefix:
467
- list_of_keys = [key.split(".")[-1] for key in list_of_keys]
468
-
469
- list_of_dicts = []
470
- for entries in zip(*list_of_values):
471
- list_of_dicts.append(dict(zip(list_of_keys, entries)))
472
-
473
- return list_of_dicts
474
-
475
- def to_list(self, flatten=False, remove_none=False, unzipped=False) -> list[list]:
476
- """Convert the results to a list of lists.
477
-
478
- :param flatten: Whether to flatten the list of lists.
479
- :param remove_none: Whether to remove None values from the list.
480
-
481
- >>> from edsl.results import Results
482
- >>> Results.example().select('how_feeling', 'how_feeling_yesterday')
483
- Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
484
-
485
- >>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
486
- [('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
487
-
488
- >>> r = Results.example()
489
- >>> r.select('how_feeling').to_list()
490
- ['OK', 'Great', 'Terrible', 'OK']
491
-
492
- >>> from edsl.results.Dataset import Dataset
493
- >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}]).select('a.b').to_list(flatten = True)
494
- [1, 9, 2, 3, 4]
495
-
496
- >>> from edsl.results.Dataset import Dataset
497
- >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}, {'c': [6, 2, 3, 4]}]).select('a.b', 'c').to_list(flatten = True)
498
- Traceback (most recent call last):
499
- ...
500
- ValueError: Cannot flatten a list of lists when there are multiple columns selected.
501
-
502
-
503
- """
504
- if len(self.relevant_columns()) > 1 and flatten:
505
- raise ValueError(
506
- "Cannot flatten a list of lists when there are multiple columns selected."
507
- )
508
-
509
- if len(self.relevant_columns()) == 1:
510
- # if only one 'column' is selected (which is typical for this method
511
- list_to_return = list(self[0].values())[0]
512
- else:
513
- keys = self.relevant_columns()
514
- data = self.to_dicts(remove_prefix=False)
515
- list_to_return = []
516
- for d in data:
517
- list_to_return.append(tuple([d[key] for key in keys]))
518
-
519
- if remove_none:
520
- list_to_return = [item for item in list_to_return if item is not None]
521
-
522
- if flatten:
523
- new_list = []
524
- for item in list_to_return:
525
- if isinstance(item, list):
526
- new_list.extend(item)
527
- else:
528
- new_list.append(item)
529
- list_to_return = new_list
530
-
531
- from edsl.utilities.PrettyList import PrettyList
532
-
533
- return PrettyList(list_to_return)
534
-
535
- def html(
536
- self,
537
- filename: Optional[str] = None,
538
- cta: str = "Open in browser",
539
- return_link: bool = False,
540
- ):
541
- import os
542
- import tempfile
543
- from edsl.utilities.utilities import is_notebook
544
- from IPython.display import HTML, display
545
- from edsl.utilities.utilities import is_notebook
546
-
547
- df = self.to_pandas()
548
-
549
- if filename is None:
550
- current_directory = os.getcwd()
551
- filename = tempfile.NamedTemporaryFile(
552
- "w", delete=False, suffix=".html", dir=current_directory
553
- ).name
554
-
555
- with open(filename, "w") as f:
556
- f.write(df.to_html())
557
-
558
- if is_notebook():
559
- html_url = f"/files/{filename}"
560
- html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
561
- display(HTML(html_link))
562
- else:
563
- print(f"Saved to {filename}")
564
- import webbrowser
565
- import os
566
-
567
- webbrowser.open(f"file://{os.path.abspath(filename)}")
568
-
569
- if return_link:
570
- return filename
571
-
572
- def tally(
573
- self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
574
- ) -> Union[dict, "Dataset"]:
575
- """Tally the values of a field or perform a cross-tab of multiple fields.
576
-
577
- :param fields: The field(s) to tally, multiple fields for cross-tabulation.
578
-
579
- >>> from edsl.results import Results
580
- >>> r = Results.example()
581
- >>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
582
- {'OK': 2, 'Great': 1, 'Terrible': 1}
583
- >>> from edsl.results.Dataset import Dataset
584
- >>> expected = Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
585
- >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset") == expected
586
- True
587
- """
588
- from collections import Counter
589
-
590
- if len(fields) == 0:
591
- fields = self.relevant_columns()
592
-
593
- relevant_columns_without_prefix = [
594
- column.split(".")[-1] for column in self.relevant_columns()
595
- ]
596
-
597
- if not all(
598
- f in self.relevant_columns() or f in relevant_columns_without_prefix
599
- for f in fields
600
- ):
601
- raise ValueError("One or more specified fields are not in the dataset.")
602
-
603
- if len(fields) == 1:
604
- field = fields[0]
605
- values = self._key_to_value(field)
606
- else:
607
- values = list(zip(*(self._key_to_value(field) for field in fields)))
608
-
609
- for value in values:
610
- if isinstance(value, list):
611
- value = tuple(value)
612
-
613
- tally = dict(Counter(values))
614
- sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
615
- if top_n is not None:
616
- sorted_tally = dict(list(sorted_tally.items())[:top_n])
617
-
618
- from edsl.results.Dataset import Dataset
619
-
620
- if output == "dict":
621
- # why did I do this?
622
- warnings.warn(
623
- textwrap.dedent(
624
- """\
625
- The default output from tally will change to Dataset in the future.
626
- Use output='Dataset' to get the Dataset object for now.
627
- """
628
- )
629
- )
630
- return sorted_tally
631
- elif output == "Dataset":
632
- dataset = Dataset(
633
- [
634
- {"value": list(sorted_tally.keys())},
635
- {"count": list(sorted_tally.values())},
636
- ]
637
- )
638
- # return dataset
639
- sl = dataset.to_scenario_list().unpack(
640
- "value",
641
- new_names=[fields] if isinstance(fields, str) else fields,
642
- keep_original=False,
643
- )
644
- keys = list(sl[0].keys())
645
- keys.remove("count")
646
- keys.append("count")
647
- return sl.reorder_keys(keys).to_dataset()
648
-
649
-
650
- if __name__ == "__main__":
651
- import doctest
652
-
653
- doctest.testmod(optionflags=doctest.ELLIPSIS)
1
+ """Mixin class for exporting results."""
2
+
3
+ import base64
4
+ import csv
5
+ import io
6
+ import html
7
+ from typing import Optional
8
+
9
+ from typing import Literal, Optional, Union, List
10
+
11
+
12
+ class DatasetExportMixin:
13
+ """Mixin class for exporting Dataset objects."""
14
+
15
+ def relevant_columns(
16
+ self, data_type: Optional[str] = None, remove_prefix=False
17
+ ) -> list:
18
+ """Return the set of keys that are present in the dataset.
19
+
20
+ :param data_type: The data type to filter by.
21
+ :param remove_prefix: Whether to remove the prefix from the column names.
22
+
23
+ >>> from edsl.results.Dataset import Dataset
24
+ >>> d = Dataset([{'a.b':[1,2,3,4]}])
25
+ >>> d.relevant_columns()
26
+ ['a.b']
27
+
28
+ >>> d.relevant_columns(remove_prefix=True)
29
+ ['b']
30
+
31
+ >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[5,6,7,8]}])
32
+ >>> d.relevant_columns()
33
+ ['a', 'b']
34
+
35
+ >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
36
+ ['answer.how_feeling', 'answer.how_feeling_yesterday']
37
+
38
+ >>> from edsl.results import Results
39
+ >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
40
+ ['model.frequency_penalty', 'model.logprobs', 'model.max_tokens', 'model.model', 'model.presence_penalty', 'model.temperature', 'model.top_logprobs', 'model.top_p']
41
+
42
+ >>> Results.example().relevant_columns(data_type = "flimflam")
43
+ Traceback (most recent call last):
44
+ ...
45
+ ValueError: No columns found for data type: flimflam. Available data types are: ...
46
+ """
47
+ columns = [list(x.keys())[0] for x in self]
48
+ if remove_prefix:
49
+ columns = [column.split(".")[-1] for column in columns]
50
+
51
+ def get_data_type(column):
52
+ if "." in column:
53
+ return column.split(".")[0]
54
+ else:
55
+ return None
56
+
57
+ if data_type:
58
+ all_columns = columns[:]
59
+ columns = [
60
+ column for column in columns if get_data_type(column) == data_type
61
+ ]
62
+ if len(columns) == 0:
63
+ all_data_types = sorted(
64
+ list(set(get_data_type(column) for column in all_columns))
65
+ )
66
+ raise ValueError(
67
+ f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
68
+ )
69
+
70
+ return columns
71
+
72
+ def num_observations(self):
73
+ """Return the number of observations in the dataset.
74
+
75
+ >>> from edsl.results import Results
76
+ >>> Results.example().num_observations()
77
+ 4
78
+ """
79
+ _num_observations = None
80
+ for entry in self:
81
+ key, values = list(entry.items())[0]
82
+ if _num_observations is None:
83
+ _num_observations = len(values)
84
+ else:
85
+ if len(values) != _num_observations:
86
+ raise ValueError(
87
+ "The number of observations is not consistent across columns."
88
+ )
89
+
90
+ return _num_observations
91
+
92
+ def _make_tabular(
93
+ self, remove_prefix: bool, pretty_labels: Optional[dict] = None
94
+ ) -> tuple[list, List[list]]:
95
+ """Turn the results into a tabular format.
96
+
97
+ :param remove_prefix: Whether to remove the prefix from the column names.
98
+
99
+ >>> from edsl.results import Results
100
+ >>> r = Results.example()
101
+ >>> r.select('how_feeling')._make_tabular(remove_prefix = True)
102
+ (['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
103
+
104
+ >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
105
+ (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
106
+ """
107
+
108
+ def create_dict_from_list_of_dicts(list_of_dicts):
109
+ for entry in list_of_dicts:
110
+ key, list_of_values = list(entry.items())[0]
111
+ yield key, list_of_values
112
+
113
+ tabular_repr = dict(create_dict_from_list_of_dicts(self.data))
114
+
115
+ full_header = [list(x.keys())[0] for x in self]
116
+
117
+ rows = []
118
+ for i in range(self.num_observations()):
119
+ row = [tabular_repr[h][i] for h in full_header]
120
+ rows.append(row)
121
+
122
+ if remove_prefix:
123
+ header = [h.split(".")[-1] for h in full_header]
124
+ else:
125
+ header = full_header
126
+
127
+ if pretty_labels is not None:
128
+ header = [pretty_labels.get(h, h) for h in header]
129
+
130
+ return header, rows
131
+
132
+ def print_long(self):
133
+ """Print the results in a long format.
134
+ >>> from edsl.results import Results
135
+ >>> r = Results.example()
136
+ >>> r.select('how_feeling').print_long()
137
+ answer.how_feeling: OK
138
+ answer.how_feeling: Great
139
+ answer.how_feeling: Terrible
140
+ answer.how_feeling: OK
141
+ """
142
+ for entry in self:
143
+ key, list_of_values = list(entry.items())[0]
144
+ for value in list_of_values:
145
+ print(f"{key}: {value}")
146
+
147
+ # def print(
148
+ # self,
149
+ # pretty_labels: Optional[dict] = None,
150
+ # filename: Optional[str] = None,
151
+ # format: Optional[Literal["rich", "html", "markdown", "latex"]] = None,
152
+ # interactive: bool = False,
153
+ # split_at_dot: bool = True,
154
+ # max_rows=None,
155
+ # tee=False,
156
+ # iframe=False,
157
+ # iframe_height: int = 200,
158
+ # iframe_width: int = 600,
159
+ # web=False,
160
+ # return_string: bool = False,
161
+ # ) -> Union[None, str, "Results"]:
162
+ # """Print the results in a pretty format.
163
+
164
+ # :param pretty_labels: A dictionary of pretty labels for the columns.
165
+ # :param filename: The filename to save the results to.
166
+ # :param format: The format to print the results in. Options are 'rich', 'html', 'markdown', or 'latex'.
167
+ # :param interactive: Whether to print the results interactively in a Jupyter notebook.
168
+ # :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
169
+ # :param max_rows: The maximum number of rows to print.
170
+ # :param tee: Whether to return the dataset.
171
+ # :param iframe: Whether to display the table in an iframe.
172
+ # :param iframe_height: The height of the iframe.
173
+ # :param iframe_width: The width of the iframe.
174
+ # :param web: Whether to display the table in a web browser.
175
+ # :param return_string: Whether to return the output as a string instead of printing.
176
+
177
+ # :return: None if tee is False and return_string is False, the dataset if tee is True, or a string if return_string is True.
178
+
179
+ # Example: Print in rich format at the terminal
180
+
181
+ # >>> from edsl.results import Results
182
+ # >>> r = Results.example()
183
+ # >>> r.select('how_feeling').print(format = "rich")
184
+ # ┏━━━━━━━━━━━━━━┓
185
+ # answer ┃
186
+ # ┃ .how_feeling
187
+ # ┡━━━━━━━━━━━━━━┩
188
+ # │ OK │
189
+ # ├──────────────┤
190
+ # │ Great
191
+ # ├──────────────┤
192
+ # │ Terrible │
193
+ # ├──────────────┤
194
+ # │ OK │
195
+ # └──────────────┘
196
+
197
+ # >>> r = Results.example()
198
+ # >>> r2 = r.select("how_feeling").print(format = "rich", tee = True, max_rows = 2)
199
+ # ┏━━━━━━━━━━━━━━┓
200
+ # ┃ answer ┃
201
+ # ┃ .how_feeling
202
+ # ┡━━━━━━━━━━━━━━┩
203
+ # │ OK │
204
+ # ├──────────────┤
205
+ # │ Great │
206
+ # └──────────────┘
207
+ # >>> r2
208
+ # Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
209
+
210
+ # >>> r.select('how_feeling').print(format = "rich", max_rows = 2)
211
+ # ┏━━━━━━━━━━━━━━┓
212
+ # ┃ answer ┃
213
+ # ┃ .how_feeling ┃
214
+ # ┡━━━━━━━━━━━━━━┩
215
+ # │ OK │
216
+ # ├──────────────┤
217
+ # Great │
218
+ # └──────────────┘
219
+
220
+ # >>> r.select('how_feeling').print(format = "rich", split_at_dot = False)
221
+ # ┏━━━━━━━━━━━━━━━━━━━━┓
222
+ # answer.how_feeling
223
+ # ┡━━━━━━━━━━━━━━━━━━━━┩
224
+ # │ OK │
225
+ # ├────────────────────┤
226
+ # │ Great │
227
+ # ├────────────────────┤
228
+ # Terrible │
229
+ # ├────────────────────┤
230
+ # │ OK │
231
+ # └────────────────────┘
232
+
233
+ # Example: using the pretty_labels parameter
234
+
235
+ # >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
236
+ # ┏━━━━━━━━━━━━━━━━━━━━━┓
237
+ # ┃ How are you feeling ┃
238
+ # ┡━━━━━━━━━━━━━━━━━━━━━┩
239
+ # │ OK │
240
+ # ├─────────────────────┤
241
+ # │ Great │
242
+ # ├─────────────────────┤
243
+ # │ Terrible │
244
+ # ├─────────────────────┤
245
+ # │ OK │
246
+ # └─────────────────────┘
247
+
248
+ # Example: printing in markdown format
249
+
250
+ # >>> r.select('how_feeling').print(format='markdown')
251
+ # | answer.how_feeling |
252
+ # |--|
253
+ # | OK |
254
+ # | Great |
255
+ # | Terrible |
256
+ # | OK |
257
+ # ...
258
+
259
+ # >>> r.select('how_feeling').print(format='latex')
260
+ # \\begin{tabular}{l}
261
+ # ...
262
+ # \\end{tabular}
263
+ # <BLANKLINE>
264
+ # """
265
+ # from IPython.display import HTML, display
266
+ # from edsl.utilities.utilities import is_notebook
267
+ # import io
268
+ # import sys
269
+
270
+ # def _determine_format(format):
271
+ # if format is None:
272
+ # if is_notebook():
273
+ # format = "html"
274
+ # else:
275
+ # format = "rich"
276
+ # if format not in ["rich", "html", "markdown", "latex"]:
277
+ # raise ValueError(
278
+ # "format must be one of 'rich', 'html', 'markdown', or 'latex'."
279
+ # )
280
+
281
+ # return format
282
+
283
+ # format = _determine_format(format)
284
+
285
+ # if pretty_labels is None:
286
+ # pretty_labels = {}
287
+
288
+ # if pretty_labels != {}: # only split at dot if there are no pretty labels
289
+ # split_at_dot = False
290
+
291
+ # def _create_data():
292
+ # for index, entry in enumerate(self):
293
+ # key, list_of_values = list(entry.items())[0]
294
+ # yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
295
+
296
+ # new_data = list(_create_data())
297
+
298
+ # # Capture output if return_string is True
299
+ # if return_string:
300
+ # old_stdout = sys.stdout
301
+ # sys.stdout = io.StringIO()
302
+
303
+ # output = None
304
+
305
+ # if format == "rich":
306
+ # from edsl.utilities.interface import print_dataset_with_rich
307
+
308
+ # output = print_dataset_with_rich(
309
+ # new_data, filename=filename, split_at_dot=split_at_dot
310
+ # )
311
+ # elif format == "markdown":
312
+ # from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
313
+
314
+ # output = print_list_of_dicts_as_markdown_table(new_data, filename=filename)
315
+ # elif format == "latex":
316
+ # df = self.to_pandas()
317
+ # df.columns = [col.replace("_", " ") for col in df.columns]
318
+ # latex_string = df.to_latex(index=False)
319
+
320
+ # if filename is not None:
321
+ # with open(filename, "w") as f:
322
+ # f.write(latex_string)
323
+ # else:
324
+ # print(latex_string)
325
+ # output = latex_string
326
+ # elif format == "html":
327
+ # from edsl.utilities.interface import print_list_of_dicts_as_html_table
328
+
329
+ # html_source = print_list_of_dicts_as_html_table(
330
+ # new_data, interactive=interactive
331
+ # )
332
+
333
+ # if iframe:
334
+ # iframe = f""""
335
+ # <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
336
+ # """
337
+ # display(HTML(iframe))
338
+ # elif is_notebook():
339
+ # display(HTML(html_source))
340
+ # else:
341
+ # from edsl.utilities.interface import view_html
342
+
343
+ # view_html(html_source)
344
+
345
+ # output = html_source
346
+
347
+ # # Restore stdout and get captured output if return_string is True
348
+ # if return_string:
349
+ # captured_output = sys.stdout.getvalue()
350
+ # sys.stdout = old_stdout
351
+ # return captured_output or output
352
+
353
+ # if tee:
354
+ # return self
355
+
356
+ # return None
357
+
358
+ def to_csv(
359
+ self,
360
+ filename: Optional[str] = None,
361
+ remove_prefix: bool = False,
362
+ download_link: bool = False,
363
+ pretty_labels: Optional[dict] = None,
364
+ ):
365
+ """Export the results to a CSV file.
366
+
367
+ :param filename: The filename to save the CSV file to.
368
+ :param remove_prefix: Whether to remove the prefix from the column names.
369
+ :param download_link: Whether to display a download link in a Jupyter notebook.
370
+
371
+ Example:
372
+
373
+ >>> from edsl.results import Results
374
+ >>> r = Results.example()
375
+ >>> r.select('how_feeling').to_csv()
376
+ 'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
377
+
378
+ >>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
379
+ 'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
380
+
381
+ >>> import tempfile
382
+ >>> filename = tempfile.NamedTemporaryFile(delete=False).name
383
+ >>> r.select('how_feeling').to_csv(filename = filename)
384
+ >>> import os
385
+ >>> import csv
386
+ >>> with open(filename, newline='') as f:
387
+ ... reader = csv.reader(f)
388
+ ... for row in reader:
389
+ ... print(row)
390
+ ['answer.how_feeling']
391
+ ['OK']
392
+ ['Great']
393
+ ['Terrible']
394
+ ['OK']
395
+
396
+ """
397
+ if pretty_labels is None:
398
+ pretty_labels = {}
399
+ header, rows = self._make_tabular(
400
+ remove_prefix=remove_prefix, pretty_labels=pretty_labels
401
+ )
402
+
403
+ if filename is not None:
404
+ with open(filename, "w") as f:
405
+ writer = csv.writer(f)
406
+ writer.writerow(header)
407
+ writer.writerows(rows)
408
+ # print(f"Saved to {filename}")
409
+ else:
410
+ output = io.StringIO()
411
+ writer = csv.writer(output)
412
+ writer.writerow(header)
413
+ writer.writerows(rows)
414
+
415
+ if download_link:
416
+ from IPython.display import HTML, display
417
+
418
+ csv_file = output.getvalue()
419
+ b64 = base64.b64encode(csv_file.encode()).decode()
420
+ download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
421
+ display(HTML(download_link))
422
+ else:
423
+ return output.getvalue()
424
+
425
+ def download_link(self, pretty_labels: Optional[dict] = None) -> str:
426
+ """Return a download link for the results.
427
+
428
+ :param pretty_labels: A dictionary of pretty labels for the columns.
429
+
430
+ >>> from edsl.results import Results
431
+ >>> r = Results.example()
432
+ >>> r.select('how_feeling').download_link()
433
+ '<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
434
+ """
435
+ import base64
436
+
437
+ csv_string = self.to_csv(pretty_labels=pretty_labels)
438
+ b64 = base64.b64encode(csv_string.encode()).decode()
439
+ return f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
440
+
441
+ def to_pandas(
442
+ self, remove_prefix: bool = False, lists_as_strings=False
443
+ ) -> "DataFrame":
444
+ """Convert the results to a pandas DataFrame, ensuring that lists remain as lists.
445
+
446
+ :param remove_prefix: Whether to remove the prefix from the column names.
447
+
448
+ """
449
+ return self._to_pandas_strings(remove_prefix)
450
+ # if lists_as_strings:
451
+ # return self._to_pandas_strings(remove_prefix=remove_prefix)
452
+
453
+ # import pandas as pd
454
+
455
+ # df = pd.DataFrame(self.data)
456
+
457
+ # if remove_prefix:
458
+ # # Optionally remove prefixes from column names
459
+ # df.columns = [col.split(".")[-1] for col in df.columns]
460
+
461
+ # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
462
+ # return df_sorted
463
+
464
+ def _to_pandas_strings(self, remove_prefix: bool = False) -> "pd.DataFrame":
465
+ """Convert the results to a pandas DataFrame.
466
+
467
+ :param remove_prefix: Whether to remove the prefix from the column names.
468
+
469
+ >>> from edsl.results import Results
470
+ >>> r = Results.example()
471
+ >>> r.select('how_feeling').to_pandas()
472
+ answer.how_feeling
473
+ 0 OK
474
+ 1 Great
475
+ 2 Terrible
476
+ 3 OK
477
+ """
478
+
479
+ import pandas as pd
480
+
481
+ csv_string = self.to_csv(remove_prefix=remove_prefix)
482
+ csv_buffer = io.StringIO(csv_string)
483
+ df = pd.read_csv(csv_buffer)
484
+ # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
485
+ return df
486
+
487
+ def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
488
+ """Convert the results to a list of dictionaries, one per scenario.
489
+
490
+ :param remove_prefix: Whether to remove the prefix from the column names.
491
+
492
+ >>> from edsl.results import Results
493
+ >>> r = Results.example()
494
+ >>> r.select('how_feeling').to_scenario_list()
495
+ ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
496
+ """
497
+ from edsl import ScenarioList, Scenario
498
+
499
+ list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
500
+ scenarios = []
501
+ for d in list_of_dicts:
502
+ scenarios.append(Scenario(d))
503
+ return ScenarioList(scenarios)
504
+ # return ScenarioList([Scenario(d) for d in list_of_dicts])
505
+
506
+ def to_agent_list(self, remove_prefix: bool = True):
507
+ """Convert the results to a list of dictionaries, one per agent.
508
+
509
+ :param remove_prefix: Whether to remove the prefix from the column names.
510
+
511
+ >>> from edsl.results import Results
512
+ >>> r = Results.example()
513
+ >>> r.select('how_feeling').to_agent_list()
514
+ AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
515
+ """
516
+ from edsl import AgentList, Agent
517
+
518
+ list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
519
+ agents = []
520
+ for d in list_of_dicts:
521
+ if "name" in d:
522
+ d["agent_name"] = d.pop("name")
523
+ agents.append(Agent(d, name=d["agent_name"]))
524
+ else:
525
+ agents.append(Agent(d))
526
+ return AgentList(agents)
527
+
528
+ def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
529
+ """Convert the results to a list of dictionaries.
530
+
531
+ :param remove_prefix: Whether to remove the prefix from the column names.
532
+
533
+ >>> from edsl.results import Results
534
+ >>> r = Results.example()
535
+ >>> r.select('how_feeling').to_dicts()
536
+ [{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
537
+
538
+ """
539
+ list_of_keys = []
540
+ list_of_values = []
541
+ for entry in self:
542
+ key, values = list(entry.items())[0]
543
+ list_of_keys.append(key)
544
+ list_of_values.append(values)
545
+
546
+ if remove_prefix:
547
+ list_of_keys = [key.split(".")[-1] for key in list_of_keys]
548
+
549
+ list_of_dicts = []
550
+ for entries in zip(*list_of_values):
551
+ list_of_dicts.append(dict(zip(list_of_keys, entries)))
552
+
553
+ return list_of_dicts
554
+
555
+ def to_list(self, flatten=False, remove_none=False, unzipped=False) -> list[list]:
556
+ """Convert the results to a list of lists.
557
+
558
+ :param flatten: Whether to flatten the list of lists.
559
+ :param remove_none: Whether to remove None values from the list.
560
+
561
+ >>> from edsl.results import Results
562
+ >>> Results.example().select('how_feeling', 'how_feeling_yesterday')
563
+ Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
564
+
565
+ >>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
566
+ [('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
567
+
568
+ >>> r = Results.example()
569
+ >>> r.select('how_feeling').to_list()
570
+ ['OK', 'Great', 'Terrible', 'OK']
571
+
572
+ >>> from edsl.results.Dataset import Dataset
573
+ >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}]).select('a.b').to_list(flatten = True)
574
+ [1, 9, 2, 3, 4]
575
+
576
+ >>> from edsl.results.Dataset import Dataset
577
+ >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}, {'c': [6, 2, 3, 4]}]).select('a.b', 'c').to_list(flatten = True)
578
+ Traceback (most recent call last):
579
+ ...
580
+ ValueError: Cannot flatten a list of lists when there are multiple columns selected.
581
+
582
+
583
+ """
584
+ if len(self.relevant_columns()) > 1 and flatten:
585
+ raise ValueError(
586
+ "Cannot flatten a list of lists when there are multiple columns selected."
587
+ )
588
+
589
+ if len(self.relevant_columns()) == 1:
590
+ # if only one 'column' is selected (which is typical for this method
591
+ list_to_return = list(self[0].values())[0]
592
+ else:
593
+ keys = self.relevant_columns()
594
+ data = self.to_dicts(remove_prefix=False)
595
+ list_to_return = []
596
+ for d in data:
597
+ list_to_return.append(tuple([d[key] for key in keys]))
598
+
599
+ if remove_none:
600
+ list_to_return = [item for item in list_to_return if item is not None]
601
+
602
+ if flatten:
603
+ new_list = []
604
+ for item in list_to_return:
605
+ if isinstance(item, list):
606
+ new_list.extend(item)
607
+ else:
608
+ new_list.append(item)
609
+ list_to_return = new_list
610
+
611
+ return list_to_return
612
+
613
+ def html(
614
+ self,
615
+ filename: Optional[str] = None,
616
+ cta: str = "Open in browser",
617
+ return_link: bool = False,
618
+ ):
619
+ import os
620
+ import tempfile
621
+ from edsl.utilities.utilities import is_notebook
622
+ from IPython.display import HTML, display
623
+ from edsl.utilities.utilities import is_notebook
624
+
625
+ df = self.to_pandas()
626
+
627
+ if filename is None:
628
+ current_directory = os.getcwd()
629
+ filename = tempfile.NamedTemporaryFile(
630
+ "w", delete=False, suffix=".html", dir=current_directory
631
+ ).name
632
+
633
+ with open(filename, "w") as f:
634
+ f.write(df.to_html())
635
+
636
+ if is_notebook():
637
+ html_url = f"/files/{filename}"
638
+ html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
639
+ display(HTML(html_link))
640
+ else:
641
+ print(f"Saved to {filename}")
642
+ import webbrowser
643
+ import os
644
+
645
+ webbrowser.open(f"file://{os.path.abspath(filename)}")
646
+
647
+ if return_link:
648
+ return filename
649
+
650
+ def tally(
651
+ self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
652
+ ) -> Union[dict, "Dataset"]:
653
+ """Tally the values of a field or perform a cross-tab of multiple fields.
654
+
655
+ :param fields: The field(s) to tally, multiple fields for cross-tabulation.
656
+
657
+ >>> from edsl.results import Results
658
+ >>> r = Results.example()
659
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
660
+ {'OK': 2, 'Great': 1, 'Terrible': 1}
661
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset")
662
+ Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
663
+ """
664
+ from collections import Counter
665
+
666
+ if len(fields) == 0:
667
+ fields = self.relevant_columns()
668
+
669
+ relevant_columns_without_prefix = [
670
+ column.split(".")[-1] for column in self.relevant_columns()
671
+ ]
672
+
673
+ if not all(
674
+ f in self.relevant_columns() or f in relevant_columns_without_prefix
675
+ for f in fields
676
+ ):
677
+ raise ValueError("One or more specified fields are not in the dataset.")
678
+
679
+ if len(fields) == 1:
680
+ field = fields[0]
681
+ values = self._key_to_value(field)
682
+ else:
683
+ values = list(zip(*(self._key_to_value(field) for field in fields)))
684
+
685
+ for value in values:
686
+ if isinstance(value, list):
687
+ value = tuple(value)
688
+
689
+ tally = dict(Counter(values))
690
+ sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
691
+ if top_n is not None:
692
+ sorted_tally = dict(list(sorted_tally.items())[:top_n])
693
+
694
+ import warnings
695
+ import textwrap
696
+ from edsl.results.Dataset import Dataset
697
+
698
+ if output == "dict":
699
+ # why did I do this?
700
+ warnings.warn(
701
+ textwrap.dedent(
702
+ """\
703
+ The default output from tally will change to Dataset in the future.
704
+ Use output='Dataset' to get the Dataset object for now.
705
+ """
706
+ )
707
+ )
708
+ return sorted_tally
709
+ elif output == "Dataset":
710
+ dataset = Dataset(
711
+ [
712
+ {"value": list(sorted_tally.keys())},
713
+ {"count": list(sorted_tally.values())},
714
+ ]
715
+ )
716
+ # return dataset
717
+ sl = dataset.to_scenario_list().unpack(
718
+ "value",
719
+ new_names=[fields] if isinstance(fields, str) else fields,
720
+ keep_original=False,
721
+ )
722
+ keys = list(sl[0].keys())
723
+ keys.remove("count")
724
+ keys.append("count")
725
+ return sl.reorder_keys(keys).to_dataset()
726
+
727
+
728
+ if __name__ == "__main__":
729
+ import doctest
730
+
731
+ doctest.testmod(optionflags=doctest.ELLIPSIS)