edsl 0.1.39.dev3__py3-none-any.whl → 0.1.39.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (341) hide show
  1. edsl/Base.py +413 -332
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +57 -49
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +1071 -867
  7. edsl/agents/AgentList.py +551 -413
  8. edsl/agents/Invigilator.py +284 -233
  9. edsl/agents/InvigilatorBase.py +257 -270
  10. edsl/agents/PromptConstructor.py +272 -354
  11. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  12. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  13. edsl/agents/__init__.py +2 -3
  14. edsl/agents/descriptors.py +99 -99
  15. edsl/agents/prompt_helpers.py +129 -129
  16. edsl/agents/question_option_processor.py +172 -0
  17. edsl/auto/AutoStudy.py +130 -117
  18. edsl/auto/StageBase.py +243 -230
  19. edsl/auto/StageGenerateSurvey.py +178 -178
  20. edsl/auto/StageLabelQuestions.py +125 -125
  21. edsl/auto/StagePersona.py +61 -61
  22. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  23. edsl/auto/StagePersonaDimensionValues.py +74 -74
  24. edsl/auto/StagePersonaDimensions.py +69 -69
  25. edsl/auto/StageQuestions.py +74 -73
  26. edsl/auto/SurveyCreatorPipeline.py +21 -21
  27. edsl/auto/utilities.py +218 -224
  28. edsl/base/Base.py +279 -279
  29. edsl/config.py +177 -157
  30. edsl/conversation/Conversation.py +290 -290
  31. edsl/conversation/car_buying.py +59 -58
  32. edsl/conversation/chips.py +95 -95
  33. edsl/conversation/mug_negotiation.py +81 -81
  34. edsl/conversation/next_speaker_utilities.py +93 -93
  35. edsl/coop/CoopFunctionsMixin.py +15 -0
  36. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  37. edsl/coop/PriceFetcher.py +54 -54
  38. edsl/coop/__init__.py +2 -2
  39. edsl/coop/coop.py +1106 -1028
  40. edsl/coop/utils.py +131 -131
  41. edsl/data/Cache.py +573 -555
  42. edsl/data/CacheEntry.py +230 -233
  43. edsl/data/CacheHandler.py +168 -149
  44. edsl/data/RemoteCacheSync.py +186 -78
  45. edsl/data/SQLiteDict.py +292 -292
  46. edsl/data/__init__.py +5 -4
  47. edsl/data/orm.py +10 -10
  48. edsl/data_transfer_models.py +74 -73
  49. edsl/enums.py +202 -175
  50. edsl/exceptions/BaseException.py +21 -21
  51. edsl/exceptions/__init__.py +54 -54
  52. edsl/exceptions/agents.py +54 -42
  53. edsl/exceptions/cache.py +5 -5
  54. edsl/exceptions/configuration.py +16 -16
  55. edsl/exceptions/coop.py +10 -10
  56. edsl/exceptions/data.py +14 -14
  57. edsl/exceptions/general.py +34 -34
  58. edsl/exceptions/inference_services.py +5 -0
  59. edsl/exceptions/jobs.py +33 -33
  60. edsl/exceptions/language_models.py +63 -63
  61. edsl/exceptions/prompts.py +15 -15
  62. edsl/exceptions/questions.py +109 -91
  63. edsl/exceptions/results.py +29 -29
  64. edsl/exceptions/scenarios.py +29 -22
  65. edsl/exceptions/surveys.py +37 -37
  66. edsl/inference_services/AnthropicService.py +106 -87
  67. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  68. edsl/inference_services/AvailableModelFetcher.py +215 -0
  69. edsl/inference_services/AwsBedrock.py +118 -120
  70. edsl/inference_services/AzureAI.py +215 -217
  71. edsl/inference_services/DeepInfraService.py +18 -18
  72. edsl/inference_services/GoogleService.py +143 -148
  73. edsl/inference_services/GroqService.py +20 -20
  74. edsl/inference_services/InferenceServiceABC.py +80 -147
  75. edsl/inference_services/InferenceServicesCollection.py +138 -97
  76. edsl/inference_services/MistralAIService.py +120 -123
  77. edsl/inference_services/OllamaService.py +18 -18
  78. edsl/inference_services/OpenAIService.py +236 -224
  79. edsl/inference_services/PerplexityService.py +160 -163
  80. edsl/inference_services/ServiceAvailability.py +135 -0
  81. edsl/inference_services/TestService.py +90 -89
  82. edsl/inference_services/TogetherAIService.py +172 -170
  83. edsl/inference_services/data_structures.py +134 -0
  84. edsl/inference_services/models_available_cache.py +118 -118
  85. edsl/inference_services/rate_limits_cache.py +25 -25
  86. edsl/inference_services/registry.py +41 -41
  87. edsl/inference_services/write_available.py +10 -10
  88. edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
  89. edsl/jobs/Answers.py +43 -56
  90. edsl/jobs/FetchInvigilator.py +47 -0
  91. edsl/jobs/InterviewTaskManager.py +98 -0
  92. edsl/jobs/InterviewsConstructor.py +50 -0
  93. edsl/jobs/Jobs.py +823 -898
  94. edsl/jobs/JobsChecks.py +172 -147
  95. edsl/jobs/JobsComponentConstructor.py +189 -0
  96. edsl/jobs/JobsPrompts.py +270 -268
  97. edsl/jobs/JobsRemoteInferenceHandler.py +311 -239
  98. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  99. edsl/jobs/RequestTokenEstimator.py +30 -0
  100. edsl/jobs/__init__.py +1 -1
  101. edsl/jobs/async_interview_runner.py +138 -0
  102. edsl/jobs/buckets/BucketCollection.py +104 -63
  103. edsl/jobs/buckets/ModelBuckets.py +65 -65
  104. edsl/jobs/buckets/TokenBucket.py +283 -251
  105. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  106. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  107. edsl/jobs/check_survey_scenario_compatibility.py +85 -0
  108. edsl/jobs/data_structures.py +120 -0
  109. edsl/jobs/decorators.py +35 -0
  110. edsl/jobs/interviews/Interview.py +396 -661
  111. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  112. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
  113. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  114. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  115. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  116. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  117. edsl/jobs/interviews/ReportErrors.py +66 -66
  118. edsl/jobs/interviews/interview_status_enum.py +9 -9
  119. edsl/jobs/jobs_status_enums.py +9 -0
  120. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  121. edsl/jobs/results_exceptions_handler.py +98 -0
  122. edsl/jobs/runners/JobsRunnerAsyncio.py +151 -466
  123. edsl/jobs/runners/JobsRunnerStatus.py +297 -330
  124. edsl/jobs/tasks/QuestionTaskCreator.py +244 -242
  125. edsl/jobs/tasks/TaskCreators.py +64 -64
  126. edsl/jobs/tasks/TaskHistory.py +470 -450
  127. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  128. edsl/jobs/tasks/task_status_enum.py +161 -163
  129. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  130. edsl/jobs/tokens/TokenUsage.py +34 -34
  131. edsl/language_models/ComputeCost.py +63 -0
  132. edsl/language_models/LanguageModel.py +626 -668
  133. edsl/language_models/ModelList.py +164 -155
  134. edsl/language_models/PriceManager.py +127 -0
  135. edsl/language_models/RawResponseHandler.py +106 -0
  136. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  137. edsl/language_models/ServiceDataSources.py +0 -0
  138. edsl/language_models/__init__.py +2 -3
  139. edsl/language_models/fake_openai_call.py +15 -15
  140. edsl/language_models/fake_openai_service.py +61 -61
  141. edsl/language_models/key_management/KeyLookup.py +63 -0
  142. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  143. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  144. edsl/language_models/key_management/__init__.py +0 -0
  145. edsl/language_models/key_management/models.py +131 -0
  146. edsl/language_models/model.py +256 -0
  147. edsl/language_models/repair.py +156 -156
  148. edsl/language_models/utilities.py +65 -64
  149. edsl/notebooks/Notebook.py +263 -258
  150. edsl/notebooks/NotebookToLaTeX.py +142 -0
  151. edsl/notebooks/__init__.py +1 -1
  152. edsl/prompts/Prompt.py +352 -362
  153. edsl/prompts/__init__.py +2 -2
  154. edsl/questions/ExceptionExplainer.py +77 -0
  155. edsl/questions/HTMLQuestion.py +103 -0
  156. edsl/questions/QuestionBase.py +518 -664
  157. edsl/questions/QuestionBasePromptsMixin.py +221 -217
  158. edsl/questions/QuestionBudget.py +227 -227
  159. edsl/questions/QuestionCheckBox.py +359 -359
  160. edsl/questions/QuestionExtract.py +180 -182
  161. edsl/questions/QuestionFreeText.py +113 -114
  162. edsl/questions/QuestionFunctional.py +166 -166
  163. edsl/questions/QuestionList.py +223 -231
  164. edsl/questions/QuestionMatrix.py +265 -0
  165. edsl/questions/QuestionMultipleChoice.py +330 -286
  166. edsl/questions/QuestionNumerical.py +151 -153
  167. edsl/questions/QuestionRank.py +314 -324
  168. edsl/questions/Quick.py +41 -41
  169. edsl/questions/SimpleAskMixin.py +74 -73
  170. edsl/questions/__init__.py +27 -26
  171. edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +334 -289
  172. edsl/questions/compose_questions.py +98 -98
  173. edsl/questions/data_structures.py +20 -0
  174. edsl/questions/decorators.py +21 -21
  175. edsl/questions/derived/QuestionLikertFive.py +76 -76
  176. edsl/questions/derived/QuestionLinearScale.py +90 -87
  177. edsl/questions/derived/QuestionTopK.py +93 -93
  178. edsl/questions/derived/QuestionYesNo.py +82 -82
  179. edsl/questions/descriptors.py +427 -413
  180. edsl/questions/loop_processor.py +149 -0
  181. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  182. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  183. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  184. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  185. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  186. edsl/questions/prompt_templates/question_list.jinja +17 -17
  187. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  188. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  189. edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +168 -161
  190. edsl/questions/question_registry.py +177 -177
  191. edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +71 -71
  192. edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +188 -174
  193. edsl/questions/response_validator_factory.py +34 -0
  194. edsl/questions/settings.py +12 -12
  195. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  196. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  197. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  198. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  199. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  200. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  201. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  202. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  203. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  204. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  205. edsl/questions/templates/list/question_presentation.jinja +5 -5
  206. edsl/questions/templates/matrix/__init__.py +1 -0
  207. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  208. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  209. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  210. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  211. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  212. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  213. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  214. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  215. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  216. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  217. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  218. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  219. edsl/results/CSSParameterizer.py +108 -108
  220. edsl/results/Dataset.py +587 -424
  221. edsl/results/DatasetExportMixin.py +594 -731
  222. edsl/results/DatasetTree.py +295 -275
  223. edsl/results/MarkdownToDocx.py +122 -0
  224. edsl/results/MarkdownToPDF.py +111 -0
  225. edsl/results/Result.py +557 -465
  226. edsl/results/Results.py +1183 -1165
  227. edsl/results/ResultsExportMixin.py +45 -43
  228. edsl/results/ResultsGGMixin.py +121 -121
  229. edsl/results/TableDisplay.py +125 -198
  230. edsl/results/TextEditor.py +50 -0
  231. edsl/results/__init__.py +2 -2
  232. edsl/results/file_exports.py +252 -0
  233. edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +33 -33
  234. edsl/results/{Selector.py → results_selector.py} +145 -135
  235. edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +98 -98
  236. edsl/results/smart_objects.py +96 -0
  237. edsl/results/table_data_class.py +12 -0
  238. edsl/results/table_display.css +77 -77
  239. edsl/results/table_renderers.py +118 -0
  240. edsl/results/tree_explore.py +115 -115
  241. edsl/scenarios/ConstructDownloadLink.py +109 -0
  242. edsl/scenarios/DocumentChunker.py +102 -0
  243. edsl/scenarios/DocxScenario.py +16 -0
  244. edsl/scenarios/FileStore.py +511 -632
  245. edsl/scenarios/PdfExtractor.py +40 -0
  246. edsl/scenarios/Scenario.py +498 -601
  247. edsl/scenarios/ScenarioHtmlMixin.py +65 -64
  248. edsl/scenarios/ScenarioList.py +1458 -1287
  249. edsl/scenarios/ScenarioListExportMixin.py +45 -52
  250. edsl/scenarios/ScenarioListPdfMixin.py +239 -261
  251. edsl/scenarios/__init__.py +3 -4
  252. edsl/scenarios/directory_scanner.py +96 -0
  253. edsl/scenarios/file_methods.py +85 -0
  254. edsl/scenarios/handlers/__init__.py +13 -0
  255. edsl/scenarios/handlers/csv.py +38 -0
  256. edsl/scenarios/handlers/docx.py +76 -0
  257. edsl/scenarios/handlers/html.py +37 -0
  258. edsl/scenarios/handlers/json.py +111 -0
  259. edsl/scenarios/handlers/latex.py +5 -0
  260. edsl/scenarios/handlers/md.py +51 -0
  261. edsl/scenarios/handlers/pdf.py +68 -0
  262. edsl/scenarios/handlers/png.py +39 -0
  263. edsl/scenarios/handlers/pptx.py +105 -0
  264. edsl/scenarios/handlers/py.py +294 -0
  265. edsl/scenarios/handlers/sql.py +313 -0
  266. edsl/scenarios/handlers/sqlite.py +149 -0
  267. edsl/scenarios/handlers/txt.py +33 -0
  268. edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +131 -127
  269. edsl/scenarios/scenario_selector.py +156 -0
  270. edsl/shared.py +1 -1
  271. edsl/study/ObjectEntry.py +173 -173
  272. edsl/study/ProofOfWork.py +113 -113
  273. edsl/study/SnapShot.py +80 -80
  274. edsl/study/Study.py +521 -528
  275. edsl/study/__init__.py +4 -4
  276. edsl/surveys/ConstructDAG.py +92 -0
  277. edsl/surveys/DAG.py +148 -148
  278. edsl/surveys/EditSurvey.py +221 -0
  279. edsl/surveys/InstructionHandler.py +100 -0
  280. edsl/surveys/Memory.py +31 -31
  281. edsl/surveys/MemoryManagement.py +72 -0
  282. edsl/surveys/MemoryPlan.py +244 -244
  283. edsl/surveys/Rule.py +327 -326
  284. edsl/surveys/RuleCollection.py +385 -387
  285. edsl/surveys/RuleManager.py +172 -0
  286. edsl/surveys/Simulator.py +75 -0
  287. edsl/surveys/Survey.py +1280 -1801
  288. edsl/surveys/SurveyCSS.py +273 -261
  289. edsl/surveys/SurveyExportMixin.py +259 -259
  290. edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +181 -179
  291. edsl/surveys/SurveyQualtricsImport.py +284 -284
  292. edsl/surveys/SurveyToApp.py +141 -0
  293. edsl/surveys/__init__.py +5 -3
  294. edsl/surveys/base.py +53 -53
  295. edsl/surveys/descriptors.py +60 -56
  296. edsl/surveys/instructions/ChangeInstruction.py +48 -49
  297. edsl/surveys/instructions/Instruction.py +56 -65
  298. edsl/surveys/instructions/InstructionCollection.py +82 -77
  299. edsl/templates/error_reporting/base.html +23 -23
  300. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  301. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  302. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  303. edsl/templates/error_reporting/interview_details.html +115 -115
  304. edsl/templates/error_reporting/interviews.html +19 -19
  305. edsl/templates/error_reporting/overview.html +4 -4
  306. edsl/templates/error_reporting/performance_plot.html +1 -1
  307. edsl/templates/error_reporting/report.css +73 -73
  308. edsl/templates/error_reporting/report.html +117 -117
  309. edsl/templates/error_reporting/report.js +25 -25
  310. edsl/tools/__init__.py +1 -1
  311. edsl/tools/clusters.py +192 -192
  312. edsl/tools/embeddings.py +27 -27
  313. edsl/tools/embeddings_plotting.py +118 -118
  314. edsl/tools/plotting.py +112 -112
  315. edsl/tools/summarize.py +18 -18
  316. edsl/utilities/PrettyList.py +56 -0
  317. edsl/utilities/SystemInfo.py +28 -28
  318. edsl/utilities/__init__.py +22 -22
  319. edsl/utilities/ast_utilities.py +25 -25
  320. edsl/utilities/data/Registry.py +6 -6
  321. edsl/utilities/data/__init__.py +1 -1
  322. edsl/utilities/data/scooter_results.json +1 -1
  323. edsl/utilities/decorators.py +77 -77
  324. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  325. edsl/utilities/interface.py +627 -627
  326. edsl/utilities/is_notebook.py +18 -0
  327. edsl/utilities/is_valid_variable_name.py +11 -0
  328. edsl/utilities/naming_utilities.py +263 -263
  329. edsl/utilities/remove_edsl_version.py +24 -0
  330. edsl/utilities/repair_functions.py +28 -28
  331. edsl/utilities/restricted_python.py +70 -70
  332. edsl/utilities/utilities.py +436 -424
  333. {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/LICENSE +21 -21
  334. {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/METADATA +13 -11
  335. edsl-0.1.39.dev5.dist-info/RECORD +358 -0
  336. {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/WHEEL +1 -1
  337. edsl/language_models/KeyLookup.py +0 -30
  338. edsl/language_models/registry.py +0 -190
  339. edsl/language_models/unused/ReplicateBase.py +0 -83
  340. edsl/results/ResultsDBMixin.py +0 -238
  341. edsl-0.1.39.dev3.dist-info/RECORD +0 -277
@@ -1,731 +1,594 @@
1
- """Mixin class for exporting results."""
2
-
3
- import base64
4
- import csv
5
- import io
6
- import html
7
- from typing import Optional
8
-
9
- from typing import Literal, Optional, Union, List
10
-
11
-
12
- class DatasetExportMixin:
13
- """Mixin class for exporting Dataset objects."""
14
-
15
- def relevant_columns(
16
- self, data_type: Optional[str] = None, remove_prefix=False
17
- ) -> list:
18
- """Return the set of keys that are present in the dataset.
19
-
20
- :param data_type: The data type to filter by.
21
- :param remove_prefix: Whether to remove the prefix from the column names.
22
-
23
- >>> from edsl.results.Dataset import Dataset
24
- >>> d = Dataset([{'a.b':[1,2,3,4]}])
25
- >>> d.relevant_columns()
26
- ['a.b']
27
-
28
- >>> d.relevant_columns(remove_prefix=True)
29
- ['b']
30
-
31
- >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[5,6,7,8]}])
32
- >>> d.relevant_columns()
33
- ['a', 'b']
34
-
35
- >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
36
- ['answer.how_feeling', 'answer.how_feeling_yesterday']
37
-
38
- >>> from edsl.results import Results
39
- >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
40
- ['model.frequency_penalty', 'model.logprobs', 'model.max_tokens', 'model.model', 'model.presence_penalty', 'model.temperature', 'model.top_logprobs', 'model.top_p']
41
-
42
- >>> Results.example().relevant_columns(data_type = "flimflam")
43
- Traceback (most recent call last):
44
- ...
45
- ValueError: No columns found for data type: flimflam. Available data types are: ...
46
- """
47
- columns = [list(x.keys())[0] for x in self]
48
- if remove_prefix:
49
- columns = [column.split(".")[-1] for column in columns]
50
-
51
- def get_data_type(column):
52
- if "." in column:
53
- return column.split(".")[0]
54
- else:
55
- return None
56
-
57
- if data_type:
58
- all_columns = columns[:]
59
- columns = [
60
- column for column in columns if get_data_type(column) == data_type
61
- ]
62
- if len(columns) == 0:
63
- all_data_types = sorted(
64
- list(set(get_data_type(column) for column in all_columns))
65
- )
66
- raise ValueError(
67
- f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
68
- )
69
-
70
- return columns
71
-
72
- def num_observations(self):
73
- """Return the number of observations in the dataset.
74
-
75
- >>> from edsl.results import Results
76
- >>> Results.example().num_observations()
77
- 4
78
- """
79
- _num_observations = None
80
- for entry in self:
81
- key, values = list(entry.items())[0]
82
- if _num_observations is None:
83
- _num_observations = len(values)
84
- else:
85
- if len(values) != _num_observations:
86
- raise ValueError(
87
- "The number of observations is not consistent across columns."
88
- )
89
-
90
- return _num_observations
91
-
92
- def _make_tabular(
93
- self, remove_prefix: bool, pretty_labels: Optional[dict] = None
94
- ) -> tuple[list, List[list]]:
95
- """Turn the results into a tabular format.
96
-
97
- :param remove_prefix: Whether to remove the prefix from the column names.
98
-
99
- >>> from edsl.results import Results
100
- >>> r = Results.example()
101
- >>> r.select('how_feeling')._make_tabular(remove_prefix = True)
102
- (['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
103
-
104
- >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
105
- (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
106
- """
107
-
108
- def create_dict_from_list_of_dicts(list_of_dicts):
109
- for entry in list_of_dicts:
110
- key, list_of_values = list(entry.items())[0]
111
- yield key, list_of_values
112
-
113
- tabular_repr = dict(create_dict_from_list_of_dicts(self.data))
114
-
115
- full_header = [list(x.keys())[0] for x in self]
116
-
117
- rows = []
118
- for i in range(self.num_observations()):
119
- row = [tabular_repr[h][i] for h in full_header]
120
- rows.append(row)
121
-
122
- if remove_prefix:
123
- header = [h.split(".")[-1] for h in full_header]
124
- else:
125
- header = full_header
126
-
127
- if pretty_labels is not None:
128
- header = [pretty_labels.get(h, h) for h in header]
129
-
130
- return header, rows
131
-
132
- def print_long(self):
133
- """Print the results in a long format.
134
- >>> from edsl.results import Results
135
- >>> r = Results.example()
136
- >>> r.select('how_feeling').print_long()
137
- answer.how_feeling: OK
138
- answer.how_feeling: Great
139
- answer.how_feeling: Terrible
140
- answer.how_feeling: OK
141
- """
142
- for entry in self:
143
- key, list_of_values = list(entry.items())[0]
144
- for value in list_of_values:
145
- print(f"{key}: {value}")
146
-
147
- # def print(
148
- # self,
149
- # pretty_labels: Optional[dict] = None,
150
- # filename: Optional[str] = None,
151
- # format: Optional[Literal["rich", "html", "markdown", "latex"]] = None,
152
- # interactive: bool = False,
153
- # split_at_dot: bool = True,
154
- # max_rows=None,
155
- # tee=False,
156
- # iframe=False,
157
- # iframe_height: int = 200,
158
- # iframe_width: int = 600,
159
- # web=False,
160
- # return_string: bool = False,
161
- # ) -> Union[None, str, "Results"]:
162
- # """Print the results in a pretty format.
163
-
164
- # :param pretty_labels: A dictionary of pretty labels for the columns.
165
- # :param filename: The filename to save the results to.
166
- # :param format: The format to print the results in. Options are 'rich', 'html', 'markdown', or 'latex'.
167
- # :param interactive: Whether to print the results interactively in a Jupyter notebook.
168
- # :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
169
- # :param max_rows: The maximum number of rows to print.
170
- # :param tee: Whether to return the dataset.
171
- # :param iframe: Whether to display the table in an iframe.
172
- # :param iframe_height: The height of the iframe.
173
- # :param iframe_width: The width of the iframe.
174
- # :param web: Whether to display the table in a web browser.
175
- # :param return_string: Whether to return the output as a string instead of printing.
176
-
177
- # :return: None if tee is False and return_string is False, the dataset if tee is True, or a string if return_string is True.
178
-
179
- # Example: Print in rich format at the terminal
180
-
181
- # >>> from edsl.results import Results
182
- # >>> r = Results.example()
183
- # >>> r.select('how_feeling').print(format = "rich")
184
- # ┏━━━━━━━━━━━━━━┓
185
- # ┃ answer ┃
186
- # ┃ .how_feeling ┃
187
- # ┡━━━━━━━━━━━━━━┩
188
- # │ OK │
189
- # ├──────────────┤
190
- # │ Great │
191
- # ├──────────────┤
192
- # │ Terrible │
193
- # ├──────────────┤
194
- # │ OK │
195
- # └──────────────┘
196
-
197
- # >>> r = Results.example()
198
- # >>> r2 = r.select("how_feeling").print(format = "rich", tee = True, max_rows = 2)
199
- # ┏━━━━━━━━━━━━━━┓
200
- # ┃ answer ┃
201
- # ┃ .how_feeling ┃
202
- # ┡━━━━━━━━━━━━━━┩
203
- # │ OK │
204
- # ├──────────────┤
205
- # │ Great │
206
- # └──────────────┘
207
- # >>> r2
208
- # Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
209
-
210
- # >>> r.select('how_feeling').print(format = "rich", max_rows = 2)
211
- # ┏━━━━━━━━━━━━━━┓
212
- # ┃ answer ┃
213
- # ┃ .how_feeling ┃
214
- # ┡━━━━━━━━━━━━━━┩
215
- # │ OK │
216
- # ├──────────────┤
217
- # │ Great │
218
- # └──────────────┘
219
-
220
- # >>> r.select('how_feeling').print(format = "rich", split_at_dot = False)
221
- # ┏━━━━━━━━━━━━━━━━━━━━┓
222
- # ┃ answer.how_feeling ┃
223
- # ┡━━━━━━━━━━━━━━━━━━━━┩
224
- # │ OK │
225
- # ├────────────────────┤
226
- # │ Great │
227
- # ├────────────────────┤
228
- # │ Terrible │
229
- # ├────────────────────┤
230
- # │ OK │
231
- # └────────────────────┘
232
-
233
- # Example: using the pretty_labels parameter
234
-
235
- # >>> r.select('how_feeling').print(format="rich", pretty_labels = {'answer.how_feeling': "How are you feeling"})
236
- # ┏━━━━━━━━━━━━━━━━━━━━━┓
237
- # ┃ How are you feeling ┃
238
- # ┡━━━━━━━━━━━━━━━━━━━━━┩
239
- # │ OK │
240
- # ├─────────────────────┤
241
- # │ Great │
242
- # ├─────────────────────┤
243
- # │ Terrible
244
- # ├─────────────────────┤
245
- # │ OK │
246
- # └─────────────────────┘
247
-
248
- # Example: printing in markdown format
249
-
250
- # >>> r.select('how_feeling').print(format='markdown')
251
- # | answer.how_feeling |
252
- # |--|
253
- # | OK |
254
- # | Great |
255
- # | Terrible |
256
- # | OK |
257
- # ...
258
-
259
- # >>> r.select('how_feeling').print(format='latex')
260
- # \\begin{tabular}{l}
261
- # ...
262
- # \\end{tabular}
263
- # <BLANKLINE>
264
- # """
265
- # from IPython.display import HTML, display
266
- # from edsl.utilities.utilities import is_notebook
267
- # import io
268
- # import sys
269
-
270
- # def _determine_format(format):
271
- # if format is None:
272
- # if is_notebook():
273
- # format = "html"
274
- # else:
275
- # format = "rich"
276
- # if format not in ["rich", "html", "markdown", "latex"]:
277
- # raise ValueError(
278
- # "format must be one of 'rich', 'html', 'markdown', or 'latex'."
279
- # )
280
-
281
- # return format
282
-
283
- # format = _determine_format(format)
284
-
285
- # if pretty_labels is None:
286
- # pretty_labels = {}
287
-
288
- # if pretty_labels != {}: # only split at dot if there are no pretty labels
289
- # split_at_dot = False
290
-
291
- # def _create_data():
292
- # for index, entry in enumerate(self):
293
- # key, list_of_values = list(entry.items())[0]
294
- # yield {pretty_labels.get(key, key): list_of_values[:max_rows]}
295
-
296
- # new_data = list(_create_data())
297
-
298
- # # Capture output if return_string is True
299
- # if return_string:
300
- # old_stdout = sys.stdout
301
- # sys.stdout = io.StringIO()
302
-
303
- # output = None
304
-
305
- # if format == "rich":
306
- # from edsl.utilities.interface import print_dataset_with_rich
307
-
308
- # output = print_dataset_with_rich(
309
- # new_data, filename=filename, split_at_dot=split_at_dot
310
- # )
311
- # elif format == "markdown":
312
- # from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
313
-
314
- # output = print_list_of_dicts_as_markdown_table(new_data, filename=filename)
315
- # elif format == "latex":
316
- # df = self.to_pandas()
317
- # df.columns = [col.replace("_", " ") for col in df.columns]
318
- # latex_string = df.to_latex(index=False)
319
-
320
- # if filename is not None:
321
- # with open(filename, "w") as f:
322
- # f.write(latex_string)
323
- # else:
324
- # print(latex_string)
325
- # output = latex_string
326
- # elif format == "html":
327
- # from edsl.utilities.interface import print_list_of_dicts_as_html_table
328
-
329
- # html_source = print_list_of_dicts_as_html_table(
330
- # new_data, interactive=interactive
331
- # )
332
-
333
- # if iframe:
334
- # iframe = f""""
335
- # <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
336
- # """
337
- # display(HTML(iframe))
338
- # elif is_notebook():
339
- # display(HTML(html_source))
340
- # else:
341
- # from edsl.utilities.interface import view_html
342
-
343
- # view_html(html_source)
344
-
345
- # output = html_source
346
-
347
- # # Restore stdout and get captured output if return_string is True
348
- # if return_string:
349
- # captured_output = sys.stdout.getvalue()
350
- # sys.stdout = old_stdout
351
- # return captured_output or output
352
-
353
- # if tee:
354
- # return self
355
-
356
- # return None
357
-
358
- def to_csv(
359
- self,
360
- filename: Optional[str] = None,
361
- remove_prefix: bool = False,
362
- download_link: bool = False,
363
- pretty_labels: Optional[dict] = None,
364
- ):
365
- """Export the results to a CSV file.
366
-
367
- :param filename: The filename to save the CSV file to.
368
- :param remove_prefix: Whether to remove the prefix from the column names.
369
- :param download_link: Whether to display a download link in a Jupyter notebook.
370
-
371
- Example:
372
-
373
- >>> from edsl.results import Results
374
- >>> r = Results.example()
375
- >>> r.select('how_feeling').to_csv()
376
- 'answer.how_feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
377
-
378
- >>> r.select('how_feeling').to_csv(pretty_labels = {'answer.how_feeling': "How are you feeling"})
379
- 'How are you feeling\\r\\nOK\\r\\nGreat\\r\\nTerrible\\r\\nOK\\r\\n'
380
-
381
- >>> import tempfile
382
- >>> filename = tempfile.NamedTemporaryFile(delete=False).name
383
- >>> r.select('how_feeling').to_csv(filename = filename)
384
- >>> import os
385
- >>> import csv
386
- >>> with open(filename, newline='') as f:
387
- ... reader = csv.reader(f)
388
- ... for row in reader:
389
- ... print(row)
390
- ['answer.how_feeling']
391
- ['OK']
392
- ['Great']
393
- ['Terrible']
394
- ['OK']
395
-
396
- """
397
- if pretty_labels is None:
398
- pretty_labels = {}
399
- header, rows = self._make_tabular(
400
- remove_prefix=remove_prefix, pretty_labels=pretty_labels
401
- )
402
-
403
- if filename is not None:
404
- with open(filename, "w") as f:
405
- writer = csv.writer(f)
406
- writer.writerow(header)
407
- writer.writerows(rows)
408
- # print(f"Saved to {filename}")
409
- else:
410
- output = io.StringIO()
411
- writer = csv.writer(output)
412
- writer.writerow(header)
413
- writer.writerows(rows)
414
-
415
- if download_link:
416
- from IPython.display import HTML, display
417
-
418
- csv_file = output.getvalue()
419
- b64 = base64.b64encode(csv_file.encode()).decode()
420
- download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
421
- display(HTML(download_link))
422
- else:
423
- return output.getvalue()
424
-
425
- def download_link(self, pretty_labels: Optional[dict] = None) -> str:
426
- """Return a download link for the results.
427
-
428
- :param pretty_labels: A dictionary of pretty labels for the columns.
429
-
430
- >>> from edsl.results import Results
431
- >>> r = Results.example()
432
- >>> r.select('how_feeling').download_link()
433
- '<a href="data:file/csv;base64,YW5zd2VyLmhvd19mZWVsaW5nDQpPSw0KR3JlYXQNClRlcnJpYmxlDQpPSw0K" download="my_data.csv">Download CSV file</a>'
434
- """
435
- import base64
436
-
437
- csv_string = self.to_csv(pretty_labels=pretty_labels)
438
- b64 = base64.b64encode(csv_string.encode()).decode()
439
- return f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
440
-
441
- def to_pandas(
442
- self, remove_prefix: bool = False, lists_as_strings=False
443
- ) -> "DataFrame":
444
- """Convert the results to a pandas DataFrame, ensuring that lists remain as lists.
445
-
446
- :param remove_prefix: Whether to remove the prefix from the column names.
447
-
448
- """
449
- return self._to_pandas_strings(remove_prefix)
450
- # if lists_as_strings:
451
- # return self._to_pandas_strings(remove_prefix=remove_prefix)
452
-
453
- # import pandas as pd
454
-
455
- # df = pd.DataFrame(self.data)
456
-
457
- # if remove_prefix:
458
- # # Optionally remove prefixes from column names
459
- # df.columns = [col.split(".")[-1] for col in df.columns]
460
-
461
- # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
462
- # return df_sorted
463
-
464
- def _to_pandas_strings(self, remove_prefix: bool = False) -> "pd.DataFrame":
465
- """Convert the results to a pandas DataFrame.
466
-
467
- :param remove_prefix: Whether to remove the prefix from the column names.
468
-
469
- >>> from edsl.results import Results
470
- >>> r = Results.example()
471
- >>> r.select('how_feeling').to_pandas()
472
- answer.how_feeling
473
- 0 OK
474
- 1 Great
475
- 2 Terrible
476
- 3 OK
477
- """
478
-
479
- import pandas as pd
480
-
481
- csv_string = self.to_csv(remove_prefix=remove_prefix)
482
- csv_buffer = io.StringIO(csv_string)
483
- df = pd.read_csv(csv_buffer)
484
- # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
485
- return df
486
-
487
- def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
488
- """Convert the results to a list of dictionaries, one per scenario.
489
-
490
- :param remove_prefix: Whether to remove the prefix from the column names.
491
-
492
- >>> from edsl.results import Results
493
- >>> r = Results.example()
494
- >>> r.select('how_feeling').to_scenario_list()
495
- ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
496
- """
497
- from edsl import ScenarioList, Scenario
498
-
499
- list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
500
- scenarios = []
501
- for d in list_of_dicts:
502
- scenarios.append(Scenario(d))
503
- return ScenarioList(scenarios)
504
- # return ScenarioList([Scenario(d) for d in list_of_dicts])
505
-
506
- def to_agent_list(self, remove_prefix: bool = True):
507
- """Convert the results to a list of dictionaries, one per agent.
508
-
509
- :param remove_prefix: Whether to remove the prefix from the column names.
510
-
511
- >>> from edsl.results import Results
512
- >>> r = Results.example()
513
- >>> r.select('how_feeling').to_agent_list()
514
- AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
515
- """
516
- from edsl import AgentList, Agent
517
-
518
- list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
519
- agents = []
520
- for d in list_of_dicts:
521
- if "name" in d:
522
- d["agent_name"] = d.pop("name")
523
- agents.append(Agent(d, name=d["agent_name"]))
524
- else:
525
- agents.append(Agent(d))
526
- return AgentList(agents)
527
-
528
- def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
529
- """Convert the results to a list of dictionaries.
530
-
531
- :param remove_prefix: Whether to remove the prefix from the column names.
532
-
533
- >>> from edsl.results import Results
534
- >>> r = Results.example()
535
- >>> r.select('how_feeling').to_dicts()
536
- [{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
537
-
538
- """
539
- list_of_keys = []
540
- list_of_values = []
541
- for entry in self:
542
- key, values = list(entry.items())[0]
543
- list_of_keys.append(key)
544
- list_of_values.append(values)
545
-
546
- if remove_prefix:
547
- list_of_keys = [key.split(".")[-1] for key in list_of_keys]
548
-
549
- list_of_dicts = []
550
- for entries in zip(*list_of_values):
551
- list_of_dicts.append(dict(zip(list_of_keys, entries)))
552
-
553
- return list_of_dicts
554
-
555
- def to_list(self, flatten=False, remove_none=False, unzipped=False) -> list[list]:
556
- """Convert the results to a list of lists.
557
-
558
- :param flatten: Whether to flatten the list of lists.
559
- :param remove_none: Whether to remove None values from the list.
560
-
561
- >>> from edsl.results import Results
562
- >>> Results.example().select('how_feeling', 'how_feeling_yesterday')
563
- Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
564
-
565
- >>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
566
- [('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
567
-
568
- >>> r = Results.example()
569
- >>> r.select('how_feeling').to_list()
570
- ['OK', 'Great', 'Terrible', 'OK']
571
-
572
- >>> from edsl.results.Dataset import Dataset
573
- >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}]).select('a.b').to_list(flatten = True)
574
- [1, 9, 2, 3, 4]
575
-
576
- >>> from edsl.results.Dataset import Dataset
577
- >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}, {'c': [6, 2, 3, 4]}]).select('a.b', 'c').to_list(flatten = True)
578
- Traceback (most recent call last):
579
- ...
580
- ValueError: Cannot flatten a list of lists when there are multiple columns selected.
581
-
582
-
583
- """
584
- if len(self.relevant_columns()) > 1 and flatten:
585
- raise ValueError(
586
- "Cannot flatten a list of lists when there are multiple columns selected."
587
- )
588
-
589
- if len(self.relevant_columns()) == 1:
590
- # if only one 'column' is selected (which is typical for this method
591
- list_to_return = list(self[0].values())[0]
592
- else:
593
- keys = self.relevant_columns()
594
- data = self.to_dicts(remove_prefix=False)
595
- list_to_return = []
596
- for d in data:
597
- list_to_return.append(tuple([d[key] for key in keys]))
598
-
599
- if remove_none:
600
- list_to_return = [item for item in list_to_return if item is not None]
601
-
602
- if flatten:
603
- new_list = []
604
- for item in list_to_return:
605
- if isinstance(item, list):
606
- new_list.extend(item)
607
- else:
608
- new_list.append(item)
609
- list_to_return = new_list
610
-
611
- return list_to_return
612
-
613
- def html(
614
- self,
615
- filename: Optional[str] = None,
616
- cta: str = "Open in browser",
617
- return_link: bool = False,
618
- ):
619
- import os
620
- import tempfile
621
- from edsl.utilities.utilities import is_notebook
622
- from IPython.display import HTML, display
623
- from edsl.utilities.utilities import is_notebook
624
-
625
- df = self.to_pandas()
626
-
627
- if filename is None:
628
- current_directory = os.getcwd()
629
- filename = tempfile.NamedTemporaryFile(
630
- "w", delete=False, suffix=".html", dir=current_directory
631
- ).name
632
-
633
- with open(filename, "w") as f:
634
- f.write(df.to_html())
635
-
636
- if is_notebook():
637
- html_url = f"/files/{filename}"
638
- html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
639
- display(HTML(html_link))
640
- else:
641
- print(f"Saved to {filename}")
642
- import webbrowser
643
- import os
644
-
645
- webbrowser.open(f"file://{os.path.abspath(filename)}")
646
-
647
- if return_link:
648
- return filename
649
-
650
- def tally(
651
- self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
652
- ) -> Union[dict, "Dataset"]:
653
- """Tally the values of a field or perform a cross-tab of multiple fields.
654
-
655
- :param fields: The field(s) to tally, multiple fields for cross-tabulation.
656
-
657
- >>> from edsl.results import Results
658
- >>> r = Results.example()
659
- >>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
660
- {'OK': 2, 'Great': 1, 'Terrible': 1}
661
- >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset")
662
- Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
663
- """
664
- from collections import Counter
665
-
666
- if len(fields) == 0:
667
- fields = self.relevant_columns()
668
-
669
- relevant_columns_without_prefix = [
670
- column.split(".")[-1] for column in self.relevant_columns()
671
- ]
672
-
673
- if not all(
674
- f in self.relevant_columns() or f in relevant_columns_without_prefix
675
- for f in fields
676
- ):
677
- raise ValueError("One or more specified fields are not in the dataset.")
678
-
679
- if len(fields) == 1:
680
- field = fields[0]
681
- values = self._key_to_value(field)
682
- else:
683
- values = list(zip(*(self._key_to_value(field) for field in fields)))
684
-
685
- for value in values:
686
- if isinstance(value, list):
687
- value = tuple(value)
688
-
689
- tally = dict(Counter(values))
690
- sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
691
- if top_n is not None:
692
- sorted_tally = dict(list(sorted_tally.items())[:top_n])
693
-
694
- import warnings
695
- import textwrap
696
- from edsl.results.Dataset import Dataset
697
-
698
- if output == "dict":
699
- # why did I do this?
700
- warnings.warn(
701
- textwrap.dedent(
702
- """\
703
- The default output from tally will change to Dataset in the future.
704
- Use output='Dataset' to get the Dataset object for now.
705
- """
706
- )
707
- )
708
- return sorted_tally
709
- elif output == "Dataset":
710
- dataset = Dataset(
711
- [
712
- {"value": list(sorted_tally.keys())},
713
- {"count": list(sorted_tally.values())},
714
- ]
715
- )
716
- # return dataset
717
- sl = dataset.to_scenario_list().unpack(
718
- "value",
719
- new_names=[fields] if isinstance(fields, str) else fields,
720
- keep_original=False,
721
- )
722
- keys = list(sl[0].keys())
723
- keys.remove("count")
724
- keys.append("count")
725
- return sl.reorder_keys(keys).to_dataset()
726
-
727
-
728
- if __name__ == "__main__":
729
- import doctest
730
-
731
- doctest.testmod(optionflags=doctest.ELLIPSIS)
1
+ """Mixin class for exporting results."""
2
+
3
+ import io
4
+ import warnings
5
+ import textwrap
6
+ from typing import Optional, Tuple, Union, List
7
+
8
+ from edsl.results.file_exports import CSVExport, ExcelExport, JSONLExport, SQLiteExport
9
+
10
+
11
+ class DatasetExportMixin:
12
+ """Mixin class for exporting Dataset objects."""
13
+
14
+ def relevant_columns(
15
+ self, data_type: Optional[str] = None, remove_prefix=False
16
+ ) -> list:
17
+ """Return the set of keys that are present in the dataset.
18
+
19
+ :param data_type: The data type to filter by.
20
+ :param remove_prefix: Whether to remove the prefix from the column names.
21
+
22
+ >>> from edsl.results.Dataset import Dataset
23
+ >>> d = Dataset([{'a.b':[1,2,3,4]}])
24
+ >>> d.relevant_columns()
25
+ ['a.b']
26
+
27
+ >>> d.relevant_columns(remove_prefix=True)
28
+ ['b']
29
+
30
+ >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[5,6,7,8]}])
31
+ >>> d.relevant_columns()
32
+ ['a', 'b']
33
+
34
+ >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
35
+ ['answer.how_feeling', 'answer.how_feeling_yesterday']
36
+
37
+ >>> from edsl.results import Results
38
+ >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
39
+ ['model.frequency_penalty', ...]
40
+
41
+ >>> Results.example().relevant_columns(data_type = "flimflam")
42
+ Traceback (most recent call last):
43
+ ...
44
+ ValueError: No columns found for data type: flimflam. Available data types are: ...
45
+ """
46
+ columns = [list(x.keys())[0] for x in self]
47
+ if remove_prefix:
48
+ columns = [column.split(".")[-1] for column in columns]
49
+
50
+ def get_data_type(column):
51
+ if "." in column:
52
+ return column.split(".")[0]
53
+ else:
54
+ return None
55
+
56
+ if data_type:
57
+ all_columns = columns[:]
58
+ columns = [
59
+ column for column in columns if get_data_type(column) == data_type
60
+ ]
61
+ if len(columns) == 0:
62
+ all_data_types = sorted(
63
+ list(set(get_data_type(column) for column in all_columns))
64
+ )
65
+ raise ValueError(
66
+ f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
67
+ )
68
+
69
+ return columns
70
+
71
+ def num_observations(self):
72
+ """Return the number of observations in the dataset.
73
+
74
+ >>> from edsl.results.Results import Results
75
+ >>> Results.example().num_observations()
76
+ 4
77
+ """
78
+ _num_observations = None
79
+ for entry in self:
80
+ key, values = list(entry.items())[0]
81
+ if _num_observations is None:
82
+ _num_observations = len(values)
83
+ else:
84
+ if len(values) != _num_observations:
85
+ raise ValueError(
86
+ "The number of observations is not consistent across columns."
87
+ )
88
+
89
+ return _num_observations
90
+
91
+ def _make_tabular(
92
+ self, remove_prefix: bool, pretty_labels: Optional[dict] = None
93
+ ) -> tuple[list, List[list]]:
94
+ """Turn the results into a tabular format.
95
+
96
+ :param remove_prefix: Whether to remove the prefix from the column names.
97
+
98
+ >>> from edsl.results import Results
99
+ >>> r = Results.example()
100
+ >>> r.select('how_feeling')._make_tabular(remove_prefix = True)
101
+ (['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
102
+
103
+ >>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
104
+ (['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
105
+ """
106
+
107
+ def create_dict_from_list_of_dicts(list_of_dicts):
108
+ for entry in list_of_dicts:
109
+ key, list_of_values = list(entry.items())[0]
110
+ yield key, list_of_values
111
+
112
+ tabular_repr = dict(create_dict_from_list_of_dicts(self.data))
113
+
114
+ full_header = [list(x.keys())[0] for x in self]
115
+
116
+ rows = []
117
+ for i in range(self.num_observations()):
118
+ row = [tabular_repr[h][i] for h in full_header]
119
+ rows.append(row)
120
+
121
+ if remove_prefix:
122
+ header = [h.split(".")[-1] for h in full_header]
123
+ else:
124
+ header = full_header
125
+
126
+ if pretty_labels is not None:
127
+ header = [pretty_labels.get(h, h) for h in header]
128
+
129
+ return header, rows
130
+
131
+ def print_long(self):
132
+ """Print the results in a long format.
133
+ >>> from edsl.results import Results
134
+ >>> r = Results.example()
135
+ >>> r.select('how_feeling').print_long()
136
+ answer.how_feeling: OK
137
+ answer.how_feeling: Great
138
+ answer.how_feeling: Terrible
139
+ answer.how_feeling: OK
140
+ """
141
+ for entry in self:
142
+ key, list_of_values = list(entry.items())[0]
143
+ for value in list_of_values:
144
+ print(f"{key}: {value}")
145
+
146
+ def _get_tabular_data(
147
+ self,
148
+ remove_prefix: bool = False,
149
+ pretty_labels: Optional[dict] = None,
150
+ ) -> Tuple[List[str], List[List]]:
151
+ """Internal method to get tabular data in a standard format.
152
+
153
+ Args:
154
+ remove_prefix: Whether to remove the prefix from column names
155
+ pretty_labels: Dictionary mapping original column names to pretty labels
156
+
157
+ Returns:
158
+ Tuple containing (header_row, data_rows)
159
+ """
160
+ if pretty_labels is None:
161
+ pretty_labels = {}
162
+
163
+ return self._make_tabular(
164
+ remove_prefix=remove_prefix, pretty_labels=pretty_labels
165
+ )
166
+
167
+ def to_jsonl(self, filename: Optional[str] = None) -> Optional["FileStore"]:
168
+ """Export the results to a FileStore instance containing JSONL data."""
169
+ exporter = JSONLExport(data=self, filename=filename)
170
+ return exporter.export()
171
+
172
+ def to_sqlite(
173
+ self,
174
+ filename: Optional[str] = None,
175
+ remove_prefix: bool = False,
176
+ pretty_labels: Optional[dict] = None,
177
+ table_name: str = "results",
178
+ if_exists: str = "replace",
179
+ ) -> Optional["FileStore"]:
180
+ """Export the results to a SQLite database file."""
181
+ exporter = SQLiteExport(
182
+ data=self,
183
+ filename=filename,
184
+ remove_prefix=remove_prefix,
185
+ pretty_labels=pretty_labels,
186
+ table_name=table_name,
187
+ if_exists=if_exists,
188
+ )
189
+ return exporter.export()
190
+
191
+ def to_csv(
192
+ self,
193
+ filename: Optional[str] = None,
194
+ remove_prefix: bool = False,
195
+ pretty_labels: Optional[dict] = None,
196
+ ) -> Optional["FileStore"]:
197
+ """Export the results to a FileStore instance containing CSV data."""
198
+ exporter = CSVExport(
199
+ data=self,
200
+ filename=filename,
201
+ remove_prefix=remove_prefix,
202
+ pretty_labels=pretty_labels,
203
+ )
204
+ return exporter.export()
205
+
206
+ def to_excel(
207
+ self,
208
+ filename: Optional[str] = None,
209
+ remove_prefix: bool = False,
210
+ pretty_labels: Optional[dict] = None,
211
+ sheet_name: Optional[str] = None,
212
+ ) -> Optional["FileStore"]:
213
+ """Export the results to a FileStore instance containing Excel data."""
214
+ exporter = ExcelExport(
215
+ data=self,
216
+ filename=filename,
217
+ remove_prefix=remove_prefix,
218
+ pretty_labels=pretty_labels,
219
+ sheet_name=sheet_name,
220
+ )
221
+ return exporter.export()
222
+
223
+ def _db(self, remove_prefix: bool = True):
224
+ """Create a SQLite database in memory and return the connection.
225
+
226
+ Args:
227
+ shape: The shape of the data in the database (wide or long)
228
+ remove_prefix: Whether to remove the prefix from the column names
229
+
230
+ Returns:
231
+ A database connection
232
+ """
233
+ from sqlalchemy import create_engine
234
+
235
+ engine = create_engine("sqlite:///:memory:")
236
+ if remove_prefix:
237
+ df = self.remove_prefix().to_pandas(lists_as_strings=True)
238
+ else:
239
+ df = self.to_pandas(lists_as_strings=True)
240
+ df.to_sql(
241
+ "self",
242
+ engine,
243
+ index=False,
244
+ if_exists="replace",
245
+ )
246
+ return engine.connect()
247
+
248
+ def sql(
249
+ self,
250
+ query: str,
251
+ transpose: bool = None,
252
+ transpose_by: str = None,
253
+ remove_prefix: bool = True,
254
+ ) -> Union["pd.DataFrame", str]:
255
+ """Execute a SQL query and return the results as a DataFrame.
256
+
257
+ Args:
258
+ query: The SQL query to execute
259
+ shape: The shape of the data in the database (wide or long)
260
+ remove_prefix: Whether to remove the prefix from the column names
261
+ transpose: Whether to transpose the DataFrame
262
+ transpose_by: The column to use as the index when transposing
263
+ csv: Whether to return the DataFrame as a CSV string
264
+ to_list: Whether to return the results as a list
265
+ to_latex: Whether to return the results as LaTeX
266
+ filename: Optional filename to save the results to
267
+
268
+ Returns:
269
+ DataFrame, CSV string, list, or LaTeX string depending on parameters
270
+
271
+ """
272
+ import pandas as pd
273
+
274
+ conn = self._db(remove_prefix=remove_prefix)
275
+ df = pd.read_sql_query(query, conn)
276
+
277
+ # Transpose the DataFrame if transpose is True
278
+ if transpose or transpose_by:
279
+ df = pd.DataFrame(df)
280
+ if transpose_by:
281
+ df = df.set_index(transpose_by)
282
+ else:
283
+ df = df.set_index(df.columns[0])
284
+ df = df.transpose()
285
+ from edsl.results.Dataset import Dataset
286
+
287
+ return Dataset.from_pandas_dataframe(df)
288
+
289
+ def to_pandas(
290
+ self, remove_prefix: bool = False, lists_as_strings=False
291
+ ) -> "DataFrame":
292
+ """Convert the results to a pandas DataFrame, ensuring that lists remain as lists.
293
+
294
+ :param remove_prefix: Whether to remove the prefix from the column names.
295
+
296
+ """
297
+ return self._to_pandas_strings(remove_prefix)
298
+
299
+ def _to_pandas_strings(self, remove_prefix: bool = False) -> "pd.DataFrame":
300
+ """Convert the results to a pandas DataFrame.
301
+
302
+ :param remove_prefix: Whether to remove the prefix from the column names.
303
+
304
+ >>> from edsl.results import Results
305
+ >>> r = Results.example()
306
+ >>> r.select('how_feeling').to_pandas()
307
+ answer.how_feeling
308
+ 0 OK
309
+ 1 Great
310
+ 2 Terrible
311
+ 3 OK
312
+ """
313
+
314
+ import pandas as pd
315
+
316
+ csv_string = self.to_csv(remove_prefix=remove_prefix).text
317
+ csv_buffer = io.StringIO(csv_string)
318
+ df = pd.read_csv(csv_buffer)
319
+ # df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
320
+ return df
321
+
322
+ def to_polars(
323
+ self, remove_prefix: bool = False, lists_as_strings=False
324
+ ) -> "pl.DataFrame":
325
+ """Convert the results to a Polars DataFrame.
326
+
327
+ :param remove_prefix: Whether to remove the prefix from the column names.
328
+ """
329
+ return self._to_polars_strings(remove_prefix)
330
+
331
+ def _to_polars_strings(self, remove_prefix: bool = False) -> "pl.DataFrame":
332
+ """Convert the results to a Polars DataFrame.
333
+
334
+ :param remove_prefix: Whether to remove the prefix from the column names.
335
+ """
336
+ import polars as pl
337
+
338
+ csv_string = self.to_csv(remove_prefix=remove_prefix).text
339
+ df = pl.read_csv(io.StringIO(csv_string))
340
+ return df
341
+
342
+ def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
343
+ """Convert the results to a list of dictionaries, one per scenario.
344
+
345
+ :param remove_prefix: Whether to remove the prefix from the column names.
346
+
347
+ >>> from edsl.results import Results
348
+ >>> r = Results.example()
349
+ >>> r.select('how_feeling').to_scenario_list()
350
+ ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
351
+ """
352
+ from edsl.scenarios.ScenarioList import ScenarioList
353
+ from edsl.scenarios.Scenario import Scenario
354
+
355
+ list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
356
+ scenarios = []
357
+ for d in list_of_dicts:
358
+ scenarios.append(Scenario(d))
359
+ return ScenarioList(scenarios)
360
+
361
+ def to_agent_list(self, remove_prefix: bool = True):
362
+ """Convert the results to a list of dictionaries, one per agent.
363
+
364
+ :param remove_prefix: Whether to remove the prefix from the column names.
365
+
366
+ >>> from edsl.results import Results
367
+ >>> r = Results.example()
368
+ >>> r.select('how_feeling').to_agent_list()
369
+ AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
370
+ """
371
+ from edsl.agents import Agent
372
+ from edsl.agents.AgentList import AgentList
373
+
374
+ list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
375
+ agents = []
376
+ for d in list_of_dicts:
377
+ if "name" in d:
378
+ d["agent_name"] = d.pop("name")
379
+ agents.append(Agent(d, name=d["agent_name"]))
380
+ if "agent_parameters" in d:
381
+ agent_parameters = d.pop("agent_parameters")
382
+ agent_name = agent_parameters.get("name", None)
383
+ instruction = agent_parameters.get("instruction", None)
384
+ agents.append(Agent(d, name=agent_name, instruction=instruction))
385
+ else:
386
+ agents.append(Agent(d))
387
+ return AgentList(agents)
388
+
389
+ def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
390
+ """Convert the results to a list of dictionaries.
391
+
392
+ :param remove_prefix: Whether to remove the prefix from the column names.
393
+
394
+ >>> from edsl.results import Results
395
+ >>> r = Results.example()
396
+ >>> r.select('how_feeling').to_dicts()
397
+ [{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
398
+
399
+ """
400
+ list_of_keys = []
401
+ list_of_values = []
402
+ for entry in self:
403
+ key, values = list(entry.items())[0]
404
+ list_of_keys.append(key)
405
+ list_of_values.append(values)
406
+
407
+ if remove_prefix:
408
+ list_of_keys = [key.split(".")[-1] for key in list_of_keys]
409
+
410
+ list_of_dicts = []
411
+ for entries in zip(*list_of_values):
412
+ list_of_dicts.append(dict(zip(list_of_keys, entries)))
413
+
414
+ return list_of_dicts
415
+
416
+ def to_list(self, flatten=False, remove_none=False, unzipped=False) -> list[list]:
417
+ """Convert the results to a list of lists.
418
+
419
+ :param flatten: Whether to flatten the list of lists.
420
+ :param remove_none: Whether to remove None values from the list.
421
+
422
+ >>> from edsl.results import Results
423
+ >>> Results.example().select('how_feeling', 'how_feeling_yesterday')
424
+ Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
425
+
426
+ >>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
427
+ [('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
428
+
429
+ >>> r = Results.example()
430
+ >>> r.select('how_feeling').to_list()
431
+ ['OK', 'Great', 'Terrible', 'OK']
432
+
433
+ >>> from edsl.results.Dataset import Dataset
434
+ >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}]).select('a.b').to_list(flatten = True)
435
+ [1, 9, 2, 3, 4]
436
+
437
+ >>> from edsl.results.Dataset import Dataset
438
+ >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}, {'c': [6, 2, 3, 4]}]).select('a.b', 'c').to_list(flatten = True)
439
+ Traceback (most recent call last):
440
+ ...
441
+ ValueError: Cannot flatten a list of lists when there are multiple columns selected.
442
+
443
+
444
+ """
445
+ if len(self.relevant_columns()) > 1 and flatten:
446
+ raise ValueError(
447
+ "Cannot flatten a list of lists when there are multiple columns selected."
448
+ )
449
+
450
+ if len(self.relevant_columns()) == 1:
451
+ # if only one 'column' is selected (which is typical for this method
452
+ list_to_return = list(self[0].values())[0]
453
+ else:
454
+ keys = self.relevant_columns()
455
+ data = self.to_dicts(remove_prefix=False)
456
+ list_to_return = []
457
+ for d in data:
458
+ list_to_return.append(tuple([d[key] for key in keys]))
459
+
460
+ if remove_none:
461
+ list_to_return = [item for item in list_to_return if item is not None]
462
+
463
+ if flatten:
464
+ new_list = []
465
+ for item in list_to_return:
466
+ if isinstance(item, list):
467
+ new_list.extend(item)
468
+ else:
469
+ new_list.append(item)
470
+ list_to_return = new_list
471
+
472
+ from edsl.utilities.PrettyList import PrettyList
473
+
474
+ return PrettyList(list_to_return)
475
+
476
+ def html(
477
+ self,
478
+ filename: Optional[str] = None,
479
+ cta: str = "Open in browser",
480
+ return_link: bool = False,
481
+ ):
482
+ import os
483
+ import tempfile
484
+ from edsl.utilities.utilities import is_notebook
485
+ from IPython.display import HTML, display
486
+ from edsl.utilities.utilities import is_notebook
487
+
488
+ df = self.to_pandas()
489
+
490
+ if filename is None:
491
+ current_directory = os.getcwd()
492
+ filename = tempfile.NamedTemporaryFile(
493
+ "w", delete=False, suffix=".html", dir=current_directory
494
+ ).name
495
+
496
+ with open(filename, "w") as f:
497
+ f.write(df.to_html())
498
+
499
+ if is_notebook():
500
+ html_url = f"/files/{filename}"
501
+ html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
502
+ display(HTML(html_link))
503
+ else:
504
+ print(f"Saved to {filename}")
505
+ import webbrowser
506
+ import os
507
+
508
+ webbrowser.open(f"file://{os.path.abspath(filename)}")
509
+
510
+ if return_link:
511
+ return filename
512
+
513
+ def tally(
514
+ self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
515
+ ) -> Union[dict, "Dataset"]:
516
+ """Tally the values of a field or perform a cross-tab of multiple fields.
517
+
518
+ :param fields: The field(s) to tally, multiple fields for cross-tabulation.
519
+
520
+ >>> from edsl.results import Results
521
+ >>> r = Results.example()
522
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
523
+ {'OK': 2, 'Great': 1, 'Terrible': 1}
524
+ >>> from edsl.results.Dataset import Dataset
525
+ >>> expected = Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
526
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset") == expected
527
+ True
528
+ """
529
+ from collections import Counter
530
+
531
+ if len(fields) == 0:
532
+ fields = self.relevant_columns()
533
+
534
+ relevant_columns_without_prefix = [
535
+ column.split(".")[-1] for column in self.relevant_columns()
536
+ ]
537
+
538
+ if not all(
539
+ f in self.relevant_columns() or f in relevant_columns_without_prefix
540
+ for f in fields
541
+ ):
542
+ raise ValueError("One or more specified fields are not in the dataset.")
543
+
544
+ if len(fields) == 1:
545
+ field = fields[0]
546
+ values = self._key_to_value(field)
547
+ else:
548
+ values = list(zip(*(self._key_to_value(field) for field in fields)))
549
+
550
+ for value in values:
551
+ if isinstance(value, list):
552
+ value = tuple(value)
553
+
554
+ tally = dict(Counter(values))
555
+ sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
556
+ if top_n is not None:
557
+ sorted_tally = dict(list(sorted_tally.items())[:top_n])
558
+
559
+ from edsl.results.Dataset import Dataset
560
+
561
+ if output == "dict":
562
+ # why did I do this?
563
+ warnings.warn(
564
+ textwrap.dedent(
565
+ """\
566
+ The default output from tally will change to Dataset in the future.
567
+ Use output='Dataset' to get the Dataset object for now.
568
+ """
569
+ )
570
+ )
571
+ return sorted_tally
572
+ elif output == "Dataset":
573
+ dataset = Dataset(
574
+ [
575
+ {"value": list(sorted_tally.keys())},
576
+ {"count": list(sorted_tally.values())},
577
+ ]
578
+ )
579
+ # return dataset
580
+ sl = dataset.to_scenario_list().unpack(
581
+ "value",
582
+ new_names=[fields] if isinstance(fields, str) else fields,
583
+ keep_original=False,
584
+ )
585
+ keys = list(sl[0].keys())
586
+ keys.remove("count")
587
+ keys.append("count")
588
+ return sl.reorder_keys(keys).to_dataset()
589
+
590
+
591
+ if __name__ == "__main__":
592
+ import doctest
593
+
594
+ doctest.testmod(optionflags=doctest.ELLIPSIS)