edsl 0.1.15__py3-none-any.whl → 0.1.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (407) hide show
  1. edsl/Base.py +348 -38
  2. edsl/BaseDiff.py +260 -0
  3. edsl/TemplateLoader.py +24 -0
  4. edsl/__init__.py +45 -10
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +842 -144
  7. edsl/agents/AgentList.py +521 -25
  8. edsl/agents/Invigilator.py +250 -374
  9. edsl/agents/InvigilatorBase.py +257 -0
  10. edsl/agents/PromptConstructor.py +272 -0
  11. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  12. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  13. edsl/agents/descriptors.py +43 -13
  14. edsl/agents/prompt_helpers.py +129 -0
  15. edsl/agents/question_option_processor.py +172 -0
  16. edsl/auto/AutoStudy.py +130 -0
  17. edsl/auto/StageBase.py +243 -0
  18. edsl/auto/StageGenerateSurvey.py +178 -0
  19. edsl/auto/StageLabelQuestions.py +125 -0
  20. edsl/auto/StagePersona.py +61 -0
  21. edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
  22. edsl/auto/StagePersonaDimensionValues.py +74 -0
  23. edsl/auto/StagePersonaDimensions.py +69 -0
  24. edsl/auto/StageQuestions.py +74 -0
  25. edsl/auto/SurveyCreatorPipeline.py +21 -0
  26. edsl/auto/utilities.py +218 -0
  27. edsl/base/Base.py +279 -0
  28. edsl/config.py +115 -113
  29. edsl/conversation/Conversation.py +290 -0
  30. edsl/conversation/car_buying.py +59 -0
  31. edsl/conversation/chips.py +95 -0
  32. edsl/conversation/mug_negotiation.py +81 -0
  33. edsl/conversation/next_speaker_utilities.py +93 -0
  34. edsl/coop/CoopFunctionsMixin.py +15 -0
  35. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  36. edsl/coop/PriceFetcher.py +54 -0
  37. edsl/coop/__init__.py +1 -0
  38. edsl/coop/coop.py +1029 -134
  39. edsl/coop/utils.py +131 -0
  40. edsl/data/Cache.py +560 -89
  41. edsl/data/CacheEntry.py +230 -0
  42. edsl/data/CacheHandler.py +168 -0
  43. edsl/data/RemoteCacheSync.py +186 -0
  44. edsl/data/SQLiteDict.py +292 -0
  45. edsl/data/__init__.py +5 -3
  46. edsl/data/orm.py +6 -33
  47. edsl/data_transfer_models.py +74 -27
  48. edsl/enums.py +165 -8
  49. edsl/exceptions/BaseException.py +21 -0
  50. edsl/exceptions/__init__.py +52 -46
  51. edsl/exceptions/agents.py +33 -15
  52. edsl/exceptions/cache.py +5 -0
  53. edsl/exceptions/coop.py +8 -0
  54. edsl/exceptions/general.py +34 -0
  55. edsl/exceptions/inference_services.py +5 -0
  56. edsl/exceptions/jobs.py +15 -0
  57. edsl/exceptions/language_models.py +46 -1
  58. edsl/exceptions/questions.py +80 -5
  59. edsl/exceptions/results.py +16 -5
  60. edsl/exceptions/scenarios.py +29 -0
  61. edsl/exceptions/surveys.py +13 -10
  62. edsl/inference_services/AnthropicService.py +106 -0
  63. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  64. edsl/inference_services/AvailableModelFetcher.py +215 -0
  65. edsl/inference_services/AwsBedrock.py +118 -0
  66. edsl/inference_services/AzureAI.py +215 -0
  67. edsl/inference_services/DeepInfraService.py +18 -0
  68. edsl/inference_services/GoogleService.py +143 -0
  69. edsl/inference_services/GroqService.py +20 -0
  70. edsl/inference_services/InferenceServiceABC.py +80 -0
  71. edsl/inference_services/InferenceServicesCollection.py +138 -0
  72. edsl/inference_services/MistralAIService.py +120 -0
  73. edsl/inference_services/OllamaService.py +18 -0
  74. edsl/inference_services/OpenAIService.py +236 -0
  75. edsl/inference_services/PerplexityService.py +160 -0
  76. edsl/inference_services/ServiceAvailability.py +135 -0
  77. edsl/inference_services/TestService.py +90 -0
  78. edsl/inference_services/TogetherAIService.py +172 -0
  79. edsl/inference_services/data_structures.py +134 -0
  80. edsl/inference_services/models_available_cache.py +118 -0
  81. edsl/inference_services/rate_limits_cache.py +25 -0
  82. edsl/inference_services/registry.py +41 -0
  83. edsl/inference_services/write_available.py +10 -0
  84. edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
  85. edsl/jobs/Answers.py +21 -20
  86. edsl/jobs/FetchInvigilator.py +47 -0
  87. edsl/jobs/InterviewTaskManager.py +98 -0
  88. edsl/jobs/InterviewsConstructor.py +50 -0
  89. edsl/jobs/Jobs.py +684 -206
  90. edsl/jobs/JobsChecks.py +172 -0
  91. edsl/jobs/JobsComponentConstructor.py +189 -0
  92. edsl/jobs/JobsPrompts.py +270 -0
  93. edsl/jobs/JobsRemoteInferenceHandler.py +311 -0
  94. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  95. edsl/jobs/RequestTokenEstimator.py +30 -0
  96. edsl/jobs/async_interview_runner.py +138 -0
  97. edsl/jobs/buckets/BucketCollection.py +104 -0
  98. edsl/jobs/buckets/ModelBuckets.py +65 -0
  99. edsl/jobs/buckets/TokenBucket.py +283 -0
  100. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  101. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  102. edsl/jobs/check_survey_scenario_compatibility.py +85 -0
  103. edsl/jobs/data_structures.py +120 -0
  104. edsl/jobs/decorators.py +35 -0
  105. edsl/jobs/interviews/Interview.py +392 -0
  106. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -0
  107. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -0
  108. edsl/jobs/interviews/InterviewStatistic.py +63 -0
  109. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -0
  110. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -0
  111. edsl/jobs/interviews/InterviewStatusLog.py +92 -0
  112. edsl/jobs/interviews/ReportErrors.py +66 -0
  113. edsl/jobs/interviews/interview_status_enum.py +9 -0
  114. edsl/jobs/jobs_status_enums.py +9 -0
  115. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  116. edsl/jobs/results_exceptions_handler.py +98 -0
  117. edsl/jobs/runners/JobsRunnerAsyncio.py +151 -110
  118. edsl/jobs/runners/JobsRunnerStatus.py +298 -0
  119. edsl/jobs/tasks/QuestionTaskCreator.py +244 -0
  120. edsl/jobs/tasks/TaskCreators.py +64 -0
  121. edsl/jobs/tasks/TaskHistory.py +470 -0
  122. edsl/jobs/tasks/TaskStatusLog.py +23 -0
  123. edsl/jobs/tasks/task_status_enum.py +161 -0
  124. edsl/jobs/tokens/InterviewTokenUsage.py +27 -0
  125. edsl/jobs/tokens/TokenUsage.py +34 -0
  126. edsl/language_models/ComputeCost.py +63 -0
  127. edsl/language_models/LanguageModel.py +507 -386
  128. edsl/language_models/ModelList.py +164 -0
  129. edsl/language_models/PriceManager.py +127 -0
  130. edsl/language_models/RawResponseHandler.py +106 -0
  131. edsl/language_models/RegisterLanguageModelsMeta.py +184 -0
  132. edsl/language_models/__init__.py +1 -8
  133. edsl/language_models/fake_openai_call.py +15 -0
  134. edsl/language_models/fake_openai_service.py +61 -0
  135. edsl/language_models/key_management/KeyLookup.py +63 -0
  136. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  137. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  138. edsl/language_models/key_management/__init__.py +0 -0
  139. edsl/language_models/key_management/models.py +131 -0
  140. edsl/language_models/model.py +256 -0
  141. edsl/language_models/repair.py +109 -41
  142. edsl/language_models/utilities.py +65 -0
  143. edsl/notebooks/Notebook.py +263 -0
  144. edsl/notebooks/NotebookToLaTeX.py +142 -0
  145. edsl/notebooks/__init__.py +1 -0
  146. edsl/prompts/Prompt.py +222 -93
  147. edsl/prompts/__init__.py +1 -1
  148. edsl/questions/ExceptionExplainer.py +77 -0
  149. edsl/questions/HTMLQuestion.py +103 -0
  150. edsl/questions/QuestionBase.py +518 -0
  151. edsl/questions/QuestionBasePromptsMixin.py +221 -0
  152. edsl/questions/QuestionBudget.py +164 -67
  153. edsl/questions/QuestionCheckBox.py +281 -62
  154. edsl/questions/QuestionDict.py +343 -0
  155. edsl/questions/QuestionExtract.py +136 -50
  156. edsl/questions/QuestionFreeText.py +79 -55
  157. edsl/questions/QuestionFunctional.py +138 -41
  158. edsl/questions/QuestionList.py +184 -57
  159. edsl/questions/QuestionMatrix.py +265 -0
  160. edsl/questions/QuestionMultipleChoice.py +293 -69
  161. edsl/questions/QuestionNumerical.py +109 -56
  162. edsl/questions/QuestionRank.py +244 -49
  163. edsl/questions/Quick.py +41 -0
  164. edsl/questions/SimpleAskMixin.py +74 -0
  165. edsl/questions/__init__.py +9 -6
  166. edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +153 -38
  167. edsl/questions/compose_questions.py +13 -7
  168. edsl/questions/data_structures.py +20 -0
  169. edsl/questions/decorators.py +21 -0
  170. edsl/questions/derived/QuestionLikertFive.py +28 -26
  171. edsl/questions/derived/QuestionLinearScale.py +41 -28
  172. edsl/questions/derived/QuestionTopK.py +34 -26
  173. edsl/questions/derived/QuestionYesNo.py +40 -27
  174. edsl/questions/descriptors.py +228 -74
  175. edsl/questions/loop_processor.py +149 -0
  176. edsl/questions/prompt_templates/question_budget.jinja +13 -0
  177. edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
  178. edsl/questions/prompt_templates/question_extract.jinja +11 -0
  179. edsl/questions/prompt_templates/question_free_text.jinja +3 -0
  180. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
  181. edsl/questions/prompt_templates/question_list.jinja +17 -0
  182. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
  183. edsl/questions/prompt_templates/question_numerical.jinja +37 -0
  184. edsl/questions/question_base_gen_mixin.py +168 -0
  185. edsl/questions/question_registry.py +130 -46
  186. edsl/questions/register_questions_meta.py +71 -0
  187. edsl/questions/response_validator_abc.py +188 -0
  188. edsl/questions/response_validator_factory.py +34 -0
  189. edsl/questions/settings.py +5 -2
  190. edsl/questions/templates/__init__.py +0 -0
  191. edsl/questions/templates/budget/__init__.py +0 -0
  192. edsl/questions/templates/budget/answering_instructions.jinja +7 -0
  193. edsl/questions/templates/budget/question_presentation.jinja +7 -0
  194. edsl/questions/templates/checkbox/__init__.py +0 -0
  195. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
  196. edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
  197. edsl/questions/templates/dict/__init__.py +0 -0
  198. edsl/questions/templates/dict/answering_instructions.jinja +21 -0
  199. edsl/questions/templates/dict/question_presentation.jinja +1 -0
  200. edsl/questions/templates/extract/__init__.py +0 -0
  201. edsl/questions/templates/extract/answering_instructions.jinja +7 -0
  202. edsl/questions/templates/extract/question_presentation.jinja +1 -0
  203. edsl/questions/templates/free_text/__init__.py +0 -0
  204. edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
  205. edsl/questions/templates/free_text/question_presentation.jinja +1 -0
  206. edsl/questions/templates/likert_five/__init__.py +0 -0
  207. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
  208. edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
  209. edsl/questions/templates/linear_scale/__init__.py +0 -0
  210. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
  211. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
  212. edsl/questions/templates/list/__init__.py +0 -0
  213. edsl/questions/templates/list/answering_instructions.jinja +4 -0
  214. edsl/questions/templates/list/question_presentation.jinja +5 -0
  215. edsl/questions/templates/matrix/__init__.py +1 -0
  216. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  217. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  218. edsl/questions/templates/multiple_choice/__init__.py +0 -0
  219. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
  220. edsl/questions/templates/multiple_choice/html.jinja +0 -0
  221. edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
  222. edsl/questions/templates/numerical/__init__.py +0 -0
  223. edsl/questions/templates/numerical/answering_instructions.jinja +7 -0
  224. edsl/questions/templates/numerical/question_presentation.jinja +7 -0
  225. edsl/questions/templates/rank/__init__.py +0 -0
  226. edsl/questions/templates/rank/answering_instructions.jinja +11 -0
  227. edsl/questions/templates/rank/question_presentation.jinja +15 -0
  228. edsl/questions/templates/top_k/__init__.py +0 -0
  229. edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
  230. edsl/questions/templates/top_k/question_presentation.jinja +22 -0
  231. edsl/questions/templates/yes_no/__init__.py +0 -0
  232. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
  233. edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
  234. edsl/results/CSSParameterizer.py +108 -0
  235. edsl/results/Dataset.py +550 -19
  236. edsl/results/DatasetExportMixin.py +594 -0
  237. edsl/results/DatasetTree.py +295 -0
  238. edsl/results/MarkdownToDocx.py +122 -0
  239. edsl/results/MarkdownToPDF.py +111 -0
  240. edsl/results/Result.py +477 -173
  241. edsl/results/Results.py +987 -269
  242. edsl/results/ResultsExportMixin.py +28 -125
  243. edsl/results/ResultsGGMixin.py +83 -15
  244. edsl/results/TableDisplay.py +125 -0
  245. edsl/results/TextEditor.py +50 -0
  246. edsl/results/__init__.py +1 -1
  247. edsl/results/file_exports.py +252 -0
  248. edsl/results/results_fetch_mixin.py +33 -0
  249. edsl/results/results_selector.py +145 -0
  250. edsl/results/results_tools_mixin.py +98 -0
  251. edsl/results/smart_objects.py +96 -0
  252. edsl/results/table_data_class.py +12 -0
  253. edsl/results/table_display.css +78 -0
  254. edsl/results/table_renderers.py +118 -0
  255. edsl/results/tree_explore.py +115 -0
  256. edsl/scenarios/ConstructDownloadLink.py +109 -0
  257. edsl/scenarios/DocumentChunker.py +102 -0
  258. edsl/scenarios/DocxScenario.py +16 -0
  259. edsl/scenarios/FileStore.py +543 -0
  260. edsl/scenarios/PdfExtractor.py +40 -0
  261. edsl/scenarios/Scenario.py +431 -62
  262. edsl/scenarios/ScenarioHtmlMixin.py +65 -0
  263. edsl/scenarios/ScenarioList.py +1415 -45
  264. edsl/scenarios/ScenarioListExportMixin.py +45 -0
  265. edsl/scenarios/ScenarioListPdfMixin.py +239 -0
  266. edsl/scenarios/__init__.py +2 -0
  267. edsl/scenarios/directory_scanner.py +96 -0
  268. edsl/scenarios/file_methods.py +85 -0
  269. edsl/scenarios/handlers/__init__.py +13 -0
  270. edsl/scenarios/handlers/csv.py +49 -0
  271. edsl/scenarios/handlers/docx.py +76 -0
  272. edsl/scenarios/handlers/html.py +37 -0
  273. edsl/scenarios/handlers/json.py +111 -0
  274. edsl/scenarios/handlers/latex.py +5 -0
  275. edsl/scenarios/handlers/md.py +51 -0
  276. edsl/scenarios/handlers/pdf.py +68 -0
  277. edsl/scenarios/handlers/png.py +39 -0
  278. edsl/scenarios/handlers/pptx.py +105 -0
  279. edsl/scenarios/handlers/py.py +294 -0
  280. edsl/scenarios/handlers/sql.py +313 -0
  281. edsl/scenarios/handlers/sqlite.py +149 -0
  282. edsl/scenarios/handlers/txt.py +33 -0
  283. edsl/scenarios/scenario_join.py +131 -0
  284. edsl/scenarios/scenario_selector.py +156 -0
  285. edsl/shared.py +1 -0
  286. edsl/study/ObjectEntry.py +173 -0
  287. edsl/study/ProofOfWork.py +113 -0
  288. edsl/study/SnapShot.py +80 -0
  289. edsl/study/Study.py +521 -0
  290. edsl/study/__init__.py +4 -0
  291. edsl/surveys/ConstructDAG.py +92 -0
  292. edsl/surveys/DAG.py +92 -11
  293. edsl/surveys/EditSurvey.py +221 -0
  294. edsl/surveys/InstructionHandler.py +100 -0
  295. edsl/surveys/Memory.py +9 -4
  296. edsl/surveys/MemoryManagement.py +72 -0
  297. edsl/surveys/MemoryPlan.py +156 -35
  298. edsl/surveys/Rule.py +221 -74
  299. edsl/surveys/RuleCollection.py +241 -61
  300. edsl/surveys/RuleManager.py +172 -0
  301. edsl/surveys/Simulator.py +75 -0
  302. edsl/surveys/Survey.py +1079 -339
  303. edsl/surveys/SurveyCSS.py +273 -0
  304. edsl/surveys/SurveyExportMixin.py +235 -40
  305. edsl/surveys/SurveyFlowVisualization.py +181 -0
  306. edsl/surveys/SurveyQualtricsImport.py +284 -0
  307. edsl/surveys/SurveyToApp.py +141 -0
  308. edsl/surveys/__init__.py +4 -2
  309. edsl/surveys/base.py +19 -3
  310. edsl/surveys/descriptors.py +17 -6
  311. edsl/surveys/instructions/ChangeInstruction.py +48 -0
  312. edsl/surveys/instructions/Instruction.py +56 -0
  313. edsl/surveys/instructions/InstructionCollection.py +82 -0
  314. edsl/surveys/instructions/__init__.py +0 -0
  315. edsl/templates/error_reporting/base.html +24 -0
  316. edsl/templates/error_reporting/exceptions_by_model.html +35 -0
  317. edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
  318. edsl/templates/error_reporting/exceptions_by_type.html +17 -0
  319. edsl/templates/error_reporting/interview_details.html +116 -0
  320. edsl/templates/error_reporting/interviews.html +19 -0
  321. edsl/templates/error_reporting/overview.html +5 -0
  322. edsl/templates/error_reporting/performance_plot.html +2 -0
  323. edsl/templates/error_reporting/report.css +74 -0
  324. edsl/templates/error_reporting/report.html +118 -0
  325. edsl/templates/error_reporting/report.js +25 -0
  326. edsl/tools/__init__.py +1 -0
  327. edsl/tools/clusters.py +192 -0
  328. edsl/tools/embeddings.py +27 -0
  329. edsl/tools/embeddings_plotting.py +118 -0
  330. edsl/tools/plotting.py +112 -0
  331. edsl/tools/summarize.py +18 -0
  332. edsl/utilities/PrettyList.py +56 -0
  333. edsl/utilities/SystemInfo.py +5 -0
  334. edsl/utilities/__init__.py +21 -20
  335. edsl/utilities/ast_utilities.py +3 -0
  336. edsl/utilities/data/Registry.py +2 -0
  337. edsl/utilities/decorators.py +41 -0
  338. edsl/utilities/gcp_bucket/__init__.py +0 -0
  339. edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
  340. edsl/utilities/interface.py +310 -60
  341. edsl/utilities/is_notebook.py +18 -0
  342. edsl/utilities/is_valid_variable_name.py +11 -0
  343. edsl/utilities/naming_utilities.py +263 -0
  344. edsl/utilities/remove_edsl_version.py +24 -0
  345. edsl/utilities/repair_functions.py +28 -0
  346. edsl/utilities/restricted_python.py +70 -0
  347. edsl/utilities/utilities.py +203 -13
  348. edsl-0.1.40.dist-info/METADATA +111 -0
  349. edsl-0.1.40.dist-info/RECORD +362 -0
  350. {edsl-0.1.15.dist-info → edsl-0.1.40.dist-info}/WHEEL +1 -1
  351. edsl/agents/AgentListExportMixin.py +0 -24
  352. edsl/coop/old.py +0 -31
  353. edsl/data/Database.py +0 -141
  354. edsl/data/crud.py +0 -121
  355. edsl/jobs/Interview.py +0 -435
  356. edsl/jobs/JobsRunner.py +0 -63
  357. edsl/jobs/JobsRunnerStatusMixin.py +0 -115
  358. edsl/jobs/base.py +0 -47
  359. edsl/jobs/buckets.py +0 -178
  360. edsl/jobs/runners/JobsRunnerDryRun.py +0 -19
  361. edsl/jobs/runners/JobsRunnerStreaming.py +0 -54
  362. edsl/jobs/task_management.py +0 -215
  363. edsl/jobs/token_tracking.py +0 -78
  364. edsl/language_models/DeepInfra.py +0 -69
  365. edsl/language_models/OpenAI.py +0 -98
  366. edsl/language_models/model_interfaces/GeminiPro.py +0 -66
  367. edsl/language_models/model_interfaces/LanguageModelOpenAIFour.py +0 -8
  368. edsl/language_models/model_interfaces/LanguageModelOpenAIThreeFiveTurbo.py +0 -8
  369. edsl/language_models/model_interfaces/LlamaTwo13B.py +0 -21
  370. edsl/language_models/model_interfaces/LlamaTwo70B.py +0 -21
  371. edsl/language_models/model_interfaces/Mixtral8x7B.py +0 -24
  372. edsl/language_models/registry.py +0 -81
  373. edsl/language_models/schemas.py +0 -15
  374. edsl/language_models/unused/ReplicateBase.py +0 -83
  375. edsl/prompts/QuestionInstructionsBase.py +0 -6
  376. edsl/prompts/library/agent_instructions.py +0 -29
  377. edsl/prompts/library/agent_persona.py +0 -17
  378. edsl/prompts/library/question_budget.py +0 -26
  379. edsl/prompts/library/question_checkbox.py +0 -32
  380. edsl/prompts/library/question_extract.py +0 -19
  381. edsl/prompts/library/question_freetext.py +0 -14
  382. edsl/prompts/library/question_linear_scale.py +0 -20
  383. edsl/prompts/library/question_list.py +0 -22
  384. edsl/prompts/library/question_multiple_choice.py +0 -44
  385. edsl/prompts/library/question_numerical.py +0 -31
  386. edsl/prompts/library/question_rank.py +0 -21
  387. edsl/prompts/prompt_config.py +0 -33
  388. edsl/prompts/registry.py +0 -185
  389. edsl/questions/Question.py +0 -240
  390. edsl/report/InputOutputDataTypes.py +0 -134
  391. edsl/report/RegressionMixin.py +0 -28
  392. edsl/report/ReportOutputs.py +0 -1228
  393. edsl/report/ResultsFetchMixin.py +0 -106
  394. edsl/report/ResultsOutputMixin.py +0 -14
  395. edsl/report/demo.ipynb +0 -645
  396. edsl/results/ResultsDBMixin.py +0 -184
  397. edsl/surveys/SurveyFlowVisualizationMixin.py +0 -92
  398. edsl/trackers/Tracker.py +0 -91
  399. edsl/trackers/TrackerAPI.py +0 -196
  400. edsl/trackers/TrackerTasks.py +0 -70
  401. edsl/utilities/pastebin.py +0 -141
  402. edsl-0.1.15.dist-info/METADATA +0 -69
  403. edsl-0.1.15.dist-info/RECORD +0 -142
  404. /edsl/{language_models/model_interfaces → inference_services}/__init__.py +0 -0
  405. /edsl/{report/__init__.py → jobs/runners/JobsRunnerStatusData.py} +0 -0
  406. /edsl/{trackers/__init__.py → language_models/ServiceDataSources.py} +0 -0
  407. {edsl-0.1.15.dist-info → edsl-0.1.40.dist-info}/LICENSE +0 -0
@@ -1,1228 +0,0 @@
1
- import platform
2
- import subprocess
3
- import tempfile
4
-
5
- import copy
6
- import base64
7
- import functools
8
- import inspect
9
- import markdown2
10
- import math
11
- import matplotlib.pyplot as plt
12
- import numpy as np
13
- import os
14
- import pandas as pd
15
- import seaborn as sns
16
- import string
17
- import tempfile
18
- import textwrap
19
- import warnings
20
- import webbrowser
21
- from abc import ABC, abstractmethod, ABCMeta
22
- from collections import Counter
23
- from dataclasses import asdict
24
- from io import BytesIO
25
- from IPython.display import display, HTML
26
- from scipy import stats
27
- from scipy.stats import chisquare
28
- from statsmodels.miscmodels.ordinal_model import OrderedModel
29
- from statsmodels.tools.sm_exceptions import HessianInversionWarning, ConvergenceWarning
30
- from typing import Callable
31
- from wordcloud import WordCloud
32
- from edsl.report.InputOutputDataTypes import (
33
- CategoricalData,
34
- NumericalData,
35
- PlotData,
36
- TallyData,
37
- CrossTabData,
38
- FreeTextData,
39
- ChiSquareData,
40
- RegressionData,
41
- )
42
- from edsl.utilities import is_notebook
43
-
44
-
45
- def save_figure(filename):
46
- base, ext = os.path.splitext(filename)
47
- if ext.lower() == ".png":
48
- plt.savefig(filename, format="png")
49
- elif ext.lower() == ".jpg" or ext.lower() == ".jpeg":
50
- plt.savefig(filename, format="jpeg")
51
- elif ext.lower() == ".svg":
52
- plt.savefig(filename, format="svg")
53
- else:
54
- print("Unsupported file extension. Saving as PNG by default.")
55
- plt.savefig(base + ".png", format="png")
56
-
57
-
58
- warnings.filterwarnings(
59
- "ignore",
60
- category=FutureWarning,
61
- module="seaborn._oldcore",
62
- message=".*is_categorical_dtype is deprecated.*",
63
- )
64
- warnings.filterwarnings("ignore", category=RuntimeWarning, module="statsmodels.*")
65
- warnings.filterwarnings("ignore", category=RuntimeWarning, module="scipy.optimize.*")
66
- warnings.filterwarnings("ignore", category=HessianInversionWarning)
67
- warnings.filterwarnings("ignore", category=ConvergenceWarning)
68
-
69
-
70
- def open_temp_file(file_path):
71
- system = platform.system()
72
- if system == "Linux":
73
- subprocess.run(["xdg-open", file_path])
74
- elif system == "Windows":
75
- os.startfile(file_path)
76
- elif system == "Darwin": # macOS
77
- subprocess.run(["open", file_path])
78
- else:
79
- print("Unsupported operating system")
80
-
81
-
82
- def convert_svg_to_png_in_memory(svg_bytes):
83
- # Create a temporary SVG file
84
- with tempfile.NamedTemporaryFile(suffix=".svg", delete=False) as temp_svg:
85
- temp_svg.write(svg_bytes)
86
- open_temp_file(temp_svg.name)
87
-
88
-
89
- class RegisterElementMeta(ABCMeta):
90
- "Metaclass to register output elements in a registry i.e., those that have a parent"
91
- _registry = {} # Initialize the registry as a dictionary
92
-
93
- def __init__(cls, name, bases, dct):
94
- super(RegisterElementMeta, cls).__init__(name, bases, dct)
95
- if cls.LeftInputType is not None or cls.RightInputType is not None:
96
- # Register the class in the registry
97
- RegisterElementMeta._registry[name] = cls
98
-
99
- @classmethod
100
- def get_registered_classes(cls):
101
- return cls._registry
102
-
103
-
104
- def camel_to_snake(name: str) -> str:
105
- """Converts a camel case string to snake case, e.g.,
106
- >>> camel_to_snake("HelloWorld")
107
- 'hello_world'
108
- """
109
- snake_name = ""
110
- for index, char in enumerate(name):
111
- if char.isupper() and index != 0:
112
- snake_name += "_"
113
- snake_name += char.lower()
114
-
115
- return snake_name
116
-
117
-
118
- class CustomFunctionWrapper:
119
- """A wrapper for a function that adds a name and docstring."""
120
-
121
- def __init__(self, func, name, doc):
122
- self._func = func
123
- self.name = name
124
- self.doc = doc
125
-
126
- def __call__(self, *args, **kwargs):
127
- return self._func(*args, **kwargs)
128
-
129
- def __repr__(self):
130
- return f"Method: `{self.name}`\nDescription: {self.doc or 'No description available'}"
131
-
132
- def _repr_html_(self):
133
- html = markdown2.markdown(
134
- f"**Method:** {self.name}\n\n**Description:** {self.doc or 'No description available'}"
135
- )
136
- # return markdown2.markdown(f"**Method:** {self.name}\n\n**Description:** {self.doc or 'No description available'}")
137
- # return f"<b>Method:</b> {self.name}<br><b>Description:</b> {self.doc or 'No description available'}"
138
- return html
139
-
140
-
141
- def html_decorator(func: Callable) -> Callable:
142
- "A decorator that displays the output of a function as HTML."
143
-
144
- @functools.wraps(func)
145
- def wrapper(*args, **kwargs):
146
- obj = func(*args, **kwargs)
147
- if is_notebook(): # if in a jupyter notebook
148
- html = obj.html()
149
- return display(HTML(html))
150
- else:
151
- return obj.view() # otherwise open in a browser
152
-
153
- return wrapper
154
-
155
-
156
- class Element(ABC, metaclass=RegisterElementMeta):
157
- """Base class for all elements.
158
-
159
-
160
- LeftInputType: The type of the left parent. Could be None.
161
- RightInputType: The type of the right parent. Could Be None.
162
- OutputDataType: The type of the output data.
163
-
164
- "Root" elements are those that do not have a parent, and are created from the results.
165
-
166
- """
167
-
168
- LeftInputType = None
169
- RightInputType = None
170
- OutputDataType = None
171
-
172
- def __init__(self, left_parent=None, right_parent=None, output_data=None, **kwargs):
173
- self.left_parent = left_parent
174
- self.right_parent = right_parent
175
- self.left_data = getattr(left_parent, "output_data", None)
176
- self.right_data = getattr(right_parent, "output_data", None)
177
- self.filename = None
178
-
179
- for key, value in kwargs.items():
180
- setattr(self, key, value)
181
-
182
- if (
183
- self.LeftInputType is not None
184
- and type(self.left_data) != self.LeftInputType
185
- ):
186
- raise TypeError(f"Left parent must be of type {self.LeftInputType}")
187
-
188
- if (
189
- self.RightInputType is not None
190
- and type(self.right_data) != self.RightInputType
191
- ):
192
- raise TypeError(f"Right parent must be of type {self.RightInputType}")
193
-
194
- if output_data is None:
195
- self.output_data = self.create_output(
196
- self.left_data, self.right_data, **kwargs
197
- )
198
- else:
199
- self.output_data = output_data
200
-
201
- @classmethod
202
- def unary(cls):
203
- print("Switch to using the cls.element_type method instead")
204
- return cls.RightInputType is None
205
-
206
- @property
207
- def data(self):
208
- print("Shift to using self.output_data")
209
- return self.output_data
210
-
211
- @classmethod
212
- @property
213
- def function_name(cls):
214
- return camel_to_snake(cls.__name__)
215
-
216
- @classmethod
217
- def element_type(cls):
218
- if cls.LeftInputType is None and cls.RightInputType is None:
219
- return "root"
220
- if cls.LeftInputType is not None and cls.RightInputType is None:
221
- return "unary"
222
- if cls.LeftInputType is not None and cls.RightInputType is not None:
223
- return "binary"
224
-
225
- @classmethod
226
- def code_generation(cls, results_name: str, left_column, right_column=None):
227
- if cls.element_type() == "unary":
228
- return f'{results_name}.{cls.function_name}("{left_column}")'
229
- elif cls.element_type() == "binary":
230
- return (
231
- f'{results_name}.{cls.function_name}("{left_column}", "{right_column}")'
232
- )
233
- elif cls.element_type() == "root":
234
- raise Exception("Should not be called on a root element")
235
-
236
- @abstractmethod
237
- def _primary_function(self):
238
- "The function that creates the output data, as a dictionary."
239
- raise NotImplementedError
240
-
241
- @abstractmethod
242
- def _html(self):
243
- "The function that creates the HTML representation of the output data"
244
- raise NotImplementedError
245
-
246
- def create_output(self, LeftInput, RightInput, **kwargs):
247
- if self.element_type() == "unary":
248
- output_data = self._primary_function(LeftInput, **kwargs)
249
- elif self.element_type() == "binary":
250
- output_data = self._primary_function(LeftInput, RightInput, **kwargs)
251
- elif self.element_type() == "root":
252
- raise Exception("Should not be called on a root element")
253
- else:
254
- raise Exception("Unknown element type")
255
-
256
- if output_data is None:
257
- self.filename = kwargs.get("filename", None)
258
- return None
259
-
260
- return self.OutputDataType(**output_data)
261
-
262
- @classmethod
263
- def example(cls, **kwargs):
264
- class MockParent:
265
- def __init__(self, data):
266
- self.output_data = data
267
-
268
- left_parent = MockParent(cls.LeftInputType.example())
269
- right_parent = (
270
- None
271
- if cls.RightInputType is None
272
- else MockParent(cls.RightInputType.example())
273
- )
274
-
275
- return cls(left_parent, right_parent, **kwargs)
276
-
277
- def html(self):
278
- return self._html(**asdict(self.output_data))
279
-
280
- def view(self, **kwargs):
281
- if hasattr(self.output_data, "buffer"):
282
- svg_bytes = self.output_data.buffer.getvalue()
283
- convert_svg_to_png_in_memory(svg_bytes)
284
- else:
285
- if self.filename:
286
- print(f"Output was written to file: {self.filename}")
287
- else:
288
- temporary_directory = tempfile.mkdtemp()
289
- with open(os.path.join(temporary_directory, "temp.html"), "w") as f:
290
- f.write(self.html(**kwargs))
291
- webbrowser.open(os.path.join(temporary_directory, "temp.html"))
292
-
293
- @classmethod
294
- def parameters(cls):
295
- return inspect.signature(cls._primary_function).parameters
296
-
297
- @classmethod
298
- def create_external_function(cls, results) -> Callable:
299
- """Adds a function to the Results class that creates an output element.
300
-
301
-
302
- In ResultsOutputMixin, there is this function that iterates through the registered
303
- classes and adds a function to the Results class for each one.
304
-
305
- def add_output_functions(self) -> None:
306
- output_classes = registery.get_registered_classes().values()
307
- self.analysis_options = []
308
- for output_class in output_classes:
309
- new_function_name = output_class.function_name
310
- new_function = output_class.create_external_function(self)
311
- self.__dict__[new_function_name] = new_function
312
-
313
- self.analysis_options.append({new_function_name: output_class.__doc__})
314
-
315
- """
316
-
317
- def create_parent(data_type, key, input_type):
318
- RootElement = create_root_element(input_type)
319
- parent = RootElement.from_results(results, key, input_type)
320
- return parent
321
-
322
- if cls.RightInputType is None:
323
-
324
- def func(column, **kwargs):
325
- left_parent = create_parent(
326
- *results._parse_column(column), input_type=cls.LeftInputType
327
- )
328
- return cls(left_parent=left_parent, **kwargs)
329
-
330
- else:
331
-
332
- def func(left_column, right_column, **kwargs):
333
- left_parent = create_parent(
334
- *results._parse_column(left_column), cls.LeftInputType
335
- )
336
- right_parent = create_parent(
337
- *results._parse_column(right_column), cls.RightInputType
338
- )
339
- return cls(left_parent=left_parent, right_parent=right_parent, **kwargs)
340
-
341
- return CustomFunctionWrapper(
342
- html_decorator(func), doc=cls.help(), name=cls.function_name
343
- )
344
-
345
- @classmethod
346
- def help(cls):
347
- help_text = textwrap.dedent(
348
- f"""\
349
- {cls._primary_function.__doc__}
350
- """
351
- )
352
- # return self._primary_function.__doc__
353
- return help_text
354
-
355
-
356
- def create_root_element(output_data_type):
357
- class Container(Element):
358
- LeftInputType = None
359
- RightInputType = None
360
- OutputDataType = output_data_type
361
-
362
- def _primary_function(self):
363
- raise Exception("Should not be called directly")
364
-
365
- @classmethod
366
- def from_results(cls, results, data_name, index=None):
367
- data_type, key = results._parse_column(data_name)
368
- output_data = results._fetch_element(data_type, key, cls.OutputDataType)
369
- return cls(
370
- name=data_name,
371
- left_parent=None,
372
- right_parent=None,
373
- output_data=output_data,
374
- index=index,
375
- )
376
-
377
- def _html(self):
378
- return self.output_data.html()
379
-
380
- return Container
381
-
382
-
383
- class PlotMixin:
384
- OutputDataType = PlotData
385
-
386
- image_format = "svg"
387
-
388
- @staticmethod
389
- def plt_to_buf(plt, format=image_format):
390
- buf = BytesIO()
391
- plt.savefig(buf, format=format)
392
- buf.seek(0)
393
- plt.close()
394
- return buf
395
-
396
- def _html(
397
- self,
398
- buffer,
399
- title,
400
- format=image_format,
401
- option_codes=None,
402
- width_pct=100,
403
- **kwargs,
404
- ):
405
- image_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
406
- html = []
407
- html.append(title)
408
- format_line = "png" if format == "png" else "svg+xml"
409
- html.append(
410
- f"""<img src="data:image/{format_line};base64,{image_base64}" style="width: {width_pct}%; height: auto;" />"""
411
- )
412
- if option_codes is not None:
413
- left_option_codes = option_codes.get("left_option_codes", None)
414
- if left_option_codes is not None:
415
- html.append("<p>Codes:</p>")
416
- for key, value in left_option_codes.items():
417
- html.append(f"<p><b>{value}</b>: {key}</p>")
418
- right_option_codes = option_codes.get("right_option_codes", None)
419
- if right_option_codes is not None:
420
- if right_option_codes is not None:
421
- html.append("<p>2nd variable Codes:</p>")
422
- for key, value in right_option_codes.items():
423
- html.append(f"<p><b>{value}</b>: {key}</p>")
424
- return "\n".join(html)
425
-
426
-
427
- def tally(responses, options):
428
- response_counts = dict(Counter(responses))
429
- for key in options:
430
- if key not in response_counts:
431
- response_counts[key] = 0
432
- return response_counts
433
-
434
-
435
- def replace_with_alpha_codes(
436
- options: list[str], responses: list[str], prefix: str = ""
437
- ):
438
- code_gen = (chr(i) for i in range(65, 91))
439
- option_codes = {}
440
- for option in options:
441
- option_codes[option] = prefix + next(code_gen)
442
- new_options = [option_codes[option] for option in options]
443
- new_responses = [option_codes[response] for response in responses]
444
- return new_options, new_responses, option_codes
445
-
446
-
447
- def header_version(options, index):
448
- cleaned_versions = [
449
- option.translate(str.maketrans("", "", string.punctuation)).lower()
450
- for option in options
451
- ]
452
- split_versions = [option.split(" ") for option in cleaned_versions]
453
- versions = [split_version[:index] for split_version in split_versions]
454
- return ["_".join(version) for version in versions]
455
-
456
-
457
- def find_version(options, index):
458
- candidate = header_version(options, index)
459
- if len(set(candidate)) == len(candidate):
460
- return candidate
461
- else:
462
- return find_version(options, index + 1)
463
-
464
-
465
- def get_option_codes_short_name(options):
466
- return dict(zip(options, find_version(options, 1)))
467
-
468
-
469
- def to_strings(split_versions):
470
- return ["_".join(version) for version in split_versions]
471
-
472
-
473
- def is_unique(split_versions):
474
- return len(set(to_strings(split_versions))) == len(to_strings(split_versions))
475
-
476
-
477
- def get_option_codes(options: list[str]):
478
- """Creates a dictionary mapping options to codes."""
479
- cleaned_versions = [
480
- option.translate(str.maketrans("", "", string.punctuation)).lower()
481
- for option in options
482
- ]
483
- new_cleaned_versions = []
484
- shortshands = {"not sure": "not-sure", "need more": "need-more"}
485
- for option in cleaned_versions:
486
- candidate = copy.copy(option)
487
- for key, value in shortshands.items():
488
- candidate = candidate.replace(key, value)
489
- # print(f"Replacing {key} with {value}")
490
- # print(option)
491
- # print(option.replace(key, value))
492
- new_cleaned_versions.append(candidate)
493
-
494
- cleaned_versions = new_cleaned_versions
495
- split_versions = [option.split(" ") for option in cleaned_versions]
496
- # get rid of stop words, is possible
497
- stop_words = [
498
- "a",
499
- "an",
500
- "am",
501
- "the",
502
- "of",
503
- "and",
504
- "or",
505
- "to",
506
- "for",
507
- "in",
508
- "on",
509
- "im",
510
- "that",
511
- "are",
512
- "i",
513
- ]
514
- # removes stop words so long as doing so doesn't make responses non-unique
515
- for version in split_versions:
516
- for stop_word in stop_words:
517
- if stop_word in version:
518
- index = version.index(stop_word)
519
- del version[index]
520
- if not is_unique(split_versions):
521
- version.insert(index, stop_word)
522
- # break
523
-
524
- # starts at the end and pops off options if it keeps everything unique
525
- # if it goes through and doesn't pop anything for each option, it stops
526
- while True:
527
- num_pops = 0
528
- for version in split_versions:
529
- if len(version) > 1: # we we get to one word, stop
530
- removed = version.pop()
531
- if is_unique(split_versions): # no problem
532
- pass
533
- else: # oops, we cut into bone
534
- version.append(removed)
535
- num_pops += 1
536
- else:
537
- num_pops += 1
538
- if num_pops == len(
539
- split_versions
540
- ): # stop the loop if we tried popping everyting w/ no luck
541
- break
542
-
543
- return dict(zip(options, ["_".join(version) for version in split_versions]))
544
-
545
-
546
- def replace_with_codes(
547
- options: list[str], responses: list[str], short_names_dict=None, prefix=""
548
- ):
549
- if short_names_dict is not None:
550
- option_codes = short_names_dict
551
- else:
552
- option_codes = get_option_codes(options)
553
-
554
- new_options = [option_codes[option] for option in options]
555
- new_responses = [option_codes[response] for response in responses]
556
- return new_options, new_responses, option_codes
557
-
558
-
559
- class BarChart(PlotMixin, Element):
560
- "Creates a bar chart plot for categorical data."
561
- LeftInputType = CategoricalData
562
- RightInputType = None
563
-
564
- def _primary_function(
565
- self,
566
- CategoricalDataObject,
567
- width=10,
568
- height=5,
569
- xlabel="Counts",
570
- ylabel="",
571
- footer_fontsize=8,
572
- title=None,
573
- use_code=None,
574
- width_pct=100,
575
- show_percentage=True,
576
- filename=None,
577
- ) -> dict:
578
- """
579
- Generates a bar chart from the provided categorical data object.
580
-
581
- ### Args:
582
- - CategoricalDataObject (CategoricalData): An object containing categorical data to be plotted.
583
- - `width (int, optional)`: Width of the plot. Defaults to 10.
584
- - height (int, optional): Height of the plot. Defaults to 5.
585
- - xlabel (str, optional): Label for the x-axis. Defaults to "Counts".
586
- - ylabel (str, optional): Label for the y-axis. Defaults to an empty string.
587
- - footer_fontsize (int, optional): Font size for the footer text. Defaults to 8.
588
- - title (str, optional): Title of the plot. If None, title is taken from CategoricalDataObject.text. Defaults to None.
589
- - use_code (bool, optional): Whether to use alphabetical codes for categorical options. Defaults to False.
590
-
591
- Note:
592
- If 'use_code' is set to True, each category in the plot is represented by an alphabetical code (A, B, C, ...),
593
- and a footer is added to the plot mapping these codes back to the original category names.
594
- """
595
- responses = CategoricalDataObject.responses
596
- options = CategoricalDataObject.options
597
- if title is None:
598
- title = CategoricalDataObject.text
599
-
600
- option_codes = None
601
-
602
- max_option_length = max([len(option) for option in options])
603
- if use_code is None:
604
- use_code = max_option_length > 10
605
-
606
- if use_code:
607
- if not (d := CategoricalDataObject.short_names_dict) == {}:
608
- options, responses, option_codes = replace_with_codes(
609
- options, responses, short_names_dict=d
610
- )
611
- else:
612
- options, responses, option_codes = replace_with_codes(
613
- options, responses
614
- )
615
-
616
- response_count = tally(responses, options)
617
- total_responses = sum(response_count.values())
618
- data = {key: response_count[key] for key in options}
619
- data_df = pd.DataFrame(list(data.items()), columns=["Keys", "Counts"])
620
- sns.set(style="whitegrid")
621
- plt.figure(figsize=(width, height))
622
- # sns.barplot(x="Counts", y="Keys", data=data_df, palette="Blues_d")
623
- # ax = sns.barplot(x="Counts", y="Keys", data=data_df, palette="Blues_d")
624
- ax = sns.barplot(
625
- x="Counts",
626
- y="Keys",
627
- data=data_df,
628
- palette="Blues_d",
629
- hue="Keys",
630
- legend=False,
631
- )
632
-
633
- # Adjust layout and add footer if necessary
634
- plt.xlabel(xlabel)
635
- plt.ylabel(ylabel)
636
- plt.title(f"{title}")
637
-
638
- if show_percentage:
639
- for p in ax.patches:
640
- percentage = f"{100 * p.get_width() / total_responses:.1f}%" # Calculate percentage
641
- x = p.get_x() + p.get_width() + 0.5
642
- y = p.get_y() + p.get_height() / 2
643
- ax.text(x, y, percentage, ha="center", va="center")
644
-
645
- plt.tight_layout()
646
-
647
- if filename:
648
- save_figure(filename)
649
- return None
650
-
651
- return {
652
- "buffer": self.plt_to_buf(plt),
653
- "title": title,
654
- "option_codes": {
655
- "left_option_codes": option_codes,
656
- "right_option_codes": None,
657
- },
658
- "width_pct": width_pct,
659
- }
660
-
661
-
662
- class HistogramPlot(PlotMixin, Element):
663
- LeftInputType = NumericalData
664
- RightInputType = None
665
-
666
- def _primary_function(
667
- self,
668
- NumericalDataObject,
669
- alpha=0.7,
670
- bins=30,
671
- xlabel="Value",
672
- ylabel="Frequency",
673
- color="blue",
674
- title=None,
675
- max_title_length=40,
676
- width_pct=100,
677
- filename=None,
678
- ):
679
- """
680
- Generates a histogram plot from a NumericalDataObject.
681
-
682
- This method plots a histogram based on the responses contained within the NumericalDataObject.
683
- It allows customization of the plot's appearance including the number of bins, transparency,
684
- color, and axis labels. Optionally, a custom title can be set, or it defaults to the 'text'
685
- attribute of the NumericalDataObject.
686
-
687
- Parameters:
688
- NumericalDataObject (NumericalData): An object containing numerical data and associated responses.
689
- alpha (float, optional): The transparency level of the histogram bars. Defaults to 0.7.
690
- bins (int, optional): The number of bins in the histogram. Defaults to 30.
691
- xlabel (str, optional): Label for the x-axis. Defaults to "Value".
692
- ylabel (str, optional): Label for the y-axis. Defaults to "Frequency".
693
- color (str, optional): Color of the histogram bars. Defaults to "blue".
694
- title (str, optional): Custom title for the histogram. If None, uses the 'text' attribute from NumericalDataObject.
695
-
696
- """
697
- responses = [
698
- float(x) if x is not None else None for x in NumericalDataObject.responses
699
- ]
700
- max_title_length = 40
701
- if title is None:
702
- if len(NumericalDataObject.text) > max_title_length:
703
- text = NumericalDataObject.text[:max_title_length] + "..."
704
- else:
705
- text = NumericalDataObject.text
706
- else:
707
- text = title
708
- plt.hist(responses, bins=bins, alpha=alpha, color=color)
709
- plt.title(f"{text}")
710
- plt.xlabel(xlabel)
711
- plt.ylabel(ylabel)
712
- plt.tight_layout()
713
-
714
- if filename is not None:
715
- save_figure(filename)
716
- return None
717
-
718
- return {
719
- "buffer": self.plt_to_buf(plt),
720
- "title": text,
721
- "option_codes": None,
722
- "width_pct": width_pct,
723
- }
724
-
725
-
726
- class ScatterPlot(PlotMixin, Element):
727
- LeftInputType = NumericalData
728
- RightInputType = NumericalData
729
-
730
- def _primary_function(
731
- self,
732
- LeftNumericalDataObject,
733
- RightNumericalDataObject,
734
- alpha=0.5,
735
- title=None,
736
- regression_line=True,
737
- x_text=None,
738
- y_text=None,
739
- width_pct=100,
740
- filename=None,
741
- ):
742
- """
743
- Generates a scatter plot using numerical data from two provided data objects.
744
-
745
- This method creates a scatter plot to visually represent the relationship between
746
- two sets of numerical data. It offers customization for the plot's transparency
747
- (alpha) and title.
748
-
749
- Args:
750
- LeftNumericalDataObject (NumericalData): The first numerical data object,
751
- used for the x-axis data.
752
- RightNumericalDataObject (NumericalData): The second numerical data object,
753
- used for the y-axis data.
754
- alpha (float, optional): The transparency level of the scatter plot points.
755
- A value between 0 (transparent) and 1 (opaque).
756
- Defaults to 0.5.
757
- title (str, optional): Title for the scatter plot. If None, a default title
758
- is generated using the text attributes of the
759
- NumericalData objects. Defaults to None.
760
- """
761
- x = LeftNumericalDataObject.responses
762
- y = RightNumericalDataObject.responses
763
- if x_text is None:
764
- x_text = LeftNumericalDataObject.text
765
- if y_text is None:
766
- y_text = RightNumericalDataObject.text
767
-
768
- if title is None:
769
- title = f"{x_text} vs {y_text}"
770
-
771
- plt.title("")
772
- plt.xlabel(x_text)
773
- plt.ylabel(y_text)
774
- plt.scatter(x, y, alpha=alpha)
775
-
776
- if regression_line:
777
- slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
778
- x_array = np.array(x)
779
- plt.plot(
780
- x, intercept + slope * x_array, color="red"
781
- ) # Plotting the regression line
782
- regression_info_text = (
783
- f"Slope: {slope:.3f}\n"
784
- f"Std Error in Slope: {std_err:.3f}\n"
785
- f"Intercept: {intercept:.2f}\n"
786
- )
787
- plt.text(
788
- 0.05,
789
- 0.95,
790
- regression_info_text,
791
- transform=plt.gca().transAxes,
792
- fontsize=9,
793
- verticalalignment="top",
794
- bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.5),
795
- )
796
-
797
- plt.tight_layout()
798
-
799
- if filename is not None:
800
- save_figure(filename)
801
- return None
802
-
803
- return {
804
- "buffer": self.plt_to_buf(plt),
805
- "title": "",
806
- "option_codes": None,
807
- "width_pct": width_pct,
808
- }
809
-
810
-
811
- class WordCloudPlot(PlotMixin, Element):
812
- LeftInputType = FreeTextData
813
- RightInputType = None
814
-
815
- def _primary_function(
816
- self,
817
- FreeTextDataObject,
818
- width=800,
819
- height=400,
820
- background_color="white",
821
- width_pct=100,
822
- filename=None,
823
- ):
824
- """Creates a word cloud plot for free text data.
825
-
826
- Parameters
827
- ----------
828
- column: str
829
- Name of the column in the results to use.
830
- width : int
831
- Width of the plot in pixels.
832
- height : int
833
- Height of the plot in pixels.
834
- background_color : str
835
- Background color of the plot.
836
- """
837
- responses = " ".join(FreeTextDataObject.responses)
838
- text = FreeTextDataObject.text
839
-
840
- wordcloud = WordCloud(
841
- width=width, height=height, background_color=background_color
842
- ).generate(responses)
843
- plt.figure(figsize=(10, 5))
844
- plt.imshow(wordcloud, interpolation="bilinear")
845
- plt.axis("off")
846
- plt.title(f"{text}")
847
-
848
- if filename is not None:
849
- save_figure(filename)
850
- return None
851
-
852
- #
853
- # with open(filename, "w") as f:
854
- # f.write(wordcloud)
855
-
856
- return {
857
- "buffer": self.plt_to_buf(plt),
858
- "title": "",
859
- "option_codes": None,
860
- "width_pct": width_pct,
861
- }
862
-
863
-
864
- class Tally(Element):
865
- LeftInputType = CategoricalData
866
- RightInputType = None
867
- OutputDataType = TallyData
868
-
869
- def _primary_function(self, CategoricalDataObject, **kwargs):
870
- """Creates a tally of responses to a categorical question."""
871
- responses = CategoricalDataObject.responses
872
- text = CategoricalDataObject.text
873
- options = CategoricalDataObject.options
874
-
875
- response_count = dict(Counter(responses))
876
- # Add 0s for things that weren't selected even once
877
- for key in options:
878
- if key not in response_count:
879
- response_count[key] = 0
880
-
881
- options.reverse()
882
- return {
883
- "responses": {key: response_count[key] for key in options},
884
- "text": text,
885
- }
886
-
887
- def _html(self, responses, text, **kwargs):
888
- report_html = [
889
- "<div>",
890
- f"<p>{text}</p>" "<table>",
891
- ]
892
- for key, value in responses.items():
893
- report_html.append(f"<tr><td>{key}</td><td>{value}</td></tr>")
894
- report_html.append("</table>")
895
- report_html.append("</div>")
896
- return "\n".join(report_html)
897
-
898
-
899
- def compute_cross_tab(left_responses, left_options, right_responses, right_options):
900
- left_response_count = dict(Counter(left_responses))
901
- right_response_count = dict(Counter(right_responses))
902
- # Add 0s for things that weren't selected even once
903
- for key in left_options:
904
- if key not in left_response_count:
905
- left_response_count[key] = 0
906
- for key in right_options:
907
- if key not in right_response_count:
908
- right_response_count[key] = 0
909
-
910
- left_options.reverse()
911
- right_options.reverse()
912
-
913
- cross_tab = {
914
- left_option: {right_option: 0 for right_option in right_options}
915
- for left_option in left_options
916
- }
917
- # Perform the cross-tabulation
918
- for left_response, right_response in zip(left_responses, right_responses):
919
- cross_tab[left_response][right_response] += 1
920
- return cross_tab
921
-
922
-
923
- class CrossTab(Element):
924
- LeftInputType = CategoricalData
925
- RightInputType = CategoricalData
926
- OutputDataType = CrossTabData
927
-
928
- def _primary_function(
929
- self, LeftCategoricalDataObject, RightCategoricalDataObject, **kwargs
930
- ):
931
- """Creates a cross tabulation of two categorical variables.
932
- Parameters
933
- ----------
934
- left_column: str
935
- Name of the column in the results to use for the left side.
936
- right_column: str
937
- Name of the column in the results to use for the right side.
938
- """
939
- left_responses = LeftCategoricalDataObject.responses
940
- right_responses = RightCategoricalDataObject.responses
941
- left_text = LeftCategoricalDataObject.text
942
- right_text = RightCategoricalDataObject.text
943
- left_options = LeftCategoricalDataObject.options
944
- right_options = RightCategoricalDataObject.options
945
-
946
- cross_tab = compute_cross_tab(
947
- left_responses, left_options, right_responses, right_options
948
- )
949
- return {
950
- "cross_tab": cross_tab,
951
- "left_title": left_text,
952
- "right_title": right_text,
953
- }
954
-
955
- def _html(self, cross_tab, left_title, right_title, **kwargs):
956
- report_html = [
957
- "<div>",
958
- f"<p>Cross tabulation of: {left_title} and {right_title}</p>",
959
- "<table>",
960
- ]
961
-
962
- # Assuming all inner dictionaries have the same keys, use the keys from the first one
963
- first_key = next(iter(cross_tab))
964
- left_options = cross_tab[first_key].keys()
965
- headers = [""] + list(left_options)
966
- report_html.append(
967
- "<tr>" + "".join(f"<th>{header}</th>" for header in headers) + "</tr>"
968
- )
969
-
970
- # Fill in the rows of the table
971
- for right_option, counts in cross_tab.items():
972
- row = [f"<td>{right_option}</td>"]
973
- for left_option in left_options:
974
- row.append(f"<td>{counts[left_option]}</td>")
975
- report_html.append("<tr>" + "".join(row) + "</tr>")
976
-
977
- report_html.append("</table>")
978
- report_html.append("</div>")
979
- return "\n".join(report_html)
980
-
981
-
982
- class FacetedBarChart(PlotMixin, Element):
983
- LeftInputType = CategoricalData
984
- RightInputType = CategoricalData
985
-
986
- def _primary_function(
987
- self,
988
- LeftCategoricalDataObject,
989
- RightCategoricalDataObject,
990
- num_cols=None,
991
- height=5,
992
- label_angle=45,
993
- title=None,
994
- use_code_left=None,
995
- use_code_right=None,
996
- sharey=True,
997
- width_pct=100,
998
- filename=None,
999
- ):
1000
- """ "
1001
- Generates a set of bar plots as a FacetGrid to compare two categorical data sets.
1002
-
1003
- This method creates a series of bar plots, one for each category in the RightCategoricalDataObject,
1004
- to compare the frequencies of categories from LeftCategoricalDataObject. The plots are
1005
- arranged in a grid layout, with an option to specify the number of columns and the height of each plot.
1006
- Additionally, the angle of the x-axis labels and the title of the grid can be customized.
1007
-
1008
- Args:
1009
- LeftCategoricalDataObject (CategoricalData): The first categorical data object,
1010
- used for the x-axis data in the bar plots.
1011
- RightCategoricalDataObject (CategoricalData): The second categorical data object,
1012
- whose categories define the grid columns.
1013
- num_cols (int, optional): The number of columns in the FacetGrid. If None, it's calculated
1014
- based on the number of categories in RightCategoricalDataObject.
1015
- Defaults to None.
1016
- height (int, optional): The height of each subplot in the grid. Defaults to 5.
1017
- label_angle (int, optional): The angle for rotating the x-axis labels for readability.
1018
- Defaults to 45 degrees.
1019
- title (str, optional): The overall title of the FacetGrid. If None, a default title is
1020
- generated based on the texts of the categorical data objects.
1021
- Defaults to None.
1022
- use_code_left (bool, optional): Whether to use alphabetical codes for categorical options
1023
- in the left data object.
1024
- use_code_right (bool, optional): Whether to use alphabetical codes for categorical options
1025
- sharey (bool, optional): Whether to share the y-axis across all plots. Defaults to True.
1026
-
1027
- Notes:
1028
- - The bar plots are generated using seaborn's barplot function within a FacetGrid.
1029
- - The layout of the grid is adjusted to accommodate the overall title and to prevent
1030
- overlap of plot elements.
1031
- """
1032
- left_responses = LeftCategoricalDataObject.responses
1033
- right_responses = RightCategoricalDataObject.responses
1034
- left_text = LeftCategoricalDataObject.text
1035
- right_text = RightCategoricalDataObject.text
1036
- left_options = LeftCategoricalDataObject.options
1037
- right_options = RightCategoricalDataObject.options
1038
-
1039
- if use_code_left is None:
1040
- max_option_length_left = max([len(option) for option in left_options])
1041
- use_code_left = max_option_length_left > 10
1042
- if use_code_right is None:
1043
- max_option_length_right = max([len(option) for option in right_options])
1044
- use_code_right = max_option_length_right > 10
1045
-
1046
- if title is None:
1047
- title = f'"{left_text}" \n by "{right_text}"'
1048
-
1049
- if len(left_text) > 40:
1050
- left_text = left_text[:20] + "..."
1051
- if len(right_text) > 40:
1052
- right_text = right_text[:20] + "..."
1053
-
1054
- left_option_codes = None
1055
- right_option_codes = None
1056
-
1057
- if use_code_left:
1058
- left_options, left_responses, left_option_codes = replace_with_codes(
1059
- left_options, left_responses, prefix="L-"
1060
- )
1061
- if use_code_right:
1062
- right_options, right_responses, right_option_codes = replace_with_codes(
1063
- right_options, right_responses, prefix="R-"
1064
- )
1065
-
1066
- # Figures out how many columns to use in the FacetGrid if not specified
1067
- if num_cols is None:
1068
- if len(right_options) < 6:
1069
- num_cols = len(right_options)
1070
- else:
1071
- num_cols = math.ceil(math.sqrt(len(right_options)))
1072
-
1073
- cross_tab = compute_cross_tab(
1074
- right_responses, right_options, left_responses, left_options
1075
- )
1076
-
1077
- d = {}
1078
- if use_code_left:
1079
- d = {v: k for k, v in left_option_codes.items()}
1080
- left_option_name = d.get(left_text, left_text)
1081
- if use_code_right:
1082
- d = {v: k for k, v in right_option_codes.items()}
1083
- right_option_name = d.get(right_text, right_text)
1084
-
1085
- df = pd.DataFrame(cross_tab)
1086
- # Reset index to turn the index into a column
1087
- df = df.reset_index()
1088
- # Rename the columns to be more descriptive
1089
- df.rename(columns={"index": left_option_name}, inplace=True)
1090
- # Melt the DataFrame to long format
1091
- df_long = df.melt(
1092
- id_vars=left_option_name, var_name=right_option_name, value_name="Count"
1093
- )
1094
- sns.set(style="whitegrid")
1095
- # Creating a FacetGrid
1096
- g = sns.FacetGrid(
1097
- df_long,
1098
- col=right_option_name,
1099
- col_wrap=num_cols,
1100
- sharey=sharey,
1101
- height=height,
1102
- )
1103
- # ax = sns.barplot(
1104
- # x="Counts",
1105
- # y="Keys",
1106
- # data=data_df,
1107
- # palette="Blues_d",
1108
- # hue="Keys",
1109
- # legend=False,
1110
- # )
1111
- # Adding bar plots to the FacetGrid
1112
- g = g.map(
1113
- sns.barplot,
1114
- left_option_name,
1115
- "Count",
1116
- order=df_long[left_option_name].unique(),
1117
- palette="viridis",
1118
- hue=df_long[left_option_name].unique(),
1119
- legend=False,
1120
- )
1121
- # Rotating x-axis labels for better readability
1122
- for ax in g.axes.ravel():
1123
- for label in ax.get_xticklabels():
1124
- label.set_rotation(label_angle)
1125
-
1126
- g.fig.suptitle(f"{title}", fontsize=16)
1127
-
1128
- # Adjust the layout to make room for the title and prevent overlap
1129
- g.fig.subplots_adjust(top=0.9) # you can adjust the value as needed
1130
-
1131
- plt.tight_layout()
1132
-
1133
- if filename is not None:
1134
- save_figure(filename)
1135
- return None
1136
-
1137
- return {
1138
- "buffer": self.plt_to_buf(plt),
1139
- "title": "",
1140
- "option_codes": {
1141
- "left_option_codes": left_option_codes,
1142
- "right_option_codes": right_option_codes,
1143
- },
1144
- "width_pct": width_pct,
1145
- }
1146
-
1147
-
1148
- class ChiSquare(Element):
1149
- LeftInputType = CategoricalData
1150
- RightInputType = None
1151
- OutputDataType = ChiSquareData
1152
-
1153
- def _primary_function(self, CategoricalDataObject, **kwargs):
1154
- responses = CategoricalDataObject.responses
1155
- text = CategoricalDataObject.text
1156
- options = CategoricalDataObject.options
1157
-
1158
- response_count = dict(Counter(responses))
1159
- # Add 0s for things that weren't selected even once
1160
- for key in options:
1161
- if key not in response_count:
1162
- response_count[key] = 0
1163
-
1164
- observed_counts = list(response_count.values())
1165
- chi_square, p_value = chisquare(observed_counts)
1166
- return {"chi_square": chi_square, "p_value": p_value, "text": text}
1167
-
1168
- def _html(self, chi_square, p_value, text, digits=3, **kwargs):
1169
- report_html = ["<div>", f"<p>Chi-square test for: {text}</p>" "<table>"]
1170
- report_html.append(f"<p>Chi-square statistic: {round(chi_square, digits)}</p>")
1171
- report_html.append(f"<p>p-value: {round(p_value, digits)}</p>")
1172
- report_html.append("</div>")
1173
- return "\n".join(report_html)
1174
-
1175
-
1176
- class OrderedLogit(Element):
1177
- LeftInputType = CategoricalData
1178
- RightInputType = CategoricalData
1179
- OutputDataType = RegressionData
1180
-
1181
- def _primary_function(
1182
- self, LeftSideCategoricalData, RightSideCategoricalData, **kwargs
1183
- ):
1184
- y = LeftSideCategoricalData.responses
1185
- category_order = LeftSideCategoricalData.options
1186
- X = RightSideCategoricalData.responses
1187
- outcome_description = LeftSideCategoricalData.text
1188
- if not (isinstance(y, list) and isinstance(X, list) and len(y) == len(X)):
1189
- print(y)
1190
- print(X)
1191
- raise ValueError("y and X must be lists of the same length.")
1192
-
1193
- y_ordered = pd.Categorical(y, categories=category_order, ordered=True)
1194
-
1195
- # Create a DataFrame from the inputs
1196
- data = pd.DataFrame({"Outcome": y_ordered, "Predictor": X})
1197
-
1198
- # Convert the categorical variable into dummy/indicator variables
1199
- data = pd.get_dummies(data, columns=["Predictor"], drop_first=True)
1200
-
1201
- for col in data.columns.drop("Outcome"):
1202
- data[col] = pd.to_numeric(data[col], errors="coerce")
1203
-
1204
- for col in data.select_dtypes(include=["bool"]).columns:
1205
- data[col] = data[col].astype(int)
1206
- try:
1207
- model = OrderedModel(
1208
- data["Outcome"], data.drop(columns=["Outcome"]), distr="logit"
1209
- ) # Use 'logit' for logistic distribution
1210
- result = model.fit()
1211
- return {
1212
- "model_outcome": result.summary().as_html(),
1213
- "outcome_description": outcome_description,
1214
- }
1215
- except Exception as e:
1216
- return {
1217
- "model_outcome": f"Error: {e}",
1218
- "outcome_description": outcome_description,
1219
- }
1220
-
1221
- def _html(self, model_outcome: str, outcome_description: str):
1222
- report_html = [
1223
- "<h1>Ordered logit</h1>" "<div>",
1224
- f"<p>Outcome: {outcome_description}</p>",
1225
- ]
1226
- report_html.append(model_outcome)
1227
- report_html.append("</div>")
1228
- return "\n".join(report_html)