edsl 0.1.14__py3-none-any.whl → 0.1.40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +348 -38
- edsl/BaseDiff.py +260 -0
- edsl/TemplateLoader.py +24 -0
- edsl/__init__.py +46 -10
- edsl/__version__.py +1 -0
- edsl/agents/Agent.py +842 -144
- edsl/agents/AgentList.py +521 -25
- edsl/agents/Invigilator.py +250 -374
- edsl/agents/InvigilatorBase.py +257 -0
- edsl/agents/PromptConstructor.py +272 -0
- edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
- edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
- edsl/agents/descriptors.py +43 -13
- edsl/agents/prompt_helpers.py +129 -0
- edsl/agents/question_option_processor.py +172 -0
- edsl/auto/AutoStudy.py +130 -0
- edsl/auto/StageBase.py +243 -0
- edsl/auto/StageGenerateSurvey.py +178 -0
- edsl/auto/StageLabelQuestions.py +125 -0
- edsl/auto/StagePersona.py +61 -0
- edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
- edsl/auto/StagePersonaDimensionValues.py +74 -0
- edsl/auto/StagePersonaDimensions.py +69 -0
- edsl/auto/StageQuestions.py +74 -0
- edsl/auto/SurveyCreatorPipeline.py +21 -0
- edsl/auto/utilities.py +218 -0
- edsl/base/Base.py +279 -0
- edsl/config.py +121 -104
- edsl/conversation/Conversation.py +290 -0
- edsl/conversation/car_buying.py +59 -0
- edsl/conversation/chips.py +95 -0
- edsl/conversation/mug_negotiation.py +81 -0
- edsl/conversation/next_speaker_utilities.py +93 -0
- edsl/coop/CoopFunctionsMixin.py +15 -0
- edsl/coop/ExpectedParrotKeyHandler.py +125 -0
- edsl/coop/PriceFetcher.py +54 -0
- edsl/coop/__init__.py +1 -0
- edsl/coop/coop.py +1029 -134
- edsl/coop/utils.py +131 -0
- edsl/data/Cache.py +560 -89
- edsl/data/CacheEntry.py +230 -0
- edsl/data/CacheHandler.py +168 -0
- edsl/data/RemoteCacheSync.py +186 -0
- edsl/data/SQLiteDict.py +292 -0
- edsl/data/__init__.py +5 -3
- edsl/data/orm.py +6 -33
- edsl/data_transfer_models.py +74 -27
- edsl/enums.py +165 -8
- edsl/exceptions/BaseException.py +21 -0
- edsl/exceptions/__init__.py +52 -46
- edsl/exceptions/agents.py +33 -15
- edsl/exceptions/cache.py +5 -0
- edsl/exceptions/coop.py +8 -0
- edsl/exceptions/general.py +34 -0
- edsl/exceptions/inference_services.py +5 -0
- edsl/exceptions/jobs.py +15 -0
- edsl/exceptions/language_models.py +46 -1
- edsl/exceptions/questions.py +80 -5
- edsl/exceptions/results.py +16 -5
- edsl/exceptions/scenarios.py +29 -0
- edsl/exceptions/surveys.py +13 -10
- edsl/inference_services/AnthropicService.py +106 -0
- edsl/inference_services/AvailableModelCacheHandler.py +184 -0
- edsl/inference_services/AvailableModelFetcher.py +215 -0
- edsl/inference_services/AwsBedrock.py +118 -0
- edsl/inference_services/AzureAI.py +215 -0
- edsl/inference_services/DeepInfraService.py +18 -0
- edsl/inference_services/GoogleService.py +143 -0
- edsl/inference_services/GroqService.py +20 -0
- edsl/inference_services/InferenceServiceABC.py +80 -0
- edsl/inference_services/InferenceServicesCollection.py +138 -0
- edsl/inference_services/MistralAIService.py +120 -0
- edsl/inference_services/OllamaService.py +18 -0
- edsl/inference_services/OpenAIService.py +236 -0
- edsl/inference_services/PerplexityService.py +160 -0
- edsl/inference_services/ServiceAvailability.py +135 -0
- edsl/inference_services/TestService.py +90 -0
- edsl/inference_services/TogetherAIService.py +172 -0
- edsl/inference_services/data_structures.py +134 -0
- edsl/inference_services/models_available_cache.py +118 -0
- edsl/inference_services/rate_limits_cache.py +25 -0
- edsl/inference_services/registry.py +41 -0
- edsl/inference_services/write_available.py +10 -0
- edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
- edsl/jobs/Answers.py +21 -20
- edsl/jobs/FetchInvigilator.py +47 -0
- edsl/jobs/InterviewTaskManager.py +98 -0
- edsl/jobs/InterviewsConstructor.py +50 -0
- edsl/jobs/Jobs.py +684 -204
- edsl/jobs/JobsChecks.py +172 -0
- edsl/jobs/JobsComponentConstructor.py +189 -0
- edsl/jobs/JobsPrompts.py +270 -0
- edsl/jobs/JobsRemoteInferenceHandler.py +311 -0
- edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
- edsl/jobs/RequestTokenEstimator.py +30 -0
- edsl/jobs/async_interview_runner.py +138 -0
- edsl/jobs/buckets/BucketCollection.py +104 -0
- edsl/jobs/buckets/ModelBuckets.py +65 -0
- edsl/jobs/buckets/TokenBucket.py +283 -0
- edsl/jobs/buckets/TokenBucketAPI.py +211 -0
- edsl/jobs/buckets/TokenBucketClient.py +191 -0
- edsl/jobs/check_survey_scenario_compatibility.py +85 -0
- edsl/jobs/data_structures.py +120 -0
- edsl/jobs/decorators.py +35 -0
- edsl/jobs/interviews/Interview.py +392 -0
- edsl/jobs/interviews/InterviewExceptionCollection.py +99 -0
- edsl/jobs/interviews/InterviewExceptionEntry.py +186 -0
- edsl/jobs/interviews/InterviewStatistic.py +63 -0
- edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -0
- edsl/jobs/interviews/InterviewStatusDictionary.py +78 -0
- edsl/jobs/interviews/InterviewStatusLog.py +92 -0
- edsl/jobs/interviews/ReportErrors.py +66 -0
- edsl/jobs/interviews/interview_status_enum.py +9 -0
- edsl/jobs/jobs_status_enums.py +9 -0
- edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
- edsl/jobs/results_exceptions_handler.py +98 -0
- edsl/jobs/runners/JobsRunnerAsyncio.py +151 -110
- edsl/jobs/runners/JobsRunnerStatus.py +298 -0
- edsl/jobs/tasks/QuestionTaskCreator.py +244 -0
- edsl/jobs/tasks/TaskCreators.py +64 -0
- edsl/jobs/tasks/TaskHistory.py +470 -0
- edsl/jobs/tasks/TaskStatusLog.py +23 -0
- edsl/jobs/tasks/task_status_enum.py +161 -0
- edsl/jobs/tokens/InterviewTokenUsage.py +27 -0
- edsl/jobs/tokens/TokenUsage.py +34 -0
- edsl/language_models/ComputeCost.py +63 -0
- edsl/language_models/LanguageModel.py +507 -386
- edsl/language_models/ModelList.py +164 -0
- edsl/language_models/PriceManager.py +127 -0
- edsl/language_models/RawResponseHandler.py +106 -0
- edsl/language_models/RegisterLanguageModelsMeta.py +184 -0
- edsl/language_models/__init__.py +1 -8
- edsl/language_models/fake_openai_call.py +15 -0
- edsl/language_models/fake_openai_service.py +61 -0
- edsl/language_models/key_management/KeyLookup.py +63 -0
- edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
- edsl/language_models/key_management/KeyLookupCollection.py +38 -0
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +131 -0
- edsl/language_models/model.py +256 -0
- edsl/language_models/repair.py +109 -41
- edsl/language_models/utilities.py +65 -0
- edsl/notebooks/Notebook.py +263 -0
- edsl/notebooks/NotebookToLaTeX.py +142 -0
- edsl/notebooks/__init__.py +1 -0
- edsl/prompts/Prompt.py +222 -93
- edsl/prompts/__init__.py +1 -1
- edsl/questions/ExceptionExplainer.py +77 -0
- edsl/questions/HTMLQuestion.py +103 -0
- edsl/questions/QuestionBase.py +518 -0
- edsl/questions/QuestionBasePromptsMixin.py +221 -0
- edsl/questions/QuestionBudget.py +164 -67
- edsl/questions/QuestionCheckBox.py +281 -62
- edsl/questions/QuestionDict.py +343 -0
- edsl/questions/QuestionExtract.py +136 -50
- edsl/questions/QuestionFreeText.py +79 -55
- edsl/questions/QuestionFunctional.py +138 -41
- edsl/questions/QuestionList.py +184 -57
- edsl/questions/QuestionMatrix.py +265 -0
- edsl/questions/QuestionMultipleChoice.py +293 -69
- edsl/questions/QuestionNumerical.py +109 -56
- edsl/questions/QuestionRank.py +244 -49
- edsl/questions/Quick.py +41 -0
- edsl/questions/SimpleAskMixin.py +74 -0
- edsl/questions/__init__.py +9 -6
- edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +153 -38
- edsl/questions/compose_questions.py +13 -7
- edsl/questions/data_structures.py +20 -0
- edsl/questions/decorators.py +21 -0
- edsl/questions/derived/QuestionLikertFive.py +28 -26
- edsl/questions/derived/QuestionLinearScale.py +41 -28
- edsl/questions/derived/QuestionTopK.py +34 -26
- edsl/questions/derived/QuestionYesNo.py +40 -27
- edsl/questions/descriptors.py +228 -74
- edsl/questions/loop_processor.py +149 -0
- edsl/questions/prompt_templates/question_budget.jinja +13 -0
- edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
- edsl/questions/prompt_templates/question_extract.jinja +11 -0
- edsl/questions/prompt_templates/question_free_text.jinja +3 -0
- edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
- edsl/questions/prompt_templates/question_list.jinja +17 -0
- edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
- edsl/questions/prompt_templates/question_numerical.jinja +37 -0
- edsl/questions/question_base_gen_mixin.py +168 -0
- edsl/questions/question_registry.py +130 -46
- edsl/questions/register_questions_meta.py +71 -0
- edsl/questions/response_validator_abc.py +188 -0
- edsl/questions/response_validator_factory.py +34 -0
- edsl/questions/settings.py +5 -2
- edsl/questions/templates/__init__.py +0 -0
- edsl/questions/templates/budget/__init__.py +0 -0
- edsl/questions/templates/budget/answering_instructions.jinja +7 -0
- edsl/questions/templates/budget/question_presentation.jinja +7 -0
- edsl/questions/templates/checkbox/__init__.py +0 -0
- edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
- edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
- edsl/questions/templates/dict/__init__.py +0 -0
- edsl/questions/templates/dict/answering_instructions.jinja +21 -0
- edsl/questions/templates/dict/question_presentation.jinja +1 -0
- edsl/questions/templates/extract/__init__.py +0 -0
- edsl/questions/templates/extract/answering_instructions.jinja +7 -0
- edsl/questions/templates/extract/question_presentation.jinja +1 -0
- edsl/questions/templates/free_text/__init__.py +0 -0
- edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
- edsl/questions/templates/free_text/question_presentation.jinja +1 -0
- edsl/questions/templates/likert_five/__init__.py +0 -0
- edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
- edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
- edsl/questions/templates/linear_scale/__init__.py +0 -0
- edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
- edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
- edsl/questions/templates/list/__init__.py +0 -0
- edsl/questions/templates/list/answering_instructions.jinja +4 -0
- edsl/questions/templates/list/question_presentation.jinja +5 -0
- edsl/questions/templates/matrix/__init__.py +1 -0
- edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
- edsl/questions/templates/matrix/question_presentation.jinja +20 -0
- edsl/questions/templates/multiple_choice/__init__.py +0 -0
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
- edsl/questions/templates/multiple_choice/html.jinja +0 -0
- edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
- edsl/questions/templates/numerical/__init__.py +0 -0
- edsl/questions/templates/numerical/answering_instructions.jinja +7 -0
- edsl/questions/templates/numerical/question_presentation.jinja +7 -0
- edsl/questions/templates/rank/__init__.py +0 -0
- edsl/questions/templates/rank/answering_instructions.jinja +11 -0
- edsl/questions/templates/rank/question_presentation.jinja +15 -0
- edsl/questions/templates/top_k/__init__.py +0 -0
- edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
- edsl/questions/templates/top_k/question_presentation.jinja +22 -0
- edsl/questions/templates/yes_no/__init__.py +0 -0
- edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
- edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
- edsl/results/CSSParameterizer.py +108 -0
- edsl/results/Dataset.py +550 -19
- edsl/results/DatasetExportMixin.py +594 -0
- edsl/results/DatasetTree.py +295 -0
- edsl/results/MarkdownToDocx.py +122 -0
- edsl/results/MarkdownToPDF.py +111 -0
- edsl/results/Result.py +477 -173
- edsl/results/Results.py +987 -269
- edsl/results/ResultsExportMixin.py +28 -125
- edsl/results/ResultsGGMixin.py +83 -15
- edsl/results/TableDisplay.py +125 -0
- edsl/results/TextEditor.py +50 -0
- edsl/results/__init__.py +1 -1
- edsl/results/file_exports.py +252 -0
- edsl/results/results_fetch_mixin.py +33 -0
- edsl/results/results_selector.py +145 -0
- edsl/results/results_tools_mixin.py +98 -0
- edsl/results/smart_objects.py +96 -0
- edsl/results/table_data_class.py +12 -0
- edsl/results/table_display.css +78 -0
- edsl/results/table_renderers.py +118 -0
- edsl/results/tree_explore.py +115 -0
- edsl/scenarios/ConstructDownloadLink.py +109 -0
- edsl/scenarios/DocumentChunker.py +102 -0
- edsl/scenarios/DocxScenario.py +16 -0
- edsl/scenarios/FileStore.py +543 -0
- edsl/scenarios/PdfExtractor.py +40 -0
- edsl/scenarios/Scenario.py +431 -62
- edsl/scenarios/ScenarioHtmlMixin.py +65 -0
- edsl/scenarios/ScenarioList.py +1415 -45
- edsl/scenarios/ScenarioListExportMixin.py +45 -0
- edsl/scenarios/ScenarioListPdfMixin.py +239 -0
- edsl/scenarios/__init__.py +2 -0
- edsl/scenarios/directory_scanner.py +96 -0
- edsl/scenarios/file_methods.py +85 -0
- edsl/scenarios/handlers/__init__.py +13 -0
- edsl/scenarios/handlers/csv.py +49 -0
- edsl/scenarios/handlers/docx.py +76 -0
- edsl/scenarios/handlers/html.py +37 -0
- edsl/scenarios/handlers/json.py +111 -0
- edsl/scenarios/handlers/latex.py +5 -0
- edsl/scenarios/handlers/md.py +51 -0
- edsl/scenarios/handlers/pdf.py +68 -0
- edsl/scenarios/handlers/png.py +39 -0
- edsl/scenarios/handlers/pptx.py +105 -0
- edsl/scenarios/handlers/py.py +294 -0
- edsl/scenarios/handlers/sql.py +313 -0
- edsl/scenarios/handlers/sqlite.py +149 -0
- edsl/scenarios/handlers/txt.py +33 -0
- edsl/scenarios/scenario_join.py +131 -0
- edsl/scenarios/scenario_selector.py +156 -0
- edsl/shared.py +1 -0
- edsl/study/ObjectEntry.py +173 -0
- edsl/study/ProofOfWork.py +113 -0
- edsl/study/SnapShot.py +80 -0
- edsl/study/Study.py +521 -0
- edsl/study/__init__.py +4 -0
- edsl/surveys/ConstructDAG.py +92 -0
- edsl/surveys/DAG.py +92 -11
- edsl/surveys/EditSurvey.py +221 -0
- edsl/surveys/InstructionHandler.py +100 -0
- edsl/surveys/Memory.py +9 -4
- edsl/surveys/MemoryManagement.py +72 -0
- edsl/surveys/MemoryPlan.py +156 -35
- edsl/surveys/Rule.py +221 -74
- edsl/surveys/RuleCollection.py +241 -61
- edsl/surveys/RuleManager.py +172 -0
- edsl/surveys/Simulator.py +75 -0
- edsl/surveys/Survey.py +1079 -339
- edsl/surveys/SurveyCSS.py +273 -0
- edsl/surveys/SurveyExportMixin.py +235 -40
- edsl/surveys/SurveyFlowVisualization.py +181 -0
- edsl/surveys/SurveyQualtricsImport.py +284 -0
- edsl/surveys/SurveyToApp.py +141 -0
- edsl/surveys/__init__.py +4 -2
- edsl/surveys/base.py +19 -3
- edsl/surveys/descriptors.py +17 -6
- edsl/surveys/instructions/ChangeInstruction.py +48 -0
- edsl/surveys/instructions/Instruction.py +56 -0
- edsl/surveys/instructions/InstructionCollection.py +82 -0
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/templates/error_reporting/base.html +24 -0
- edsl/templates/error_reporting/exceptions_by_model.html +35 -0
- edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
- edsl/templates/error_reporting/exceptions_by_type.html +17 -0
- edsl/templates/error_reporting/interview_details.html +116 -0
- edsl/templates/error_reporting/interviews.html +19 -0
- edsl/templates/error_reporting/overview.html +5 -0
- edsl/templates/error_reporting/performance_plot.html +2 -0
- edsl/templates/error_reporting/report.css +74 -0
- edsl/templates/error_reporting/report.html +118 -0
- edsl/templates/error_reporting/report.js +25 -0
- edsl/tools/__init__.py +1 -0
- edsl/tools/clusters.py +192 -0
- edsl/tools/embeddings.py +27 -0
- edsl/tools/embeddings_plotting.py +118 -0
- edsl/tools/plotting.py +112 -0
- edsl/tools/summarize.py +18 -0
- edsl/utilities/PrettyList.py +56 -0
- edsl/utilities/SystemInfo.py +5 -0
- edsl/utilities/__init__.py +21 -20
- edsl/utilities/ast_utilities.py +3 -0
- edsl/utilities/data/Registry.py +2 -0
- edsl/utilities/decorators.py +41 -0
- edsl/utilities/gcp_bucket/__init__.py +0 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
- edsl/utilities/interface.py +310 -60
- edsl/utilities/is_notebook.py +18 -0
- edsl/utilities/is_valid_variable_name.py +11 -0
- edsl/utilities/naming_utilities.py +263 -0
- edsl/utilities/remove_edsl_version.py +24 -0
- edsl/utilities/repair_functions.py +28 -0
- edsl/utilities/restricted_python.py +70 -0
- edsl/utilities/utilities.py +203 -13
- edsl-0.1.40.dist-info/METADATA +111 -0
- edsl-0.1.40.dist-info/RECORD +362 -0
- {edsl-0.1.14.dist-info → edsl-0.1.40.dist-info}/WHEEL +1 -1
- edsl/agents/AgentListExportMixin.py +0 -24
- edsl/coop/old.py +0 -31
- edsl/data/Database.py +0 -141
- edsl/data/crud.py +0 -121
- edsl/jobs/Interview.py +0 -417
- edsl/jobs/JobsRunner.py +0 -63
- edsl/jobs/JobsRunnerStatusMixin.py +0 -115
- edsl/jobs/base.py +0 -47
- edsl/jobs/buckets.py +0 -166
- edsl/jobs/runners/JobsRunnerDryRun.py +0 -19
- edsl/jobs/runners/JobsRunnerStreaming.py +0 -54
- edsl/jobs/task_management.py +0 -218
- edsl/jobs/token_tracking.py +0 -78
- edsl/language_models/DeepInfra.py +0 -69
- edsl/language_models/OpenAI.py +0 -98
- edsl/language_models/model_interfaces/GeminiPro.py +0 -66
- edsl/language_models/model_interfaces/LanguageModelOpenAIFour.py +0 -8
- edsl/language_models/model_interfaces/LanguageModelOpenAIThreeFiveTurbo.py +0 -8
- edsl/language_models/model_interfaces/LlamaTwo13B.py +0 -21
- edsl/language_models/model_interfaces/LlamaTwo70B.py +0 -21
- edsl/language_models/model_interfaces/Mixtral8x7B.py +0 -24
- edsl/language_models/registry.py +0 -81
- edsl/language_models/schemas.py +0 -15
- edsl/language_models/unused/ReplicateBase.py +0 -83
- edsl/prompts/QuestionInstructionsBase.py +0 -6
- edsl/prompts/library/agent_instructions.py +0 -29
- edsl/prompts/library/agent_persona.py +0 -17
- edsl/prompts/library/question_budget.py +0 -26
- edsl/prompts/library/question_checkbox.py +0 -32
- edsl/prompts/library/question_extract.py +0 -19
- edsl/prompts/library/question_freetext.py +0 -14
- edsl/prompts/library/question_linear_scale.py +0 -20
- edsl/prompts/library/question_list.py +0 -22
- edsl/prompts/library/question_multiple_choice.py +0 -44
- edsl/prompts/library/question_numerical.py +0 -31
- edsl/prompts/library/question_rank.py +0 -21
- edsl/prompts/prompt_config.py +0 -33
- edsl/prompts/registry.py +0 -185
- edsl/questions/Question.py +0 -240
- edsl/report/InputOutputDataTypes.py +0 -134
- edsl/report/RegressionMixin.py +0 -28
- edsl/report/ReportOutputs.py +0 -1228
- edsl/report/ResultsFetchMixin.py +0 -106
- edsl/report/ResultsOutputMixin.py +0 -14
- edsl/report/demo.ipynb +0 -645
- edsl/results/ResultsDBMixin.py +0 -184
- edsl/surveys/SurveyFlowVisualizationMixin.py +0 -92
- edsl/trackers/Tracker.py +0 -91
- edsl/trackers/TrackerAPI.py +0 -196
- edsl/trackers/TrackerTasks.py +0 -70
- edsl/utilities/pastebin.py +0 -141
- edsl-0.1.14.dist-info/METADATA +0 -69
- edsl-0.1.14.dist-info/RECORD +0 -141
- /edsl/{language_models/model_interfaces → inference_services}/__init__.py +0 -0
- /edsl/{report/__init__.py → jobs/runners/JobsRunnerStatusData.py} +0 -0
- /edsl/{trackers/__init__.py → language_models/ServiceDataSources.py} +0 -0
- {edsl-0.1.14.dist-info → edsl-0.1.40.dist-info}/LICENSE +0 -0
edsl/report/ReportOutputs.py
DELETED
@@ -1,1228 +0,0 @@
|
|
1
|
-
import platform
|
2
|
-
import subprocess
|
3
|
-
import tempfile
|
4
|
-
|
5
|
-
import copy
|
6
|
-
import base64
|
7
|
-
import functools
|
8
|
-
import inspect
|
9
|
-
import markdown2
|
10
|
-
import math
|
11
|
-
import matplotlib.pyplot as plt
|
12
|
-
import numpy as np
|
13
|
-
import os
|
14
|
-
import pandas as pd
|
15
|
-
import seaborn as sns
|
16
|
-
import string
|
17
|
-
import tempfile
|
18
|
-
import textwrap
|
19
|
-
import warnings
|
20
|
-
import webbrowser
|
21
|
-
from abc import ABC, abstractmethod, ABCMeta
|
22
|
-
from collections import Counter
|
23
|
-
from dataclasses import asdict
|
24
|
-
from io import BytesIO
|
25
|
-
from IPython.display import display, HTML
|
26
|
-
from scipy import stats
|
27
|
-
from scipy.stats import chisquare
|
28
|
-
from statsmodels.miscmodels.ordinal_model import OrderedModel
|
29
|
-
from statsmodels.tools.sm_exceptions import HessianInversionWarning, ConvergenceWarning
|
30
|
-
from typing import Callable
|
31
|
-
from wordcloud import WordCloud
|
32
|
-
from edsl.report.InputOutputDataTypes import (
|
33
|
-
CategoricalData,
|
34
|
-
NumericalData,
|
35
|
-
PlotData,
|
36
|
-
TallyData,
|
37
|
-
CrossTabData,
|
38
|
-
FreeTextData,
|
39
|
-
ChiSquareData,
|
40
|
-
RegressionData,
|
41
|
-
)
|
42
|
-
from edsl.utilities import is_notebook
|
43
|
-
|
44
|
-
|
45
|
-
def save_figure(filename):
|
46
|
-
base, ext = os.path.splitext(filename)
|
47
|
-
if ext.lower() == ".png":
|
48
|
-
plt.savefig(filename, format="png")
|
49
|
-
elif ext.lower() == ".jpg" or ext.lower() == ".jpeg":
|
50
|
-
plt.savefig(filename, format="jpeg")
|
51
|
-
elif ext.lower() == ".svg":
|
52
|
-
plt.savefig(filename, format="svg")
|
53
|
-
else:
|
54
|
-
print("Unsupported file extension. Saving as PNG by default.")
|
55
|
-
plt.savefig(base + ".png", format="png")
|
56
|
-
|
57
|
-
|
58
|
-
warnings.filterwarnings(
|
59
|
-
"ignore",
|
60
|
-
category=FutureWarning,
|
61
|
-
module="seaborn._oldcore",
|
62
|
-
message=".*is_categorical_dtype is deprecated.*",
|
63
|
-
)
|
64
|
-
warnings.filterwarnings("ignore", category=RuntimeWarning, module="statsmodels.*")
|
65
|
-
warnings.filterwarnings("ignore", category=RuntimeWarning, module="scipy.optimize.*")
|
66
|
-
warnings.filterwarnings("ignore", category=HessianInversionWarning)
|
67
|
-
warnings.filterwarnings("ignore", category=ConvergenceWarning)
|
68
|
-
|
69
|
-
|
70
|
-
def open_temp_file(file_path):
|
71
|
-
system = platform.system()
|
72
|
-
if system == "Linux":
|
73
|
-
subprocess.run(["xdg-open", file_path])
|
74
|
-
elif system == "Windows":
|
75
|
-
os.startfile(file_path)
|
76
|
-
elif system == "Darwin": # macOS
|
77
|
-
subprocess.run(["open", file_path])
|
78
|
-
else:
|
79
|
-
print("Unsupported operating system")
|
80
|
-
|
81
|
-
|
82
|
-
def convert_svg_to_png_in_memory(svg_bytes):
|
83
|
-
# Create a temporary SVG file
|
84
|
-
with tempfile.NamedTemporaryFile(suffix=".svg", delete=False) as temp_svg:
|
85
|
-
temp_svg.write(svg_bytes)
|
86
|
-
open_temp_file(temp_svg.name)
|
87
|
-
|
88
|
-
|
89
|
-
class RegisterElementMeta(ABCMeta):
|
90
|
-
"Metaclass to register output elements in a registry i.e., those that have a parent"
|
91
|
-
_registry = {} # Initialize the registry as a dictionary
|
92
|
-
|
93
|
-
def __init__(cls, name, bases, dct):
|
94
|
-
super(RegisterElementMeta, cls).__init__(name, bases, dct)
|
95
|
-
if cls.LeftInputType is not None or cls.RightInputType is not None:
|
96
|
-
# Register the class in the registry
|
97
|
-
RegisterElementMeta._registry[name] = cls
|
98
|
-
|
99
|
-
@classmethod
|
100
|
-
def get_registered_classes(cls):
|
101
|
-
return cls._registry
|
102
|
-
|
103
|
-
|
104
|
-
def camel_to_snake(name: str) -> str:
|
105
|
-
"""Converts a camel case string to snake case, e.g.,
|
106
|
-
>>> camel_to_snake("HelloWorld")
|
107
|
-
'hello_world'
|
108
|
-
"""
|
109
|
-
snake_name = ""
|
110
|
-
for index, char in enumerate(name):
|
111
|
-
if char.isupper() and index != 0:
|
112
|
-
snake_name += "_"
|
113
|
-
snake_name += char.lower()
|
114
|
-
|
115
|
-
return snake_name
|
116
|
-
|
117
|
-
|
118
|
-
class CustomFunctionWrapper:
|
119
|
-
"""A wrapper for a function that adds a name and docstring."""
|
120
|
-
|
121
|
-
def __init__(self, func, name, doc):
|
122
|
-
self._func = func
|
123
|
-
self.name = name
|
124
|
-
self.doc = doc
|
125
|
-
|
126
|
-
def __call__(self, *args, **kwargs):
|
127
|
-
return self._func(*args, **kwargs)
|
128
|
-
|
129
|
-
def __repr__(self):
|
130
|
-
return f"Method: `{self.name}`\nDescription: {self.doc or 'No description available'}"
|
131
|
-
|
132
|
-
def _repr_html_(self):
|
133
|
-
html = markdown2.markdown(
|
134
|
-
f"**Method:** {self.name}\n\n**Description:** {self.doc or 'No description available'}"
|
135
|
-
)
|
136
|
-
# return markdown2.markdown(f"**Method:** {self.name}\n\n**Description:** {self.doc or 'No description available'}")
|
137
|
-
# return f"<b>Method:</b> {self.name}<br><b>Description:</b> {self.doc or 'No description available'}"
|
138
|
-
return html
|
139
|
-
|
140
|
-
|
141
|
-
def html_decorator(func: Callable) -> Callable:
|
142
|
-
"A decorator that displays the output of a function as HTML."
|
143
|
-
|
144
|
-
@functools.wraps(func)
|
145
|
-
def wrapper(*args, **kwargs):
|
146
|
-
obj = func(*args, **kwargs)
|
147
|
-
if is_notebook(): # if in a jupyter notebook
|
148
|
-
html = obj.html()
|
149
|
-
return display(HTML(html))
|
150
|
-
else:
|
151
|
-
return obj.view() # otherwise open in a browser
|
152
|
-
|
153
|
-
return wrapper
|
154
|
-
|
155
|
-
|
156
|
-
class Element(ABC, metaclass=RegisterElementMeta):
|
157
|
-
"""Base class for all elements.
|
158
|
-
|
159
|
-
|
160
|
-
LeftInputType: The type of the left parent. Could be None.
|
161
|
-
RightInputType: The type of the right parent. Could Be None.
|
162
|
-
OutputDataType: The type of the output data.
|
163
|
-
|
164
|
-
"Root" elements are those that do not have a parent, and are created from the results.
|
165
|
-
|
166
|
-
"""
|
167
|
-
|
168
|
-
LeftInputType = None
|
169
|
-
RightInputType = None
|
170
|
-
OutputDataType = None
|
171
|
-
|
172
|
-
def __init__(self, left_parent=None, right_parent=None, output_data=None, **kwargs):
|
173
|
-
self.left_parent = left_parent
|
174
|
-
self.right_parent = right_parent
|
175
|
-
self.left_data = getattr(left_parent, "output_data", None)
|
176
|
-
self.right_data = getattr(right_parent, "output_data", None)
|
177
|
-
self.filename = None
|
178
|
-
|
179
|
-
for key, value in kwargs.items():
|
180
|
-
setattr(self, key, value)
|
181
|
-
|
182
|
-
if (
|
183
|
-
self.LeftInputType is not None
|
184
|
-
and type(self.left_data) != self.LeftInputType
|
185
|
-
):
|
186
|
-
raise TypeError(f"Left parent must be of type {self.LeftInputType}")
|
187
|
-
|
188
|
-
if (
|
189
|
-
self.RightInputType is not None
|
190
|
-
and type(self.right_data) != self.RightInputType
|
191
|
-
):
|
192
|
-
raise TypeError(f"Right parent must be of type {self.RightInputType}")
|
193
|
-
|
194
|
-
if output_data is None:
|
195
|
-
self.output_data = self.create_output(
|
196
|
-
self.left_data, self.right_data, **kwargs
|
197
|
-
)
|
198
|
-
else:
|
199
|
-
self.output_data = output_data
|
200
|
-
|
201
|
-
@classmethod
|
202
|
-
def unary(cls):
|
203
|
-
print("Switch to using the cls.element_type method instead")
|
204
|
-
return cls.RightInputType is None
|
205
|
-
|
206
|
-
@property
|
207
|
-
def data(self):
|
208
|
-
print("Shift to using self.output_data")
|
209
|
-
return self.output_data
|
210
|
-
|
211
|
-
@classmethod
|
212
|
-
@property
|
213
|
-
def function_name(cls):
|
214
|
-
return camel_to_snake(cls.__name__)
|
215
|
-
|
216
|
-
@classmethod
|
217
|
-
def element_type(cls):
|
218
|
-
if cls.LeftInputType is None and cls.RightInputType is None:
|
219
|
-
return "root"
|
220
|
-
if cls.LeftInputType is not None and cls.RightInputType is None:
|
221
|
-
return "unary"
|
222
|
-
if cls.LeftInputType is not None and cls.RightInputType is not None:
|
223
|
-
return "binary"
|
224
|
-
|
225
|
-
@classmethod
|
226
|
-
def code_generation(cls, results_name: str, left_column, right_column=None):
|
227
|
-
if cls.element_type() == "unary":
|
228
|
-
return f'{results_name}.{cls.function_name}("{left_column}")'
|
229
|
-
elif cls.element_type() == "binary":
|
230
|
-
return (
|
231
|
-
f'{results_name}.{cls.function_name}("{left_column}", "{right_column}")'
|
232
|
-
)
|
233
|
-
elif cls.element_type() == "root":
|
234
|
-
raise Exception("Should not be called on a root element")
|
235
|
-
|
236
|
-
@abstractmethod
|
237
|
-
def _primary_function(self):
|
238
|
-
"The function that creates the output data, as a dictionary."
|
239
|
-
raise NotImplementedError
|
240
|
-
|
241
|
-
@abstractmethod
|
242
|
-
def _html(self):
|
243
|
-
"The function that creates the HTML representation of the output data"
|
244
|
-
raise NotImplementedError
|
245
|
-
|
246
|
-
def create_output(self, LeftInput, RightInput, **kwargs):
|
247
|
-
if self.element_type() == "unary":
|
248
|
-
output_data = self._primary_function(LeftInput, **kwargs)
|
249
|
-
elif self.element_type() == "binary":
|
250
|
-
output_data = self._primary_function(LeftInput, RightInput, **kwargs)
|
251
|
-
elif self.element_type() == "root":
|
252
|
-
raise Exception("Should not be called on a root element")
|
253
|
-
else:
|
254
|
-
raise Exception("Unknown element type")
|
255
|
-
|
256
|
-
if output_data is None:
|
257
|
-
self.filename = kwargs.get("filename", None)
|
258
|
-
return None
|
259
|
-
|
260
|
-
return self.OutputDataType(**output_data)
|
261
|
-
|
262
|
-
@classmethod
|
263
|
-
def example(cls, **kwargs):
|
264
|
-
class MockParent:
|
265
|
-
def __init__(self, data):
|
266
|
-
self.output_data = data
|
267
|
-
|
268
|
-
left_parent = MockParent(cls.LeftInputType.example())
|
269
|
-
right_parent = (
|
270
|
-
None
|
271
|
-
if cls.RightInputType is None
|
272
|
-
else MockParent(cls.RightInputType.example())
|
273
|
-
)
|
274
|
-
|
275
|
-
return cls(left_parent, right_parent, **kwargs)
|
276
|
-
|
277
|
-
def html(self):
|
278
|
-
return self._html(**asdict(self.output_data))
|
279
|
-
|
280
|
-
def view(self, **kwargs):
|
281
|
-
if hasattr(self.output_data, "buffer"):
|
282
|
-
svg_bytes = self.output_data.buffer.getvalue()
|
283
|
-
convert_svg_to_png_in_memory(svg_bytes)
|
284
|
-
else:
|
285
|
-
if self.filename:
|
286
|
-
print(f"Output was written to file: {self.filename}")
|
287
|
-
else:
|
288
|
-
temporary_directory = tempfile.mkdtemp()
|
289
|
-
with open(os.path.join(temporary_directory, "temp.html"), "w") as f:
|
290
|
-
f.write(self.html(**kwargs))
|
291
|
-
webbrowser.open(os.path.join(temporary_directory, "temp.html"))
|
292
|
-
|
293
|
-
@classmethod
|
294
|
-
def parameters(cls):
|
295
|
-
return inspect.signature(cls._primary_function).parameters
|
296
|
-
|
297
|
-
@classmethod
|
298
|
-
def create_external_function(cls, results) -> Callable:
|
299
|
-
"""Adds a function to the Results class that creates an output element.
|
300
|
-
|
301
|
-
|
302
|
-
In ResultsOutputMixin, there is this function that iterates through the registered
|
303
|
-
classes and adds a function to the Results class for each one.
|
304
|
-
|
305
|
-
def add_output_functions(self) -> None:
|
306
|
-
output_classes = registery.get_registered_classes().values()
|
307
|
-
self.analysis_options = []
|
308
|
-
for output_class in output_classes:
|
309
|
-
new_function_name = output_class.function_name
|
310
|
-
new_function = output_class.create_external_function(self)
|
311
|
-
self.__dict__[new_function_name] = new_function
|
312
|
-
|
313
|
-
self.analysis_options.append({new_function_name: output_class.__doc__})
|
314
|
-
|
315
|
-
"""
|
316
|
-
|
317
|
-
def create_parent(data_type, key, input_type):
|
318
|
-
RootElement = create_root_element(input_type)
|
319
|
-
parent = RootElement.from_results(results, key, input_type)
|
320
|
-
return parent
|
321
|
-
|
322
|
-
if cls.RightInputType is None:
|
323
|
-
|
324
|
-
def func(column, **kwargs):
|
325
|
-
left_parent = create_parent(
|
326
|
-
*results._parse_column(column), input_type=cls.LeftInputType
|
327
|
-
)
|
328
|
-
return cls(left_parent=left_parent, **kwargs)
|
329
|
-
|
330
|
-
else:
|
331
|
-
|
332
|
-
def func(left_column, right_column, **kwargs):
|
333
|
-
left_parent = create_parent(
|
334
|
-
*results._parse_column(left_column), cls.LeftInputType
|
335
|
-
)
|
336
|
-
right_parent = create_parent(
|
337
|
-
*results._parse_column(right_column), cls.RightInputType
|
338
|
-
)
|
339
|
-
return cls(left_parent=left_parent, right_parent=right_parent, **kwargs)
|
340
|
-
|
341
|
-
return CustomFunctionWrapper(
|
342
|
-
html_decorator(func), doc=cls.help(), name=cls.function_name
|
343
|
-
)
|
344
|
-
|
345
|
-
@classmethod
|
346
|
-
def help(cls):
|
347
|
-
help_text = textwrap.dedent(
|
348
|
-
f"""\
|
349
|
-
{cls._primary_function.__doc__}
|
350
|
-
"""
|
351
|
-
)
|
352
|
-
# return self._primary_function.__doc__
|
353
|
-
return help_text
|
354
|
-
|
355
|
-
|
356
|
-
def create_root_element(output_data_type):
|
357
|
-
class Container(Element):
|
358
|
-
LeftInputType = None
|
359
|
-
RightInputType = None
|
360
|
-
OutputDataType = output_data_type
|
361
|
-
|
362
|
-
def _primary_function(self):
|
363
|
-
raise Exception("Should not be called directly")
|
364
|
-
|
365
|
-
@classmethod
|
366
|
-
def from_results(cls, results, data_name, index=None):
|
367
|
-
data_type, key = results._parse_column(data_name)
|
368
|
-
output_data = results._fetch_element(data_type, key, cls.OutputDataType)
|
369
|
-
return cls(
|
370
|
-
name=data_name,
|
371
|
-
left_parent=None,
|
372
|
-
right_parent=None,
|
373
|
-
output_data=output_data,
|
374
|
-
index=index,
|
375
|
-
)
|
376
|
-
|
377
|
-
def _html(self):
|
378
|
-
return self.output_data.html()
|
379
|
-
|
380
|
-
return Container
|
381
|
-
|
382
|
-
|
383
|
-
class PlotMixin:
|
384
|
-
OutputDataType = PlotData
|
385
|
-
|
386
|
-
image_format = "svg"
|
387
|
-
|
388
|
-
@staticmethod
|
389
|
-
def plt_to_buf(plt, format=image_format):
|
390
|
-
buf = BytesIO()
|
391
|
-
plt.savefig(buf, format=format)
|
392
|
-
buf.seek(0)
|
393
|
-
plt.close()
|
394
|
-
return buf
|
395
|
-
|
396
|
-
def _html(
|
397
|
-
self,
|
398
|
-
buffer,
|
399
|
-
title,
|
400
|
-
format=image_format,
|
401
|
-
option_codes=None,
|
402
|
-
width_pct=100,
|
403
|
-
**kwargs,
|
404
|
-
):
|
405
|
-
image_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
|
406
|
-
html = []
|
407
|
-
html.append(title)
|
408
|
-
format_line = "png" if format == "png" else "svg+xml"
|
409
|
-
html.append(
|
410
|
-
f"""<img src="data:image/{format_line};base64,{image_base64}" style="width: {width_pct}%; height: auto;" />"""
|
411
|
-
)
|
412
|
-
if option_codes is not None:
|
413
|
-
left_option_codes = option_codes.get("left_option_codes", None)
|
414
|
-
if left_option_codes is not None:
|
415
|
-
html.append("<p>Codes:</p>")
|
416
|
-
for key, value in left_option_codes.items():
|
417
|
-
html.append(f"<p><b>{value}</b>: {key}</p>")
|
418
|
-
right_option_codes = option_codes.get("right_option_codes", None)
|
419
|
-
if right_option_codes is not None:
|
420
|
-
if right_option_codes is not None:
|
421
|
-
html.append("<p>2nd variable Codes:</p>")
|
422
|
-
for key, value in right_option_codes.items():
|
423
|
-
html.append(f"<p><b>{value}</b>: {key}</p>")
|
424
|
-
return "\n".join(html)
|
425
|
-
|
426
|
-
|
427
|
-
def tally(responses, options):
|
428
|
-
response_counts = dict(Counter(responses))
|
429
|
-
for key in options:
|
430
|
-
if key not in response_counts:
|
431
|
-
response_counts[key] = 0
|
432
|
-
return response_counts
|
433
|
-
|
434
|
-
|
435
|
-
def replace_with_alpha_codes(
|
436
|
-
options: list[str], responses: list[str], prefix: str = ""
|
437
|
-
):
|
438
|
-
code_gen = (chr(i) for i in range(65, 91))
|
439
|
-
option_codes = {}
|
440
|
-
for option in options:
|
441
|
-
option_codes[option] = prefix + next(code_gen)
|
442
|
-
new_options = [option_codes[option] for option in options]
|
443
|
-
new_responses = [option_codes[response] for response in responses]
|
444
|
-
return new_options, new_responses, option_codes
|
445
|
-
|
446
|
-
|
447
|
-
def header_version(options, index):
|
448
|
-
cleaned_versions = [
|
449
|
-
option.translate(str.maketrans("", "", string.punctuation)).lower()
|
450
|
-
for option in options
|
451
|
-
]
|
452
|
-
split_versions = [option.split(" ") for option in cleaned_versions]
|
453
|
-
versions = [split_version[:index] for split_version in split_versions]
|
454
|
-
return ["_".join(version) for version in versions]
|
455
|
-
|
456
|
-
|
457
|
-
def find_version(options, index):
|
458
|
-
candidate = header_version(options, index)
|
459
|
-
if len(set(candidate)) == len(candidate):
|
460
|
-
return candidate
|
461
|
-
else:
|
462
|
-
return find_version(options, index + 1)
|
463
|
-
|
464
|
-
|
465
|
-
def get_option_codes_short_name(options):
|
466
|
-
return dict(zip(options, find_version(options, 1)))
|
467
|
-
|
468
|
-
|
469
|
-
def to_strings(split_versions):
|
470
|
-
return ["_".join(version) for version in split_versions]
|
471
|
-
|
472
|
-
|
473
|
-
def is_unique(split_versions):
|
474
|
-
return len(set(to_strings(split_versions))) == len(to_strings(split_versions))
|
475
|
-
|
476
|
-
|
477
|
-
def get_option_codes(options: list[str]):
|
478
|
-
"""Creates a dictionary mapping options to codes."""
|
479
|
-
cleaned_versions = [
|
480
|
-
option.translate(str.maketrans("", "", string.punctuation)).lower()
|
481
|
-
for option in options
|
482
|
-
]
|
483
|
-
new_cleaned_versions = []
|
484
|
-
shortshands = {"not sure": "not-sure", "need more": "need-more"}
|
485
|
-
for option in cleaned_versions:
|
486
|
-
candidate = copy.copy(option)
|
487
|
-
for key, value in shortshands.items():
|
488
|
-
candidate = candidate.replace(key, value)
|
489
|
-
# print(f"Replacing {key} with {value}")
|
490
|
-
# print(option)
|
491
|
-
# print(option.replace(key, value))
|
492
|
-
new_cleaned_versions.append(candidate)
|
493
|
-
|
494
|
-
cleaned_versions = new_cleaned_versions
|
495
|
-
split_versions = [option.split(" ") for option in cleaned_versions]
|
496
|
-
# get rid of stop words, is possible
|
497
|
-
stop_words = [
|
498
|
-
"a",
|
499
|
-
"an",
|
500
|
-
"am",
|
501
|
-
"the",
|
502
|
-
"of",
|
503
|
-
"and",
|
504
|
-
"or",
|
505
|
-
"to",
|
506
|
-
"for",
|
507
|
-
"in",
|
508
|
-
"on",
|
509
|
-
"im",
|
510
|
-
"that",
|
511
|
-
"are",
|
512
|
-
"i",
|
513
|
-
]
|
514
|
-
# removes stop words so long as doing so doesn't make responses non-unique
|
515
|
-
for version in split_versions:
|
516
|
-
for stop_word in stop_words:
|
517
|
-
if stop_word in version:
|
518
|
-
index = version.index(stop_word)
|
519
|
-
del version[index]
|
520
|
-
if not is_unique(split_versions):
|
521
|
-
version.insert(index, stop_word)
|
522
|
-
# break
|
523
|
-
|
524
|
-
# starts at the end and pops off options if it keeps everything unique
|
525
|
-
# if it goes through and doesn't pop anything for each option, it stops
|
526
|
-
while True:
|
527
|
-
num_pops = 0
|
528
|
-
for version in split_versions:
|
529
|
-
if len(version) > 1: # we we get to one word, stop
|
530
|
-
removed = version.pop()
|
531
|
-
if is_unique(split_versions): # no problem
|
532
|
-
pass
|
533
|
-
else: # oops, we cut into bone
|
534
|
-
version.append(removed)
|
535
|
-
num_pops += 1
|
536
|
-
else:
|
537
|
-
num_pops += 1
|
538
|
-
if num_pops == len(
|
539
|
-
split_versions
|
540
|
-
): # stop the loop if we tried popping everyting w/ no luck
|
541
|
-
break
|
542
|
-
|
543
|
-
return dict(zip(options, ["_".join(version) for version in split_versions]))
|
544
|
-
|
545
|
-
|
546
|
-
def replace_with_codes(
|
547
|
-
options: list[str], responses: list[str], short_names_dict=None, prefix=""
|
548
|
-
):
|
549
|
-
if short_names_dict is not None:
|
550
|
-
option_codes = short_names_dict
|
551
|
-
else:
|
552
|
-
option_codes = get_option_codes(options)
|
553
|
-
|
554
|
-
new_options = [option_codes[option] for option in options]
|
555
|
-
new_responses = [option_codes[response] for response in responses]
|
556
|
-
return new_options, new_responses, option_codes
|
557
|
-
|
558
|
-
|
559
|
-
class BarChart(PlotMixin, Element):
|
560
|
-
"Creates a bar chart plot for categorical data."
|
561
|
-
LeftInputType = CategoricalData
|
562
|
-
RightInputType = None
|
563
|
-
|
564
|
-
def _primary_function(
|
565
|
-
self,
|
566
|
-
CategoricalDataObject,
|
567
|
-
width=10,
|
568
|
-
height=5,
|
569
|
-
xlabel="Counts",
|
570
|
-
ylabel="",
|
571
|
-
footer_fontsize=8,
|
572
|
-
title=None,
|
573
|
-
use_code=None,
|
574
|
-
width_pct=100,
|
575
|
-
show_percentage=True,
|
576
|
-
filename=None,
|
577
|
-
) -> dict:
|
578
|
-
"""
|
579
|
-
Generates a bar chart from the provided categorical data object.
|
580
|
-
|
581
|
-
### Args:
|
582
|
-
- CategoricalDataObject (CategoricalData): An object containing categorical data to be plotted.
|
583
|
-
- `width (int, optional)`: Width of the plot. Defaults to 10.
|
584
|
-
- height (int, optional): Height of the plot. Defaults to 5.
|
585
|
-
- xlabel (str, optional): Label for the x-axis. Defaults to "Counts".
|
586
|
-
- ylabel (str, optional): Label for the y-axis. Defaults to an empty string.
|
587
|
-
- footer_fontsize (int, optional): Font size for the footer text. Defaults to 8.
|
588
|
-
- title (str, optional): Title of the plot. If None, title is taken from CategoricalDataObject.text. Defaults to None.
|
589
|
-
- use_code (bool, optional): Whether to use alphabetical codes for categorical options. Defaults to False.
|
590
|
-
|
591
|
-
Note:
|
592
|
-
If 'use_code' is set to True, each category in the plot is represented by an alphabetical code (A, B, C, ...),
|
593
|
-
and a footer is added to the plot mapping these codes back to the original category names.
|
594
|
-
"""
|
595
|
-
responses = CategoricalDataObject.responses
|
596
|
-
options = CategoricalDataObject.options
|
597
|
-
if title is None:
|
598
|
-
title = CategoricalDataObject.text
|
599
|
-
|
600
|
-
option_codes = None
|
601
|
-
|
602
|
-
max_option_length = max([len(option) for option in options])
|
603
|
-
if use_code is None:
|
604
|
-
use_code = max_option_length > 10
|
605
|
-
|
606
|
-
if use_code:
|
607
|
-
if not (d := CategoricalDataObject.short_names_dict) == {}:
|
608
|
-
options, responses, option_codes = replace_with_codes(
|
609
|
-
options, responses, short_names_dict=d
|
610
|
-
)
|
611
|
-
else:
|
612
|
-
options, responses, option_codes = replace_with_codes(
|
613
|
-
options, responses
|
614
|
-
)
|
615
|
-
|
616
|
-
response_count = tally(responses, options)
|
617
|
-
total_responses = sum(response_count.values())
|
618
|
-
data = {key: response_count[key] for key in options}
|
619
|
-
data_df = pd.DataFrame(list(data.items()), columns=["Keys", "Counts"])
|
620
|
-
sns.set(style="whitegrid")
|
621
|
-
plt.figure(figsize=(width, height))
|
622
|
-
# sns.barplot(x="Counts", y="Keys", data=data_df, palette="Blues_d")
|
623
|
-
# ax = sns.barplot(x="Counts", y="Keys", data=data_df, palette="Blues_d")
|
624
|
-
ax = sns.barplot(
|
625
|
-
x="Counts",
|
626
|
-
y="Keys",
|
627
|
-
data=data_df,
|
628
|
-
palette="Blues_d",
|
629
|
-
hue="Keys",
|
630
|
-
legend=False,
|
631
|
-
)
|
632
|
-
|
633
|
-
# Adjust layout and add footer if necessary
|
634
|
-
plt.xlabel(xlabel)
|
635
|
-
plt.ylabel(ylabel)
|
636
|
-
plt.title(f"{title}")
|
637
|
-
|
638
|
-
if show_percentage:
|
639
|
-
for p in ax.patches:
|
640
|
-
percentage = f"{100 * p.get_width() / total_responses:.1f}%" # Calculate percentage
|
641
|
-
x = p.get_x() + p.get_width() + 0.5
|
642
|
-
y = p.get_y() + p.get_height() / 2
|
643
|
-
ax.text(x, y, percentage, ha="center", va="center")
|
644
|
-
|
645
|
-
plt.tight_layout()
|
646
|
-
|
647
|
-
if filename:
|
648
|
-
save_figure(filename)
|
649
|
-
return None
|
650
|
-
|
651
|
-
return {
|
652
|
-
"buffer": self.plt_to_buf(plt),
|
653
|
-
"title": title,
|
654
|
-
"option_codes": {
|
655
|
-
"left_option_codes": option_codes,
|
656
|
-
"right_option_codes": None,
|
657
|
-
},
|
658
|
-
"width_pct": width_pct,
|
659
|
-
}
|
660
|
-
|
661
|
-
|
662
|
-
class HistogramPlot(PlotMixin, Element):
|
663
|
-
LeftInputType = NumericalData
|
664
|
-
RightInputType = None
|
665
|
-
|
666
|
-
def _primary_function(
|
667
|
-
self,
|
668
|
-
NumericalDataObject,
|
669
|
-
alpha=0.7,
|
670
|
-
bins=30,
|
671
|
-
xlabel="Value",
|
672
|
-
ylabel="Frequency",
|
673
|
-
color="blue",
|
674
|
-
title=None,
|
675
|
-
max_title_length=40,
|
676
|
-
width_pct=100,
|
677
|
-
filename=None,
|
678
|
-
):
|
679
|
-
"""
|
680
|
-
Generates a histogram plot from a NumericalDataObject.
|
681
|
-
|
682
|
-
This method plots a histogram based on the responses contained within the NumericalDataObject.
|
683
|
-
It allows customization of the plot's appearance including the number of bins, transparency,
|
684
|
-
color, and axis labels. Optionally, a custom title can be set, or it defaults to the 'text'
|
685
|
-
attribute of the NumericalDataObject.
|
686
|
-
|
687
|
-
Parameters:
|
688
|
-
NumericalDataObject (NumericalData): An object containing numerical data and associated responses.
|
689
|
-
alpha (float, optional): The transparency level of the histogram bars. Defaults to 0.7.
|
690
|
-
bins (int, optional): The number of bins in the histogram. Defaults to 30.
|
691
|
-
xlabel (str, optional): Label for the x-axis. Defaults to "Value".
|
692
|
-
ylabel (str, optional): Label for the y-axis. Defaults to "Frequency".
|
693
|
-
color (str, optional): Color of the histogram bars. Defaults to "blue".
|
694
|
-
title (str, optional): Custom title for the histogram. If None, uses the 'text' attribute from NumericalDataObject.
|
695
|
-
|
696
|
-
"""
|
697
|
-
responses = [
|
698
|
-
float(x) if x is not None else None for x in NumericalDataObject.responses
|
699
|
-
]
|
700
|
-
max_title_length = 40
|
701
|
-
if title is None:
|
702
|
-
if len(NumericalDataObject.text) > max_title_length:
|
703
|
-
text = NumericalDataObject.text[:max_title_length] + "..."
|
704
|
-
else:
|
705
|
-
text = NumericalDataObject.text
|
706
|
-
else:
|
707
|
-
text = title
|
708
|
-
plt.hist(responses, bins=bins, alpha=alpha, color=color)
|
709
|
-
plt.title(f"{text}")
|
710
|
-
plt.xlabel(xlabel)
|
711
|
-
plt.ylabel(ylabel)
|
712
|
-
plt.tight_layout()
|
713
|
-
|
714
|
-
if filename is not None:
|
715
|
-
save_figure(filename)
|
716
|
-
return None
|
717
|
-
|
718
|
-
return {
|
719
|
-
"buffer": self.plt_to_buf(plt),
|
720
|
-
"title": text,
|
721
|
-
"option_codes": None,
|
722
|
-
"width_pct": width_pct,
|
723
|
-
}
|
724
|
-
|
725
|
-
|
726
|
-
class ScatterPlot(PlotMixin, Element):
|
727
|
-
LeftInputType = NumericalData
|
728
|
-
RightInputType = NumericalData
|
729
|
-
|
730
|
-
def _primary_function(
|
731
|
-
self,
|
732
|
-
LeftNumericalDataObject,
|
733
|
-
RightNumericalDataObject,
|
734
|
-
alpha=0.5,
|
735
|
-
title=None,
|
736
|
-
regression_line=True,
|
737
|
-
x_text=None,
|
738
|
-
y_text=None,
|
739
|
-
width_pct=100,
|
740
|
-
filename=None,
|
741
|
-
):
|
742
|
-
"""
|
743
|
-
Generates a scatter plot using numerical data from two provided data objects.
|
744
|
-
|
745
|
-
This method creates a scatter plot to visually represent the relationship between
|
746
|
-
two sets of numerical data. It offers customization for the plot's transparency
|
747
|
-
(alpha) and title.
|
748
|
-
|
749
|
-
Args:
|
750
|
-
LeftNumericalDataObject (NumericalData): The first numerical data object,
|
751
|
-
used for the x-axis data.
|
752
|
-
RightNumericalDataObject (NumericalData): The second numerical data object,
|
753
|
-
used for the y-axis data.
|
754
|
-
alpha (float, optional): The transparency level of the scatter plot points.
|
755
|
-
A value between 0 (transparent) and 1 (opaque).
|
756
|
-
Defaults to 0.5.
|
757
|
-
title (str, optional): Title for the scatter plot. If None, a default title
|
758
|
-
is generated using the text attributes of the
|
759
|
-
NumericalData objects. Defaults to None.
|
760
|
-
"""
|
761
|
-
x = LeftNumericalDataObject.responses
|
762
|
-
y = RightNumericalDataObject.responses
|
763
|
-
if x_text is None:
|
764
|
-
x_text = LeftNumericalDataObject.text
|
765
|
-
if y_text is None:
|
766
|
-
y_text = RightNumericalDataObject.text
|
767
|
-
|
768
|
-
if title is None:
|
769
|
-
title = f"{x_text} vs {y_text}"
|
770
|
-
|
771
|
-
plt.title("")
|
772
|
-
plt.xlabel(x_text)
|
773
|
-
plt.ylabel(y_text)
|
774
|
-
plt.scatter(x, y, alpha=alpha)
|
775
|
-
|
776
|
-
if regression_line:
|
777
|
-
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
|
778
|
-
x_array = np.array(x)
|
779
|
-
plt.plot(
|
780
|
-
x, intercept + slope * x_array, color="red"
|
781
|
-
) # Plotting the regression line
|
782
|
-
regression_info_text = (
|
783
|
-
f"Slope: {slope:.3f}\n"
|
784
|
-
f"Std Error in Slope: {std_err:.3f}\n"
|
785
|
-
f"Intercept: {intercept:.2f}\n"
|
786
|
-
)
|
787
|
-
plt.text(
|
788
|
-
0.05,
|
789
|
-
0.95,
|
790
|
-
regression_info_text,
|
791
|
-
transform=plt.gca().transAxes,
|
792
|
-
fontsize=9,
|
793
|
-
verticalalignment="top",
|
794
|
-
bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.5),
|
795
|
-
)
|
796
|
-
|
797
|
-
plt.tight_layout()
|
798
|
-
|
799
|
-
if filename is not None:
|
800
|
-
save_figure(filename)
|
801
|
-
return None
|
802
|
-
|
803
|
-
return {
|
804
|
-
"buffer": self.plt_to_buf(plt),
|
805
|
-
"title": "",
|
806
|
-
"option_codes": None,
|
807
|
-
"width_pct": width_pct,
|
808
|
-
}
|
809
|
-
|
810
|
-
|
811
|
-
class WordCloudPlot(PlotMixin, Element):
|
812
|
-
LeftInputType = FreeTextData
|
813
|
-
RightInputType = None
|
814
|
-
|
815
|
-
def _primary_function(
|
816
|
-
self,
|
817
|
-
FreeTextDataObject,
|
818
|
-
width=800,
|
819
|
-
height=400,
|
820
|
-
background_color="white",
|
821
|
-
width_pct=100,
|
822
|
-
filename=None,
|
823
|
-
):
|
824
|
-
"""Creates a word cloud plot for free text data.
|
825
|
-
|
826
|
-
Parameters
|
827
|
-
----------
|
828
|
-
column: str
|
829
|
-
Name of the column in the results to use.
|
830
|
-
width : int
|
831
|
-
Width of the plot in pixels.
|
832
|
-
height : int
|
833
|
-
Height of the plot in pixels.
|
834
|
-
background_color : str
|
835
|
-
Background color of the plot.
|
836
|
-
"""
|
837
|
-
responses = " ".join(FreeTextDataObject.responses)
|
838
|
-
text = FreeTextDataObject.text
|
839
|
-
|
840
|
-
wordcloud = WordCloud(
|
841
|
-
width=width, height=height, background_color=background_color
|
842
|
-
).generate(responses)
|
843
|
-
plt.figure(figsize=(10, 5))
|
844
|
-
plt.imshow(wordcloud, interpolation="bilinear")
|
845
|
-
plt.axis("off")
|
846
|
-
plt.title(f"{text}")
|
847
|
-
|
848
|
-
if filename is not None:
|
849
|
-
save_figure(filename)
|
850
|
-
return None
|
851
|
-
|
852
|
-
#
|
853
|
-
# with open(filename, "w") as f:
|
854
|
-
# f.write(wordcloud)
|
855
|
-
|
856
|
-
return {
|
857
|
-
"buffer": self.plt_to_buf(plt),
|
858
|
-
"title": "",
|
859
|
-
"option_codes": None,
|
860
|
-
"width_pct": width_pct,
|
861
|
-
}
|
862
|
-
|
863
|
-
|
864
|
-
class Tally(Element):
|
865
|
-
LeftInputType = CategoricalData
|
866
|
-
RightInputType = None
|
867
|
-
OutputDataType = TallyData
|
868
|
-
|
869
|
-
def _primary_function(self, CategoricalDataObject, **kwargs):
|
870
|
-
"""Creates a tally of responses to a categorical question."""
|
871
|
-
responses = CategoricalDataObject.responses
|
872
|
-
text = CategoricalDataObject.text
|
873
|
-
options = CategoricalDataObject.options
|
874
|
-
|
875
|
-
response_count = dict(Counter(responses))
|
876
|
-
# Add 0s for things that weren't selected even once
|
877
|
-
for key in options:
|
878
|
-
if key not in response_count:
|
879
|
-
response_count[key] = 0
|
880
|
-
|
881
|
-
options.reverse()
|
882
|
-
return {
|
883
|
-
"responses": {key: response_count[key] for key in options},
|
884
|
-
"text": text,
|
885
|
-
}
|
886
|
-
|
887
|
-
def _html(self, responses, text, **kwargs):
|
888
|
-
report_html = [
|
889
|
-
"<div>",
|
890
|
-
f"<p>{text}</p>" "<table>",
|
891
|
-
]
|
892
|
-
for key, value in responses.items():
|
893
|
-
report_html.append(f"<tr><td>{key}</td><td>{value}</td></tr>")
|
894
|
-
report_html.append("</table>")
|
895
|
-
report_html.append("</div>")
|
896
|
-
return "\n".join(report_html)
|
897
|
-
|
898
|
-
|
899
|
-
def compute_cross_tab(left_responses, left_options, right_responses, right_options):
|
900
|
-
left_response_count = dict(Counter(left_responses))
|
901
|
-
right_response_count = dict(Counter(right_responses))
|
902
|
-
# Add 0s for things that weren't selected even once
|
903
|
-
for key in left_options:
|
904
|
-
if key not in left_response_count:
|
905
|
-
left_response_count[key] = 0
|
906
|
-
for key in right_options:
|
907
|
-
if key not in right_response_count:
|
908
|
-
right_response_count[key] = 0
|
909
|
-
|
910
|
-
left_options.reverse()
|
911
|
-
right_options.reverse()
|
912
|
-
|
913
|
-
cross_tab = {
|
914
|
-
left_option: {right_option: 0 for right_option in right_options}
|
915
|
-
for left_option in left_options
|
916
|
-
}
|
917
|
-
# Perform the cross-tabulation
|
918
|
-
for left_response, right_response in zip(left_responses, right_responses):
|
919
|
-
cross_tab[left_response][right_response] += 1
|
920
|
-
return cross_tab
|
921
|
-
|
922
|
-
|
923
|
-
class CrossTab(Element):
|
924
|
-
LeftInputType = CategoricalData
|
925
|
-
RightInputType = CategoricalData
|
926
|
-
OutputDataType = CrossTabData
|
927
|
-
|
928
|
-
def _primary_function(
|
929
|
-
self, LeftCategoricalDataObject, RightCategoricalDataObject, **kwargs
|
930
|
-
):
|
931
|
-
"""Creates a cross tabulation of two categorical variables.
|
932
|
-
Parameters
|
933
|
-
----------
|
934
|
-
left_column: str
|
935
|
-
Name of the column in the results to use for the left side.
|
936
|
-
right_column: str
|
937
|
-
Name of the column in the results to use for the right side.
|
938
|
-
"""
|
939
|
-
left_responses = LeftCategoricalDataObject.responses
|
940
|
-
right_responses = RightCategoricalDataObject.responses
|
941
|
-
left_text = LeftCategoricalDataObject.text
|
942
|
-
right_text = RightCategoricalDataObject.text
|
943
|
-
left_options = LeftCategoricalDataObject.options
|
944
|
-
right_options = RightCategoricalDataObject.options
|
945
|
-
|
946
|
-
cross_tab = compute_cross_tab(
|
947
|
-
left_responses, left_options, right_responses, right_options
|
948
|
-
)
|
949
|
-
return {
|
950
|
-
"cross_tab": cross_tab,
|
951
|
-
"left_title": left_text,
|
952
|
-
"right_title": right_text,
|
953
|
-
}
|
954
|
-
|
955
|
-
def _html(self, cross_tab, left_title, right_title, **kwargs):
|
956
|
-
report_html = [
|
957
|
-
"<div>",
|
958
|
-
f"<p>Cross tabulation of: {left_title} and {right_title}</p>",
|
959
|
-
"<table>",
|
960
|
-
]
|
961
|
-
|
962
|
-
# Assuming all inner dictionaries have the same keys, use the keys from the first one
|
963
|
-
first_key = next(iter(cross_tab))
|
964
|
-
left_options = cross_tab[first_key].keys()
|
965
|
-
headers = [""] + list(left_options)
|
966
|
-
report_html.append(
|
967
|
-
"<tr>" + "".join(f"<th>{header}</th>" for header in headers) + "</tr>"
|
968
|
-
)
|
969
|
-
|
970
|
-
# Fill in the rows of the table
|
971
|
-
for right_option, counts in cross_tab.items():
|
972
|
-
row = [f"<td>{right_option}</td>"]
|
973
|
-
for left_option in left_options:
|
974
|
-
row.append(f"<td>{counts[left_option]}</td>")
|
975
|
-
report_html.append("<tr>" + "".join(row) + "</tr>")
|
976
|
-
|
977
|
-
report_html.append("</table>")
|
978
|
-
report_html.append("</div>")
|
979
|
-
return "\n".join(report_html)
|
980
|
-
|
981
|
-
|
982
|
-
class FacetedBarChart(PlotMixin, Element):
|
983
|
-
LeftInputType = CategoricalData
|
984
|
-
RightInputType = CategoricalData
|
985
|
-
|
986
|
-
def _primary_function(
|
987
|
-
self,
|
988
|
-
LeftCategoricalDataObject,
|
989
|
-
RightCategoricalDataObject,
|
990
|
-
num_cols=None,
|
991
|
-
height=5,
|
992
|
-
label_angle=45,
|
993
|
-
title=None,
|
994
|
-
use_code_left=None,
|
995
|
-
use_code_right=None,
|
996
|
-
sharey=True,
|
997
|
-
width_pct=100,
|
998
|
-
filename=None,
|
999
|
-
):
|
1000
|
-
""" "
|
1001
|
-
Generates a set of bar plots as a FacetGrid to compare two categorical data sets.
|
1002
|
-
|
1003
|
-
This method creates a series of bar plots, one for each category in the RightCategoricalDataObject,
|
1004
|
-
to compare the frequencies of categories from LeftCategoricalDataObject. The plots are
|
1005
|
-
arranged in a grid layout, with an option to specify the number of columns and the height of each plot.
|
1006
|
-
Additionally, the angle of the x-axis labels and the title of the grid can be customized.
|
1007
|
-
|
1008
|
-
Args:
|
1009
|
-
LeftCategoricalDataObject (CategoricalData): The first categorical data object,
|
1010
|
-
used for the x-axis data in the bar plots.
|
1011
|
-
RightCategoricalDataObject (CategoricalData): The second categorical data object,
|
1012
|
-
whose categories define the grid columns.
|
1013
|
-
num_cols (int, optional): The number of columns in the FacetGrid. If None, it's calculated
|
1014
|
-
based on the number of categories in RightCategoricalDataObject.
|
1015
|
-
Defaults to None.
|
1016
|
-
height (int, optional): The height of each subplot in the grid. Defaults to 5.
|
1017
|
-
label_angle (int, optional): The angle for rotating the x-axis labels for readability.
|
1018
|
-
Defaults to 45 degrees.
|
1019
|
-
title (str, optional): The overall title of the FacetGrid. If None, a default title is
|
1020
|
-
generated based on the texts of the categorical data objects.
|
1021
|
-
Defaults to None.
|
1022
|
-
use_code_left (bool, optional): Whether to use alphabetical codes for categorical options
|
1023
|
-
in the left data object.
|
1024
|
-
use_code_right (bool, optional): Whether to use alphabetical codes for categorical options
|
1025
|
-
sharey (bool, optional): Whether to share the y-axis across all plots. Defaults to True.
|
1026
|
-
|
1027
|
-
Notes:
|
1028
|
-
- The bar plots are generated using seaborn's barplot function within a FacetGrid.
|
1029
|
-
- The layout of the grid is adjusted to accommodate the overall title and to prevent
|
1030
|
-
overlap of plot elements.
|
1031
|
-
"""
|
1032
|
-
left_responses = LeftCategoricalDataObject.responses
|
1033
|
-
right_responses = RightCategoricalDataObject.responses
|
1034
|
-
left_text = LeftCategoricalDataObject.text
|
1035
|
-
right_text = RightCategoricalDataObject.text
|
1036
|
-
left_options = LeftCategoricalDataObject.options
|
1037
|
-
right_options = RightCategoricalDataObject.options
|
1038
|
-
|
1039
|
-
if use_code_left is None:
|
1040
|
-
max_option_length_left = max([len(option) for option in left_options])
|
1041
|
-
use_code_left = max_option_length_left > 10
|
1042
|
-
if use_code_right is None:
|
1043
|
-
max_option_length_right = max([len(option) for option in right_options])
|
1044
|
-
use_code_right = max_option_length_right > 10
|
1045
|
-
|
1046
|
-
if title is None:
|
1047
|
-
title = f'"{left_text}" \n by "{right_text}"'
|
1048
|
-
|
1049
|
-
if len(left_text) > 40:
|
1050
|
-
left_text = left_text[:20] + "..."
|
1051
|
-
if len(right_text) > 40:
|
1052
|
-
right_text = right_text[:20] + "..."
|
1053
|
-
|
1054
|
-
left_option_codes = None
|
1055
|
-
right_option_codes = None
|
1056
|
-
|
1057
|
-
if use_code_left:
|
1058
|
-
left_options, left_responses, left_option_codes = replace_with_codes(
|
1059
|
-
left_options, left_responses, prefix="L-"
|
1060
|
-
)
|
1061
|
-
if use_code_right:
|
1062
|
-
right_options, right_responses, right_option_codes = replace_with_codes(
|
1063
|
-
right_options, right_responses, prefix="R-"
|
1064
|
-
)
|
1065
|
-
|
1066
|
-
# Figures out how many columns to use in the FacetGrid if not specified
|
1067
|
-
if num_cols is None:
|
1068
|
-
if len(right_options) < 6:
|
1069
|
-
num_cols = len(right_options)
|
1070
|
-
else:
|
1071
|
-
num_cols = math.ceil(math.sqrt(len(right_options)))
|
1072
|
-
|
1073
|
-
cross_tab = compute_cross_tab(
|
1074
|
-
right_responses, right_options, left_responses, left_options
|
1075
|
-
)
|
1076
|
-
|
1077
|
-
d = {}
|
1078
|
-
if use_code_left:
|
1079
|
-
d = {v: k for k, v in left_option_codes.items()}
|
1080
|
-
left_option_name = d.get(left_text, left_text)
|
1081
|
-
if use_code_right:
|
1082
|
-
d = {v: k for k, v in right_option_codes.items()}
|
1083
|
-
right_option_name = d.get(right_text, right_text)
|
1084
|
-
|
1085
|
-
df = pd.DataFrame(cross_tab)
|
1086
|
-
# Reset index to turn the index into a column
|
1087
|
-
df = df.reset_index()
|
1088
|
-
# Rename the columns to be more descriptive
|
1089
|
-
df.rename(columns={"index": left_option_name}, inplace=True)
|
1090
|
-
# Melt the DataFrame to long format
|
1091
|
-
df_long = df.melt(
|
1092
|
-
id_vars=left_option_name, var_name=right_option_name, value_name="Count"
|
1093
|
-
)
|
1094
|
-
sns.set(style="whitegrid")
|
1095
|
-
# Creating a FacetGrid
|
1096
|
-
g = sns.FacetGrid(
|
1097
|
-
df_long,
|
1098
|
-
col=right_option_name,
|
1099
|
-
col_wrap=num_cols,
|
1100
|
-
sharey=sharey,
|
1101
|
-
height=height,
|
1102
|
-
)
|
1103
|
-
# ax = sns.barplot(
|
1104
|
-
# x="Counts",
|
1105
|
-
# y="Keys",
|
1106
|
-
# data=data_df,
|
1107
|
-
# palette="Blues_d",
|
1108
|
-
# hue="Keys",
|
1109
|
-
# legend=False,
|
1110
|
-
# )
|
1111
|
-
# Adding bar plots to the FacetGrid
|
1112
|
-
g = g.map(
|
1113
|
-
sns.barplot,
|
1114
|
-
left_option_name,
|
1115
|
-
"Count",
|
1116
|
-
order=df_long[left_option_name].unique(),
|
1117
|
-
palette="viridis",
|
1118
|
-
hue=df_long[left_option_name].unique(),
|
1119
|
-
legend=False,
|
1120
|
-
)
|
1121
|
-
# Rotating x-axis labels for better readability
|
1122
|
-
for ax in g.axes.ravel():
|
1123
|
-
for label in ax.get_xticklabels():
|
1124
|
-
label.set_rotation(label_angle)
|
1125
|
-
|
1126
|
-
g.fig.suptitle(f"{title}", fontsize=16)
|
1127
|
-
|
1128
|
-
# Adjust the layout to make room for the title and prevent overlap
|
1129
|
-
g.fig.subplots_adjust(top=0.9) # you can adjust the value as needed
|
1130
|
-
|
1131
|
-
plt.tight_layout()
|
1132
|
-
|
1133
|
-
if filename is not None:
|
1134
|
-
save_figure(filename)
|
1135
|
-
return None
|
1136
|
-
|
1137
|
-
return {
|
1138
|
-
"buffer": self.plt_to_buf(plt),
|
1139
|
-
"title": "",
|
1140
|
-
"option_codes": {
|
1141
|
-
"left_option_codes": left_option_codes,
|
1142
|
-
"right_option_codes": right_option_codes,
|
1143
|
-
},
|
1144
|
-
"width_pct": width_pct,
|
1145
|
-
}
|
1146
|
-
|
1147
|
-
|
1148
|
-
class ChiSquare(Element):
|
1149
|
-
LeftInputType = CategoricalData
|
1150
|
-
RightInputType = None
|
1151
|
-
OutputDataType = ChiSquareData
|
1152
|
-
|
1153
|
-
def _primary_function(self, CategoricalDataObject, **kwargs):
|
1154
|
-
responses = CategoricalDataObject.responses
|
1155
|
-
text = CategoricalDataObject.text
|
1156
|
-
options = CategoricalDataObject.options
|
1157
|
-
|
1158
|
-
response_count = dict(Counter(responses))
|
1159
|
-
# Add 0s for things that weren't selected even once
|
1160
|
-
for key in options:
|
1161
|
-
if key not in response_count:
|
1162
|
-
response_count[key] = 0
|
1163
|
-
|
1164
|
-
observed_counts = list(response_count.values())
|
1165
|
-
chi_square, p_value = chisquare(observed_counts)
|
1166
|
-
return {"chi_square": chi_square, "p_value": p_value, "text": text}
|
1167
|
-
|
1168
|
-
def _html(self, chi_square, p_value, text, digits=3, **kwargs):
|
1169
|
-
report_html = ["<div>", f"<p>Chi-square test for: {text}</p>" "<table>"]
|
1170
|
-
report_html.append(f"<p>Chi-square statistic: {round(chi_square, digits)}</p>")
|
1171
|
-
report_html.append(f"<p>p-value: {round(p_value, digits)}</p>")
|
1172
|
-
report_html.append("</div>")
|
1173
|
-
return "\n".join(report_html)
|
1174
|
-
|
1175
|
-
|
1176
|
-
class OrderedLogit(Element):
|
1177
|
-
LeftInputType = CategoricalData
|
1178
|
-
RightInputType = CategoricalData
|
1179
|
-
OutputDataType = RegressionData
|
1180
|
-
|
1181
|
-
def _primary_function(
|
1182
|
-
self, LeftSideCategoricalData, RightSideCategoricalData, **kwargs
|
1183
|
-
):
|
1184
|
-
y = LeftSideCategoricalData.responses
|
1185
|
-
category_order = LeftSideCategoricalData.options
|
1186
|
-
X = RightSideCategoricalData.responses
|
1187
|
-
outcome_description = LeftSideCategoricalData.text
|
1188
|
-
if not (isinstance(y, list) and isinstance(X, list) and len(y) == len(X)):
|
1189
|
-
print(y)
|
1190
|
-
print(X)
|
1191
|
-
raise ValueError("y and X must be lists of the same length.")
|
1192
|
-
|
1193
|
-
y_ordered = pd.Categorical(y, categories=category_order, ordered=True)
|
1194
|
-
|
1195
|
-
# Create a DataFrame from the inputs
|
1196
|
-
data = pd.DataFrame({"Outcome": y_ordered, "Predictor": X})
|
1197
|
-
|
1198
|
-
# Convert the categorical variable into dummy/indicator variables
|
1199
|
-
data = pd.get_dummies(data, columns=["Predictor"], drop_first=True)
|
1200
|
-
|
1201
|
-
for col in data.columns.drop("Outcome"):
|
1202
|
-
data[col] = pd.to_numeric(data[col], errors="coerce")
|
1203
|
-
|
1204
|
-
for col in data.select_dtypes(include=["bool"]).columns:
|
1205
|
-
data[col] = data[col].astype(int)
|
1206
|
-
try:
|
1207
|
-
model = OrderedModel(
|
1208
|
-
data["Outcome"], data.drop(columns=["Outcome"]), distr="logit"
|
1209
|
-
) # Use 'logit' for logistic distribution
|
1210
|
-
result = model.fit()
|
1211
|
-
return {
|
1212
|
-
"model_outcome": result.summary().as_html(),
|
1213
|
-
"outcome_description": outcome_description,
|
1214
|
-
}
|
1215
|
-
except Exception as e:
|
1216
|
-
return {
|
1217
|
-
"model_outcome": f"Error: {e}",
|
1218
|
-
"outcome_description": outcome_description,
|
1219
|
-
}
|
1220
|
-
|
1221
|
-
def _html(self, model_outcome: str, outcome_description: str):
|
1222
|
-
report_html = [
|
1223
|
-
"<h1>Ordered logit</h1>" "<div>",
|
1224
|
-
f"<p>Outcome: {outcome_description}</p>",
|
1225
|
-
]
|
1226
|
-
report_html.append(model_outcome)
|
1227
|
-
report_html.append("</div>")
|
1228
|
-
return "\n".join(report_html)
|