edsl 0.1.37__py3-none-any.whl → 0.1.37.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +303 -303
- edsl/BaseDiff.py +260 -260
- edsl/TemplateLoader.py +24 -24
- edsl/__init__.py +48 -48
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +804 -855
- edsl/agents/AgentList.py +345 -350
- edsl/agents/Invigilator.py +222 -222
- edsl/agents/InvigilatorBase.py +305 -284
- edsl/agents/PromptConstructor.py +312 -353
- edsl/agents/__init__.py +3 -3
- edsl/agents/descriptors.py +86 -99
- edsl/agents/prompt_helpers.py +129 -129
- edsl/auto/AutoStudy.py +117 -117
- edsl/auto/StageBase.py +230 -230
- edsl/auto/StageGenerateSurvey.py +178 -178
- edsl/auto/StageLabelQuestions.py +125 -125
- edsl/auto/StagePersona.py +61 -61
- edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
- edsl/auto/StagePersonaDimensionValues.py +74 -74
- edsl/auto/StagePersonaDimensions.py +69 -69
- edsl/auto/StageQuestions.py +73 -73
- edsl/auto/SurveyCreatorPipeline.py +21 -21
- edsl/auto/utilities.py +224 -224
- edsl/base/Base.py +289 -289
- edsl/config.py +149 -149
- edsl/conjure/AgentConstructionMixin.py +152 -160
- edsl/conjure/Conjure.py +62 -62
- edsl/conjure/InputData.py +659 -659
- edsl/conjure/InputDataCSV.py +48 -48
- edsl/conjure/InputDataMixinQuestionStats.py +182 -182
- edsl/conjure/InputDataPyRead.py +91 -91
- edsl/conjure/InputDataSPSS.py +8 -8
- edsl/conjure/InputDataStata.py +8 -8
- edsl/conjure/QuestionOptionMixin.py +76 -76
- edsl/conjure/QuestionTypeMixin.py +23 -23
- edsl/conjure/RawQuestion.py +65 -65
- edsl/conjure/SurveyResponses.py +7 -7
- edsl/conjure/__init__.py +9 -9
- edsl/conjure/naming_utilities.py +263 -263
- edsl/conjure/utilities.py +201 -201
- edsl/conversation/Conversation.py +238 -290
- edsl/conversation/car_buying.py +58 -58
- edsl/conversation/mug_negotiation.py +81 -81
- edsl/conversation/next_speaker_utilities.py +93 -93
- edsl/coop/PriceFetcher.py +54 -54
- edsl/coop/__init__.py +2 -2
- edsl/coop/coop.py +824 -958
- edsl/coop/utils.py +131 -131
- edsl/data/Cache.py +527 -527
- edsl/data/CacheEntry.py +228 -228
- edsl/data/CacheHandler.py +149 -149
- edsl/data/RemoteCacheSync.py +97 -97
- edsl/data/SQLiteDict.py +292 -292
- edsl/data/__init__.py +4 -4
- edsl/data/orm.py +10 -10
- edsl/data_transfer_models.py +73 -73
- edsl/enums.py +173 -173
- edsl/exceptions/__init__.py +50 -54
- edsl/exceptions/agents.py +40 -38
- edsl/exceptions/configuration.py +16 -16
- edsl/exceptions/coop.py +10 -10
- edsl/exceptions/data.py +14 -14
- edsl/exceptions/general.py +34 -34
- edsl/exceptions/jobs.py +33 -33
- edsl/exceptions/language_models.py +63 -63
- edsl/exceptions/prompts.py +15 -15
- edsl/exceptions/questions.py +91 -91
- edsl/exceptions/results.py +26 -29
- edsl/exceptions/surveys.py +34 -37
- edsl/inference_services/AnthropicService.py +87 -87
- edsl/inference_services/AwsBedrock.py +115 -120
- edsl/inference_services/AzureAI.py +217 -217
- edsl/inference_services/DeepInfraService.py +18 -18
- edsl/inference_services/GoogleService.py +156 -156
- edsl/inference_services/GroqService.py +20 -20
- edsl/inference_services/InferenceServiceABC.py +147 -147
- edsl/inference_services/InferenceServicesCollection.py +74 -97
- edsl/inference_services/MistralAIService.py +123 -123
- edsl/inference_services/OllamaService.py +18 -18
- edsl/inference_services/OpenAIService.py +224 -224
- edsl/inference_services/TestService.py +89 -89
- edsl/inference_services/TogetherAIService.py +170 -170
- edsl/inference_services/models_available_cache.py +118 -118
- edsl/inference_services/rate_limits_cache.py +25 -25
- edsl/inference_services/registry.py +39 -39
- edsl/inference_services/write_available.py +10 -10
- edsl/jobs/Answers.py +56 -56
- edsl/jobs/Jobs.py +1121 -1347
- edsl/jobs/__init__.py +1 -1
- edsl/jobs/buckets/BucketCollection.py +63 -63
- edsl/jobs/buckets/ModelBuckets.py +65 -65
- edsl/jobs/buckets/TokenBucket.py +248 -248
- edsl/jobs/interviews/Interview.py +661 -661
- edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
- edsl/jobs/interviews/InterviewExceptionEntry.py +182 -186
- edsl/jobs/interviews/InterviewStatistic.py +63 -63
- edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
- edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
- edsl/jobs/interviews/InterviewStatusLog.py +92 -92
- edsl/jobs/interviews/ReportErrors.py +66 -66
- edsl/jobs/interviews/interview_status_enum.py +9 -9
- edsl/jobs/runners/JobsRunnerAsyncio.py +338 -338
- edsl/jobs/runners/JobsRunnerStatus.py +332 -332
- edsl/jobs/tasks/QuestionTaskCreator.py +242 -242
- edsl/jobs/tasks/TaskCreators.py +64 -64
- edsl/jobs/tasks/TaskHistory.py +441 -442
- edsl/jobs/tasks/TaskStatusLog.py +23 -23
- edsl/jobs/tasks/task_status_enum.py +163 -163
- edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
- edsl/jobs/tokens/TokenUsage.py +34 -34
- edsl/language_models/LanguageModel.py +718 -706
- edsl/language_models/ModelList.py +102 -102
- edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
- edsl/language_models/__init__.py +2 -3
- edsl/language_models/fake_openai_call.py +15 -15
- edsl/language_models/fake_openai_service.py +61 -61
- edsl/language_models/registry.py +137 -137
- edsl/language_models/repair.py +156 -156
- edsl/language_models/unused/ReplicateBase.py +83 -83
- edsl/language_models/utilities.py +64 -64
- edsl/notebooks/Notebook.py +259 -259
- edsl/notebooks/__init__.py +1 -1
- edsl/prompts/Prompt.py +353 -357
- edsl/prompts/__init__.py +2 -2
- edsl/questions/AnswerValidatorMixin.py +289 -289
- edsl/questions/QuestionBase.py +616 -656
- edsl/questions/QuestionBaseGenMixin.py +161 -161
- edsl/questions/QuestionBasePromptsMixin.py +266 -234
- edsl/questions/QuestionBudget.py +227 -227
- edsl/questions/QuestionCheckBox.py +359 -359
- edsl/questions/QuestionExtract.py +183 -183
- edsl/questions/QuestionFreeText.py +114 -114
- edsl/questions/QuestionFunctional.py +159 -159
- edsl/questions/QuestionList.py +231 -231
- edsl/questions/QuestionMultipleChoice.py +286 -286
- edsl/questions/QuestionNumerical.py +153 -153
- edsl/questions/QuestionRank.py +324 -324
- edsl/questions/Quick.py +41 -41
- edsl/questions/RegisterQuestionsMeta.py +71 -71
- edsl/questions/ResponseValidatorABC.py +174 -174
- edsl/questions/SimpleAskMixin.py +73 -73
- edsl/questions/__init__.py +26 -26
- edsl/questions/compose_questions.py +98 -98
- edsl/questions/decorators.py +21 -21
- edsl/questions/derived/QuestionLikertFive.py +76 -76
- edsl/questions/derived/QuestionLinearScale.py +87 -87
- edsl/questions/derived/QuestionTopK.py +91 -91
- edsl/questions/derived/QuestionYesNo.py +82 -82
- edsl/questions/descriptors.py +418 -413
- edsl/questions/prompt_templates/question_budget.jinja +13 -13
- edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
- edsl/questions/prompt_templates/question_extract.jinja +11 -11
- edsl/questions/prompt_templates/question_free_text.jinja +3 -3
- edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
- edsl/questions/prompt_templates/question_list.jinja +17 -17
- edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
- edsl/questions/prompt_templates/question_numerical.jinja +36 -36
- edsl/questions/question_registry.py +147 -147
- edsl/questions/settings.py +12 -12
- edsl/questions/templates/budget/answering_instructions.jinja +7 -7
- edsl/questions/templates/budget/question_presentation.jinja +7 -7
- edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
- edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
- edsl/questions/templates/extract/answering_instructions.jinja +7 -7
- edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
- edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
- edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
- edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
- edsl/questions/templates/list/answering_instructions.jinja +3 -3
- edsl/questions/templates/list/question_presentation.jinja +5 -5
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
- edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
- edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
- edsl/questions/templates/numerical/question_presentation.jinja +6 -6
- edsl/questions/templates/rank/answering_instructions.jinja +11 -11
- edsl/questions/templates/rank/question_presentation.jinja +15 -15
- edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
- edsl/questions/templates/top_k/question_presentation.jinja +22 -22
- edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
- edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
- edsl/results/Dataset.py +293 -293
- edsl/results/DatasetExportMixin.py +693 -717
- edsl/results/DatasetTree.py +145 -145
- edsl/results/Result.py +435 -450
- edsl/results/Results.py +1160 -1071
- edsl/results/ResultsDBMixin.py +238 -238
- edsl/results/ResultsExportMixin.py +43 -43
- edsl/results/ResultsFetchMixin.py +33 -33
- edsl/results/ResultsGGMixin.py +121 -121
- edsl/results/ResultsToolsMixin.py +98 -98
- edsl/results/Selector.py +118 -135
- edsl/results/__init__.py +2 -2
- edsl/results/tree_explore.py +115 -115
- edsl/scenarios/FileStore.py +458 -458
- edsl/scenarios/Scenario.py +510 -546
- edsl/scenarios/ScenarioHtmlMixin.py +59 -64
- edsl/scenarios/ScenarioList.py +1101 -1112
- edsl/scenarios/ScenarioListExportMixin.py +52 -52
- edsl/scenarios/ScenarioListPdfMixin.py +261 -261
- edsl/scenarios/__init__.py +4 -4
- edsl/shared.py +1 -1
- edsl/study/ObjectEntry.py +173 -173
- edsl/study/ProofOfWork.py +113 -113
- edsl/study/SnapShot.py +80 -80
- edsl/study/Study.py +528 -528
- edsl/study/__init__.py +4 -4
- edsl/surveys/DAG.py +148 -148
- edsl/surveys/Memory.py +31 -31
- edsl/surveys/MemoryPlan.py +244 -244
- edsl/surveys/Rule.py +324 -330
- edsl/surveys/RuleCollection.py +387 -387
- edsl/surveys/Survey.py +1772 -1795
- edsl/surveys/SurveyCSS.py +261 -261
- edsl/surveys/SurveyExportMixin.py +259 -259
- edsl/surveys/SurveyFlowVisualizationMixin.py +121 -121
- edsl/surveys/SurveyQualtricsImport.py +284 -284
- edsl/surveys/__init__.py +3 -3
- edsl/surveys/base.py +53 -53
- edsl/surveys/descriptors.py +56 -56
- edsl/surveys/instructions/ChangeInstruction.py +47 -47
- edsl/surveys/instructions/Instruction.py +51 -51
- edsl/surveys/instructions/InstructionCollection.py +77 -77
- edsl/templates/error_reporting/base.html +23 -23
- edsl/templates/error_reporting/exceptions_by_model.html +34 -34
- edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
- edsl/templates/error_reporting/exceptions_by_type.html +16 -16
- edsl/templates/error_reporting/interview_details.html +115 -115
- edsl/templates/error_reporting/interviews.html +9 -9
- edsl/templates/error_reporting/overview.html +4 -4
- edsl/templates/error_reporting/performance_plot.html +1 -1
- edsl/templates/error_reporting/report.css +73 -73
- edsl/templates/error_reporting/report.html +117 -117
- edsl/templates/error_reporting/report.js +25 -25
- edsl/tools/__init__.py +1 -1
- edsl/tools/clusters.py +192 -192
- edsl/tools/embeddings.py +27 -27
- edsl/tools/embeddings_plotting.py +118 -118
- edsl/tools/plotting.py +112 -112
- edsl/tools/summarize.py +18 -18
- edsl/utilities/SystemInfo.py +28 -28
- edsl/utilities/__init__.py +22 -22
- edsl/utilities/ast_utilities.py +25 -25
- edsl/utilities/data/Registry.py +6 -6
- edsl/utilities/data/__init__.py +1 -1
- edsl/utilities/data/scooter_results.json +1 -1
- edsl/utilities/decorators.py +77 -77
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
- edsl/utilities/interface.py +627 -627
- edsl/utilities/repair_functions.py +28 -28
- edsl/utilities/restricted_python.py +70 -70
- edsl/utilities/utilities.py +391 -409
- {edsl-0.1.37.dist-info → edsl-0.1.37.dev3.dist-info}/LICENSE +21 -21
- {edsl-0.1.37.dist-info → edsl-0.1.37.dev3.dist-info}/METADATA +1 -1
- edsl-0.1.37.dev3.dist-info/RECORD +279 -0
- edsl/conversation/chips.py +0 -95
- edsl/exceptions/BaseException.py +0 -21
- edsl/exceptions/scenarios.py +0 -22
- edsl/language_models/KeyLookup.py +0 -30
- edsl-0.1.37.dist-info/RECORD +0 -283
- {edsl-0.1.37.dist-info → edsl-0.1.37.dev3.dist-info}/WHEEL +0 -0
edsl/conjure/InputData.py
CHANGED
@@ -1,659 +1,659 @@
|
|
1
|
-
import base64
|
2
|
-
from abc import ABC, abstractmethod
|
3
|
-
from typing import Dict, Callable, Optional, List, Generator, Tuple, Union
|
4
|
-
from collections import namedtuple
|
5
|
-
from typing import List, Union
|
6
|
-
|
7
|
-
from edsl.questions.QuestionBase import QuestionBase
|
8
|
-
|
9
|
-
from edsl.scenarios.ScenarioList import ScenarioList
|
10
|
-
from edsl.surveys.Survey import Survey
|
11
|
-
from edsl.conjure.SurveyResponses import SurveyResponses
|
12
|
-
from edsl.conjure.naming_utilities import sanitize_string
|
13
|
-
from edsl.utilities.utilities import is_valid_variable_name
|
14
|
-
|
15
|
-
from edsl.conjure.RawQuestion import RawQuestion
|
16
|
-
from edsl.conjure.AgentConstructionMixin import AgentConstructionMixin
|
17
|
-
|
18
|
-
from edsl.conjure.QuestionOptionMixin import QuestionOptionMixin
|
19
|
-
from edsl.conjure.InputDataMixinQuestionStats import InputDataMixinQuestionStats
|
20
|
-
from edsl.conjure.QuestionTypeMixin import QuestionTypeMixin
|
21
|
-
|
22
|
-
|
23
|
-
class InputDataABC(
|
24
|
-
ABC,
|
25
|
-
InputDataMixinQuestionStats,
|
26
|
-
AgentConstructionMixin,
|
27
|
-
QuestionOptionMixin,
|
28
|
-
QuestionTypeMixin,
|
29
|
-
):
|
30
|
-
"""A class to represent the input data for a survey."""
|
31
|
-
|
32
|
-
NUM_UNIQUE_THRESHOLD = 15
|
33
|
-
FRAC_NUMERICAL_THRESHOLD = 0.8
|
34
|
-
MULTIPLE_CHOICE_OTHER_THRESHOLD = 0.5
|
35
|
-
OTHER_STRING = "Other:"
|
36
|
-
|
37
|
-
question_attributes = [
|
38
|
-
"num_responses",
|
39
|
-
"num_unique_responses",
|
40
|
-
"missing",
|
41
|
-
"unique_responses",
|
42
|
-
"frac_numerical",
|
43
|
-
"top_5",
|
44
|
-
"frac_obs_from_top_5",
|
45
|
-
]
|
46
|
-
QuestionStats = namedtuple("QuestionStats", question_attributes)
|
47
|
-
|
48
|
-
def __init__(
|
49
|
-
self,
|
50
|
-
datafile_name: str,
|
51
|
-
config: Optional[dict] = None,
|
52
|
-
naming_function: Optional[Callable] = sanitize_string,
|
53
|
-
raw_data: Optional[List] = None,
|
54
|
-
binary: Optional[str] = None,
|
55
|
-
question_names: Optional[List[str]] = None,
|
56
|
-
question_texts: Optional[List[str]] = None,
|
57
|
-
answer_codebook: Optional[Dict] = None,
|
58
|
-
question_types: Optional[List[str]] = None,
|
59
|
-
question_options: Optional[List] = None,
|
60
|
-
order_options=False,
|
61
|
-
question_name_repair_func: Callable = None,
|
62
|
-
):
|
63
|
-
"""Initialize the InputData object.
|
64
|
-
|
65
|
-
:param datafile_name: The name of the file containing the data.
|
66
|
-
:param config: The configuration parameters for reading the data.
|
67
|
-
:param raw_data: The raw data in the form of a dictionary.
|
68
|
-
:param question_names: The names of the questions.
|
69
|
-
:param question_texts: The text of the questions.
|
70
|
-
:param answer_codebook: The codebook for the answers.
|
71
|
-
:param question_types: The types of the questions.
|
72
|
-
:param question_options: The options for the questions.
|
73
|
-
|
74
|
-
>>> id = InputDataABC.example(question_names = ['a','b'], answer_codebook = {'a': {'1':'yes', '2':'no'}, 'b': {'1':'yes', '2':'no'}})
|
75
|
-
|
76
|
-
>>> id = InputDataABC.example(question_names = ['a','b'], answer_codebook = {'a': {'1':'yes', '2':'no'}, 'c': {'1':'yes', '2':'no'}})
|
77
|
-
Traceback (most recent call last):
|
78
|
-
...
|
79
|
-
Exception: The keys of the answer_codebook must match the question_names.
|
80
|
-
"""
|
81
|
-
|
82
|
-
self.datafile_name = datafile_name
|
83
|
-
self.config = config
|
84
|
-
self.naming_function = naming_function
|
85
|
-
|
86
|
-
if binary is not None:
|
87
|
-
self.binary = binary
|
88
|
-
else:
|
89
|
-
try:
|
90
|
-
with open(self.datafile_name, "rb") as file:
|
91
|
-
self.binary = base64.b64encode(file.read()).decode()
|
92
|
-
except FileNotFoundError:
|
93
|
-
self.binary = None
|
94
|
-
|
95
|
-
def default_repair_func(x):
|
96
|
-
return (
|
97
|
-
x.replace("#", "_num")
|
98
|
-
.replace("class", "social_class")
|
99
|
-
.replace("name", "respondent_name")
|
100
|
-
)
|
101
|
-
|
102
|
-
self.question_name_repair_func = (
|
103
|
-
question_name_repair_func or default_repair_func
|
104
|
-
)
|
105
|
-
|
106
|
-
if answer_codebook is not None and question_names is not None:
|
107
|
-
if set(answer_codebook.keys()) != set(question_names):
|
108
|
-
raise Exception(
|
109
|
-
"The keys of the answer_codebook must match the question_names."
|
110
|
-
)
|
111
|
-
|
112
|
-
if question_names is not None and question_texts is not None:
|
113
|
-
if len(question_names) != len(question_texts):
|
114
|
-
raise Exception(
|
115
|
-
"The question_names and question_texts must have the same length."
|
116
|
-
)
|
117
|
-
|
118
|
-
self.question_texts = question_texts
|
119
|
-
self.question_names = question_names
|
120
|
-
self.answer_codebook = answer_codebook
|
121
|
-
self.raw_data = raw_data
|
122
|
-
|
123
|
-
self.apply_codebook()
|
124
|
-
|
125
|
-
self.question_types = question_types
|
126
|
-
self.question_options = question_options
|
127
|
-
if order_options:
|
128
|
-
self.order_options()
|
129
|
-
|
130
|
-
@property
|
131
|
-
def download_link(self):
|
132
|
-
from IPython.display import HTML
|
133
|
-
|
134
|
-
actual_file_name = self.datafile_name.split("/")[-1]
|
135
|
-
download_link = f'<a href="data:text/plain;base64,{self.binary}" download="{actual_file_name}">Download {self.datafile_name}</a>'
|
136
|
-
return HTML(download_link)
|
137
|
-
|
138
|
-
@abstractmethod
|
139
|
-
def get_question_texts(self) -> List[str]:
|
140
|
-
"""Get the text of the questions
|
141
|
-
|
142
|
-
>>> id = InputDataABC.example()
|
143
|
-
>>> id.get_question_texts()
|
144
|
-
['how are you doing this morning?', 'how are you feeling?']
|
145
|
-
|
146
|
-
"""
|
147
|
-
raise NotImplementedError
|
148
|
-
|
149
|
-
@abstractmethod
|
150
|
-
def get_raw_data(self) -> List[List[str]]:
|
151
|
-
"""Returns the responses by reading the datafile_name.
|
152
|
-
|
153
|
-
>>> id = InputDataABC.example()
|
154
|
-
>>> id.get_raw_data()
|
155
|
-
[['1', '4'], ['3', '6']]
|
156
|
-
|
157
|
-
"""
|
158
|
-
raise NotImplementedError
|
159
|
-
|
160
|
-
@abstractmethod
|
161
|
-
def get_question_names(self) -> List[str]:
|
162
|
-
"""Get the names of the questions.
|
163
|
-
|
164
|
-
>>> id = InputDataABC.example()
|
165
|
-
>>> id.get_question_names()
|
166
|
-
['morning', 'feeling']
|
167
|
-
|
168
|
-
"""
|
169
|
-
raise NotImplementedError
|
170
|
-
|
171
|
-
def rename_questions(
|
172
|
-
self, rename_dict: Dict[str, str], ignore_missing=False
|
173
|
-
) -> "InputData":
|
174
|
-
"""Rename a question.
|
175
|
-
|
176
|
-
>>> id = InputDataABC.example()
|
177
|
-
>>> id.rename_questions({'morning': 'evening'}).question_names
|
178
|
-
['evening', 'feeling']
|
179
|
-
|
180
|
-
"""
|
181
|
-
for old_name, new_name in rename_dict.items():
|
182
|
-
self.rename(old_name, new_name, ignore_missing=ignore_missing)
|
183
|
-
return self
|
184
|
-
|
185
|
-
def rename(self, old_name, new_name, ignore_missing=False) -> "InputData":
|
186
|
-
"""Rename a question.
|
187
|
-
|
188
|
-
>>> id = InputDataABC.example()
|
189
|
-
>>> id.rename('morning', 'evening').question_names
|
190
|
-
['evening', 'feeling']
|
191
|
-
|
192
|
-
"""
|
193
|
-
if old_name not in self.question_names:
|
194
|
-
if ignore_missing:
|
195
|
-
return self
|
196
|
-
else:
|
197
|
-
raise ValueError(f"Question {old_name} not found.")
|
198
|
-
|
199
|
-
idx = self.question_names.index(old_name)
|
200
|
-
self.question_names[idx] = new_name
|
201
|
-
self.answer_codebook[new_name] = self.answer_codebook.pop(old_name, {})
|
202
|
-
|
203
|
-
return self
|
204
|
-
|
205
|
-
def _drop_question(self, question_name, ignore_missing=False):
|
206
|
-
"""Drop a question
|
207
|
-
|
208
|
-
>>> id = InputDataABC.example()
|
209
|
-
>>> id._drop_question('morning').question_names
|
210
|
-
['feeling']
|
211
|
-
|
212
|
-
"""
|
213
|
-
if question_name not in self.question_names:
|
214
|
-
if ignore_missing:
|
215
|
-
return self
|
216
|
-
else:
|
217
|
-
raise ValueError(f"Question {question_name} not found.")
|
218
|
-
idx = self.question_names.index(question_name)
|
219
|
-
self._question_names.pop(idx)
|
220
|
-
self._question_texts.pop(idx)
|
221
|
-
self.question_types.pop(idx)
|
222
|
-
self.question_options.pop(idx)
|
223
|
-
self.raw_data.pop(idx)
|
224
|
-
self.answer_codebook.pop(question_name, None)
|
225
|
-
return self
|
226
|
-
|
227
|
-
def drop(self, *question_names_to_drop) -> "InputData":
|
228
|
-
"""Drop a question.
|
229
|
-
|
230
|
-
>>> id = InputDataABC.example()
|
231
|
-
>>> id.drop('morning').question_names
|
232
|
-
['feeling']
|
233
|
-
|
234
|
-
"""
|
235
|
-
for qn in question_names_to_drop:
|
236
|
-
self._drop_question(qn)
|
237
|
-
return self
|
238
|
-
|
239
|
-
def keep(self, *question_names_to_keep, ignore_missing=False) -> "InputDataABC":
|
240
|
-
"""Keep a question.
|
241
|
-
|
242
|
-
>>> id = InputDataABC.example()
|
243
|
-
>>> id.keep('morning').question_names
|
244
|
-
['morning']
|
245
|
-
|
246
|
-
"""
|
247
|
-
all_question_names = self._question_names[:]
|
248
|
-
for qn in all_question_names:
|
249
|
-
if qn not in question_names_to_keep:
|
250
|
-
self._drop_question(qn, ignore_missing=ignore_missing)
|
251
|
-
return self
|
252
|
-
|
253
|
-
def modify_question_type(
|
254
|
-
self,
|
255
|
-
question_name: str,
|
256
|
-
new_type: str,
|
257
|
-
drop_options: bool = False,
|
258
|
-
new_options: Optional[List[str]] = None,
|
259
|
-
) -> "InputData":
|
260
|
-
"""Modify the question type of a question. Checks to make sure the new type is valid.
|
261
|
-
|
262
|
-
>>> id = InputDataABC.example()
|
263
|
-
>>> id.modify_question_type('morning', 'numerical', drop_options = True).question_types
|
264
|
-
['numerical', 'multiple_choice']
|
265
|
-
|
266
|
-
>>> id = InputDataABC.example()
|
267
|
-
>>> id.modify_question_type('morning', 'poop')
|
268
|
-
Traceback (most recent call last):
|
269
|
-
...
|
270
|
-
ValueError: Question type poop is not available.
|
271
|
-
"""
|
272
|
-
old_type = self.question_types[self.question_names.index(question_name)]
|
273
|
-
old_options = self.question_options[self.question_names.index(question_name)]
|
274
|
-
|
275
|
-
from edsl import Question
|
276
|
-
|
277
|
-
if new_type not in Question.available():
|
278
|
-
raise ValueError(f"Question type {new_type} is not available.")
|
279
|
-
|
280
|
-
idx = self.question_names.index(question_name)
|
281
|
-
self.question_types[idx] = new_type
|
282
|
-
if drop_options:
|
283
|
-
self.question_options[idx] = None
|
284
|
-
if new_options is not None:
|
285
|
-
self.question_options[idx] = new_options
|
286
|
-
|
287
|
-
try:
|
288
|
-
idx = self.question_names.index(question_name)
|
289
|
-
rq = self.raw_question(idx)
|
290
|
-
q = rq.to_question()
|
291
|
-
except Exception as e:
|
292
|
-
print(f"Error with question {question_name} in {self.datafile_name}")
|
293
|
-
print(e)
|
294
|
-
print("Reverting changes")
|
295
|
-
self.question_types[idx] = old_type
|
296
|
-
self.question_options[idx] = old_options
|
297
|
-
return self
|
298
|
-
|
299
|
-
@property
|
300
|
-
def num_observations(self):
|
301
|
-
"""Return the number of observations.
|
302
|
-
|
303
|
-
>>> id = InputDataABC.example()
|
304
|
-
>>> id.num_observations
|
305
|
-
2
|
306
|
-
|
307
|
-
"""
|
308
|
-
return len(self.raw_data[0])
|
309
|
-
|
310
|
-
def to_dict(self):
|
311
|
-
return {
|
312
|
-
"datafile_name": self.datafile_name,
|
313
|
-
"config": self.config,
|
314
|
-
"raw_data": self.raw_data,
|
315
|
-
"question_names": self.question_names,
|
316
|
-
"question_texts": self.question_texts,
|
317
|
-
"binary": self.binary,
|
318
|
-
"answer_codebook": self.answer_codebook,
|
319
|
-
"question_types": self.question_types,
|
320
|
-
}
|
321
|
-
|
322
|
-
@classmethod
|
323
|
-
def from_dict(cls, d: Dict):
|
324
|
-
return cls(**d)
|
325
|
-
|
326
|
-
@property
|
327
|
-
def question_names(self) -> List[str]:
|
328
|
-
"""
|
329
|
-
Return a list of question names.
|
330
|
-
|
331
|
-
>>> id = InputDataABC.example()
|
332
|
-
>>> id.question_names
|
333
|
-
['morning', 'feeling']
|
334
|
-
|
335
|
-
We can pass question names instead:
|
336
|
-
|
337
|
-
>>> id = InputDataABC.example(question_names = ['a','b'])
|
338
|
-
>>> id.question_names
|
339
|
-
['a', 'b']
|
340
|
-
|
341
|
-
"""
|
342
|
-
if not hasattr(self, "_question_names"):
|
343
|
-
self.question_names = None
|
344
|
-
return self._question_names
|
345
|
-
|
346
|
-
@question_names.setter
|
347
|
-
def question_names(self, value) -> None:
|
348
|
-
if value is None:
|
349
|
-
value = self.get_question_names()
|
350
|
-
if len(set(value)) != len(value):
|
351
|
-
raise ValueError("Question names must be unique.")
|
352
|
-
for i, qn in enumerate(value):
|
353
|
-
if not is_valid_variable_name(qn, allow_name=False):
|
354
|
-
new_name = self.question_name_repair_func(qn)
|
355
|
-
if not is_valid_variable_name(new_name, allow_name=False):
|
356
|
-
raise ValueError(
|
357
|
-
f"""Question names must be valid Python identifiers. '{qn}' is not.""",
|
358
|
-
"""You can pass an entry in question_name_repair_func to fix this.""",
|
359
|
-
)
|
360
|
-
else:
|
361
|
-
value[i] = new_name
|
362
|
-
else:
|
363
|
-
value[i] = qn
|
364
|
-
self._question_names = value
|
365
|
-
|
366
|
-
@property
|
367
|
-
def question_texts(self) -> List[str]:
|
368
|
-
"""
|
369
|
-
Return a list of question texts.
|
370
|
-
|
371
|
-
>>> id = InputDataABC.example()
|
372
|
-
>>> id.question_texts
|
373
|
-
['how are you doing this morning?', 'how are you feeling?']
|
374
|
-
"""
|
375
|
-
if not hasattr(self, "_question_texts"):
|
376
|
-
self.question_texts = None
|
377
|
-
return self._question_texts
|
378
|
-
|
379
|
-
@question_texts.setter
|
380
|
-
def question_texts(self, value):
|
381
|
-
if value is None:
|
382
|
-
value = self.get_question_texts()
|
383
|
-
self._question_texts = value
|
384
|
-
|
385
|
-
@property
|
386
|
-
def raw_data(self):
|
387
|
-
"""
|
388
|
-
|
389
|
-
>>> id = InputDataABC.example()
|
390
|
-
>>> id.raw_data
|
391
|
-
[['1', '4'], ['3', '6']]
|
392
|
-
|
393
|
-
"""
|
394
|
-
if not hasattr(self, "_raw_data"):
|
395
|
-
self.raw_data = None
|
396
|
-
return self._raw_data
|
397
|
-
|
398
|
-
@raw_data.setter
|
399
|
-
def raw_data(self, value):
|
400
|
-
""" """
|
401
|
-
if value is None:
|
402
|
-
value = self.get_raw_data()
|
403
|
-
# self.apply_codebook()
|
404
|
-
self._raw_data = value
|
405
|
-
|
406
|
-
def to_dataset(self) -> "Dataset":
|
407
|
-
from edsl.results.Dataset import Dataset
|
408
|
-
|
409
|
-
dataset_list = []
|
410
|
-
for key, value in zip(self.question_names, self.raw_data):
|
411
|
-
dataset_list.append({key: value})
|
412
|
-
return Dataset(dataset_list)
|
413
|
-
|
414
|
-
def to_scenario_list(self) -> ScenarioList:
|
415
|
-
"""Return a ScenarioList object from the raw response data.
|
416
|
-
|
417
|
-
>>> id = InputDataABC.example()
|
418
|
-
>>> s = id.to_scenario_list()
|
419
|
-
>>> type(s) == ScenarioList
|
420
|
-
True
|
421
|
-
|
422
|
-
>>> s
|
423
|
-
ScenarioList([Scenario({'morning': '1', 'feeling': '3'}), Scenario({'morning': '4', 'feeling': '6'})])
|
424
|
-
|
425
|
-
"""
|
426
|
-
s = ScenarioList()
|
427
|
-
for qn in self.question_names:
|
428
|
-
idx = self.question_names.index(qn)
|
429
|
-
s = s.add_list(qn, self.raw_data[idx])
|
430
|
-
return s
|
431
|
-
|
432
|
-
@property
|
433
|
-
def names_to_texts(self) -> dict:
|
434
|
-
"""
|
435
|
-
Return a dictionary of question names to question texts.
|
436
|
-
|
437
|
-
>>> id = InputDataABC.example()
|
438
|
-
>>> id.names_to_texts
|
439
|
-
{'morning': 'how are you doing this morning?', 'feeling': 'how are you feeling?'}
|
440
|
-
"""
|
441
|
-
return {n: t for n, t in zip(self.question_names, self.question_texts)}
|
442
|
-
|
443
|
-
@property
|
444
|
-
def texts_to_names(self):
|
445
|
-
"""Return a dictionary of question texts to question names.
|
446
|
-
|
447
|
-
>>> id = InputDataABC.example()
|
448
|
-
>>> id.texts_to_names
|
449
|
-
{'how are you doing this morning?': 'morning', 'how are you feeling?': 'feeling'}
|
450
|
-
|
451
|
-
"""
|
452
|
-
return {t: n for n, t in self.names_to_texts.items()}
|
453
|
-
|
454
|
-
def raw_question(self, index: int) -> RawQuestion:
|
455
|
-
return RawQuestion(
|
456
|
-
question_type=self.question_types[index],
|
457
|
-
question_name=self.question_names[index],
|
458
|
-
question_text=self.question_texts[index],
|
459
|
-
responses=self.raw_data[index],
|
460
|
-
question_options=self.question_options[index],
|
461
|
-
)
|
462
|
-
|
463
|
-
def raw_questions(self) -> Generator[RawQuestion, None, None]:
|
464
|
-
"""Return a generator of RawQuestion objects."""
|
465
|
-
for qn in self.question_names:
|
466
|
-
idx = self.question_names.index(qn)
|
467
|
-
yield self.raw_question(idx)
|
468
|
-
|
469
|
-
def questions(self) -> Generator[Union[QuestionBase, None], None, None]:
|
470
|
-
"""Return a generator of Question objects."""
|
471
|
-
for rq in self.raw_questions():
|
472
|
-
try:
|
473
|
-
yield rq.to_question()
|
474
|
-
except Exception as e:
|
475
|
-
print(
|
476
|
-
f"Error with question '{rq.question_name}' in '{self.datafile_name}'"
|
477
|
-
)
|
478
|
-
print(e)
|
479
|
-
yield None
|
480
|
-
|
481
|
-
def select(self, *question_names: List[str]) -> "InputData":
|
482
|
-
"""Select a subset of the questions.
|
483
|
-
|
484
|
-
:param question_names: The names of the questions to select.
|
485
|
-
|
486
|
-
>>> id = InputDataABC.example()
|
487
|
-
>>> id.select('morning').question_names
|
488
|
-
['morning']
|
489
|
-
|
490
|
-
"""
|
491
|
-
|
492
|
-
idxs = [self.question_names.index(qn) for qn in question_names]
|
493
|
-
new_data = [self.raw_data[i] for i in idxs]
|
494
|
-
new_texts = [self.question_texts[i] for i in idxs]
|
495
|
-
new_types = [self.question_types[i] for i in idxs]
|
496
|
-
new_options = [self.question_options[i] for i in idxs]
|
497
|
-
new_names = [self.question_names[i] for i in idxs]
|
498
|
-
answer_codebook = {
|
499
|
-
qn: self.answer_codebook.get(qn, {}) for qn in question_names
|
500
|
-
}
|
501
|
-
return self.__class__(
|
502
|
-
self.datafile_name,
|
503
|
-
self.config,
|
504
|
-
raw_data=new_data,
|
505
|
-
question_names=new_names,
|
506
|
-
question_texts=new_texts,
|
507
|
-
question_types=new_types,
|
508
|
-
question_options=new_options,
|
509
|
-
answer_codebook=answer_codebook,
|
510
|
-
question_name_repair_func=self.question_name_repair_func,
|
511
|
-
)
|
512
|
-
|
513
|
-
def to_survey(self) -> Survey:
|
514
|
-
"""
|
515
|
-
>>> id = InputDataABC.example()
|
516
|
-
>>> s = id.to_survey()
|
517
|
-
>>> type(s) == Survey
|
518
|
-
True
|
519
|
-
|
520
|
-
"""
|
521
|
-
s = Survey()
|
522
|
-
for q in self.questions():
|
523
|
-
if q is not None:
|
524
|
-
s.add_question(q)
|
525
|
-
return s
|
526
|
-
|
527
|
-
def print(self):
|
528
|
-
sl = (
|
529
|
-
ScenarioList.from_list("question_name", self.question_names)
|
530
|
-
.add_list("question_text", self.question_texts)
|
531
|
-
.add_list("inferred_question_type", self.question_types)
|
532
|
-
.add_list("num_responses", self.num_responses)
|
533
|
-
.add_list("num_unique_responses", self.num_unique_responses)
|
534
|
-
.add_list("missing", self.missing)
|
535
|
-
.add_list("frac_numerical", self.frac_numerical)
|
536
|
-
.add_list("top_5_items", self.top_k(5))
|
537
|
-
.add_list("frac_obs_from_top_5", self.frac_obs_from_top_k(5))
|
538
|
-
)
|
539
|
-
sl.print()
|
540
|
-
|
541
|
-
@property
|
542
|
-
def answer_codebook(self) -> dict:
|
543
|
-
"""Return the answer codebook.
|
544
|
-
>>> id = InputDataABC.example(answer_codebook = {'morning':{'1':'hello'}})
|
545
|
-
>>> id.answer_codebook
|
546
|
-
{'morning': {'1': 'hello'}}
|
547
|
-
|
548
|
-
"""
|
549
|
-
if not hasattr(self, "_answer_codebook"):
|
550
|
-
self._answer_codebook = None
|
551
|
-
return self._answer_codebook
|
552
|
-
|
553
|
-
@answer_codebook.setter
|
554
|
-
def answer_codebook(self, value):
|
555
|
-
if value is None:
|
556
|
-
value = self.get_answer_codebook()
|
557
|
-
self._answer_codebook = value
|
558
|
-
|
559
|
-
def get_answer_codebook(self):
|
560
|
-
return {}
|
561
|
-
|
562
|
-
def _drop_rows(self, indices: List[int]):
|
563
|
-
"""Drop rows from the raw data.
|
564
|
-
:param indices
|
565
|
-
|
566
|
-
>>> id = InputDataABC.example()
|
567
|
-
>>> id.num_observations
|
568
|
-
2
|
569
|
-
>>> _ = id._drop_rows([1])
|
570
|
-
>>> id.num_observations
|
571
|
-
1
|
572
|
-
|
573
|
-
"""
|
574
|
-
self.raw_data = [
|
575
|
-
[r for i, r in enumerate(row) if i not in indices] for row in self.raw_data
|
576
|
-
]
|
577
|
-
return self
|
578
|
-
|
579
|
-
def _missing_indices(self, question_name):
|
580
|
-
"""Return the indices of missing values for a question.
|
581
|
-
TODO: Could re-factor to use SimpleEval
|
582
|
-
|
583
|
-
>>> id = InputDataABC.example()
|
584
|
-
>>> id.raw_data[0][0] = 'missing'
|
585
|
-
>>> id._missing_indices('morning')
|
586
|
-
[0]
|
587
|
-
"""
|
588
|
-
idx = self.question_names.index(question_name)
|
589
|
-
return [i for i, r in enumerate(self.raw_data[idx]) if r == "missing"]
|
590
|
-
|
591
|
-
def drop_missing(self, question_name):
|
592
|
-
"""Drop missing values for a question.
|
593
|
-
|
594
|
-
>>> id = InputDataABC.example()
|
595
|
-
>>> id.num_observations
|
596
|
-
2
|
597
|
-
>>> id.raw_data[0][0] = 'missing'
|
598
|
-
>>> id.drop_missing('morning')
|
599
|
-
>>> id.num_observations
|
600
|
-
1
|
601
|
-
"""
|
602
|
-
self._drop_rows(self._missing_indices(question_name))
|
603
|
-
|
604
|
-
@property
|
605
|
-
def num_observations(self):
|
606
|
-
"""
|
607
|
-
Return the number of observations
|
608
|
-
|
609
|
-
>>> id = InputDataABC.example()
|
610
|
-
>>> id.num_observations
|
611
|
-
2
|
612
|
-
"""
|
613
|
-
return len(self.raw_data[0])
|
614
|
-
|
615
|
-
def apply_codebook(self) -> None:
|
616
|
-
"""Apply the codebook to the raw data.
|
617
|
-
|
618
|
-
>>> id = InputDataABC.example()
|
619
|
-
>>> id.raw_data
|
620
|
-
[['1', '4'], ['3', '6']]
|
621
|
-
|
622
|
-
>>> id = InputDataABC.example(answer_codebook = {'morning':{'1':'hello'}})
|
623
|
-
>>> id.raw_data
|
624
|
-
[['hello', '4'], ['3', '6']]
|
625
|
-
"""
|
626
|
-
for index, qn in enumerate(self.question_names):
|
627
|
-
if qn in self.answer_codebook:
|
628
|
-
new_responses = [
|
629
|
-
self.answer_codebook[qn].get(r, r) for r in self.raw_data[index]
|
630
|
-
]
|
631
|
-
self.raw_data[index] = new_responses
|
632
|
-
|
633
|
-
def __repr__(self):
|
634
|
-
return f"{self.__class__.__name__}: datafile_name:'{self.datafile_name}' num_questions:{len(self.question_names)}, num_observations:{len(self.raw_data[0])}"
|
635
|
-
|
636
|
-
@classmethod
|
637
|
-
def example(cls, **kwargs) -> "InputDataABC":
|
638
|
-
class InputDataExample(InputDataABC):
|
639
|
-
def get_question_texts(self) -> List[str]:
|
640
|
-
"""Get the text of the questions"""
|
641
|
-
return ["how are you doing this morning?", "how are you feeling?"]
|
642
|
-
|
643
|
-
def get_raw_data(self) -> SurveyResponses:
|
644
|
-
"""Returns a dataframe of responses by reading the datafile_name."""
|
645
|
-
return [["1", "4"], ["3", "6"]]
|
646
|
-
|
647
|
-
def get_question_names(self):
|
648
|
-
new_names = [self.naming_function(q) for q in self.question_texts]
|
649
|
-
if len(new_names) != len(set(new_names)):
|
650
|
-
new_names = [f"{q}_{i}" for i, q in enumerate(new_names)]
|
651
|
-
return new_names
|
652
|
-
|
653
|
-
return InputDataExample("notneeded", config={}, **kwargs)
|
654
|
-
|
655
|
-
|
656
|
-
if __name__ == "__main__":
|
657
|
-
import doctest
|
658
|
-
|
659
|
-
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
1
|
+
import base64
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from typing import Dict, Callable, Optional, List, Generator, Tuple, Union
|
4
|
+
from collections import namedtuple
|
5
|
+
from typing import List, Union
|
6
|
+
|
7
|
+
from edsl.questions.QuestionBase import QuestionBase
|
8
|
+
|
9
|
+
from edsl.scenarios.ScenarioList import ScenarioList
|
10
|
+
from edsl.surveys.Survey import Survey
|
11
|
+
from edsl.conjure.SurveyResponses import SurveyResponses
|
12
|
+
from edsl.conjure.naming_utilities import sanitize_string
|
13
|
+
from edsl.utilities.utilities import is_valid_variable_name
|
14
|
+
|
15
|
+
from edsl.conjure.RawQuestion import RawQuestion
|
16
|
+
from edsl.conjure.AgentConstructionMixin import AgentConstructionMixin
|
17
|
+
|
18
|
+
from edsl.conjure.QuestionOptionMixin import QuestionOptionMixin
|
19
|
+
from edsl.conjure.InputDataMixinQuestionStats import InputDataMixinQuestionStats
|
20
|
+
from edsl.conjure.QuestionTypeMixin import QuestionTypeMixin
|
21
|
+
|
22
|
+
|
23
|
+
class InputDataABC(
|
24
|
+
ABC,
|
25
|
+
InputDataMixinQuestionStats,
|
26
|
+
AgentConstructionMixin,
|
27
|
+
QuestionOptionMixin,
|
28
|
+
QuestionTypeMixin,
|
29
|
+
):
|
30
|
+
"""A class to represent the input data for a survey."""
|
31
|
+
|
32
|
+
NUM_UNIQUE_THRESHOLD = 15
|
33
|
+
FRAC_NUMERICAL_THRESHOLD = 0.8
|
34
|
+
MULTIPLE_CHOICE_OTHER_THRESHOLD = 0.5
|
35
|
+
OTHER_STRING = "Other:"
|
36
|
+
|
37
|
+
question_attributes = [
|
38
|
+
"num_responses",
|
39
|
+
"num_unique_responses",
|
40
|
+
"missing",
|
41
|
+
"unique_responses",
|
42
|
+
"frac_numerical",
|
43
|
+
"top_5",
|
44
|
+
"frac_obs_from_top_5",
|
45
|
+
]
|
46
|
+
QuestionStats = namedtuple("QuestionStats", question_attributes)
|
47
|
+
|
48
|
+
def __init__(
|
49
|
+
self,
|
50
|
+
datafile_name: str,
|
51
|
+
config: Optional[dict] = None,
|
52
|
+
naming_function: Optional[Callable] = sanitize_string,
|
53
|
+
raw_data: Optional[List] = None,
|
54
|
+
binary: Optional[str] = None,
|
55
|
+
question_names: Optional[List[str]] = None,
|
56
|
+
question_texts: Optional[List[str]] = None,
|
57
|
+
answer_codebook: Optional[Dict] = None,
|
58
|
+
question_types: Optional[List[str]] = None,
|
59
|
+
question_options: Optional[List] = None,
|
60
|
+
order_options=False,
|
61
|
+
question_name_repair_func: Callable = None,
|
62
|
+
):
|
63
|
+
"""Initialize the InputData object.
|
64
|
+
|
65
|
+
:param datafile_name: The name of the file containing the data.
|
66
|
+
:param config: The configuration parameters for reading the data.
|
67
|
+
:param raw_data: The raw data in the form of a dictionary.
|
68
|
+
:param question_names: The names of the questions.
|
69
|
+
:param question_texts: The text of the questions.
|
70
|
+
:param answer_codebook: The codebook for the answers.
|
71
|
+
:param question_types: The types of the questions.
|
72
|
+
:param question_options: The options for the questions.
|
73
|
+
|
74
|
+
>>> id = InputDataABC.example(question_names = ['a','b'], answer_codebook = {'a': {'1':'yes', '2':'no'}, 'b': {'1':'yes', '2':'no'}})
|
75
|
+
|
76
|
+
>>> id = InputDataABC.example(question_names = ['a','b'], answer_codebook = {'a': {'1':'yes', '2':'no'}, 'c': {'1':'yes', '2':'no'}})
|
77
|
+
Traceback (most recent call last):
|
78
|
+
...
|
79
|
+
Exception: The keys of the answer_codebook must match the question_names.
|
80
|
+
"""
|
81
|
+
|
82
|
+
self.datafile_name = datafile_name
|
83
|
+
self.config = config
|
84
|
+
self.naming_function = naming_function
|
85
|
+
|
86
|
+
if binary is not None:
|
87
|
+
self.binary = binary
|
88
|
+
else:
|
89
|
+
try:
|
90
|
+
with open(self.datafile_name, "rb") as file:
|
91
|
+
self.binary = base64.b64encode(file.read()).decode()
|
92
|
+
except FileNotFoundError:
|
93
|
+
self.binary = None
|
94
|
+
|
95
|
+
def default_repair_func(x):
|
96
|
+
return (
|
97
|
+
x.replace("#", "_num")
|
98
|
+
.replace("class", "social_class")
|
99
|
+
.replace("name", "respondent_name")
|
100
|
+
)
|
101
|
+
|
102
|
+
self.question_name_repair_func = (
|
103
|
+
question_name_repair_func or default_repair_func
|
104
|
+
)
|
105
|
+
|
106
|
+
if answer_codebook is not None and question_names is not None:
|
107
|
+
if set(answer_codebook.keys()) != set(question_names):
|
108
|
+
raise Exception(
|
109
|
+
"The keys of the answer_codebook must match the question_names."
|
110
|
+
)
|
111
|
+
|
112
|
+
if question_names is not None and question_texts is not None:
|
113
|
+
if len(question_names) != len(question_texts):
|
114
|
+
raise Exception(
|
115
|
+
"The question_names and question_texts must have the same length."
|
116
|
+
)
|
117
|
+
|
118
|
+
self.question_texts = question_texts
|
119
|
+
self.question_names = question_names
|
120
|
+
self.answer_codebook = answer_codebook
|
121
|
+
self.raw_data = raw_data
|
122
|
+
|
123
|
+
self.apply_codebook()
|
124
|
+
|
125
|
+
self.question_types = question_types
|
126
|
+
self.question_options = question_options
|
127
|
+
if order_options:
|
128
|
+
self.order_options()
|
129
|
+
|
130
|
+
@property
|
131
|
+
def download_link(self):
|
132
|
+
from IPython.display import HTML
|
133
|
+
|
134
|
+
actual_file_name = self.datafile_name.split("/")[-1]
|
135
|
+
download_link = f'<a href="data:text/plain;base64,{self.binary}" download="{actual_file_name}">Download {self.datafile_name}</a>'
|
136
|
+
return HTML(download_link)
|
137
|
+
|
138
|
+
@abstractmethod
|
139
|
+
def get_question_texts(self) -> List[str]:
|
140
|
+
"""Get the text of the questions
|
141
|
+
|
142
|
+
>>> id = InputDataABC.example()
|
143
|
+
>>> id.get_question_texts()
|
144
|
+
['how are you doing this morning?', 'how are you feeling?']
|
145
|
+
|
146
|
+
"""
|
147
|
+
raise NotImplementedError
|
148
|
+
|
149
|
+
@abstractmethod
|
150
|
+
def get_raw_data(self) -> List[List[str]]:
|
151
|
+
"""Returns the responses by reading the datafile_name.
|
152
|
+
|
153
|
+
>>> id = InputDataABC.example()
|
154
|
+
>>> id.get_raw_data()
|
155
|
+
[['1', '4'], ['3', '6']]
|
156
|
+
|
157
|
+
"""
|
158
|
+
raise NotImplementedError
|
159
|
+
|
160
|
+
@abstractmethod
|
161
|
+
def get_question_names(self) -> List[str]:
|
162
|
+
"""Get the names of the questions.
|
163
|
+
|
164
|
+
>>> id = InputDataABC.example()
|
165
|
+
>>> id.get_question_names()
|
166
|
+
['morning', 'feeling']
|
167
|
+
|
168
|
+
"""
|
169
|
+
raise NotImplementedError
|
170
|
+
|
171
|
+
def rename_questions(
|
172
|
+
self, rename_dict: Dict[str, str], ignore_missing=False
|
173
|
+
) -> "InputData":
|
174
|
+
"""Rename a question.
|
175
|
+
|
176
|
+
>>> id = InputDataABC.example()
|
177
|
+
>>> id.rename_questions({'morning': 'evening'}).question_names
|
178
|
+
['evening', 'feeling']
|
179
|
+
|
180
|
+
"""
|
181
|
+
for old_name, new_name in rename_dict.items():
|
182
|
+
self.rename(old_name, new_name, ignore_missing=ignore_missing)
|
183
|
+
return self
|
184
|
+
|
185
|
+
def rename(self, old_name, new_name, ignore_missing=False) -> "InputData":
|
186
|
+
"""Rename a question.
|
187
|
+
|
188
|
+
>>> id = InputDataABC.example()
|
189
|
+
>>> id.rename('morning', 'evening').question_names
|
190
|
+
['evening', 'feeling']
|
191
|
+
|
192
|
+
"""
|
193
|
+
if old_name not in self.question_names:
|
194
|
+
if ignore_missing:
|
195
|
+
return self
|
196
|
+
else:
|
197
|
+
raise ValueError(f"Question {old_name} not found.")
|
198
|
+
|
199
|
+
idx = self.question_names.index(old_name)
|
200
|
+
self.question_names[idx] = new_name
|
201
|
+
self.answer_codebook[new_name] = self.answer_codebook.pop(old_name, {})
|
202
|
+
|
203
|
+
return self
|
204
|
+
|
205
|
+
def _drop_question(self, question_name, ignore_missing=False):
|
206
|
+
"""Drop a question
|
207
|
+
|
208
|
+
>>> id = InputDataABC.example()
|
209
|
+
>>> id._drop_question('morning').question_names
|
210
|
+
['feeling']
|
211
|
+
|
212
|
+
"""
|
213
|
+
if question_name not in self.question_names:
|
214
|
+
if ignore_missing:
|
215
|
+
return self
|
216
|
+
else:
|
217
|
+
raise ValueError(f"Question {question_name} not found.")
|
218
|
+
idx = self.question_names.index(question_name)
|
219
|
+
self._question_names.pop(idx)
|
220
|
+
self._question_texts.pop(idx)
|
221
|
+
self.question_types.pop(idx)
|
222
|
+
self.question_options.pop(idx)
|
223
|
+
self.raw_data.pop(idx)
|
224
|
+
self.answer_codebook.pop(question_name, None)
|
225
|
+
return self
|
226
|
+
|
227
|
+
def drop(self, *question_names_to_drop) -> "InputData":
|
228
|
+
"""Drop a question.
|
229
|
+
|
230
|
+
>>> id = InputDataABC.example()
|
231
|
+
>>> id.drop('morning').question_names
|
232
|
+
['feeling']
|
233
|
+
|
234
|
+
"""
|
235
|
+
for qn in question_names_to_drop:
|
236
|
+
self._drop_question(qn)
|
237
|
+
return self
|
238
|
+
|
239
|
+
def keep(self, *question_names_to_keep, ignore_missing=False) -> "InputDataABC":
|
240
|
+
"""Keep a question.
|
241
|
+
|
242
|
+
>>> id = InputDataABC.example()
|
243
|
+
>>> id.keep('morning').question_names
|
244
|
+
['morning']
|
245
|
+
|
246
|
+
"""
|
247
|
+
all_question_names = self._question_names[:]
|
248
|
+
for qn in all_question_names:
|
249
|
+
if qn not in question_names_to_keep:
|
250
|
+
self._drop_question(qn, ignore_missing=ignore_missing)
|
251
|
+
return self
|
252
|
+
|
253
|
+
def modify_question_type(
|
254
|
+
self,
|
255
|
+
question_name: str,
|
256
|
+
new_type: str,
|
257
|
+
drop_options: bool = False,
|
258
|
+
new_options: Optional[List[str]] = None,
|
259
|
+
) -> "InputData":
|
260
|
+
"""Modify the question type of a question. Checks to make sure the new type is valid.
|
261
|
+
|
262
|
+
>>> id = InputDataABC.example()
|
263
|
+
>>> id.modify_question_type('morning', 'numerical', drop_options = True).question_types
|
264
|
+
['numerical', 'multiple_choice']
|
265
|
+
|
266
|
+
>>> id = InputDataABC.example()
|
267
|
+
>>> id.modify_question_type('morning', 'poop')
|
268
|
+
Traceback (most recent call last):
|
269
|
+
...
|
270
|
+
ValueError: Question type poop is not available.
|
271
|
+
"""
|
272
|
+
old_type = self.question_types[self.question_names.index(question_name)]
|
273
|
+
old_options = self.question_options[self.question_names.index(question_name)]
|
274
|
+
|
275
|
+
from edsl import Question
|
276
|
+
|
277
|
+
if new_type not in Question.available():
|
278
|
+
raise ValueError(f"Question type {new_type} is not available.")
|
279
|
+
|
280
|
+
idx = self.question_names.index(question_name)
|
281
|
+
self.question_types[idx] = new_type
|
282
|
+
if drop_options:
|
283
|
+
self.question_options[idx] = None
|
284
|
+
if new_options is not None:
|
285
|
+
self.question_options[idx] = new_options
|
286
|
+
|
287
|
+
try:
|
288
|
+
idx = self.question_names.index(question_name)
|
289
|
+
rq = self.raw_question(idx)
|
290
|
+
q = rq.to_question()
|
291
|
+
except Exception as e:
|
292
|
+
print(f"Error with question {question_name} in {self.datafile_name}")
|
293
|
+
print(e)
|
294
|
+
print("Reverting changes")
|
295
|
+
self.question_types[idx] = old_type
|
296
|
+
self.question_options[idx] = old_options
|
297
|
+
return self
|
298
|
+
|
299
|
+
@property
|
300
|
+
def num_observations(self):
|
301
|
+
"""Return the number of observations.
|
302
|
+
|
303
|
+
>>> id = InputDataABC.example()
|
304
|
+
>>> id.num_observations
|
305
|
+
2
|
306
|
+
|
307
|
+
"""
|
308
|
+
return len(self.raw_data[0])
|
309
|
+
|
310
|
+
def to_dict(self):
|
311
|
+
return {
|
312
|
+
"datafile_name": self.datafile_name,
|
313
|
+
"config": self.config,
|
314
|
+
"raw_data": self.raw_data,
|
315
|
+
"question_names": self.question_names,
|
316
|
+
"question_texts": self.question_texts,
|
317
|
+
"binary": self.binary,
|
318
|
+
"answer_codebook": self.answer_codebook,
|
319
|
+
"question_types": self.question_types,
|
320
|
+
}
|
321
|
+
|
322
|
+
@classmethod
|
323
|
+
def from_dict(cls, d: Dict):
|
324
|
+
return cls(**d)
|
325
|
+
|
326
|
+
@property
|
327
|
+
def question_names(self) -> List[str]:
|
328
|
+
"""
|
329
|
+
Return a list of question names.
|
330
|
+
|
331
|
+
>>> id = InputDataABC.example()
|
332
|
+
>>> id.question_names
|
333
|
+
['morning', 'feeling']
|
334
|
+
|
335
|
+
We can pass question names instead:
|
336
|
+
|
337
|
+
>>> id = InputDataABC.example(question_names = ['a','b'])
|
338
|
+
>>> id.question_names
|
339
|
+
['a', 'b']
|
340
|
+
|
341
|
+
"""
|
342
|
+
if not hasattr(self, "_question_names"):
|
343
|
+
self.question_names = None
|
344
|
+
return self._question_names
|
345
|
+
|
346
|
+
@question_names.setter
|
347
|
+
def question_names(self, value) -> None:
|
348
|
+
if value is None:
|
349
|
+
value = self.get_question_names()
|
350
|
+
if len(set(value)) != len(value):
|
351
|
+
raise ValueError("Question names must be unique.")
|
352
|
+
for i, qn in enumerate(value):
|
353
|
+
if not is_valid_variable_name(qn, allow_name=False):
|
354
|
+
new_name = self.question_name_repair_func(qn)
|
355
|
+
if not is_valid_variable_name(new_name, allow_name=False):
|
356
|
+
raise ValueError(
|
357
|
+
f"""Question names must be valid Python identifiers. '{qn}' is not.""",
|
358
|
+
"""You can pass an entry in question_name_repair_func to fix this.""",
|
359
|
+
)
|
360
|
+
else:
|
361
|
+
value[i] = new_name
|
362
|
+
else:
|
363
|
+
value[i] = qn
|
364
|
+
self._question_names = value
|
365
|
+
|
366
|
+
@property
|
367
|
+
def question_texts(self) -> List[str]:
|
368
|
+
"""
|
369
|
+
Return a list of question texts.
|
370
|
+
|
371
|
+
>>> id = InputDataABC.example()
|
372
|
+
>>> id.question_texts
|
373
|
+
['how are you doing this morning?', 'how are you feeling?']
|
374
|
+
"""
|
375
|
+
if not hasattr(self, "_question_texts"):
|
376
|
+
self.question_texts = None
|
377
|
+
return self._question_texts
|
378
|
+
|
379
|
+
@question_texts.setter
|
380
|
+
def question_texts(self, value):
|
381
|
+
if value is None:
|
382
|
+
value = self.get_question_texts()
|
383
|
+
self._question_texts = value
|
384
|
+
|
385
|
+
@property
|
386
|
+
def raw_data(self):
|
387
|
+
"""
|
388
|
+
|
389
|
+
>>> id = InputDataABC.example()
|
390
|
+
>>> id.raw_data
|
391
|
+
[['1', '4'], ['3', '6']]
|
392
|
+
|
393
|
+
"""
|
394
|
+
if not hasattr(self, "_raw_data"):
|
395
|
+
self.raw_data = None
|
396
|
+
return self._raw_data
|
397
|
+
|
398
|
+
@raw_data.setter
|
399
|
+
def raw_data(self, value):
|
400
|
+
""" """
|
401
|
+
if value is None:
|
402
|
+
value = self.get_raw_data()
|
403
|
+
# self.apply_codebook()
|
404
|
+
self._raw_data = value
|
405
|
+
|
406
|
+
def to_dataset(self) -> "Dataset":
|
407
|
+
from edsl.results.Dataset import Dataset
|
408
|
+
|
409
|
+
dataset_list = []
|
410
|
+
for key, value in zip(self.question_names, self.raw_data):
|
411
|
+
dataset_list.append({key: value})
|
412
|
+
return Dataset(dataset_list)
|
413
|
+
|
414
|
+
def to_scenario_list(self) -> ScenarioList:
|
415
|
+
"""Return a ScenarioList object from the raw response data.
|
416
|
+
|
417
|
+
>>> id = InputDataABC.example()
|
418
|
+
>>> s = id.to_scenario_list()
|
419
|
+
>>> type(s) == ScenarioList
|
420
|
+
True
|
421
|
+
|
422
|
+
>>> s
|
423
|
+
ScenarioList([Scenario({'morning': '1', 'feeling': '3'}), Scenario({'morning': '4', 'feeling': '6'})])
|
424
|
+
|
425
|
+
"""
|
426
|
+
s = ScenarioList()
|
427
|
+
for qn in self.question_names:
|
428
|
+
idx = self.question_names.index(qn)
|
429
|
+
s = s.add_list(qn, self.raw_data[idx])
|
430
|
+
return s
|
431
|
+
|
432
|
+
@property
|
433
|
+
def names_to_texts(self) -> dict:
|
434
|
+
"""
|
435
|
+
Return a dictionary of question names to question texts.
|
436
|
+
|
437
|
+
>>> id = InputDataABC.example()
|
438
|
+
>>> id.names_to_texts
|
439
|
+
{'morning': 'how are you doing this morning?', 'feeling': 'how are you feeling?'}
|
440
|
+
"""
|
441
|
+
return {n: t for n, t in zip(self.question_names, self.question_texts)}
|
442
|
+
|
443
|
+
@property
|
444
|
+
def texts_to_names(self):
|
445
|
+
"""Return a dictionary of question texts to question names.
|
446
|
+
|
447
|
+
>>> id = InputDataABC.example()
|
448
|
+
>>> id.texts_to_names
|
449
|
+
{'how are you doing this morning?': 'morning', 'how are you feeling?': 'feeling'}
|
450
|
+
|
451
|
+
"""
|
452
|
+
return {t: n for n, t in self.names_to_texts.items()}
|
453
|
+
|
454
|
+
def raw_question(self, index: int) -> RawQuestion:
|
455
|
+
return RawQuestion(
|
456
|
+
question_type=self.question_types[index],
|
457
|
+
question_name=self.question_names[index],
|
458
|
+
question_text=self.question_texts[index],
|
459
|
+
responses=self.raw_data[index],
|
460
|
+
question_options=self.question_options[index],
|
461
|
+
)
|
462
|
+
|
463
|
+
def raw_questions(self) -> Generator[RawQuestion, None, None]:
|
464
|
+
"""Return a generator of RawQuestion objects."""
|
465
|
+
for qn in self.question_names:
|
466
|
+
idx = self.question_names.index(qn)
|
467
|
+
yield self.raw_question(idx)
|
468
|
+
|
469
|
+
def questions(self) -> Generator[Union[QuestionBase, None], None, None]:
|
470
|
+
"""Return a generator of Question objects."""
|
471
|
+
for rq in self.raw_questions():
|
472
|
+
try:
|
473
|
+
yield rq.to_question()
|
474
|
+
except Exception as e:
|
475
|
+
print(
|
476
|
+
f"Error with question '{rq.question_name}' in '{self.datafile_name}'"
|
477
|
+
)
|
478
|
+
print(e)
|
479
|
+
yield None
|
480
|
+
|
481
|
+
def select(self, *question_names: List[str]) -> "InputData":
|
482
|
+
"""Select a subset of the questions.
|
483
|
+
|
484
|
+
:param question_names: The names of the questions to select.
|
485
|
+
|
486
|
+
>>> id = InputDataABC.example()
|
487
|
+
>>> id.select('morning').question_names
|
488
|
+
['morning']
|
489
|
+
|
490
|
+
"""
|
491
|
+
|
492
|
+
idxs = [self.question_names.index(qn) for qn in question_names]
|
493
|
+
new_data = [self.raw_data[i] for i in idxs]
|
494
|
+
new_texts = [self.question_texts[i] for i in idxs]
|
495
|
+
new_types = [self.question_types[i] for i in idxs]
|
496
|
+
new_options = [self.question_options[i] for i in idxs]
|
497
|
+
new_names = [self.question_names[i] for i in idxs]
|
498
|
+
answer_codebook = {
|
499
|
+
qn: self.answer_codebook.get(qn, {}) for qn in question_names
|
500
|
+
}
|
501
|
+
return self.__class__(
|
502
|
+
self.datafile_name,
|
503
|
+
self.config,
|
504
|
+
raw_data=new_data,
|
505
|
+
question_names=new_names,
|
506
|
+
question_texts=new_texts,
|
507
|
+
question_types=new_types,
|
508
|
+
question_options=new_options,
|
509
|
+
answer_codebook=answer_codebook,
|
510
|
+
question_name_repair_func=self.question_name_repair_func,
|
511
|
+
)
|
512
|
+
|
513
|
+
def to_survey(self) -> Survey:
|
514
|
+
"""
|
515
|
+
>>> id = InputDataABC.example()
|
516
|
+
>>> s = id.to_survey()
|
517
|
+
>>> type(s) == Survey
|
518
|
+
True
|
519
|
+
|
520
|
+
"""
|
521
|
+
s = Survey()
|
522
|
+
for q in self.questions():
|
523
|
+
if q is not None:
|
524
|
+
s.add_question(q)
|
525
|
+
return s
|
526
|
+
|
527
|
+
def print(self):
|
528
|
+
sl = (
|
529
|
+
ScenarioList.from_list("question_name", self.question_names)
|
530
|
+
.add_list("question_text", self.question_texts)
|
531
|
+
.add_list("inferred_question_type", self.question_types)
|
532
|
+
.add_list("num_responses", self.num_responses)
|
533
|
+
.add_list("num_unique_responses", self.num_unique_responses)
|
534
|
+
.add_list("missing", self.missing)
|
535
|
+
.add_list("frac_numerical", self.frac_numerical)
|
536
|
+
.add_list("top_5_items", self.top_k(5))
|
537
|
+
.add_list("frac_obs_from_top_5", self.frac_obs_from_top_k(5))
|
538
|
+
)
|
539
|
+
sl.print()
|
540
|
+
|
541
|
+
@property
|
542
|
+
def answer_codebook(self) -> dict:
|
543
|
+
"""Return the answer codebook.
|
544
|
+
>>> id = InputDataABC.example(answer_codebook = {'morning':{'1':'hello'}})
|
545
|
+
>>> id.answer_codebook
|
546
|
+
{'morning': {'1': 'hello'}}
|
547
|
+
|
548
|
+
"""
|
549
|
+
if not hasattr(self, "_answer_codebook"):
|
550
|
+
self._answer_codebook = None
|
551
|
+
return self._answer_codebook
|
552
|
+
|
553
|
+
@answer_codebook.setter
|
554
|
+
def answer_codebook(self, value):
|
555
|
+
if value is None:
|
556
|
+
value = self.get_answer_codebook()
|
557
|
+
self._answer_codebook = value
|
558
|
+
|
559
|
+
def get_answer_codebook(self):
|
560
|
+
return {}
|
561
|
+
|
562
|
+
def _drop_rows(self, indices: List[int]):
|
563
|
+
"""Drop rows from the raw data.
|
564
|
+
:param indices
|
565
|
+
|
566
|
+
>>> id = InputDataABC.example()
|
567
|
+
>>> id.num_observations
|
568
|
+
2
|
569
|
+
>>> _ = id._drop_rows([1])
|
570
|
+
>>> id.num_observations
|
571
|
+
1
|
572
|
+
|
573
|
+
"""
|
574
|
+
self.raw_data = [
|
575
|
+
[r for i, r in enumerate(row) if i not in indices] for row in self.raw_data
|
576
|
+
]
|
577
|
+
return self
|
578
|
+
|
579
|
+
def _missing_indices(self, question_name):
|
580
|
+
"""Return the indices of missing values for a question.
|
581
|
+
TODO: Could re-factor to use SimpleEval
|
582
|
+
|
583
|
+
>>> id = InputDataABC.example()
|
584
|
+
>>> id.raw_data[0][0] = 'missing'
|
585
|
+
>>> id._missing_indices('morning')
|
586
|
+
[0]
|
587
|
+
"""
|
588
|
+
idx = self.question_names.index(question_name)
|
589
|
+
return [i for i, r in enumerate(self.raw_data[idx]) if r == "missing"]
|
590
|
+
|
591
|
+
def drop_missing(self, question_name):
|
592
|
+
"""Drop missing values for a question.
|
593
|
+
|
594
|
+
>>> id = InputDataABC.example()
|
595
|
+
>>> id.num_observations
|
596
|
+
2
|
597
|
+
>>> id.raw_data[0][0] = 'missing'
|
598
|
+
>>> id.drop_missing('morning')
|
599
|
+
>>> id.num_observations
|
600
|
+
1
|
601
|
+
"""
|
602
|
+
self._drop_rows(self._missing_indices(question_name))
|
603
|
+
|
604
|
+
@property
|
605
|
+
def num_observations(self):
|
606
|
+
"""
|
607
|
+
Return the number of observations
|
608
|
+
|
609
|
+
>>> id = InputDataABC.example()
|
610
|
+
>>> id.num_observations
|
611
|
+
2
|
612
|
+
"""
|
613
|
+
return len(self.raw_data[0])
|
614
|
+
|
615
|
+
def apply_codebook(self) -> None:
|
616
|
+
"""Apply the codebook to the raw data.
|
617
|
+
|
618
|
+
>>> id = InputDataABC.example()
|
619
|
+
>>> id.raw_data
|
620
|
+
[['1', '4'], ['3', '6']]
|
621
|
+
|
622
|
+
>>> id = InputDataABC.example(answer_codebook = {'morning':{'1':'hello'}})
|
623
|
+
>>> id.raw_data
|
624
|
+
[['hello', '4'], ['3', '6']]
|
625
|
+
"""
|
626
|
+
for index, qn in enumerate(self.question_names):
|
627
|
+
if qn in self.answer_codebook:
|
628
|
+
new_responses = [
|
629
|
+
self.answer_codebook[qn].get(r, r) for r in self.raw_data[index]
|
630
|
+
]
|
631
|
+
self.raw_data[index] = new_responses
|
632
|
+
|
633
|
+
def __repr__(self):
|
634
|
+
return f"{self.__class__.__name__}: datafile_name:'{self.datafile_name}' num_questions:{len(self.question_names)}, num_observations:{len(self.raw_data[0])}"
|
635
|
+
|
636
|
+
@classmethod
|
637
|
+
def example(cls, **kwargs) -> "InputDataABC":
|
638
|
+
class InputDataExample(InputDataABC):
|
639
|
+
def get_question_texts(self) -> List[str]:
|
640
|
+
"""Get the text of the questions"""
|
641
|
+
return ["how are you doing this morning?", "how are you feeling?"]
|
642
|
+
|
643
|
+
def get_raw_data(self) -> SurveyResponses:
|
644
|
+
"""Returns a dataframe of responses by reading the datafile_name."""
|
645
|
+
return [["1", "4"], ["3", "6"]]
|
646
|
+
|
647
|
+
def get_question_names(self):
|
648
|
+
new_names = [self.naming_function(q) for q in self.question_texts]
|
649
|
+
if len(new_names) != len(set(new_names)):
|
650
|
+
new_names = [f"{q}_{i}" for i, q in enumerate(new_names)]
|
651
|
+
return new_names
|
652
|
+
|
653
|
+
return InputDataExample("notneeded", config={}, **kwargs)
|
654
|
+
|
655
|
+
|
656
|
+
if __name__ == "__main__":
|
657
|
+
import doctest
|
658
|
+
|
659
|
+
doctest.testmod(optionflags=doctest.ELLIPSIS)
|