edsl 0.1.37.dev6__py3-none-any.whl → 0.1.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (271) hide show
  1. edsl/Base.py +332 -303
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +49 -48
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +867 -855
  7. edsl/agents/AgentList.py +413 -350
  8. edsl/agents/Invigilator.py +233 -222
  9. edsl/agents/InvigilatorBase.py +265 -284
  10. edsl/agents/PromptConstructor.py +354 -353
  11. edsl/agents/__init__.py +3 -3
  12. edsl/agents/descriptors.py +99 -99
  13. edsl/agents/prompt_helpers.py +129 -129
  14. edsl/auto/AutoStudy.py +117 -117
  15. edsl/auto/StageBase.py +230 -230
  16. edsl/auto/StageGenerateSurvey.py +178 -178
  17. edsl/auto/StageLabelQuestions.py +125 -125
  18. edsl/auto/StagePersona.py +61 -61
  19. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  20. edsl/auto/StagePersonaDimensionValues.py +74 -74
  21. edsl/auto/StagePersonaDimensions.py +69 -69
  22. edsl/auto/StageQuestions.py +73 -73
  23. edsl/auto/SurveyCreatorPipeline.py +21 -21
  24. edsl/auto/utilities.py +224 -224
  25. edsl/base/Base.py +279 -289
  26. edsl/config.py +157 -149
  27. edsl/conversation/Conversation.py +290 -290
  28. edsl/conversation/car_buying.py +58 -58
  29. edsl/conversation/chips.py +95 -95
  30. edsl/conversation/mug_negotiation.py +81 -81
  31. edsl/conversation/next_speaker_utilities.py +93 -93
  32. edsl/coop/PriceFetcher.py +54 -54
  33. edsl/coop/__init__.py +2 -2
  34. edsl/coop/coop.py +1028 -958
  35. edsl/coop/utils.py +131 -131
  36. edsl/data/Cache.py +555 -527
  37. edsl/data/CacheEntry.py +233 -228
  38. edsl/data/CacheHandler.py +149 -149
  39. edsl/data/RemoteCacheSync.py +78 -97
  40. edsl/data/SQLiteDict.py +292 -292
  41. edsl/data/__init__.py +4 -4
  42. edsl/data/orm.py +10 -10
  43. edsl/data_transfer_models.py +73 -73
  44. edsl/enums.py +175 -173
  45. edsl/exceptions/BaseException.py +21 -21
  46. edsl/exceptions/__init__.py +54 -54
  47. edsl/exceptions/agents.py +42 -38
  48. edsl/exceptions/cache.py +5 -0
  49. edsl/exceptions/configuration.py +16 -16
  50. edsl/exceptions/coop.py +10 -10
  51. edsl/exceptions/data.py +14 -14
  52. edsl/exceptions/general.py +34 -34
  53. edsl/exceptions/jobs.py +33 -33
  54. edsl/exceptions/language_models.py +63 -63
  55. edsl/exceptions/prompts.py +15 -15
  56. edsl/exceptions/questions.py +91 -91
  57. edsl/exceptions/results.py +29 -29
  58. edsl/exceptions/scenarios.py +22 -22
  59. edsl/exceptions/surveys.py +37 -37
  60. edsl/inference_services/AnthropicService.py +87 -87
  61. edsl/inference_services/AwsBedrock.py +120 -120
  62. edsl/inference_services/AzureAI.py +217 -217
  63. edsl/inference_services/DeepInfraService.py +18 -18
  64. edsl/inference_services/GoogleService.py +148 -156
  65. edsl/inference_services/GroqService.py +20 -20
  66. edsl/inference_services/InferenceServiceABC.py +147 -147
  67. edsl/inference_services/InferenceServicesCollection.py +97 -97
  68. edsl/inference_services/MistralAIService.py +123 -123
  69. edsl/inference_services/OllamaService.py +18 -18
  70. edsl/inference_services/OpenAIService.py +224 -224
  71. edsl/inference_services/PerplexityService.py +163 -0
  72. edsl/inference_services/TestService.py +89 -89
  73. edsl/inference_services/TogetherAIService.py +170 -170
  74. edsl/inference_services/models_available_cache.py +118 -118
  75. edsl/inference_services/rate_limits_cache.py +25 -25
  76. edsl/inference_services/registry.py +41 -39
  77. edsl/inference_services/write_available.py +10 -10
  78. edsl/jobs/Answers.py +56 -56
  79. edsl/jobs/Jobs.py +898 -1347
  80. edsl/jobs/JobsChecks.py +147 -0
  81. edsl/jobs/JobsPrompts.py +268 -0
  82. edsl/jobs/JobsRemoteInferenceHandler.py +239 -0
  83. edsl/jobs/__init__.py +1 -1
  84. edsl/jobs/buckets/BucketCollection.py +63 -63
  85. edsl/jobs/buckets/ModelBuckets.py +65 -65
  86. edsl/jobs/buckets/TokenBucket.py +251 -248
  87. edsl/jobs/interviews/Interview.py +661 -661
  88. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  89. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
  90. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  91. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  92. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  93. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  94. edsl/jobs/interviews/ReportErrors.py +66 -66
  95. edsl/jobs/interviews/interview_status_enum.py +9 -9
  96. edsl/jobs/runners/JobsRunnerAsyncio.py +466 -338
  97. edsl/jobs/runners/JobsRunnerStatus.py +330 -332
  98. edsl/jobs/tasks/QuestionTaskCreator.py +242 -242
  99. edsl/jobs/tasks/TaskCreators.py +64 -64
  100. edsl/jobs/tasks/TaskHistory.py +450 -442
  101. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  102. edsl/jobs/tasks/task_status_enum.py +163 -163
  103. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  104. edsl/jobs/tokens/TokenUsage.py +34 -34
  105. edsl/language_models/KeyLookup.py +30 -30
  106. edsl/language_models/LanguageModel.py +668 -706
  107. edsl/language_models/ModelList.py +155 -102
  108. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  109. edsl/language_models/__init__.py +3 -3
  110. edsl/language_models/fake_openai_call.py +15 -15
  111. edsl/language_models/fake_openai_service.py +61 -61
  112. edsl/language_models/registry.py +190 -137
  113. edsl/language_models/repair.py +156 -156
  114. edsl/language_models/unused/ReplicateBase.py +83 -83
  115. edsl/language_models/utilities.py +64 -64
  116. edsl/notebooks/Notebook.py +258 -259
  117. edsl/notebooks/__init__.py +1 -1
  118. edsl/prompts/Prompt.py +362 -357
  119. edsl/prompts/__init__.py +2 -2
  120. edsl/questions/AnswerValidatorMixin.py +289 -289
  121. edsl/questions/QuestionBase.py +664 -656
  122. edsl/questions/QuestionBaseGenMixin.py +161 -161
  123. edsl/questions/QuestionBasePromptsMixin.py +217 -234
  124. edsl/questions/QuestionBudget.py +227 -227
  125. edsl/questions/QuestionCheckBox.py +359 -359
  126. edsl/questions/QuestionExtract.py +182 -183
  127. edsl/questions/QuestionFreeText.py +114 -114
  128. edsl/questions/QuestionFunctional.py +166 -159
  129. edsl/questions/QuestionList.py +231 -231
  130. edsl/questions/QuestionMultipleChoice.py +286 -286
  131. edsl/questions/QuestionNumerical.py +153 -153
  132. edsl/questions/QuestionRank.py +324 -324
  133. edsl/questions/Quick.py +41 -41
  134. edsl/questions/RegisterQuestionsMeta.py +71 -71
  135. edsl/questions/ResponseValidatorABC.py +174 -174
  136. edsl/questions/SimpleAskMixin.py +73 -73
  137. edsl/questions/__init__.py +26 -26
  138. edsl/questions/compose_questions.py +98 -98
  139. edsl/questions/decorators.py +21 -21
  140. edsl/questions/derived/QuestionLikertFive.py +76 -76
  141. edsl/questions/derived/QuestionLinearScale.py +87 -87
  142. edsl/questions/derived/QuestionTopK.py +93 -91
  143. edsl/questions/derived/QuestionYesNo.py +82 -82
  144. edsl/questions/descriptors.py +413 -413
  145. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  146. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  147. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  148. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  149. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  150. edsl/questions/prompt_templates/question_list.jinja +17 -17
  151. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  152. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  153. edsl/questions/question_registry.py +177 -147
  154. edsl/questions/settings.py +12 -12
  155. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  156. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  157. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  158. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  159. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  160. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  161. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  162. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  163. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  164. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  165. edsl/questions/templates/list/question_presentation.jinja +5 -5
  166. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  167. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  168. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  169. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  170. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  171. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  172. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  173. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  174. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  175. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  176. edsl/results/CSSParameterizer.py +108 -0
  177. edsl/results/Dataset.py +424 -293
  178. edsl/results/DatasetExportMixin.py +731 -717
  179. edsl/results/DatasetTree.py +275 -145
  180. edsl/results/Result.py +465 -450
  181. edsl/results/Results.py +1165 -1071
  182. edsl/results/ResultsDBMixin.py +238 -238
  183. edsl/results/ResultsExportMixin.py +43 -43
  184. edsl/results/ResultsFetchMixin.py +33 -33
  185. edsl/results/ResultsGGMixin.py +121 -121
  186. edsl/results/ResultsToolsMixin.py +98 -98
  187. edsl/results/Selector.py +135 -135
  188. edsl/results/TableDisplay.py +198 -0
  189. edsl/results/__init__.py +2 -2
  190. edsl/results/table_display.css +78 -0
  191. edsl/results/tree_explore.py +115 -115
  192. edsl/scenarios/FileStore.py +632 -458
  193. edsl/scenarios/Scenario.py +601 -546
  194. edsl/scenarios/ScenarioHtmlMixin.py +64 -64
  195. edsl/scenarios/ScenarioJoin.py +127 -0
  196. edsl/scenarios/ScenarioList.py +1287 -1112
  197. edsl/scenarios/ScenarioListExportMixin.py +52 -52
  198. edsl/scenarios/ScenarioListPdfMixin.py +261 -261
  199. edsl/scenarios/__init__.py +4 -4
  200. edsl/shared.py +1 -1
  201. edsl/study/ObjectEntry.py +173 -173
  202. edsl/study/ProofOfWork.py +113 -113
  203. edsl/study/SnapShot.py +80 -80
  204. edsl/study/Study.py +528 -528
  205. edsl/study/__init__.py +4 -4
  206. edsl/surveys/DAG.py +148 -148
  207. edsl/surveys/Memory.py +31 -31
  208. edsl/surveys/MemoryPlan.py +244 -244
  209. edsl/surveys/Rule.py +326 -330
  210. edsl/surveys/RuleCollection.py +387 -387
  211. edsl/surveys/Survey.py +1801 -1795
  212. edsl/surveys/SurveyCSS.py +261 -261
  213. edsl/surveys/SurveyExportMixin.py +259 -259
  214. edsl/surveys/SurveyFlowVisualizationMixin.py +179 -121
  215. edsl/surveys/SurveyQualtricsImport.py +284 -284
  216. edsl/surveys/__init__.py +3 -3
  217. edsl/surveys/base.py +53 -53
  218. edsl/surveys/descriptors.py +56 -56
  219. edsl/surveys/instructions/ChangeInstruction.py +49 -47
  220. edsl/surveys/instructions/Instruction.py +65 -51
  221. edsl/surveys/instructions/InstructionCollection.py +77 -77
  222. edsl/templates/error_reporting/base.html +23 -23
  223. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  224. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  225. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  226. edsl/templates/error_reporting/interview_details.html +115 -115
  227. edsl/templates/error_reporting/interviews.html +19 -10
  228. edsl/templates/error_reporting/overview.html +4 -4
  229. edsl/templates/error_reporting/performance_plot.html +1 -1
  230. edsl/templates/error_reporting/report.css +73 -73
  231. edsl/templates/error_reporting/report.html +117 -117
  232. edsl/templates/error_reporting/report.js +25 -25
  233. edsl/tools/__init__.py +1 -1
  234. edsl/tools/clusters.py +192 -192
  235. edsl/tools/embeddings.py +27 -27
  236. edsl/tools/embeddings_plotting.py +118 -118
  237. edsl/tools/plotting.py +112 -112
  238. edsl/tools/summarize.py +18 -18
  239. edsl/utilities/SystemInfo.py +28 -28
  240. edsl/utilities/__init__.py +22 -22
  241. edsl/utilities/ast_utilities.py +25 -25
  242. edsl/utilities/data/Registry.py +6 -6
  243. edsl/utilities/data/__init__.py +1 -1
  244. edsl/utilities/data/scooter_results.json +1 -1
  245. edsl/utilities/decorators.py +77 -77
  246. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  247. edsl/utilities/interface.py +627 -627
  248. edsl/{conjure → utilities}/naming_utilities.py +263 -263
  249. edsl/utilities/repair_functions.py +28 -28
  250. edsl/utilities/restricted_python.py +70 -70
  251. edsl/utilities/utilities.py +424 -409
  252. {edsl-0.1.37.dev6.dist-info → edsl-0.1.38.dist-info}/LICENSE +21 -21
  253. {edsl-0.1.37.dev6.dist-info → edsl-0.1.38.dist-info}/METADATA +2 -1
  254. edsl-0.1.38.dist-info/RECORD +277 -0
  255. edsl/conjure/AgentConstructionMixin.py +0 -160
  256. edsl/conjure/Conjure.py +0 -62
  257. edsl/conjure/InputData.py +0 -659
  258. edsl/conjure/InputDataCSV.py +0 -48
  259. edsl/conjure/InputDataMixinQuestionStats.py +0 -182
  260. edsl/conjure/InputDataPyRead.py +0 -91
  261. edsl/conjure/InputDataSPSS.py +0 -8
  262. edsl/conjure/InputDataStata.py +0 -8
  263. edsl/conjure/QuestionOptionMixin.py +0 -76
  264. edsl/conjure/QuestionTypeMixin.py +0 -23
  265. edsl/conjure/RawQuestion.py +0 -65
  266. edsl/conjure/SurveyResponses.py +0 -7
  267. edsl/conjure/__init__.py +0 -9
  268. edsl/conjure/examples/placeholder.txt +0 -0
  269. edsl/conjure/utilities.py +0 -201
  270. edsl-0.1.37.dev6.dist-info/RECORD +0 -283
  271. {edsl-0.1.37.dev6.dist-info → edsl-0.1.38.dist-info}/WHEEL +0 -0
@@ -1,182 +0,0 @@
1
- import functools
2
- from typing import List
3
- from edsl.conjure.utilities import Missing
4
- from collections import Counter
5
-
6
-
7
- class InputDataMixinQuestionStats:
8
- def question_statistics(self, question_name: str) -> "QuestionStats":
9
- """Return statistics for a question."""
10
- return self.QuestionStats(**self._compute_question_statistics(question_name))
11
-
12
- def _compute_question_statistics(self, question_name: str) -> dict:
13
- """
14
- Return a dictionary of statistics for a question.
15
-
16
- >>> from edsl.conjure.InputData import InputDataABC
17
- >>> id = InputDataABC.example()
18
- >>> id._compute_question_statistics('morning')
19
- {'num_responses': 2, 'num_unique_responses': 2, 'missing': 0, 'unique_responses': ..., 'frac_numerical': 0.0, 'top_5': [('1', 1), ('4', 1)], 'frac_obs_from_top_5': 1.0}
20
- """
21
- idx = self.question_names.index(question_name)
22
- return {attr: getattr(self, attr)[idx] for attr in self.question_attributes}
23
-
24
- @property
25
- def num_responses(self) -> List[int]:
26
- """
27
- Return the number of responses for each question.
28
-
29
- >>> from edsl.conjure.InputData import InputDataABC
30
- >>> id = InputDataABC.example()
31
- >>> id.num_responses
32
- [2, 2]
33
- """
34
- return self.compute_num_responses()
35
-
36
- @functools.lru_cache(maxsize=1)
37
- def compute_num_responses(self):
38
- return [len(responses) for responses in self.raw_data]
39
-
40
- @property
41
- def num_unique_responses(self) -> List[int]:
42
- """
43
- The number of unique responses for each question.
44
-
45
- >>> from edsl.conjure.InputData import InputDataABC
46
- >>> id = InputDataABC.example()
47
- >>> id.num_unique_responses
48
- [2, 2]
49
- """
50
- return self.compute_num_unique_responses()
51
-
52
- @functools.lru_cache(maxsize=1)
53
- def compute_num_unique_responses(self):
54
- return [len(set(responses)) for responses in self.raw_data]
55
-
56
- @property
57
- def missing(self) -> List[int]:
58
- """The number of observations that are missing.
59
-
60
- >>> from edsl.conjure.InputData import InputDataABC
61
- >>> input_data = InputDataABC.example(raw_data = [[1,2,Missing().value()]], question_texts = ['A question'])
62
- >>> input_data.missing
63
- [1]
64
-
65
- """
66
- return self.compute_missing()
67
-
68
- @functools.lru_cache(maxsize=1)
69
- def compute_missing(self):
70
- return [sum([1 for x in v if x == Missing().value()]) for v in self.raw_data]
71
-
72
- @property
73
- def frac_numerical(self) -> List[float]:
74
- """
75
- The fraction of responses that are numerical for each question.
76
-
77
- >>> from edsl.conjure.InputData import InputDataABC
78
- >>> input_data = InputDataABC.example(raw_data = [[1,2,"Poop", 3]], question_texts = ['A question'])
79
- >>> input_data.frac_numerical
80
- [0.75]
81
- """
82
- return self.compute_frac_numerical()
83
-
84
- @functools.lru_cache(maxsize=1)
85
- def compute_frac_numerical(self):
86
- return [
87
- sum([1 for x in v if isinstance(x, (int, float))]) / len(v)
88
- for v in self.raw_data
89
- ]
90
-
91
- @functools.lru_cache(maxsize=1)
92
- def top_k(self, k: int) -> List[List[tuple]]:
93
- """
94
- >>> from edsl.conjure.InputData import InputDataABC
95
- >>> input_data = InputDataABC.example(raw_data = [[1,1,1,1,1,2]], question_texts = ['A question'])
96
- >>> input_data.top_k(1)
97
- [[(1, 5)]]
98
- >>> input_data.top_k(2)
99
- [[(1, 5), (2, 1)]]
100
- """
101
- return [Counter(value).most_common(k) for value in self.raw_data]
102
-
103
- @functools.lru_cache(maxsize=1)
104
- def frac_obs_from_top_k(self, k):
105
- """
106
- Return the fraction of observations that are in the top k for each question.
107
-
108
- >>> from edsl.conjure.InputData import InputDataABC
109
- >>> input_data = InputDataABC.example(raw_data = [[1,1,1,1,1,1,1,1,2, 3]], question_names = ['a'])
110
- >>> input_data.frac_obs_from_top_k(1)
111
- [0.8]
112
- """
113
- return [
114
- round(
115
- sum([x[1] for x in Counter(value).most_common(k) if x[0] != "missing"])
116
- / len(value),
117
- 2,
118
- )
119
- for value in self.raw_data
120
- ]
121
-
122
- @property
123
- def frac_obs_from_top_5(self):
124
- """The fraction of observations that are in the top 5 for each question."""
125
- return self.frac_obs_from_top_k(5)
126
-
127
- @property
128
- def top_5(self):
129
- """The top 5 responses for each question."""
130
- return self.top_k(5)
131
-
132
- @property
133
- def unique_responses(self) -> List[List[str]]:
134
- """Return a list of unique responses for each question.
135
-
136
- >>> from edsl.conjure.InputData import InputDataABC
137
- >>> id = InputDataABC.example()
138
- >>> id.unique_responses
139
- [..., ...]
140
- """
141
- return self.compute_unique_responses()
142
-
143
- @functools.lru_cache(maxsize=1)
144
- def compute_unique_responses(self):
145
- return [
146
- list(set(self.filter_missing(responses))) for responses in self.raw_data
147
- ]
148
-
149
- @staticmethod
150
- def filter_missing(responses) -> List[str]:
151
- """Return a list of responses with missing values removed."""
152
- return [
153
- v
154
- for v in responses
155
- if v != Missing().value() and v != "missing" and v != ""
156
- ]
157
-
158
- def unique_responses_more_than_k(self, k, remove_missing=True) -> List[List[str]]:
159
- """Return a list of unique responses that occur more than k times for each question.
160
-
161
- >>> from edsl.conjure.InputData import InputDataABC
162
- >>> id = InputDataABC.example()
163
- >>> id.unique_responses_more_than_k(1)
164
- [[...], [...]]
165
-
166
- """
167
- counters = [Counter(responses) for responses in self.raw_data]
168
- new_counters = []
169
- for question in counters:
170
- top_options = []
171
- for option, count in question.items():
172
- if count > k and (option != "missing" or not remove_missing):
173
- top_options.append(option)
174
- new_counters.append(top_options)
175
- return new_counters
176
-
177
-
178
- if __name__ == "__main__":
179
- from edsl.conjure.InputData import InputDataABC
180
- import doctest
181
-
182
- doctest.testmod(optionflags=doctest.ELLIPSIS)
@@ -1,91 +0,0 @@
1
- import pandas as pd
2
- from typing import List
3
-
4
- from edsl.conjure.InputData import InputDataABC
5
- from edsl.conjure.utilities import convert_value
6
- from edsl.utilities.utilities import is_valid_variable_name
7
-
8
- try:
9
- import pyreadstat
10
- except ImportError as e:
11
- raise ImportError(
12
- "The 'pyreadstat' package is required for this feature. Please install it by running:\n"
13
- "pip install pyreadstat\n"
14
- ) from e
15
-
16
-
17
- class InputDataPyRead(InputDataABC):
18
- def pyread_function(self, datafile_name):
19
- raise NotImplementedError
20
-
21
- def _parse(self) -> None:
22
- try:
23
- df, meta = self.pyread_function(self.datafile_name)
24
- except Exception as e:
25
- raise ValueError(
26
- f"An error occurred while reading the file {self.datafile_name}."
27
- ) from e
28
- float_columns = df.select_dtypes(include=["float64"]).columns
29
- df[float_columns] = df[float_columns].astype(str)
30
-
31
- df.fillna("", inplace=True)
32
- df = df.astype(str)
33
- self._df = df
34
- self._meta = meta
35
-
36
- def get_df(self) -> pd.DataFrame:
37
- if not hasattr(self, "_df"):
38
- self._parse()
39
- return self._df
40
-
41
- def get_answer_codebook(self):
42
- if not hasattr(self, "_meta"):
43
- self._parse()
44
-
45
- question_name_to_label_name = self._meta.variable_to_label
46
- label_name_to_labels = self._meta.value_labels
47
- return {
48
- qn: label_name_to_labels[label_name]
49
- for qn, label_name in question_name_to_label_name.items()
50
- }
51
-
52
- def get_raw_data(self) -> List[List[str]]:
53
- df = self.get_df()
54
- data = [
55
- [convert_value(obs) for obs in v]
56
- for k, v in df.to_dict(orient="list").items()
57
- ]
58
- return data
59
-
60
- @property
61
- def question_names_to_question_texts(self):
62
- """Return a dictionary of question names to question texts.
63
- This will repair the question names if they are not valid Python identifiers using the
64
- same question_name_repair_func that was passed in.
65
- """
66
- if not hasattr(self, "_meta"):
67
- self._parse()
68
- d = {}
69
- for qn, label in self._meta.column_names_to_labels.items():
70
- new_name = qn
71
- if not is_valid_variable_name(qn):
72
- new_name = self.question_name_repair_func(qn)
73
- if not is_valid_variable_name(new_name):
74
- raise ValueError(
75
- f"""Question names must be valid Python identifiers. '{qn}' is not.""",
76
- """You can pass an entry in question_name_repair_dict to fix this.""",
77
- )
78
- if label is not None:
79
- d[new_name] = label
80
- return d
81
-
82
- def get_question_texts(self):
83
- if not hasattr(self, "_meta"):
84
- self._parse()
85
- return [
86
- self.question_names_to_question_texts.get(qn, qn)
87
- for qn in self.question_names
88
- ]
89
-
90
- def get_question_names(self):
91
- return self.get_df().columns.tolist()
@@ -1,8 +0,0 @@
1
- from edsl.conjure.InputDataPyRead import InputDataPyRead
2
-
3
-
4
- class InputDataSPSS(InputDataPyRead):
5
- def pyread_function(self, datafile_name):
6
- from pyreadstat import read_sav
7
-
8
- return read_sav(datafile_name)
@@ -1,8 +0,0 @@
1
- from edsl.conjure.InputDataPyRead import InputDataPyRead
2
-
3
-
4
- class InputDataStata(InputDataPyRead):
5
- def pyread_function(self, datafile_name):
6
- from pyreadstat import read_dta
7
-
8
- return read_dta(datafile_name)
@@ -1,76 +0,0 @@
1
- from typing import Union, List
2
-
3
-
4
- class QuestionOptionMixin:
5
- @property
6
- def question_options(self):
7
- if not hasattr(self, "_question_options"):
8
- self.question_options = None
9
- return self._question_options
10
-
11
- @question_options.setter
12
- def question_options(self, value):
13
- if value is None:
14
- value = [self._get_question_options(qn) for qn in self.question_names]
15
- self._question_options = value
16
-
17
- def _get_question_options(self, question_name) -> Union[List[str], None]:
18
- """Return the options for a question.
19
-
20
- >>> from edsl.conjure.InputData import InputDataABC
21
- >>> id = InputDataABC.example()
22
- >>> sorted(id._get_question_options('morning'))
23
- ['1', '4']
24
-
25
- """
26
- qt = self.question_statistics(question_name)
27
- idx = self.question_names.index(question_name)
28
- question_type = self.question_types[idx]
29
- if question_type == "multiple_choice":
30
- return [str(o) for o in qt.unique_responses]
31
- else:
32
- if question_type == "multiple_choice_with_other":
33
- options = self.unique_responses_more_than_k(2)[
34
- self.question_names.index(question_name)
35
- ] + [self.OTHER_STRING]
36
- return [str(o) for o in options]
37
- else:
38
- return None
39
-
40
- def order_options(self) -> None:
41
- """Order the options for multiple choice questions using an LLM."""
42
- from edsl import QuestionList, ScenarioList
43
- import textwrap
44
-
45
- scenarios = (
46
- ScenarioList.from_list("example_question_name", self.question_names)
47
- .add_list("example_question_text", self.question_texts)
48
- .add_list("example_question_type", self.question_types)
49
- .add_list("example_question_options", self.question_options)
50
- ).filter(
51
- 'example_question_type == "multiple_choice" or example_question_type == "multiple_choice_with_other"'
52
- )
53
-
54
- question = QuestionList(
55
- question_text=textwrap.dedent(
56
- """\
57
- We have a survey question: `{{ example_question_text }}`.
58
-
59
- The survey had following options: '{{ example_question_options }}'.
60
- The options might be out of order. Please put them in the correct order.
61
- If there is not natural order, just put then in order they were presented.
62
- """
63
- ),
64
- question_name="ordering",
65
- )
66
- proposed_ordering = question.by(scenarios).run()
67
- d = dict(
68
- proposed_ordering.select("example_question_name", "ordering").to_list()
69
- )
70
- self._question_options = [d.get(qn, None) for qn in self.question_names]
71
-
72
-
73
- if __name__ == "__main__":
74
- import doctest
75
-
76
- doctest.testmod(optionflags=doctest.ELLIPSIS)
@@ -1,23 +0,0 @@
1
- class QuestionTypeMixin:
2
- @property
3
- def question_types(self):
4
- if not hasattr(self, "_question_types"):
5
- self.question_types = None
6
- return self._question_types
7
-
8
- @question_types.setter
9
- def question_types(self, value):
10
- if value is None:
11
- value = [self._infer_question_type(qn) for qn in self.question_names]
12
- self._question_types = value
13
-
14
- def _infer_question_type(self, question_name) -> str:
15
- qt = self.question_statistics(question_name)
16
- if qt.num_unique_responses > self.NUM_UNIQUE_THRESHOLD:
17
- if qt.frac_numerical > self.FRAC_NUMERICAL_THRESHOLD:
18
- return "numerical"
19
- if qt.frac_obs_from_top_5 > self.MULTIPLE_CHOICE_OTHER_THRESHOLD:
20
- return "multiple_choice_with_other"
21
- return "free_text"
22
- else:
23
- return "multiple_choice"
@@ -1,65 +0,0 @@
1
- from dataclasses import dataclass, field
2
- from typing import List, Optional, Union
3
- from edsl.questions import QuestionBase
4
- from edsl import Question
5
-
6
- from edsl.conjure.utilities import convert_value
7
-
8
-
9
- @dataclass
10
- class RawQuestion:
11
- """
12
- A class to represent a question before it is converted to edsl class.
13
-
14
- >>> rq = RawQuestion.example()
15
- >>> rq.to_question()
16
- Question('multiple_choice', question_name = \"""how_are_you\""", question_text = \"""How are you doing?\""", question_options = ['Good', 'Bad'])
17
- """
18
-
19
- question_type: str
20
- question_name: str
21
- question_text: str
22
- responses: List[str] = field(default_factory=list)
23
- question_options: Optional[List[str]] = None
24
-
25
- @classmethod
26
- def example(cls):
27
- return cls(
28
- question_type="multiple_choice",
29
- question_name="how_are_you",
30
- question_text="How are you doing?",
31
- responses=["Good", "Bad", "Bad", "Good"],
32
- question_options=["Good", "Bad"],
33
- )
34
-
35
- def __post_init__(self):
36
- self.responses = [convert_value(r) for r in self.responses]
37
-
38
- def to_question(self) -> QuestionBase:
39
- """Return a Question object from the RawQuestion."""
40
-
41
- # TODO: Remove this once we have a better way to handle multiple_choice_with_other
42
- if self.question_type == "multiple_choice_with_other":
43
- question_type = "multiple_choice"
44
- else:
45
- question_type = self.question_type
46
-
47
- # exclude responses from the dictionary if they have a None value; don't inlcude responses in the dictionary
48
- d = {
49
- k: v
50
- for k, v in {
51
- "question_type": question_type,
52
- "question_name": self.question_name,
53
- "question_text": self.question_text,
54
- "responses": self.responses,
55
- "question_options": self.question_options,
56
- }.items()
57
- if v is not None and k != "responses"
58
- }
59
- return Question(**d)
60
-
61
-
62
- if __name__ == "__main__":
63
- import doctest
64
-
65
- doctest.testmod(optionflags=doctest.ELLIPSIS)
@@ -1,7 +0,0 @@
1
- from typing import Dict, List
2
- from collections import UserDict
3
-
4
-
5
- class SurveyResponses(UserDict):
6
- def __init__(self, responses: Dict[str, List[str]]):
7
- super().__init__(responses)
edsl/conjure/__init__.py DELETED
@@ -1,9 +0,0 @@
1
- # from edsl.conjure.SurveyBuilder import SurveyBuilder
2
- # from edsl.conjure.SurveyBuilderCSV import SurveyBuilderCSV
3
- # from edsl.conjure.SurveyBuilderCSV import SurveyBuilderStata
4
- # from edsl.conjure.SurveyBuilderSPSS import SurveyBuilderSPSS
5
- # from edsl.conjure.InputData.InputDataSPSS import InputDataSPSS
6
- # from edsl.conjure.InputData.InputDataCSV import InputDataCSV
7
- # from edsl.conjure.InputData.InputDataStata import InputDataStata
8
- # from edsl.conjure.InputData import InputDataSPSS
9
- # from edsl.conjure.InputData import InputData
File without changes
edsl/conjure/utilities.py DELETED
@@ -1,201 +0,0 @@
1
- import requests
2
- import subprocess
3
- from io import StringIO
4
- import os
5
- import pandas as pd
6
-
7
-
8
- class ValidFilename:
9
- """A descriptor that checks if a file exists.
10
-
11
-
12
- >>> f = ValidFilename()
13
- >>> f = "hello"
14
- """
15
-
16
- def __set_name__(self, owner, name):
17
- self.name = name
18
-
19
- def __get__(self, instance, owner):
20
- return instance.__dict__.get(self.name, None)
21
-
22
- def __set__(self, instance, value):
23
- if not isinstance(value, str):
24
- raise ValueError(
25
- f"The filename must be a string, not {type(value).__name__}"
26
- )
27
-
28
- if not os.path.exists(value):
29
- raise ValueError(f"The file '{value}' does not exist.")
30
-
31
- instance.__dict__[self.name] = value
32
-
33
-
34
- class DummyClassToTestDescriptor:
35
- """
36
-
37
- >>> d = DummyClassToTestDescriptor(1)
38
- Traceback (most recent call last):
39
- ...
40
- ValueError: The filename must be a string, not int
41
-
42
- >>> d = DummyClassToTestDescriptor("hello")
43
- Traceback (most recent call last):
44
- ...
45
- ValueError: The file 'hello' does not exist.
46
- """
47
-
48
- filename = ValidFilename()
49
-
50
- def __init__(self, filename):
51
- self.filename = filename
52
-
53
- def __repr__(self):
54
- return f"DummyClassToTestDescriptor({self.filename})"
55
-
56
-
57
- class Missing:
58
- def __repr__(self):
59
- return "Missing()"
60
-
61
- def __str__(self):
62
- return "Missing()"
63
-
64
- def value(self):
65
- return "missing"
66
-
67
-
68
- def convert_value(x):
69
- """Takes a string and tries to convert it.
70
-
71
- >>> convert_value('1')
72
- 1
73
- >>> convert_value('1.2')
74
- 1.2
75
- >>> convert_value("how are you?")
76
- 'how are you?'
77
- >>> convert_value("")
78
- 'missing'
79
-
80
- """
81
- try:
82
- float_val = float(x)
83
- if float_val.is_integer():
84
- return int(float_val)
85
- else:
86
- return float_val
87
- except ValueError:
88
- if len(x) == 0:
89
- return Missing().value()
90
- else:
91
- return str(x)
92
-
93
-
94
- # class RCodeSnippet:
95
- # def __init__(self, r_code):
96
- # self.r_code = r_code
97
-
98
- # def __call__(self, data_file_name):
99
- # return self.run_R_stdin(self.r_code, data_file_name)
100
-
101
- # def __add__(self, other):
102
- # return RCodeSnippet(self.r_code + other.r_code)
103
-
104
- # def write_to_file(self, filename) -> None:
105
- # """Writes the R code to a file; useful for debugging."""
106
- # if filename.endswith(".R") or filename.endswith(".r"):
107
- # pass
108
- # else:
109
- # filename += ".R"
110
-
111
- # with open(filename, "w") as f:
112
- # f.write(self.r_code)
113
-
114
- # @staticmethod
115
- # def run_R_stdin(r_code, data_file_name, transform_func=lambda x: pd.read_csv(x)):
116
- # """Runs an R script and returns the stdout as a string."""
117
- # cmd = ["Rscript", "-e", r_code, data_file_name]
118
- # process = subprocess.Popen(
119
- # cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
120
- # )
121
- # stdout, stderr = process.communicate()
122
- # if stderr != "":
123
- # print("Warning: stderr is not empty.")
124
- # print(f"Problem running: {r_code}")
125
- # raise Exception(stderr)
126
- # return transform_func(StringIO(stdout))
127
-
128
-
129
- def infer_question_type(question_text, responses, sample_size=15):
130
- from edsl.questions import QuestionMultipleChoice
131
-
132
- q = QuestionMultipleChoice(
133
- question_text="""We have a survey question and we are trying to infer its type.
134
- The question text is: '{{question_text}}'.
135
- The first {{ sample_size }} responses are: '{{responses}}'.
136
- There are {{ total }} responses in total.
137
- If a response is a command-separated list, it is likely a checkbox question.
138
- """,
139
- question_name="infer_question_type",
140
- question_options=[
141
- "budget",
142
- "checkbox",
143
- "extract",
144
- "free_text",
145
- "likert_five",
146
- "linear_scale",
147
- "list",
148
- "multiple_choice",
149
- "numerical",
150
- "rank",
151
- "top_k",
152
- "yes_no",
153
- ],
154
- )
155
- response = (
156
- q.to_survey()(
157
- question_text=question_text,
158
- sample_zize=sample_size,
159
- responses=responses[:sample_size],
160
- )
161
- .select("infer_question_type")
162
- .first()
163
- )
164
- return response
165
-
166
-
167
- def download_file(url, filename):
168
- """
169
- Downloads a file from a given URL and saves it to the specified filename.
170
-
171
- Parameters:
172
- url (str): The URL of the file to download.
173
- filename (str): The name of the file to save the downloaded content.
174
-
175
- Returns:
176
- str: The path to the saved file.
177
- """
178
- headers = {
179
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
180
- }
181
-
182
- # Sending the GET request
183
- response = requests.get(url, headers=headers)
184
-
185
- # Checking if the request was successful
186
- if response.status_code == 200:
187
- # Writing the content to the specified file
188
- with open(filename, "wb") as file:
189
- file.write(response.content)
190
- print(f"File downloaded successfully and saved to {filename}")
191
- return filename
192
- else:
193
- print(f"Failed to download file: {response.status_code}")
194
- return None
195
-
196
-
197
- # Example usage
198
- if __name__ == "__main__":
199
- import doctest
200
-
201
- doctest.testmod(optionflags=doctest.ELLIPSIS)