edsl 0.1.27.dev2__py3-none-any.whl → 0.1.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. edsl/Base.py +107 -30
  2. edsl/BaseDiff.py +260 -0
  3. edsl/__init__.py +25 -21
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +103 -46
  6. edsl/agents/AgentList.py +97 -13
  7. edsl/agents/Invigilator.py +23 -10
  8. edsl/agents/InvigilatorBase.py +19 -14
  9. edsl/agents/PromptConstructionMixin.py +342 -100
  10. edsl/agents/descriptors.py +5 -2
  11. edsl/base/Base.py +289 -0
  12. edsl/config.py +2 -1
  13. edsl/conjure/AgentConstructionMixin.py +152 -0
  14. edsl/conjure/Conjure.py +56 -0
  15. edsl/conjure/InputData.py +659 -0
  16. edsl/conjure/InputDataCSV.py +48 -0
  17. edsl/conjure/InputDataMixinQuestionStats.py +182 -0
  18. edsl/conjure/InputDataPyRead.py +91 -0
  19. edsl/conjure/InputDataSPSS.py +8 -0
  20. edsl/conjure/InputDataStata.py +8 -0
  21. edsl/conjure/QuestionOptionMixin.py +76 -0
  22. edsl/conjure/QuestionTypeMixin.py +23 -0
  23. edsl/conjure/RawQuestion.py +65 -0
  24. edsl/conjure/SurveyResponses.py +7 -0
  25. edsl/conjure/__init__.py +9 -4
  26. edsl/conjure/examples/placeholder.txt +0 -0
  27. edsl/conjure/naming_utilities.py +263 -0
  28. edsl/conjure/utilities.py +165 -28
  29. edsl/conversation/Conversation.py +238 -0
  30. edsl/conversation/car_buying.py +58 -0
  31. edsl/conversation/mug_negotiation.py +81 -0
  32. edsl/conversation/next_speaker_utilities.py +93 -0
  33. edsl/coop/coop.py +337 -121
  34. edsl/coop/utils.py +56 -70
  35. edsl/data/Cache.py +74 -22
  36. edsl/data/CacheHandler.py +10 -9
  37. edsl/data/SQLiteDict.py +11 -3
  38. edsl/inference_services/AnthropicService.py +1 -0
  39. edsl/inference_services/DeepInfraService.py +20 -13
  40. edsl/inference_services/GoogleService.py +7 -1
  41. edsl/inference_services/InferenceServicesCollection.py +33 -7
  42. edsl/inference_services/OpenAIService.py +17 -10
  43. edsl/inference_services/models_available_cache.py +69 -0
  44. edsl/inference_services/rate_limits_cache.py +25 -0
  45. edsl/inference_services/write_available.py +10 -0
  46. edsl/jobs/Answers.py +15 -1
  47. edsl/jobs/Jobs.py +322 -73
  48. edsl/jobs/buckets/BucketCollection.py +9 -3
  49. edsl/jobs/buckets/ModelBuckets.py +4 -2
  50. edsl/jobs/buckets/TokenBucket.py +1 -2
  51. edsl/jobs/interviews/Interview.py +7 -10
  52. edsl/jobs/interviews/InterviewStatusMixin.py +3 -3
  53. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +39 -20
  54. edsl/jobs/interviews/retry_management.py +4 -4
  55. edsl/jobs/runners/JobsRunnerAsyncio.py +103 -65
  56. edsl/jobs/runners/JobsRunnerStatusData.py +3 -3
  57. edsl/jobs/tasks/QuestionTaskCreator.py +4 -2
  58. edsl/jobs/tasks/TaskHistory.py +4 -3
  59. edsl/language_models/LanguageModel.py +42 -55
  60. edsl/language_models/ModelList.py +96 -0
  61. edsl/language_models/registry.py +14 -0
  62. edsl/language_models/repair.py +97 -25
  63. edsl/notebooks/Notebook.py +157 -32
  64. edsl/prompts/Prompt.py +31 -19
  65. edsl/questions/QuestionBase.py +145 -23
  66. edsl/questions/QuestionBudget.py +5 -6
  67. edsl/questions/QuestionCheckBox.py +7 -3
  68. edsl/questions/QuestionExtract.py +5 -3
  69. edsl/questions/QuestionFreeText.py +3 -3
  70. edsl/questions/QuestionFunctional.py +0 -3
  71. edsl/questions/QuestionList.py +3 -4
  72. edsl/questions/QuestionMultipleChoice.py +16 -8
  73. edsl/questions/QuestionNumerical.py +4 -3
  74. edsl/questions/QuestionRank.py +5 -3
  75. edsl/questions/__init__.py +4 -3
  76. edsl/questions/descriptors.py +9 -4
  77. edsl/questions/question_registry.py +27 -31
  78. edsl/questions/settings.py +1 -1
  79. edsl/results/Dataset.py +31 -0
  80. edsl/results/DatasetExportMixin.py +493 -0
  81. edsl/results/Result.py +42 -82
  82. edsl/results/Results.py +178 -66
  83. edsl/results/ResultsDBMixin.py +10 -9
  84. edsl/results/ResultsExportMixin.py +23 -507
  85. edsl/results/ResultsGGMixin.py +3 -3
  86. edsl/results/ResultsToolsMixin.py +9 -9
  87. edsl/scenarios/FileStore.py +140 -0
  88. edsl/scenarios/Scenario.py +59 -6
  89. edsl/scenarios/ScenarioList.py +138 -52
  90. edsl/scenarios/ScenarioListExportMixin.py +32 -0
  91. edsl/scenarios/ScenarioListPdfMixin.py +2 -1
  92. edsl/scenarios/__init__.py +1 -0
  93. edsl/study/ObjectEntry.py +173 -0
  94. edsl/study/ProofOfWork.py +113 -0
  95. edsl/study/SnapShot.py +73 -0
  96. edsl/study/Study.py +498 -0
  97. edsl/study/__init__.py +4 -0
  98. edsl/surveys/MemoryPlan.py +11 -4
  99. edsl/surveys/Survey.py +124 -37
  100. edsl/surveys/SurveyExportMixin.py +25 -5
  101. edsl/surveys/SurveyFlowVisualizationMixin.py +6 -4
  102. edsl/tools/plotting.py +4 -2
  103. edsl/utilities/__init__.py +21 -20
  104. edsl/utilities/gcp_bucket/__init__.py +0 -0
  105. edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
  106. edsl/utilities/gcp_bucket/simple_example.py +9 -0
  107. edsl/utilities/interface.py +90 -73
  108. edsl/utilities/repair_functions.py +28 -0
  109. edsl/utilities/utilities.py +59 -6
  110. {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/METADATA +42 -15
  111. edsl-0.1.29.dist-info/RECORD +203 -0
  112. edsl/conjure/RawResponseColumn.py +0 -327
  113. edsl/conjure/SurveyBuilder.py +0 -308
  114. edsl/conjure/SurveyBuilderCSV.py +0 -78
  115. edsl/conjure/SurveyBuilderSPSS.py +0 -118
  116. edsl/data/RemoteDict.py +0 -103
  117. edsl-0.1.27.dev2.dist-info/RECORD +0 -172
  118. {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/LICENSE +0 -0
  119. {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/WHEEL +0 -0
@@ -1,308 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- import os
3
- import random
4
- from typing import Dict, Any, List, Callable, Optional
5
- from collections import UserDict
6
-
7
- from pydantic import ValidationError
8
-
9
- from edsl.utilities.utilities import create_valid_var_name
10
- from edsl.surveys.Survey import Survey
11
- from edsl.conjure.RawResponseColumn import (
12
- RawResponseColumn,
13
- get_replacement_name,
14
- CustomDict,
15
- )
16
-
17
-
18
- class ValidFilename:
19
- def __set_name__(self, owner, name):
20
- self.name = name
21
-
22
- def __get__(self, instance, owner):
23
- return instance.__dict__.get(self.name, None)
24
-
25
- def __set__(self, instance, value):
26
- if not isinstance(value, str):
27
- raise ValueError(
28
- f"The filename must be a string, not {type(value).__name__}"
29
- )
30
-
31
- if not os.path.exists(value):
32
- raise ValueError(f"The file '{value}' does not exist.")
33
-
34
- instance.__dict__[self.name] = value
35
-
36
-
37
- class SurveyBuilder(ABC, UserDict):
38
- """A ABC class to represent the process of building a survey and results from an external format"""
39
-
40
- datafile_name = ValidFilename()
41
-
42
- def lookup_dict(self):
43
- return get_replacement_name.lookup_dict
44
-
45
- def __init__(
46
- self,
47
- datafile_name: str,
48
- sample_size: Optional[int] = None,
49
- compute_results: bool = True,
50
- ):
51
- """Initialize the SurveyBuilder with the given datafile_name.
52
-
53
- :param datafile_name: The name of the datafile to be used.
54
- :param sample_size: The number of observations to sample from the dataset.
55
- :param compute_results: Whether to compute the results or not.
56
-
57
- The SurveyBuilder will read the datafile_name and create a survey from it.
58
-
59
- >>> sb = SurveyBuilder.example()
60
- >>> sb.responses
61
- {'q1': ['1', '4'], 'q2': ['2', '5'], 'q3': ['3', '6']}
62
- >>> sb.question_name_to_text
63
- {'q1': 'Q1', 'q2': 'Q2', 'q3': 'Q3'}
64
-
65
- >>> sb.data['q1']
66
- RawResponseColumn(question_name="q1", question_text="Q1", raw_responses=['1', '4'], responses=['1', '4'], unqiue_responses=defaultdict(<class 'int'>, {'1': 1, '4': 1}), answer_codebook={})
67
- """
68
- self.datafile_name = datafile_name
69
- self.sample_size = sample_size
70
- self.responses = CustomDict(self.get_responses())
71
-
72
- self.question_name_to_text = CustomDict(self.get_question_name_to_text())
73
-
74
- self.question_name_to_answer_book = CustomDict(
75
- self.get_question_name_to_answer_book()
76
- )
77
- self.compute_results = compute_results
78
-
79
- data = {}
80
- for question_name, raw_responses in self.responses.items():
81
- raw_question_response = RawResponseColumn(
82
- question_name=question_name,
83
- raw_responses=raw_responses,
84
- answer_codebook=self.question_name_to_answer_book[question_name],
85
- question_text=self.question_name_to_text[question_name],
86
- )
87
- data[question_name] = raw_question_response
88
-
89
- super().__init__(data)
90
-
91
- def process(self) -> None:
92
- self.survey, self.survey_failures = self.create_survey()
93
-
94
- if self.compute_results:
95
- self.agents, self.agent_failures = self.create_agents()
96
- self.results = self.create_results()
97
- # remove the direct question answering method
98
- [agent.remove_direct_question_answering_method() for agent in self.agents]
99
- else:
100
- self.agents = None
101
- self.results = None
102
-
103
- def get_observations(self) -> List[Dict[str, Any]]:
104
- """Returns a list of dictionaries, where each dictionary is an observation.
105
-
106
- >>> sb = SurveyBuilder.example()
107
- >>> sb.get_observations()
108
- [{'q1': '1', 'q2': '2', 'q3': '3'}, {'q1': '4', 'q2': '5', 'q3': '6'}]
109
-
110
- """
111
- observations = []
112
- for question_name, question_responses in self.items():
113
- for index, response in enumerate(question_responses.responses):
114
- if len(observations) <= index:
115
- observations.append({question_name: response})
116
- else:
117
- observations[index][question_name] = response
118
- return observations
119
-
120
- def create_agents(self, question_keys_as_traits: List[str] = None):
121
- """Returns a list of agents, and a dictionary of failures.
122
-
123
- :param sample_size: The number of agents to sample from the dataset.
124
- :param question_keys_as_traits: A list of question keys to use as traits.
125
-
126
- These agents are special in that they have an 'answer_question_directly'
127
- method that allows them to answer questions directly when presented with
128
- the question_name. This is useful because in self.Agents, these agents can
129
- bypass the LLM call.
130
- """
131
- if question_keys_as_traits is None:
132
- question_keys_as_traits = list(self.data.keys())
133
-
134
- from edsl.agents.Agent import Agent
135
- from edsl.agents.AgentList import AgentList
136
-
137
- failures = {}
138
-
139
- def construct_answer_dict_function(answer_dict: dict) -> Callable:
140
- def func(self, question, scenario=None):
141
- return answer_dict.get(question.question_name, None)
142
-
143
- return func
144
-
145
- agent_list = AgentList()
146
-
147
- for observation in self.get_observations(): # iterate through the observations
148
- traits = {}
149
- for trait_name in question_keys_as_traits:
150
- if trait_name not in observation:
151
- failures[trait_name] = f"Question name {trait_name} not found."
152
- continue
153
- else:
154
- traits[trait_name] = observation[trait_name]
155
-
156
- agent = Agent(traits=traits)
157
- f = construct_answer_dict_function(observation.copy())
158
- agent.add_direct_question_answering_method(f)
159
- agent_list.append(agent)
160
-
161
- if self.sample_size is not None and len(agent_list) >= self.sample_size:
162
- return random.sample(agent_list, self.sample_size), failures
163
- else:
164
- return agent_list, failures
165
-
166
- def create_survey(self):
167
- "Iterates through the question keys and creates a survey."
168
- questions = []
169
- failures = {}
170
- for question_responses in self.values():
171
- try:
172
- proposed_question = question_responses.to_question()
173
- except Exception as e:
174
- print(f"Could not convert to question: {question_responses}: {e}")
175
- failures[question_responses.question_name] = e
176
- continue
177
- else:
178
- questions.append(proposed_question)
179
- if len(failures) > 0:
180
- print(
181
- f"Attempted {len(self.keys())} questions; there were {len(failures)} failures."
182
- )
183
- return Survey(questions), failures
184
-
185
- @classmethod
186
- def from_url(cls, url: str):
187
- """Create a SurveyBuilder from a URL."""
188
- import tempfile
189
- import requests
190
-
191
- headers = {
192
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
193
- "Accept": "text/csv,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/csv;q=0.9,application/excel;q=0.8",
194
- }
195
-
196
- with tempfile.NamedTemporaryFile(delete=False, mode="wb") as localfile:
197
- response = requests.get(url, headers=headers)
198
- if response.status_code == 200:
199
- localfile.write(response.content)
200
- localfile_path = localfile.name
201
- else:
202
- raise Exception(
203
- f"Failed to fetch the file from {url}, status code: {response.status_code}"
204
- )
205
-
206
- print("Data saved to", localfile_path)
207
- return cls(localfile_path)
208
-
209
- def create_results(self):
210
- return self.survey.by(self.agents).run()
211
-
212
- @abstractmethod
213
- def get_responses(self) -> Dict:
214
- """Returns all of the raw responses, as a dataframe"""
215
- pass
216
-
217
- @abstractmethod
218
- def get_question_name_to_text(self) -> Dict[str, str]:
219
- pass
220
-
221
- @abstractmethod
222
- def get_question_name_to_answer_book(self) -> Dict[str, Dict[str, str]]:
223
- pass
224
-
225
- @classmethod
226
- def example(cls):
227
- class SurveyBuilderExample(SurveyBuilder):
228
- @staticmethod
229
- def get_dataframe(datafile_name):
230
- import pandas as pd
231
-
232
- return pd.DataFrame(
233
- {"Q1": ["1", "4"], "Q2": ["2", "5"], "Q3": ["3", "6"]}
234
- )
235
-
236
- def get_responses(self) -> Dict:
237
- df = self.get_dataframe(self.datafile_name)
238
- df.fillna("", inplace=True)
239
- df = df.astype(str)
240
- data_dict = df.to_dict(orient="list")
241
- return {k.lower(): v for k, v in data_dict.items()}
242
-
243
- def get_question_name_to_text(self) -> Dict:
244
- d = {}
245
- df = self.get_dataframe(self.datafile_name)
246
- for col in df.columns:
247
- if col in self.lookup_dict():
248
- d[col] = self.lookup_dict()[col]
249
- else:
250
- d[col] = col
251
-
252
- return d
253
-
254
- def get_question_name_to_answer_book(self):
255
- d = self.get_question_name_to_text()
256
- return {k: {} for k, v in d.items()}
257
-
258
- import tempfile
259
-
260
- named_temp_file = tempfile.NamedTemporaryFile(delete=False)
261
- named_temp_file.write(b"Q1,Q2,Q3\n1,2,3\n4,5,6\n")
262
-
263
- return SurveyBuilderExample(named_temp_file.name)
264
-
265
- def to_dict(self):
266
- return {
267
- "datafile_name": self.datafile_name,
268
- "survey": self.survey.to_dict(),
269
- "agents": None if self.agents is None else self.agents.to_dict(),
270
- "results": None if self.results is None else self.results.to_dict(),
271
- "sample_size": self.sample_size,
272
- "num_survey_failures": len(self.survey_failures),
273
- }
274
-
275
- def save(self, filename: str):
276
- if self.survey is None:
277
- import warnings
278
-
279
- warnings.warn("The survey has not been created yet.")
280
- else:
281
- full_filename = filename + "_survey.json.gz"
282
- print("Saving survey to", full_filename)
283
- self.survey.save(full_filename)
284
-
285
- if self.agents is None:
286
- import warnings
287
-
288
- warnings.warn("The agents have not been created yet.")
289
- else:
290
- full_filename = filename + "_agents.json.gz"
291
- print("Saving agents to", full_filename)
292
- self.agents.save(full_filename)
293
-
294
- if self.results is None:
295
- import warnings
296
-
297
- warnings.warn("The results have not been created yet.")
298
- else:
299
- full_filename = filename + "_results.json.gz"
300
- print("Saving results to", full_filename)
301
- self.results.save(full_filename)
302
-
303
-
304
- if __name__ == "__main__":
305
- # q = RawResponseColumn(question_name="Sample question")
306
- import doctest
307
-
308
- doctest.testmod()
@@ -1,78 +0,0 @@
1
- from typing import Dict
2
-
3
- import pandas as pd
4
-
5
- from edsl.conjure.SurveyBuilder import SurveyBuilder
6
- from edsl.conjure.utilities import RCodeSnippet
7
-
8
-
9
- class SurveyBuilderCSV(SurveyBuilder):
10
- @staticmethod
11
- def get_dataframe(datafile_name):
12
- return pd.read_csv(datafile_name)
13
-
14
- def get_responses(self) -> Dict:
15
- """Returns a dataframe of responses by reading the datafile_name.
16
-
17
- The structure should be a dictionary, where the keys are the question codes,
18
- and the values are the responses.
19
-
20
- For example, {"Q1": [1, 2, 3], "Q2": [4, 5, 6]}
21
-
22
- >>> sb = SurveyBuilderCSV.example()
23
- >>> sb.get_responses()
24
- {'q1': ['1', '4'], 'q2': ['2', '5'], 'q3': ['3', '6']}
25
-
26
- """
27
- df = self.get_dataframe(self.datafile_name)
28
- df.fillna("", inplace=True)
29
- df = df.astype(str)
30
- data_dict = df.to_dict(orient="list")
31
- return {k.lower(): v for k, v in data_dict.items()}
32
-
33
- def get_question_name_to_text(self) -> Dict:
34
- """
35
- Get the question name to text mapping.
36
-
37
- >>> sb = SurveyBuilderCSV.example()
38
- >>> sb.get_question_name_to_text()
39
- {'Q1': 'Q1', 'Q2': 'Q2', 'Q3': 'Q3'}
40
-
41
- """
42
- d = {}
43
- df = self.get_dataframe(self.datafile_name)
44
- for col in df.columns:
45
- if col in self.lookup_dict():
46
- d[col] = self.lookup_dict()[col]
47
- else:
48
- d[col] = col
49
-
50
- return d
51
-
52
- def get_question_name_to_answer_book(self):
53
- """Returns a dictionary mapping question codes to a dictionary mapping answer codes to answer text."""
54
- d = self.get_question_name_to_text()
55
- return {k: {} for k, v in d.items()}
56
-
57
- @classmethod
58
- def example(cls):
59
- import tempfile
60
-
61
- named_temp_file = tempfile.NamedTemporaryFile(delete=False)
62
- named_temp_file.write(b"Q1,Q2,Q3\n1,2,3\n4,5,6\n")
63
- named_temp_file.close()
64
- return cls(named_temp_file.name)
65
-
66
-
67
- class SurveyBuilderStata(SurveyBuilderCSV):
68
- @staticmethod
69
- def get_dataframe(datafile_name):
70
- return pd.read_stata(datafile_name)
71
-
72
-
73
- if __name__ == "__main__":
74
- import doctest
75
-
76
- doctest.testmod()
77
- # sb = SurveyBuilderCSV("responses.csv")
78
- # sb.save("podcast_survey")
@@ -1,118 +0,0 @@
1
- import textwrap
2
- from typing import Dict
3
- import json
4
-
5
- import pandas as pd
6
-
7
- from edsl.conjure.SurveyBuilder import SurveyBuilder
8
- from edsl.conjure.utilities import RCodeSnippet
9
-
10
-
11
- class SurveyBuilderSPSS(SurveyBuilder):
12
- header_r_code = RCodeSnippet(
13
- textwrap.dedent(
14
- """
15
- library(haven)
16
- library(jsonlite)
17
- args <- commandArgs(trailingOnly = TRUE)
18
- sav_file_path <- args[1]
19
- data <- read_sav(sav_file_path)
20
- """
21
- )
22
- )
23
-
24
- get_responses_r_code = header_r_code + RCodeSnippet(
25
- """
26
- write.csv(data, file = stdout(), row.names = FALSE)
27
- """
28
- )
29
-
30
- get_question_code_to_question_text_r_code = header_r_code + RCodeSnippet(
31
- textwrap.dedent(
32
- """
33
- question_codes <- colnames(data)
34
- question_labels <- as.character(sapply(data, function(x) {
35
- lbl <- attr(x, "label")
36
- if (is.null(lbl)) "" else lbl
37
- }))
38
- df <- data.frame(question_codes, question_labels)
39
- write.csv(df, file = stdout(), row.names = FALSE)
40
- """
41
- )
42
- )
43
-
44
- get_answer_code_to_answer_text_r_code = header_r_code + RCodeSnippet(
45
- textwrap.dedent(
46
- """
47
- convert_label <- function(d){
48
- df <- data.frame(name = names(d), value = as.numeric(d))
49
- json_representation <- toJSON(df, pretty = TRUE)
50
- json_representation
51
- }
52
-
53
- question_codes <- colnames(data)
54
- answer_codes <- sapply(data, function(x) convert_label(attr(x, "labels")))
55
-
56
- df <- data.frame(question_codes, answer_codes)
57
- write.csv(df, file = stdout(), row.names = FALSE)
58
- """
59
- )
60
- )
61
-
62
- def get_responses(self):
63
- """Returns a dataframe of responses.
64
- The structure should be a dictionary, where the keys are the question codes,
65
- and the values are the responses.
66
-
67
- For example, {"Q1": [1, 2, 3], "Q2": [4, 5, 6]}
68
- """
69
- df = self.get_responses_r_code(self.datafile_name)
70
- df.fillna("", inplace=True)
71
- df = df.astype(str)
72
- data_dict = df.to_dict(orient="list")
73
- return {k.lower(): v for k, v in data_dict.items()}
74
-
75
- def get_question_name_to_text(self) -> Dict:
76
- df = self.get_question_code_to_question_text_r_code(self.datafile_name)
77
- df.fillna("", inplace=True)
78
- df = df.astype(str)
79
- data_dict = df.to_dict(orient="list")
80
-
81
- question_codes = [q.lower() for q in data_dict["question_codes"]]
82
- question_text = data_dict["question_labels"]
83
- d = dict(zip(question_codes, question_text))
84
- try:
85
- assert len(d) == len(question_codes)
86
- except AssertionError:
87
- raise ValueError("Duplicate question codes found.")
88
-
89
- return d
90
-
91
- def get_question_name_to_answer_book(self):
92
- """Returns a dictionary mapping question codes to a dictionary mapping answer codes to answer text.
93
-
94
- e.g., {'q1': {1: 'yes', 2:'no'}}
95
- """
96
- df = self.get_answer_code_to_answer_text_r_code(self.datafile_name)
97
- df.fillna("", inplace=True)
98
- df = df.astype(str)
99
- data_dict = df.to_dict(orient="list")
100
- question_codes = [q.lower() for q in data_dict["question_codes"]]
101
- answer_dicts = []
102
- for answer_code in data_dict["answer_codes"]:
103
- try:
104
- parsed_list = json.loads(answer_code)
105
- value = {entry["value"]: entry["name"] for entry in parsed_list}
106
- except json.JSONDecodeError as e:
107
- value = answer_code
108
- print(
109
- f"Warning: Could not parse answer_codes for {answer_code} as JSON. Using raw value instead."
110
- )
111
- answer_dicts.append(value)
112
-
113
- d = dict(zip(question_codes, answer_dicts))
114
- return d
115
-
116
-
117
- if __name__ == "__main__":
118
- spss_builder = SurveyBuilderSPSS("job_satisfaction.sav", 100)
edsl/data/RemoteDict.py DELETED
@@ -1,103 +0,0 @@
1
- import functools
2
- import json
3
- import requests
4
- from typing import Optional
5
- from edsl.data.CacheEntry import CacheEntry
6
-
7
-
8
- def handle_request_exceptions(reraise=False):
9
- def decorator(func):
10
- @functools.wraps(func)
11
- def wrapper(*args, **kwargs):
12
- try:
13
- return func(*args, **kwargs)
14
- except requests.exceptions.ConnectionError as e:
15
- print(f"Could not connect to remote server: {e}")
16
- except requests.exceptions.Timeout as e:
17
- print(f"Request timed out: {e}")
18
- except requests.exceptions.HTTPError as e:
19
- print(f"HTTP error occurred: {e}")
20
- except requests.exceptions.RequestException as e:
21
- print(f"An error occurred during the request: {e}")
22
- except ValueError as e:
23
- print(f"Invalid data format: {e}")
24
-
25
- if reraise:
26
- raise
27
-
28
- return wrapper
29
-
30
- return decorator
31
-
32
-
33
- class RemoteDict:
34
- """
35
- A dictionary-like object that is an interface for a remote database.
36
- - You can use RemoteDict as a regular dictionary.
37
- - TODO: Implement the methods.
38
- """
39
-
40
- def __init__(self, base_url: str = None):
41
- self.base_url = base_url or "http://127.0.0.1:8000"
42
-
43
- def __bool__(self):
44
- return True
45
-
46
- def __setitem__(self, key: str, value: CacheEntry):
47
- response = requests.post(f"{self.base_url}/items/{key}", json=value.to_dict())
48
- response.raise_for_status()
49
-
50
- def __getitem__(self, key: str):
51
- response = requests.get(f"{self.base_url}/items/{key}")
52
- if response.status_code == 404:
53
- raise KeyError(f"Key '{key}' not found.")
54
- response.raise_for_status()
55
- return CacheEntry.from_dict(response.json())
56
-
57
- def get(self, key: str, default: Optional[CacheEntry] = None):
58
- try:
59
- return self[key]
60
- except KeyError:
61
- return default
62
-
63
- def __delitem__(self, key: str):
64
- response = requests.delete(f"{self.base_url}/items/{key}")
65
- if response.status_code == 404:
66
- raise KeyError(f"Key '{key}' not found.")
67
- response.raise_for_status()
68
-
69
- def __contains__(self, key: str):
70
- keys = self.keys()
71
- return key in keys
72
-
73
- def __iter__(self):
74
- return iter(self.keys())
75
-
76
- def __len__(self):
77
- response = requests.get(f"{self.base_url}/items/count")
78
- response.raise_for_status()
79
- return response.json()["count"]
80
-
81
- def keys(self):
82
- response = requests.get(f"{self.base_url}/items/")
83
- response.raise_for_status()
84
- return response.json()
85
-
86
- def values(self) -> list[CacheEntry]:
87
- response = requests.get(f"{self.base_url}/items/values")
88
- response.raise_for_status()
89
- items = response.json()
90
- return [CacheEntry(**json.loads(item)) for item in items]
91
-
92
-
93
- if __name__ == "__main__":
94
- api_dict = RemoteDict()
95
-
96
- # Add an item
97
- api_dict["example"] = CacheEntry.example()
98
-
99
- # Retrieve an item
100
- print(api_dict["example"])
101
-
102
- # Check if an item exists
103
- print("example" in api_dict)