edsl 0.1.27.dev2__py3-none-any.whl → 0.1.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +107 -30
- edsl/BaseDiff.py +260 -0
- edsl/__init__.py +25 -21
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +103 -46
- edsl/agents/AgentList.py +97 -13
- edsl/agents/Invigilator.py +23 -10
- edsl/agents/InvigilatorBase.py +19 -14
- edsl/agents/PromptConstructionMixin.py +342 -100
- edsl/agents/descriptors.py +5 -2
- edsl/base/Base.py +289 -0
- edsl/config.py +2 -1
- edsl/conjure/AgentConstructionMixin.py +152 -0
- edsl/conjure/Conjure.py +56 -0
- edsl/conjure/InputData.py +659 -0
- edsl/conjure/InputDataCSV.py +48 -0
- edsl/conjure/InputDataMixinQuestionStats.py +182 -0
- edsl/conjure/InputDataPyRead.py +91 -0
- edsl/conjure/InputDataSPSS.py +8 -0
- edsl/conjure/InputDataStata.py +8 -0
- edsl/conjure/QuestionOptionMixin.py +76 -0
- edsl/conjure/QuestionTypeMixin.py +23 -0
- edsl/conjure/RawQuestion.py +65 -0
- edsl/conjure/SurveyResponses.py +7 -0
- edsl/conjure/__init__.py +9 -4
- edsl/conjure/examples/placeholder.txt +0 -0
- edsl/conjure/naming_utilities.py +263 -0
- edsl/conjure/utilities.py +165 -28
- edsl/conversation/Conversation.py +238 -0
- edsl/conversation/car_buying.py +58 -0
- edsl/conversation/mug_negotiation.py +81 -0
- edsl/conversation/next_speaker_utilities.py +93 -0
- edsl/coop/coop.py +337 -121
- edsl/coop/utils.py +56 -70
- edsl/data/Cache.py +74 -22
- edsl/data/CacheHandler.py +10 -9
- edsl/data/SQLiteDict.py +11 -3
- edsl/inference_services/AnthropicService.py +1 -0
- edsl/inference_services/DeepInfraService.py +20 -13
- edsl/inference_services/GoogleService.py +7 -1
- edsl/inference_services/InferenceServicesCollection.py +33 -7
- edsl/inference_services/OpenAIService.py +17 -10
- edsl/inference_services/models_available_cache.py +69 -0
- edsl/inference_services/rate_limits_cache.py +25 -0
- edsl/inference_services/write_available.py +10 -0
- edsl/jobs/Answers.py +15 -1
- edsl/jobs/Jobs.py +322 -73
- edsl/jobs/buckets/BucketCollection.py +9 -3
- edsl/jobs/buckets/ModelBuckets.py +4 -2
- edsl/jobs/buckets/TokenBucket.py +1 -2
- edsl/jobs/interviews/Interview.py +7 -10
- edsl/jobs/interviews/InterviewStatusMixin.py +3 -3
- edsl/jobs/interviews/InterviewTaskBuildingMixin.py +39 -20
- edsl/jobs/interviews/retry_management.py +4 -4
- edsl/jobs/runners/JobsRunnerAsyncio.py +103 -65
- edsl/jobs/runners/JobsRunnerStatusData.py +3 -3
- edsl/jobs/tasks/QuestionTaskCreator.py +4 -2
- edsl/jobs/tasks/TaskHistory.py +4 -3
- edsl/language_models/LanguageModel.py +42 -55
- edsl/language_models/ModelList.py +96 -0
- edsl/language_models/registry.py +14 -0
- edsl/language_models/repair.py +97 -25
- edsl/notebooks/Notebook.py +157 -32
- edsl/prompts/Prompt.py +31 -19
- edsl/questions/QuestionBase.py +145 -23
- edsl/questions/QuestionBudget.py +5 -6
- edsl/questions/QuestionCheckBox.py +7 -3
- edsl/questions/QuestionExtract.py +5 -3
- edsl/questions/QuestionFreeText.py +3 -3
- edsl/questions/QuestionFunctional.py +0 -3
- edsl/questions/QuestionList.py +3 -4
- edsl/questions/QuestionMultipleChoice.py +16 -8
- edsl/questions/QuestionNumerical.py +4 -3
- edsl/questions/QuestionRank.py +5 -3
- edsl/questions/__init__.py +4 -3
- edsl/questions/descriptors.py +9 -4
- edsl/questions/question_registry.py +27 -31
- edsl/questions/settings.py +1 -1
- edsl/results/Dataset.py +31 -0
- edsl/results/DatasetExportMixin.py +493 -0
- edsl/results/Result.py +42 -82
- edsl/results/Results.py +178 -66
- edsl/results/ResultsDBMixin.py +10 -9
- edsl/results/ResultsExportMixin.py +23 -507
- edsl/results/ResultsGGMixin.py +3 -3
- edsl/results/ResultsToolsMixin.py +9 -9
- edsl/scenarios/FileStore.py +140 -0
- edsl/scenarios/Scenario.py +59 -6
- edsl/scenarios/ScenarioList.py +138 -52
- edsl/scenarios/ScenarioListExportMixin.py +32 -0
- edsl/scenarios/ScenarioListPdfMixin.py +2 -1
- edsl/scenarios/__init__.py +1 -0
- edsl/study/ObjectEntry.py +173 -0
- edsl/study/ProofOfWork.py +113 -0
- edsl/study/SnapShot.py +73 -0
- edsl/study/Study.py +498 -0
- edsl/study/__init__.py +4 -0
- edsl/surveys/MemoryPlan.py +11 -4
- edsl/surveys/Survey.py +124 -37
- edsl/surveys/SurveyExportMixin.py +25 -5
- edsl/surveys/SurveyFlowVisualizationMixin.py +6 -4
- edsl/tools/plotting.py +4 -2
- edsl/utilities/__init__.py +21 -20
- edsl/utilities/gcp_bucket/__init__.py +0 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
- edsl/utilities/gcp_bucket/simple_example.py +9 -0
- edsl/utilities/interface.py +90 -73
- edsl/utilities/repair_functions.py +28 -0
- edsl/utilities/utilities.py +59 -6
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/METADATA +42 -15
- edsl-0.1.29.dist-info/RECORD +203 -0
- edsl/conjure/RawResponseColumn.py +0 -327
- edsl/conjure/SurveyBuilder.py +0 -308
- edsl/conjure/SurveyBuilderCSV.py +0 -78
- edsl/conjure/SurveyBuilderSPSS.py +0 -118
- edsl/data/RemoteDict.py +0 -103
- edsl-0.1.27.dev2.dist-info/RECORD +0 -172
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/LICENSE +0 -0
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/WHEEL +0 -0
edsl/conjure/SurveyBuilder.py
DELETED
@@ -1,308 +0,0 @@
|
|
1
|
-
from abc import ABC, abstractmethod
|
2
|
-
import os
|
3
|
-
import random
|
4
|
-
from typing import Dict, Any, List, Callable, Optional
|
5
|
-
from collections import UserDict
|
6
|
-
|
7
|
-
from pydantic import ValidationError
|
8
|
-
|
9
|
-
from edsl.utilities.utilities import create_valid_var_name
|
10
|
-
from edsl.surveys.Survey import Survey
|
11
|
-
from edsl.conjure.RawResponseColumn import (
|
12
|
-
RawResponseColumn,
|
13
|
-
get_replacement_name,
|
14
|
-
CustomDict,
|
15
|
-
)
|
16
|
-
|
17
|
-
|
18
|
-
class ValidFilename:
|
19
|
-
def __set_name__(self, owner, name):
|
20
|
-
self.name = name
|
21
|
-
|
22
|
-
def __get__(self, instance, owner):
|
23
|
-
return instance.__dict__.get(self.name, None)
|
24
|
-
|
25
|
-
def __set__(self, instance, value):
|
26
|
-
if not isinstance(value, str):
|
27
|
-
raise ValueError(
|
28
|
-
f"The filename must be a string, not {type(value).__name__}"
|
29
|
-
)
|
30
|
-
|
31
|
-
if not os.path.exists(value):
|
32
|
-
raise ValueError(f"The file '{value}' does not exist.")
|
33
|
-
|
34
|
-
instance.__dict__[self.name] = value
|
35
|
-
|
36
|
-
|
37
|
-
class SurveyBuilder(ABC, UserDict):
|
38
|
-
"""A ABC class to represent the process of building a survey and results from an external format"""
|
39
|
-
|
40
|
-
datafile_name = ValidFilename()
|
41
|
-
|
42
|
-
def lookup_dict(self):
|
43
|
-
return get_replacement_name.lookup_dict
|
44
|
-
|
45
|
-
def __init__(
|
46
|
-
self,
|
47
|
-
datafile_name: str,
|
48
|
-
sample_size: Optional[int] = None,
|
49
|
-
compute_results: bool = True,
|
50
|
-
):
|
51
|
-
"""Initialize the SurveyBuilder with the given datafile_name.
|
52
|
-
|
53
|
-
:param datafile_name: The name of the datafile to be used.
|
54
|
-
:param sample_size: The number of observations to sample from the dataset.
|
55
|
-
:param compute_results: Whether to compute the results or not.
|
56
|
-
|
57
|
-
The SurveyBuilder will read the datafile_name and create a survey from it.
|
58
|
-
|
59
|
-
>>> sb = SurveyBuilder.example()
|
60
|
-
>>> sb.responses
|
61
|
-
{'q1': ['1', '4'], 'q2': ['2', '5'], 'q3': ['3', '6']}
|
62
|
-
>>> sb.question_name_to_text
|
63
|
-
{'q1': 'Q1', 'q2': 'Q2', 'q3': 'Q3'}
|
64
|
-
|
65
|
-
>>> sb.data['q1']
|
66
|
-
RawResponseColumn(question_name="q1", question_text="Q1", raw_responses=['1', '4'], responses=['1', '4'], unqiue_responses=defaultdict(<class 'int'>, {'1': 1, '4': 1}), answer_codebook={})
|
67
|
-
"""
|
68
|
-
self.datafile_name = datafile_name
|
69
|
-
self.sample_size = sample_size
|
70
|
-
self.responses = CustomDict(self.get_responses())
|
71
|
-
|
72
|
-
self.question_name_to_text = CustomDict(self.get_question_name_to_text())
|
73
|
-
|
74
|
-
self.question_name_to_answer_book = CustomDict(
|
75
|
-
self.get_question_name_to_answer_book()
|
76
|
-
)
|
77
|
-
self.compute_results = compute_results
|
78
|
-
|
79
|
-
data = {}
|
80
|
-
for question_name, raw_responses in self.responses.items():
|
81
|
-
raw_question_response = RawResponseColumn(
|
82
|
-
question_name=question_name,
|
83
|
-
raw_responses=raw_responses,
|
84
|
-
answer_codebook=self.question_name_to_answer_book[question_name],
|
85
|
-
question_text=self.question_name_to_text[question_name],
|
86
|
-
)
|
87
|
-
data[question_name] = raw_question_response
|
88
|
-
|
89
|
-
super().__init__(data)
|
90
|
-
|
91
|
-
def process(self) -> None:
|
92
|
-
self.survey, self.survey_failures = self.create_survey()
|
93
|
-
|
94
|
-
if self.compute_results:
|
95
|
-
self.agents, self.agent_failures = self.create_agents()
|
96
|
-
self.results = self.create_results()
|
97
|
-
# remove the direct question answering method
|
98
|
-
[agent.remove_direct_question_answering_method() for agent in self.agents]
|
99
|
-
else:
|
100
|
-
self.agents = None
|
101
|
-
self.results = None
|
102
|
-
|
103
|
-
def get_observations(self) -> List[Dict[str, Any]]:
|
104
|
-
"""Returns a list of dictionaries, where each dictionary is an observation.
|
105
|
-
|
106
|
-
>>> sb = SurveyBuilder.example()
|
107
|
-
>>> sb.get_observations()
|
108
|
-
[{'q1': '1', 'q2': '2', 'q3': '3'}, {'q1': '4', 'q2': '5', 'q3': '6'}]
|
109
|
-
|
110
|
-
"""
|
111
|
-
observations = []
|
112
|
-
for question_name, question_responses in self.items():
|
113
|
-
for index, response in enumerate(question_responses.responses):
|
114
|
-
if len(observations) <= index:
|
115
|
-
observations.append({question_name: response})
|
116
|
-
else:
|
117
|
-
observations[index][question_name] = response
|
118
|
-
return observations
|
119
|
-
|
120
|
-
def create_agents(self, question_keys_as_traits: List[str] = None):
|
121
|
-
"""Returns a list of agents, and a dictionary of failures.
|
122
|
-
|
123
|
-
:param sample_size: The number of agents to sample from the dataset.
|
124
|
-
:param question_keys_as_traits: A list of question keys to use as traits.
|
125
|
-
|
126
|
-
These agents are special in that they have an 'answer_question_directly'
|
127
|
-
method that allows them to answer questions directly when presented with
|
128
|
-
the question_name. This is useful because in self.Agents, these agents can
|
129
|
-
bypass the LLM call.
|
130
|
-
"""
|
131
|
-
if question_keys_as_traits is None:
|
132
|
-
question_keys_as_traits = list(self.data.keys())
|
133
|
-
|
134
|
-
from edsl.agents.Agent import Agent
|
135
|
-
from edsl.agents.AgentList import AgentList
|
136
|
-
|
137
|
-
failures = {}
|
138
|
-
|
139
|
-
def construct_answer_dict_function(answer_dict: dict) -> Callable:
|
140
|
-
def func(self, question, scenario=None):
|
141
|
-
return answer_dict.get(question.question_name, None)
|
142
|
-
|
143
|
-
return func
|
144
|
-
|
145
|
-
agent_list = AgentList()
|
146
|
-
|
147
|
-
for observation in self.get_observations(): # iterate through the observations
|
148
|
-
traits = {}
|
149
|
-
for trait_name in question_keys_as_traits:
|
150
|
-
if trait_name not in observation:
|
151
|
-
failures[trait_name] = f"Question name {trait_name} not found."
|
152
|
-
continue
|
153
|
-
else:
|
154
|
-
traits[trait_name] = observation[trait_name]
|
155
|
-
|
156
|
-
agent = Agent(traits=traits)
|
157
|
-
f = construct_answer_dict_function(observation.copy())
|
158
|
-
agent.add_direct_question_answering_method(f)
|
159
|
-
agent_list.append(agent)
|
160
|
-
|
161
|
-
if self.sample_size is not None and len(agent_list) >= self.sample_size:
|
162
|
-
return random.sample(agent_list, self.sample_size), failures
|
163
|
-
else:
|
164
|
-
return agent_list, failures
|
165
|
-
|
166
|
-
def create_survey(self):
|
167
|
-
"Iterates through the question keys and creates a survey."
|
168
|
-
questions = []
|
169
|
-
failures = {}
|
170
|
-
for question_responses in self.values():
|
171
|
-
try:
|
172
|
-
proposed_question = question_responses.to_question()
|
173
|
-
except Exception as e:
|
174
|
-
print(f"Could not convert to question: {question_responses}: {e}")
|
175
|
-
failures[question_responses.question_name] = e
|
176
|
-
continue
|
177
|
-
else:
|
178
|
-
questions.append(proposed_question)
|
179
|
-
if len(failures) > 0:
|
180
|
-
print(
|
181
|
-
f"Attempted {len(self.keys())} questions; there were {len(failures)} failures."
|
182
|
-
)
|
183
|
-
return Survey(questions), failures
|
184
|
-
|
185
|
-
@classmethod
|
186
|
-
def from_url(cls, url: str):
|
187
|
-
"""Create a SurveyBuilder from a URL."""
|
188
|
-
import tempfile
|
189
|
-
import requests
|
190
|
-
|
191
|
-
headers = {
|
192
|
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
|
193
|
-
"Accept": "text/csv,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/csv;q=0.9,application/excel;q=0.8",
|
194
|
-
}
|
195
|
-
|
196
|
-
with tempfile.NamedTemporaryFile(delete=False, mode="wb") as localfile:
|
197
|
-
response = requests.get(url, headers=headers)
|
198
|
-
if response.status_code == 200:
|
199
|
-
localfile.write(response.content)
|
200
|
-
localfile_path = localfile.name
|
201
|
-
else:
|
202
|
-
raise Exception(
|
203
|
-
f"Failed to fetch the file from {url}, status code: {response.status_code}"
|
204
|
-
)
|
205
|
-
|
206
|
-
print("Data saved to", localfile_path)
|
207
|
-
return cls(localfile_path)
|
208
|
-
|
209
|
-
def create_results(self):
|
210
|
-
return self.survey.by(self.agents).run()
|
211
|
-
|
212
|
-
@abstractmethod
|
213
|
-
def get_responses(self) -> Dict:
|
214
|
-
"""Returns all of the raw responses, as a dataframe"""
|
215
|
-
pass
|
216
|
-
|
217
|
-
@abstractmethod
|
218
|
-
def get_question_name_to_text(self) -> Dict[str, str]:
|
219
|
-
pass
|
220
|
-
|
221
|
-
@abstractmethod
|
222
|
-
def get_question_name_to_answer_book(self) -> Dict[str, Dict[str, str]]:
|
223
|
-
pass
|
224
|
-
|
225
|
-
@classmethod
|
226
|
-
def example(cls):
|
227
|
-
class SurveyBuilderExample(SurveyBuilder):
|
228
|
-
@staticmethod
|
229
|
-
def get_dataframe(datafile_name):
|
230
|
-
import pandas as pd
|
231
|
-
|
232
|
-
return pd.DataFrame(
|
233
|
-
{"Q1": ["1", "4"], "Q2": ["2", "5"], "Q3": ["3", "6"]}
|
234
|
-
)
|
235
|
-
|
236
|
-
def get_responses(self) -> Dict:
|
237
|
-
df = self.get_dataframe(self.datafile_name)
|
238
|
-
df.fillna("", inplace=True)
|
239
|
-
df = df.astype(str)
|
240
|
-
data_dict = df.to_dict(orient="list")
|
241
|
-
return {k.lower(): v for k, v in data_dict.items()}
|
242
|
-
|
243
|
-
def get_question_name_to_text(self) -> Dict:
|
244
|
-
d = {}
|
245
|
-
df = self.get_dataframe(self.datafile_name)
|
246
|
-
for col in df.columns:
|
247
|
-
if col in self.lookup_dict():
|
248
|
-
d[col] = self.lookup_dict()[col]
|
249
|
-
else:
|
250
|
-
d[col] = col
|
251
|
-
|
252
|
-
return d
|
253
|
-
|
254
|
-
def get_question_name_to_answer_book(self):
|
255
|
-
d = self.get_question_name_to_text()
|
256
|
-
return {k: {} for k, v in d.items()}
|
257
|
-
|
258
|
-
import tempfile
|
259
|
-
|
260
|
-
named_temp_file = tempfile.NamedTemporaryFile(delete=False)
|
261
|
-
named_temp_file.write(b"Q1,Q2,Q3\n1,2,3\n4,5,6\n")
|
262
|
-
|
263
|
-
return SurveyBuilderExample(named_temp_file.name)
|
264
|
-
|
265
|
-
def to_dict(self):
|
266
|
-
return {
|
267
|
-
"datafile_name": self.datafile_name,
|
268
|
-
"survey": self.survey.to_dict(),
|
269
|
-
"agents": None if self.agents is None else self.agents.to_dict(),
|
270
|
-
"results": None if self.results is None else self.results.to_dict(),
|
271
|
-
"sample_size": self.sample_size,
|
272
|
-
"num_survey_failures": len(self.survey_failures),
|
273
|
-
}
|
274
|
-
|
275
|
-
def save(self, filename: str):
|
276
|
-
if self.survey is None:
|
277
|
-
import warnings
|
278
|
-
|
279
|
-
warnings.warn("The survey has not been created yet.")
|
280
|
-
else:
|
281
|
-
full_filename = filename + "_survey.json.gz"
|
282
|
-
print("Saving survey to", full_filename)
|
283
|
-
self.survey.save(full_filename)
|
284
|
-
|
285
|
-
if self.agents is None:
|
286
|
-
import warnings
|
287
|
-
|
288
|
-
warnings.warn("The agents have not been created yet.")
|
289
|
-
else:
|
290
|
-
full_filename = filename + "_agents.json.gz"
|
291
|
-
print("Saving agents to", full_filename)
|
292
|
-
self.agents.save(full_filename)
|
293
|
-
|
294
|
-
if self.results is None:
|
295
|
-
import warnings
|
296
|
-
|
297
|
-
warnings.warn("The results have not been created yet.")
|
298
|
-
else:
|
299
|
-
full_filename = filename + "_results.json.gz"
|
300
|
-
print("Saving results to", full_filename)
|
301
|
-
self.results.save(full_filename)
|
302
|
-
|
303
|
-
|
304
|
-
if __name__ == "__main__":
|
305
|
-
# q = RawResponseColumn(question_name="Sample question")
|
306
|
-
import doctest
|
307
|
-
|
308
|
-
doctest.testmod()
|
edsl/conjure/SurveyBuilderCSV.py
DELETED
@@ -1,78 +0,0 @@
|
|
1
|
-
from typing import Dict
|
2
|
-
|
3
|
-
import pandas as pd
|
4
|
-
|
5
|
-
from edsl.conjure.SurveyBuilder import SurveyBuilder
|
6
|
-
from edsl.conjure.utilities import RCodeSnippet
|
7
|
-
|
8
|
-
|
9
|
-
class SurveyBuilderCSV(SurveyBuilder):
|
10
|
-
@staticmethod
|
11
|
-
def get_dataframe(datafile_name):
|
12
|
-
return pd.read_csv(datafile_name)
|
13
|
-
|
14
|
-
def get_responses(self) -> Dict:
|
15
|
-
"""Returns a dataframe of responses by reading the datafile_name.
|
16
|
-
|
17
|
-
The structure should be a dictionary, where the keys are the question codes,
|
18
|
-
and the values are the responses.
|
19
|
-
|
20
|
-
For example, {"Q1": [1, 2, 3], "Q2": [4, 5, 6]}
|
21
|
-
|
22
|
-
>>> sb = SurveyBuilderCSV.example()
|
23
|
-
>>> sb.get_responses()
|
24
|
-
{'q1': ['1', '4'], 'q2': ['2', '5'], 'q3': ['3', '6']}
|
25
|
-
|
26
|
-
"""
|
27
|
-
df = self.get_dataframe(self.datafile_name)
|
28
|
-
df.fillna("", inplace=True)
|
29
|
-
df = df.astype(str)
|
30
|
-
data_dict = df.to_dict(orient="list")
|
31
|
-
return {k.lower(): v for k, v in data_dict.items()}
|
32
|
-
|
33
|
-
def get_question_name_to_text(self) -> Dict:
|
34
|
-
"""
|
35
|
-
Get the question name to text mapping.
|
36
|
-
|
37
|
-
>>> sb = SurveyBuilderCSV.example()
|
38
|
-
>>> sb.get_question_name_to_text()
|
39
|
-
{'Q1': 'Q1', 'Q2': 'Q2', 'Q3': 'Q3'}
|
40
|
-
|
41
|
-
"""
|
42
|
-
d = {}
|
43
|
-
df = self.get_dataframe(self.datafile_name)
|
44
|
-
for col in df.columns:
|
45
|
-
if col in self.lookup_dict():
|
46
|
-
d[col] = self.lookup_dict()[col]
|
47
|
-
else:
|
48
|
-
d[col] = col
|
49
|
-
|
50
|
-
return d
|
51
|
-
|
52
|
-
def get_question_name_to_answer_book(self):
|
53
|
-
"""Returns a dictionary mapping question codes to a dictionary mapping answer codes to answer text."""
|
54
|
-
d = self.get_question_name_to_text()
|
55
|
-
return {k: {} for k, v in d.items()}
|
56
|
-
|
57
|
-
@classmethod
|
58
|
-
def example(cls):
|
59
|
-
import tempfile
|
60
|
-
|
61
|
-
named_temp_file = tempfile.NamedTemporaryFile(delete=False)
|
62
|
-
named_temp_file.write(b"Q1,Q2,Q3\n1,2,3\n4,5,6\n")
|
63
|
-
named_temp_file.close()
|
64
|
-
return cls(named_temp_file.name)
|
65
|
-
|
66
|
-
|
67
|
-
class SurveyBuilderStata(SurveyBuilderCSV):
|
68
|
-
@staticmethod
|
69
|
-
def get_dataframe(datafile_name):
|
70
|
-
return pd.read_stata(datafile_name)
|
71
|
-
|
72
|
-
|
73
|
-
if __name__ == "__main__":
|
74
|
-
import doctest
|
75
|
-
|
76
|
-
doctest.testmod()
|
77
|
-
# sb = SurveyBuilderCSV("responses.csv")
|
78
|
-
# sb.save("podcast_survey")
|
@@ -1,118 +0,0 @@
|
|
1
|
-
import textwrap
|
2
|
-
from typing import Dict
|
3
|
-
import json
|
4
|
-
|
5
|
-
import pandas as pd
|
6
|
-
|
7
|
-
from edsl.conjure.SurveyBuilder import SurveyBuilder
|
8
|
-
from edsl.conjure.utilities import RCodeSnippet
|
9
|
-
|
10
|
-
|
11
|
-
class SurveyBuilderSPSS(SurveyBuilder):
|
12
|
-
header_r_code = RCodeSnippet(
|
13
|
-
textwrap.dedent(
|
14
|
-
"""
|
15
|
-
library(haven)
|
16
|
-
library(jsonlite)
|
17
|
-
args <- commandArgs(trailingOnly = TRUE)
|
18
|
-
sav_file_path <- args[1]
|
19
|
-
data <- read_sav(sav_file_path)
|
20
|
-
"""
|
21
|
-
)
|
22
|
-
)
|
23
|
-
|
24
|
-
get_responses_r_code = header_r_code + RCodeSnippet(
|
25
|
-
"""
|
26
|
-
write.csv(data, file = stdout(), row.names = FALSE)
|
27
|
-
"""
|
28
|
-
)
|
29
|
-
|
30
|
-
get_question_code_to_question_text_r_code = header_r_code + RCodeSnippet(
|
31
|
-
textwrap.dedent(
|
32
|
-
"""
|
33
|
-
question_codes <- colnames(data)
|
34
|
-
question_labels <- as.character(sapply(data, function(x) {
|
35
|
-
lbl <- attr(x, "label")
|
36
|
-
if (is.null(lbl)) "" else lbl
|
37
|
-
}))
|
38
|
-
df <- data.frame(question_codes, question_labels)
|
39
|
-
write.csv(df, file = stdout(), row.names = FALSE)
|
40
|
-
"""
|
41
|
-
)
|
42
|
-
)
|
43
|
-
|
44
|
-
get_answer_code_to_answer_text_r_code = header_r_code + RCodeSnippet(
|
45
|
-
textwrap.dedent(
|
46
|
-
"""
|
47
|
-
convert_label <- function(d){
|
48
|
-
df <- data.frame(name = names(d), value = as.numeric(d))
|
49
|
-
json_representation <- toJSON(df, pretty = TRUE)
|
50
|
-
json_representation
|
51
|
-
}
|
52
|
-
|
53
|
-
question_codes <- colnames(data)
|
54
|
-
answer_codes <- sapply(data, function(x) convert_label(attr(x, "labels")))
|
55
|
-
|
56
|
-
df <- data.frame(question_codes, answer_codes)
|
57
|
-
write.csv(df, file = stdout(), row.names = FALSE)
|
58
|
-
"""
|
59
|
-
)
|
60
|
-
)
|
61
|
-
|
62
|
-
def get_responses(self):
|
63
|
-
"""Returns a dataframe of responses.
|
64
|
-
The structure should be a dictionary, where the keys are the question codes,
|
65
|
-
and the values are the responses.
|
66
|
-
|
67
|
-
For example, {"Q1": [1, 2, 3], "Q2": [4, 5, 6]}
|
68
|
-
"""
|
69
|
-
df = self.get_responses_r_code(self.datafile_name)
|
70
|
-
df.fillna("", inplace=True)
|
71
|
-
df = df.astype(str)
|
72
|
-
data_dict = df.to_dict(orient="list")
|
73
|
-
return {k.lower(): v for k, v in data_dict.items()}
|
74
|
-
|
75
|
-
def get_question_name_to_text(self) -> Dict:
|
76
|
-
df = self.get_question_code_to_question_text_r_code(self.datafile_name)
|
77
|
-
df.fillna("", inplace=True)
|
78
|
-
df = df.astype(str)
|
79
|
-
data_dict = df.to_dict(orient="list")
|
80
|
-
|
81
|
-
question_codes = [q.lower() for q in data_dict["question_codes"]]
|
82
|
-
question_text = data_dict["question_labels"]
|
83
|
-
d = dict(zip(question_codes, question_text))
|
84
|
-
try:
|
85
|
-
assert len(d) == len(question_codes)
|
86
|
-
except AssertionError:
|
87
|
-
raise ValueError("Duplicate question codes found.")
|
88
|
-
|
89
|
-
return d
|
90
|
-
|
91
|
-
def get_question_name_to_answer_book(self):
|
92
|
-
"""Returns a dictionary mapping question codes to a dictionary mapping answer codes to answer text.
|
93
|
-
|
94
|
-
e.g., {'q1': {1: 'yes', 2:'no'}}
|
95
|
-
"""
|
96
|
-
df = self.get_answer_code_to_answer_text_r_code(self.datafile_name)
|
97
|
-
df.fillna("", inplace=True)
|
98
|
-
df = df.astype(str)
|
99
|
-
data_dict = df.to_dict(orient="list")
|
100
|
-
question_codes = [q.lower() for q in data_dict["question_codes"]]
|
101
|
-
answer_dicts = []
|
102
|
-
for answer_code in data_dict["answer_codes"]:
|
103
|
-
try:
|
104
|
-
parsed_list = json.loads(answer_code)
|
105
|
-
value = {entry["value"]: entry["name"] for entry in parsed_list}
|
106
|
-
except json.JSONDecodeError as e:
|
107
|
-
value = answer_code
|
108
|
-
print(
|
109
|
-
f"Warning: Could not parse answer_codes for {answer_code} as JSON. Using raw value instead."
|
110
|
-
)
|
111
|
-
answer_dicts.append(value)
|
112
|
-
|
113
|
-
d = dict(zip(question_codes, answer_dicts))
|
114
|
-
return d
|
115
|
-
|
116
|
-
|
117
|
-
if __name__ == "__main__":
|
118
|
-
spss_builder = SurveyBuilderSPSS("job_satisfaction.sav", 100)
|
edsl/data/RemoteDict.py
DELETED
@@ -1,103 +0,0 @@
|
|
1
|
-
import functools
|
2
|
-
import json
|
3
|
-
import requests
|
4
|
-
from typing import Optional
|
5
|
-
from edsl.data.CacheEntry import CacheEntry
|
6
|
-
|
7
|
-
|
8
|
-
def handle_request_exceptions(reraise=False):
|
9
|
-
def decorator(func):
|
10
|
-
@functools.wraps(func)
|
11
|
-
def wrapper(*args, **kwargs):
|
12
|
-
try:
|
13
|
-
return func(*args, **kwargs)
|
14
|
-
except requests.exceptions.ConnectionError as e:
|
15
|
-
print(f"Could not connect to remote server: {e}")
|
16
|
-
except requests.exceptions.Timeout as e:
|
17
|
-
print(f"Request timed out: {e}")
|
18
|
-
except requests.exceptions.HTTPError as e:
|
19
|
-
print(f"HTTP error occurred: {e}")
|
20
|
-
except requests.exceptions.RequestException as e:
|
21
|
-
print(f"An error occurred during the request: {e}")
|
22
|
-
except ValueError as e:
|
23
|
-
print(f"Invalid data format: {e}")
|
24
|
-
|
25
|
-
if reraise:
|
26
|
-
raise
|
27
|
-
|
28
|
-
return wrapper
|
29
|
-
|
30
|
-
return decorator
|
31
|
-
|
32
|
-
|
33
|
-
class RemoteDict:
|
34
|
-
"""
|
35
|
-
A dictionary-like object that is an interface for a remote database.
|
36
|
-
- You can use RemoteDict as a regular dictionary.
|
37
|
-
- TODO: Implement the methods.
|
38
|
-
"""
|
39
|
-
|
40
|
-
def __init__(self, base_url: str = None):
|
41
|
-
self.base_url = base_url or "http://127.0.0.1:8000"
|
42
|
-
|
43
|
-
def __bool__(self):
|
44
|
-
return True
|
45
|
-
|
46
|
-
def __setitem__(self, key: str, value: CacheEntry):
|
47
|
-
response = requests.post(f"{self.base_url}/items/{key}", json=value.to_dict())
|
48
|
-
response.raise_for_status()
|
49
|
-
|
50
|
-
def __getitem__(self, key: str):
|
51
|
-
response = requests.get(f"{self.base_url}/items/{key}")
|
52
|
-
if response.status_code == 404:
|
53
|
-
raise KeyError(f"Key '{key}' not found.")
|
54
|
-
response.raise_for_status()
|
55
|
-
return CacheEntry.from_dict(response.json())
|
56
|
-
|
57
|
-
def get(self, key: str, default: Optional[CacheEntry] = None):
|
58
|
-
try:
|
59
|
-
return self[key]
|
60
|
-
except KeyError:
|
61
|
-
return default
|
62
|
-
|
63
|
-
def __delitem__(self, key: str):
|
64
|
-
response = requests.delete(f"{self.base_url}/items/{key}")
|
65
|
-
if response.status_code == 404:
|
66
|
-
raise KeyError(f"Key '{key}' not found.")
|
67
|
-
response.raise_for_status()
|
68
|
-
|
69
|
-
def __contains__(self, key: str):
|
70
|
-
keys = self.keys()
|
71
|
-
return key in keys
|
72
|
-
|
73
|
-
def __iter__(self):
|
74
|
-
return iter(self.keys())
|
75
|
-
|
76
|
-
def __len__(self):
|
77
|
-
response = requests.get(f"{self.base_url}/items/count")
|
78
|
-
response.raise_for_status()
|
79
|
-
return response.json()["count"]
|
80
|
-
|
81
|
-
def keys(self):
|
82
|
-
response = requests.get(f"{self.base_url}/items/")
|
83
|
-
response.raise_for_status()
|
84
|
-
return response.json()
|
85
|
-
|
86
|
-
def values(self) -> list[CacheEntry]:
|
87
|
-
response = requests.get(f"{self.base_url}/items/values")
|
88
|
-
response.raise_for_status()
|
89
|
-
items = response.json()
|
90
|
-
return [CacheEntry(**json.loads(item)) for item in items]
|
91
|
-
|
92
|
-
|
93
|
-
if __name__ == "__main__":
|
94
|
-
api_dict = RemoteDict()
|
95
|
-
|
96
|
-
# Add an item
|
97
|
-
api_dict["example"] = CacheEntry.example()
|
98
|
-
|
99
|
-
# Retrieve an item
|
100
|
-
print(api_dict["example"])
|
101
|
-
|
102
|
-
# Check if an item exists
|
103
|
-
print("example" in api_dict)
|