edsl 0.1.38.dev1__py3-none-any.whl → 0.1.38.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. edsl/Base.py +3 -3
  2. edsl/BaseDiff.py +7 -7
  3. edsl/__init__.py +2 -1
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +17 -14
  6. edsl/agents/AgentList.py +29 -17
  7. edsl/auto/SurveyCreatorPipeline.py +1 -1
  8. edsl/auto/utilities.py +1 -1
  9. edsl/base/Base.py +3 -13
  10. edsl/coop/coop.py +3 -0
  11. edsl/data/Cache.py +18 -15
  12. edsl/exceptions/agents.py +4 -0
  13. edsl/exceptions/cache.py +5 -0
  14. edsl/jobs/Jobs.py +22 -11
  15. edsl/jobs/buckets/TokenBucket.py +3 -0
  16. edsl/jobs/interviews/Interview.py +18 -18
  17. edsl/jobs/runners/JobsRunnerAsyncio.py +38 -15
  18. edsl/jobs/runners/JobsRunnerStatus.py +196 -196
  19. edsl/jobs/tasks/TaskHistory.py +12 -3
  20. edsl/language_models/LanguageModel.py +9 -7
  21. edsl/language_models/ModelList.py +20 -13
  22. edsl/notebooks/Notebook.py +7 -8
  23. edsl/questions/QuestionBase.py +21 -17
  24. edsl/questions/QuestionBaseGenMixin.py +1 -1
  25. edsl/questions/QuestionBasePromptsMixin.py +0 -17
  26. edsl/questions/QuestionFunctional.py +10 -3
  27. edsl/questions/derived/QuestionTopK.py +2 -0
  28. edsl/results/Result.py +31 -25
  29. edsl/results/Results.py +22 -22
  30. edsl/scenarios/Scenario.py +12 -14
  31. edsl/scenarios/ScenarioList.py +16 -16
  32. edsl/surveys/MemoryPlan.py +1 -1
  33. edsl/surveys/Rule.py +1 -5
  34. edsl/surveys/RuleCollection.py +1 -1
  35. edsl/surveys/Survey.py +9 -17
  36. edsl/surveys/instructions/ChangeInstruction.py +9 -7
  37. edsl/surveys/instructions/Instruction.py +9 -7
  38. edsl/{conjure → utilities}/naming_utilities.py +1 -1
  39. {edsl-0.1.38.dev1.dist-info → edsl-0.1.38.dev2.dist-info}/METADATA +1 -1
  40. {edsl-0.1.38.dev1.dist-info → edsl-0.1.38.dev2.dist-info}/RECORD +42 -56
  41. edsl/conjure/AgentConstructionMixin.py +0 -160
  42. edsl/conjure/Conjure.py +0 -62
  43. edsl/conjure/InputData.py +0 -659
  44. edsl/conjure/InputDataCSV.py +0 -48
  45. edsl/conjure/InputDataMixinQuestionStats.py +0 -182
  46. edsl/conjure/InputDataPyRead.py +0 -91
  47. edsl/conjure/InputDataSPSS.py +0 -8
  48. edsl/conjure/InputDataStata.py +0 -8
  49. edsl/conjure/QuestionOptionMixin.py +0 -76
  50. edsl/conjure/QuestionTypeMixin.py +0 -23
  51. edsl/conjure/RawQuestion.py +0 -65
  52. edsl/conjure/SurveyResponses.py +0 -7
  53. edsl/conjure/__init__.py +0 -9
  54. edsl/conjure/examples/placeholder.txt +0 -0
  55. edsl/conjure/utilities.py +0 -201
  56. {edsl-0.1.38.dev1.dist-info → edsl-0.1.38.dev2.dist-info}/LICENSE +0 -0
  57. {edsl-0.1.38.dev1.dist-info → edsl-0.1.38.dev2.dist-info}/WHEEL +0 -0
edsl/conjure/InputData.py DELETED
@@ -1,659 +0,0 @@
1
- import base64
2
- from abc import ABC, abstractmethod
3
- from typing import Dict, Callable, Optional, List, Generator, Tuple, Union
4
- from collections import namedtuple
5
- from typing import List, Union
6
-
7
- from edsl.questions.QuestionBase import QuestionBase
8
-
9
- from edsl.scenarios.ScenarioList import ScenarioList
10
- from edsl.surveys.Survey import Survey
11
- from edsl.conjure.SurveyResponses import SurveyResponses
12
- from edsl.conjure.naming_utilities import sanitize_string
13
- from edsl.utilities.utilities import is_valid_variable_name
14
-
15
- from edsl.conjure.RawQuestion import RawQuestion
16
- from edsl.conjure.AgentConstructionMixin import AgentConstructionMixin
17
-
18
- from edsl.conjure.QuestionOptionMixin import QuestionOptionMixin
19
- from edsl.conjure.InputDataMixinQuestionStats import InputDataMixinQuestionStats
20
- from edsl.conjure.QuestionTypeMixin import QuestionTypeMixin
21
-
22
-
23
- class InputDataABC(
24
- ABC,
25
- InputDataMixinQuestionStats,
26
- AgentConstructionMixin,
27
- QuestionOptionMixin,
28
- QuestionTypeMixin,
29
- ):
30
- """A class to represent the input data for a survey."""
31
-
32
- NUM_UNIQUE_THRESHOLD = 15
33
- FRAC_NUMERICAL_THRESHOLD = 0.8
34
- MULTIPLE_CHOICE_OTHER_THRESHOLD = 0.5
35
- OTHER_STRING = "Other:"
36
-
37
- question_attributes = [
38
- "num_responses",
39
- "num_unique_responses",
40
- "missing",
41
- "unique_responses",
42
- "frac_numerical",
43
- "top_5",
44
- "frac_obs_from_top_5",
45
- ]
46
- QuestionStats = namedtuple("QuestionStats", question_attributes)
47
-
48
- def __init__(
49
- self,
50
- datafile_name: str,
51
- config: Optional[dict] = None,
52
- naming_function: Optional[Callable] = sanitize_string,
53
- raw_data: Optional[List] = None,
54
- binary: Optional[str] = None,
55
- question_names: Optional[List[str]] = None,
56
- question_texts: Optional[List[str]] = None,
57
- answer_codebook: Optional[Dict] = None,
58
- question_types: Optional[List[str]] = None,
59
- question_options: Optional[List] = None,
60
- order_options=False,
61
- question_name_repair_func: Callable = None,
62
- ):
63
- """Initialize the InputData object.
64
-
65
- :param datafile_name: The name of the file containing the data.
66
- :param config: The configuration parameters for reading the data.
67
- :param raw_data: The raw data in the form of a dictionary.
68
- :param question_names: The names of the questions.
69
- :param question_texts: The text of the questions.
70
- :param answer_codebook: The codebook for the answers.
71
- :param question_types: The types of the questions.
72
- :param question_options: The options for the questions.
73
-
74
- >>> id = InputDataABC.example(question_names = ['a','b'], answer_codebook = {'a': {'1':'yes', '2':'no'}, 'b': {'1':'yes', '2':'no'}})
75
-
76
- >>> id = InputDataABC.example(question_names = ['a','b'], answer_codebook = {'a': {'1':'yes', '2':'no'}, 'c': {'1':'yes', '2':'no'}})
77
- Traceback (most recent call last):
78
- ...
79
- Exception: The keys of the answer_codebook must match the question_names.
80
- """
81
-
82
- self.datafile_name = datafile_name
83
- self.config = config
84
- self.naming_function = naming_function
85
-
86
- if binary is not None:
87
- self.binary = binary
88
- else:
89
- try:
90
- with open(self.datafile_name, "rb") as file:
91
- self.binary = base64.b64encode(file.read()).decode()
92
- except FileNotFoundError:
93
- self.binary = None
94
-
95
- def default_repair_func(x):
96
- return (
97
- x.replace("#", "_num")
98
- .replace("class", "social_class")
99
- .replace("name", "respondent_name")
100
- )
101
-
102
- self.question_name_repair_func = (
103
- question_name_repair_func or default_repair_func
104
- )
105
-
106
- if answer_codebook is not None and question_names is not None:
107
- if set(answer_codebook.keys()) != set(question_names):
108
- raise Exception(
109
- "The keys of the answer_codebook must match the question_names."
110
- )
111
-
112
- if question_names is not None and question_texts is not None:
113
- if len(question_names) != len(question_texts):
114
- raise Exception(
115
- "The question_names and question_texts must have the same length."
116
- )
117
-
118
- self.question_texts = question_texts
119
- self.question_names = question_names
120
- self.answer_codebook = answer_codebook
121
- self.raw_data = raw_data
122
-
123
- self.apply_codebook()
124
-
125
- self.question_types = question_types
126
- self.question_options = question_options
127
- if order_options:
128
- self.order_options()
129
-
130
- @property
131
- def download_link(self):
132
- from IPython.display import HTML
133
-
134
- actual_file_name = self.datafile_name.split("/")[-1]
135
- download_link = f'<a href="data:text/plain;base64,{self.binary}" download="{actual_file_name}">Download {self.datafile_name}</a>'
136
- return HTML(download_link)
137
-
138
- @abstractmethod
139
- def get_question_texts(self) -> List[str]:
140
- """Get the text of the questions
141
-
142
- >>> id = InputDataABC.example()
143
- >>> id.get_question_texts()
144
- ['how are you doing this morning?', 'how are you feeling?']
145
-
146
- """
147
- raise NotImplementedError
148
-
149
- @abstractmethod
150
- def get_raw_data(self) -> List[List[str]]:
151
- """Returns the responses by reading the datafile_name.
152
-
153
- >>> id = InputDataABC.example()
154
- >>> id.get_raw_data()
155
- [['1', '4'], ['3', '6']]
156
-
157
- """
158
- raise NotImplementedError
159
-
160
- @abstractmethod
161
- def get_question_names(self) -> List[str]:
162
- """Get the names of the questions.
163
-
164
- >>> id = InputDataABC.example()
165
- >>> id.get_question_names()
166
- ['morning', 'feeling']
167
-
168
- """
169
- raise NotImplementedError
170
-
171
- def rename_questions(
172
- self, rename_dict: Dict[str, str], ignore_missing=False
173
- ) -> "InputData":
174
- """Rename a question.
175
-
176
- >>> id = InputDataABC.example()
177
- >>> id.rename_questions({'morning': 'evening'}).question_names
178
- ['evening', 'feeling']
179
-
180
- """
181
- for old_name, new_name in rename_dict.items():
182
- self.rename(old_name, new_name, ignore_missing=ignore_missing)
183
- return self
184
-
185
- def rename(self, old_name, new_name, ignore_missing=False) -> "InputData":
186
- """Rename a question.
187
-
188
- >>> id = InputDataABC.example()
189
- >>> id.rename('morning', 'evening').question_names
190
- ['evening', 'feeling']
191
-
192
- """
193
- if old_name not in self.question_names:
194
- if ignore_missing:
195
- return self
196
- else:
197
- raise ValueError(f"Question {old_name} not found.")
198
-
199
- idx = self.question_names.index(old_name)
200
- self.question_names[idx] = new_name
201
- self.answer_codebook[new_name] = self.answer_codebook.pop(old_name, {})
202
-
203
- return self
204
-
205
- def _drop_question(self, question_name, ignore_missing=False):
206
- """Drop a question
207
-
208
- >>> id = InputDataABC.example()
209
- >>> id._drop_question('morning').question_names
210
- ['feeling']
211
-
212
- """
213
- if question_name not in self.question_names:
214
- if ignore_missing:
215
- return self
216
- else:
217
- raise ValueError(f"Question {question_name} not found.")
218
- idx = self.question_names.index(question_name)
219
- self._question_names.pop(idx)
220
- self._question_texts.pop(idx)
221
- self.question_types.pop(idx)
222
- self.question_options.pop(idx)
223
- self.raw_data.pop(idx)
224
- self.answer_codebook.pop(question_name, None)
225
- return self
226
-
227
- def drop(self, *question_names_to_drop) -> "InputData":
228
- """Drop a question.
229
-
230
- >>> id = InputDataABC.example()
231
- >>> id.drop('morning').question_names
232
- ['feeling']
233
-
234
- """
235
- for qn in question_names_to_drop:
236
- self._drop_question(qn)
237
- return self
238
-
239
- def keep(self, *question_names_to_keep, ignore_missing=False) -> "InputDataABC":
240
- """Keep a question.
241
-
242
- >>> id = InputDataABC.example()
243
- >>> id.keep('morning').question_names
244
- ['morning']
245
-
246
- """
247
- all_question_names = self._question_names[:]
248
- for qn in all_question_names:
249
- if qn not in question_names_to_keep:
250
- self._drop_question(qn, ignore_missing=ignore_missing)
251
- return self
252
-
253
- def modify_question_type(
254
- self,
255
- question_name: str,
256
- new_type: str,
257
- drop_options: bool = False,
258
- new_options: Optional[List[str]] = None,
259
- ) -> "InputData":
260
- """Modify the question type of a question. Checks to make sure the new type is valid.
261
-
262
- >>> id = InputDataABC.example()
263
- >>> id.modify_question_type('morning', 'numerical', drop_options = True).question_types
264
- ['numerical', 'multiple_choice']
265
-
266
- >>> id = InputDataABC.example()
267
- >>> id.modify_question_type('morning', 'poop')
268
- Traceback (most recent call last):
269
- ...
270
- ValueError: Question type poop is not available.
271
- """
272
- old_type = self.question_types[self.question_names.index(question_name)]
273
- old_options = self.question_options[self.question_names.index(question_name)]
274
-
275
- from edsl import Question
276
-
277
- if new_type not in Question.available():
278
- raise ValueError(f"Question type {new_type} is not available.")
279
-
280
- idx = self.question_names.index(question_name)
281
- self.question_types[idx] = new_type
282
- if drop_options:
283
- self.question_options[idx] = None
284
- if new_options is not None:
285
- self.question_options[idx] = new_options
286
-
287
- try:
288
- idx = self.question_names.index(question_name)
289
- rq = self.raw_question(idx)
290
- q = rq.to_question()
291
- except Exception as e:
292
- print(f"Error with question {question_name} in {self.datafile_name}")
293
- print(e)
294
- print("Reverting changes")
295
- self.question_types[idx] = old_type
296
- self.question_options[idx] = old_options
297
- return self
298
-
299
- @property
300
- def num_observations(self):
301
- """Return the number of observations.
302
-
303
- >>> id = InputDataABC.example()
304
- >>> id.num_observations
305
- 2
306
-
307
- """
308
- return len(self.raw_data[0])
309
-
310
- def to_dict(self):
311
- return {
312
- "datafile_name": self.datafile_name,
313
- "config": self.config,
314
- "raw_data": self.raw_data,
315
- "question_names": self.question_names,
316
- "question_texts": self.question_texts,
317
- "binary": self.binary,
318
- "answer_codebook": self.answer_codebook,
319
- "question_types": self.question_types,
320
- }
321
-
322
- @classmethod
323
- def from_dict(cls, d: Dict):
324
- return cls(**d)
325
-
326
- @property
327
- def question_names(self) -> List[str]:
328
- """
329
- Return a list of question names.
330
-
331
- >>> id = InputDataABC.example()
332
- >>> id.question_names
333
- ['morning', 'feeling']
334
-
335
- We can pass question names instead:
336
-
337
- >>> id = InputDataABC.example(question_names = ['a','b'])
338
- >>> id.question_names
339
- ['a', 'b']
340
-
341
- """
342
- if not hasattr(self, "_question_names"):
343
- self.question_names = None
344
- return self._question_names
345
-
346
- @question_names.setter
347
- def question_names(self, value) -> None:
348
- if value is None:
349
- value = self.get_question_names()
350
- if len(set(value)) != len(value):
351
- raise ValueError("Question names must be unique.")
352
- for i, qn in enumerate(value):
353
- if not is_valid_variable_name(qn, allow_name=False):
354
- new_name = self.question_name_repair_func(qn)
355
- if not is_valid_variable_name(new_name, allow_name=False):
356
- raise ValueError(
357
- f"""Question names must be valid Python identifiers. '{qn}' is not.""",
358
- """You can pass an entry in question_name_repair_func to fix this.""",
359
- )
360
- else:
361
- value[i] = new_name
362
- else:
363
- value[i] = qn
364
- self._question_names = value
365
-
366
- @property
367
- def question_texts(self) -> List[str]:
368
- """
369
- Return a list of question texts.
370
-
371
- >>> id = InputDataABC.example()
372
- >>> id.question_texts
373
- ['how are you doing this morning?', 'how are you feeling?']
374
- """
375
- if not hasattr(self, "_question_texts"):
376
- self.question_texts = None
377
- return self._question_texts
378
-
379
- @question_texts.setter
380
- def question_texts(self, value):
381
- if value is None:
382
- value = self.get_question_texts()
383
- self._question_texts = value
384
-
385
- @property
386
- def raw_data(self):
387
- """
388
-
389
- >>> id = InputDataABC.example()
390
- >>> id.raw_data
391
- [['1', '4'], ['3', '6']]
392
-
393
- """
394
- if not hasattr(self, "_raw_data"):
395
- self.raw_data = None
396
- return self._raw_data
397
-
398
- @raw_data.setter
399
- def raw_data(self, value):
400
- """ """
401
- if value is None:
402
- value = self.get_raw_data()
403
- # self.apply_codebook()
404
- self._raw_data = value
405
-
406
- def to_dataset(self) -> "Dataset":
407
- from edsl.results.Dataset import Dataset
408
-
409
- dataset_list = []
410
- for key, value in zip(self.question_names, self.raw_data):
411
- dataset_list.append({key: value})
412
- return Dataset(dataset_list)
413
-
414
- def to_scenario_list(self) -> ScenarioList:
415
- """Return a ScenarioList object from the raw response data.
416
-
417
- >>> id = InputDataABC.example()
418
- >>> s = id.to_scenario_list()
419
- >>> type(s) == ScenarioList
420
- True
421
-
422
- >>> s
423
- ScenarioList([Scenario({'morning': '1', 'feeling': '3'}), Scenario({'morning': '4', 'feeling': '6'})])
424
-
425
- """
426
- s = ScenarioList()
427
- for qn in self.question_names:
428
- idx = self.question_names.index(qn)
429
- s = s.add_list(qn, self.raw_data[idx])
430
- return s
431
-
432
- @property
433
- def names_to_texts(self) -> dict:
434
- """
435
- Return a dictionary of question names to question texts.
436
-
437
- >>> id = InputDataABC.example()
438
- >>> id.names_to_texts
439
- {'morning': 'how are you doing this morning?', 'feeling': 'how are you feeling?'}
440
- """
441
- return {n: t for n, t in zip(self.question_names, self.question_texts)}
442
-
443
- @property
444
- def texts_to_names(self):
445
- """Return a dictionary of question texts to question names.
446
-
447
- >>> id = InputDataABC.example()
448
- >>> id.texts_to_names
449
- {'how are you doing this morning?': 'morning', 'how are you feeling?': 'feeling'}
450
-
451
- """
452
- return {t: n for n, t in self.names_to_texts.items()}
453
-
454
- def raw_question(self, index: int) -> RawQuestion:
455
- return RawQuestion(
456
- question_type=self.question_types[index],
457
- question_name=self.question_names[index],
458
- question_text=self.question_texts[index],
459
- responses=self.raw_data[index],
460
- question_options=self.question_options[index],
461
- )
462
-
463
- def raw_questions(self) -> Generator[RawQuestion, None, None]:
464
- """Return a generator of RawQuestion objects."""
465
- for qn in self.question_names:
466
- idx = self.question_names.index(qn)
467
- yield self.raw_question(idx)
468
-
469
- def questions(self) -> Generator[Union[QuestionBase, None], None, None]:
470
- """Return a generator of Question objects."""
471
- for rq in self.raw_questions():
472
- try:
473
- yield rq.to_question()
474
- except Exception as e:
475
- print(
476
- f"Error with question '{rq.question_name}' in '{self.datafile_name}'"
477
- )
478
- print(e)
479
- yield None
480
-
481
- def select(self, *question_names: List[str]) -> "InputData":
482
- """Select a subset of the questions.
483
-
484
- :param question_names: The names of the questions to select.
485
-
486
- >>> id = InputDataABC.example()
487
- >>> id.select('morning').question_names
488
- ['morning']
489
-
490
- """
491
-
492
- idxs = [self.question_names.index(qn) for qn in question_names]
493
- new_data = [self.raw_data[i] for i in idxs]
494
- new_texts = [self.question_texts[i] for i in idxs]
495
- new_types = [self.question_types[i] for i in idxs]
496
- new_options = [self.question_options[i] for i in idxs]
497
- new_names = [self.question_names[i] for i in idxs]
498
- answer_codebook = {
499
- qn: self.answer_codebook.get(qn, {}) for qn in question_names
500
- }
501
- return self.__class__(
502
- self.datafile_name,
503
- self.config,
504
- raw_data=new_data,
505
- question_names=new_names,
506
- question_texts=new_texts,
507
- question_types=new_types,
508
- question_options=new_options,
509
- answer_codebook=answer_codebook,
510
- question_name_repair_func=self.question_name_repair_func,
511
- )
512
-
513
- def to_survey(self) -> Survey:
514
- """
515
- >>> id = InputDataABC.example()
516
- >>> s = id.to_survey()
517
- >>> type(s) == Survey
518
- True
519
-
520
- """
521
- s = Survey()
522
- for q in self.questions():
523
- if q is not None:
524
- s.add_question(q)
525
- return s
526
-
527
- def print(self):
528
- sl = (
529
- ScenarioList.from_list("question_name", self.question_names)
530
- .add_list("question_text", self.question_texts)
531
- .add_list("inferred_question_type", self.question_types)
532
- .add_list("num_responses", self.num_responses)
533
- .add_list("num_unique_responses", self.num_unique_responses)
534
- .add_list("missing", self.missing)
535
- .add_list("frac_numerical", self.frac_numerical)
536
- .add_list("top_5_items", self.top_k(5))
537
- .add_list("frac_obs_from_top_5", self.frac_obs_from_top_k(5))
538
- )
539
- sl.print()
540
-
541
- @property
542
- def answer_codebook(self) -> dict:
543
- """Return the answer codebook.
544
- >>> id = InputDataABC.example(answer_codebook = {'morning':{'1':'hello'}})
545
- >>> id.answer_codebook
546
- {'morning': {'1': 'hello'}}
547
-
548
- """
549
- if not hasattr(self, "_answer_codebook"):
550
- self._answer_codebook = None
551
- return self._answer_codebook
552
-
553
- @answer_codebook.setter
554
- def answer_codebook(self, value):
555
- if value is None:
556
- value = self.get_answer_codebook()
557
- self._answer_codebook = value
558
-
559
- def get_answer_codebook(self):
560
- return {}
561
-
562
- def _drop_rows(self, indices: List[int]):
563
- """Drop rows from the raw data.
564
- :param indices
565
-
566
- >>> id = InputDataABC.example()
567
- >>> id.num_observations
568
- 2
569
- >>> _ = id._drop_rows([1])
570
- >>> id.num_observations
571
- 1
572
-
573
- """
574
- self.raw_data = [
575
- [r for i, r in enumerate(row) if i not in indices] for row in self.raw_data
576
- ]
577
- return self
578
-
579
- def _missing_indices(self, question_name):
580
- """Return the indices of missing values for a question.
581
- TODO: Could re-factor to use SimpleEval
582
-
583
- >>> id = InputDataABC.example()
584
- >>> id.raw_data[0][0] = 'missing'
585
- >>> id._missing_indices('morning')
586
- [0]
587
- """
588
- idx = self.question_names.index(question_name)
589
- return [i for i, r in enumerate(self.raw_data[idx]) if r == "missing"]
590
-
591
- def drop_missing(self, question_name):
592
- """Drop missing values for a question.
593
-
594
- >>> id = InputDataABC.example()
595
- >>> id.num_observations
596
- 2
597
- >>> id.raw_data[0][0] = 'missing'
598
- >>> id.drop_missing('morning')
599
- >>> id.num_observations
600
- 1
601
- """
602
- self._drop_rows(self._missing_indices(question_name))
603
-
604
- @property
605
- def num_observations(self):
606
- """
607
- Return the number of observations
608
-
609
- >>> id = InputDataABC.example()
610
- >>> id.num_observations
611
- 2
612
- """
613
- return len(self.raw_data[0])
614
-
615
- def apply_codebook(self) -> None:
616
- """Apply the codebook to the raw data.
617
-
618
- >>> id = InputDataABC.example()
619
- >>> id.raw_data
620
- [['1', '4'], ['3', '6']]
621
-
622
- >>> id = InputDataABC.example(answer_codebook = {'morning':{'1':'hello'}})
623
- >>> id.raw_data
624
- [['hello', '4'], ['3', '6']]
625
- """
626
- for index, qn in enumerate(self.question_names):
627
- if qn in self.answer_codebook:
628
- new_responses = [
629
- self.answer_codebook[qn].get(r, r) for r in self.raw_data[index]
630
- ]
631
- self.raw_data[index] = new_responses
632
-
633
- def __repr__(self):
634
- return f"{self.__class__.__name__}: datafile_name:'{self.datafile_name}' num_questions:{len(self.question_names)}, num_observations:{len(self.raw_data[0])}"
635
-
636
- @classmethod
637
- def example(cls, **kwargs) -> "InputDataABC":
638
- class InputDataExample(InputDataABC):
639
- def get_question_texts(self) -> List[str]:
640
- """Get the text of the questions"""
641
- return ["how are you doing this morning?", "how are you feeling?"]
642
-
643
- def get_raw_data(self) -> SurveyResponses:
644
- """Returns a dataframe of responses by reading the datafile_name."""
645
- return [["1", "4"], ["3", "6"]]
646
-
647
- def get_question_names(self):
648
- new_names = [self.naming_function(q) for q in self.question_texts]
649
- if len(new_names) != len(set(new_names)):
650
- new_names = [f"{q}_{i}" for i, q in enumerate(new_names)]
651
- return new_names
652
-
653
- return InputDataExample("notneeded", config={}, **kwargs)
654
-
655
-
656
- if __name__ == "__main__":
657
- import doctest
658
-
659
- doctest.testmod(optionflags=doctest.ELLIPSIS)
@@ -1,48 +0,0 @@
1
- from typing import List, Optional
2
- import pandas as pd
3
- from edsl.conjure.InputData import InputDataABC
4
- from edsl.conjure.utilities import convert_value
5
-
6
-
7
- class InputDataCSV(InputDataABC):
8
- def __init__(self, datafile_name: str, config: Optional[dict] = None, **kwargs):
9
- if config is None:
10
- config = {"skiprows": None, "delimiter": ","}
11
-
12
- super().__init__(datafile_name, config, **kwargs)
13
-
14
- def get_df(self) -> pd.DataFrame:
15
- if not hasattr(self, "_df"):
16
- self._df = pd.read_csv(
17
- self.datafile_name,
18
- skiprows=self.config["skiprows"],
19
- encoding_errors="ignore",
20
- )
21
- float_columns = self._df.select_dtypes(include=["float64"]).columns
22
- self._df[float_columns] = self._df[float_columns].astype(str)
23
- self._df.fillna("", inplace=True)
24
- self._df = self._df.astype(str)
25
- return self._df
26
-
27
- def get_raw_data(self) -> List[List[str]]:
28
- data = [
29
- [convert_value(obs) for obs in v]
30
- for k, v in self.get_df().to_dict(orient="list").items()
31
- ]
32
- return data
33
-
34
- def get_question_texts(self):
35
- return list(self.get_df().columns)
36
-
37
- def get_question_names(self):
38
- new_names = [self.naming_function(q) for q in self.question_texts]
39
-
40
- if len(new_names) > len(set(new_names)):
41
- from collections import Counter
42
-
43
- counter = Counter(new_names)
44
- for i, name in enumerate(new_names):
45
- if counter[name] > 1:
46
- new_names[i] = name + str(counter[name])
47
- counter[name] -= 1
48
- return new_names