edsl 0.1.27.dev2__py3-none-any.whl → 0.1.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. edsl/Base.py +99 -22
  2. edsl/BaseDiff.py +260 -0
  3. edsl/__init__.py +4 -0
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +26 -5
  6. edsl/agents/AgentList.py +62 -7
  7. edsl/agents/Invigilator.py +4 -9
  8. edsl/agents/InvigilatorBase.py +5 -5
  9. edsl/agents/descriptors.py +3 -1
  10. edsl/conjure/AgentConstructionMixin.py +152 -0
  11. edsl/conjure/Conjure.py +56 -0
  12. edsl/conjure/InputData.py +628 -0
  13. edsl/conjure/InputDataCSV.py +48 -0
  14. edsl/conjure/InputDataMixinQuestionStats.py +182 -0
  15. edsl/conjure/InputDataPyRead.py +91 -0
  16. edsl/conjure/InputDataSPSS.py +8 -0
  17. edsl/conjure/InputDataStata.py +8 -0
  18. edsl/conjure/QuestionOptionMixin.py +76 -0
  19. edsl/conjure/QuestionTypeMixin.py +23 -0
  20. edsl/conjure/RawQuestion.py +65 -0
  21. edsl/conjure/SurveyResponses.py +7 -0
  22. edsl/conjure/__init__.py +9 -4
  23. edsl/conjure/examples/placeholder.txt +0 -0
  24. edsl/conjure/naming_utilities.py +263 -0
  25. edsl/conjure/utilities.py +165 -28
  26. edsl/conversation/Conversation.py +238 -0
  27. edsl/conversation/car_buying.py +58 -0
  28. edsl/conversation/mug_negotiation.py +81 -0
  29. edsl/conversation/next_speaker_utilities.py +93 -0
  30. edsl/coop/coop.py +191 -12
  31. edsl/coop/utils.py +20 -2
  32. edsl/data/Cache.py +55 -17
  33. edsl/data/CacheHandler.py +10 -9
  34. edsl/inference_services/AnthropicService.py +1 -0
  35. edsl/inference_services/DeepInfraService.py +20 -13
  36. edsl/inference_services/GoogleService.py +7 -1
  37. edsl/inference_services/InferenceServicesCollection.py +33 -7
  38. edsl/inference_services/OpenAIService.py +17 -10
  39. edsl/inference_services/models_available_cache.py +69 -0
  40. edsl/inference_services/rate_limits_cache.py +25 -0
  41. edsl/inference_services/write_available.py +10 -0
  42. edsl/jobs/Jobs.py +240 -36
  43. edsl/jobs/buckets/BucketCollection.py +9 -3
  44. edsl/jobs/interviews/Interview.py +4 -1
  45. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +24 -10
  46. edsl/jobs/interviews/retry_management.py +4 -4
  47. edsl/jobs/runners/JobsRunnerAsyncio.py +87 -45
  48. edsl/jobs/runners/JobsRunnerStatusData.py +3 -3
  49. edsl/jobs/tasks/QuestionTaskCreator.py +4 -2
  50. edsl/language_models/LanguageModel.py +37 -44
  51. edsl/language_models/ModelList.py +96 -0
  52. edsl/language_models/registry.py +14 -0
  53. edsl/language_models/repair.py +95 -24
  54. edsl/notebooks/Notebook.py +119 -31
  55. edsl/questions/QuestionBase.py +109 -12
  56. edsl/questions/descriptors.py +5 -2
  57. edsl/questions/question_registry.py +7 -0
  58. edsl/results/Result.py +20 -8
  59. edsl/results/Results.py +85 -11
  60. edsl/results/ResultsDBMixin.py +3 -6
  61. edsl/results/ResultsExportMixin.py +47 -16
  62. edsl/results/ResultsToolsMixin.py +5 -5
  63. edsl/scenarios/Scenario.py +59 -5
  64. edsl/scenarios/ScenarioList.py +97 -40
  65. edsl/study/ObjectEntry.py +97 -0
  66. edsl/study/ProofOfWork.py +110 -0
  67. edsl/study/SnapShot.py +77 -0
  68. edsl/study/Study.py +491 -0
  69. edsl/study/__init__.py +2 -0
  70. edsl/surveys/Survey.py +79 -31
  71. edsl/surveys/SurveyExportMixin.py +21 -3
  72. edsl/utilities/__init__.py +1 -0
  73. edsl/utilities/gcp_bucket/__init__.py +0 -0
  74. edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
  75. edsl/utilities/gcp_bucket/simple_example.py +9 -0
  76. edsl/utilities/interface.py +24 -28
  77. edsl/utilities/repair_functions.py +28 -0
  78. edsl/utilities/utilities.py +57 -2
  79. {edsl-0.1.27.dev2.dist-info → edsl-0.1.28.dist-info}/METADATA +43 -17
  80. {edsl-0.1.27.dev2.dist-info → edsl-0.1.28.dist-info}/RECORD +83 -55
  81. edsl-0.1.28.dist-info/entry_points.txt +3 -0
  82. edsl/conjure/RawResponseColumn.py +0 -327
  83. edsl/conjure/SurveyBuilder.py +0 -308
  84. edsl/conjure/SurveyBuilderCSV.py +0 -78
  85. edsl/conjure/SurveyBuilderSPSS.py +0 -118
  86. edsl/data/RemoteDict.py +0 -103
  87. {edsl-0.1.27.dev2.dist-info → edsl-0.1.28.dist-info}/LICENSE +0 -0
  88. {edsl-0.1.27.dev2.dist-info → edsl-0.1.28.dist-info}/WHEEL +0 -0
edsl/study/Study.py ADDED
@@ -0,0 +1,491 @@
1
+ import os
2
+ import platform
3
+ import socket
4
+ import copy
5
+ import inspect
6
+ import json
7
+ from typing import Optional, List, Dict
8
+ from datetime import datetime
9
+
10
+ # from edsl.Base import Base
11
+ from edsl import Cache, set_session_cache, unset_session_cache
12
+ from edsl.utilities.utilities import dict_hash
13
+
14
+ from edsl.study.ObjectEntry import ObjectEntry
15
+ from edsl.study.ProofOfWork import ProofOfWork
16
+ from edsl.study.SnapShot import SnapShot
17
+
18
+
19
+ class _StudyFrameMarker:
20
+ pass
21
+
22
+
23
+ class Study:
24
+ """A study organizes a series of EDSL objects.
25
+
26
+ ```python
27
+ with Study(name = "cool_study") as study:
28
+ q = QuestionFreeText.example()
29
+ results = q.run()
30
+ ```
31
+
32
+ The `study` object is a context manager.
33
+ It lets you group a series of events and objects together.
34
+
35
+ >>> with Study(name = "cool_study", verbose = False) as study:
36
+ ... from edsl import QuestionFreeText
37
+ ... q = QuestionFreeText.example()
38
+ >>> len(study.objects)
39
+ 1
40
+ >>> _ = os.system("rm cool_study.json")
41
+
42
+
43
+ It records all the edsl objects that are created during the study.
44
+ On exit, is saves them to a study file.
45
+
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ name: Optional[str] = None,
51
+ filename: Optional[str] = None,
52
+ description: Optional[str] = None,
53
+ objects: Optional[Dict[str, ObjectEntry]] = None,
54
+ cache: Optional[Cache] = None,
55
+ coop: bool = False,
56
+ use_study_cache=True,
57
+ overwrite_on_change=True,
58
+ proof_of_work=None,
59
+ proof_of_work_difficulty: int = None,
60
+ namespace: Optional[dict] = None,
61
+ verbose=True,
62
+ ):
63
+ """
64
+ :param name: The name of the study.
65
+ :param description: A description of the study.
66
+ :param objects: A dictionary of objects to add to the study.
67
+ :param cache: A cache object to (potentially) use for the study.
68
+ :param filename: The path to the study file.
69
+ :param coop: Whether to push the study to coop.
70
+ :param use_study_cache: Whether to use the study cache.
71
+ :param overwrite_on_change: Whether to overwrite the study file if it has changed.
72
+
73
+ >>> s = Study()
74
+ Traceback (most recent call last):
75
+ ...
76
+ ValueError: You must provide a name or a filename for the study.
77
+
78
+ """
79
+ self.verbose = verbose
80
+
81
+ if name is None and filename is None:
82
+ raise ValueError("You must provide a name or a filename for the study.")
83
+
84
+ if filename is None:
85
+ self.filename = name
86
+ else:
87
+ self.filename = filename
88
+
89
+ if (
90
+ self.filename
91
+ and os.path.exists(self.filename + ".json")
92
+ and os.path.getsize(self.filename + ".json") > 0
93
+ ):
94
+ if self.verbose:
95
+ print(f"Using existing study file {self.filename}.json")
96
+ self._load_from_file()
97
+ else:
98
+ self.name = name
99
+ self.description = description
100
+ self.objects = objects or {}
101
+ self.cache = cache or Cache()
102
+ self.proof_of_work = proof_of_work or ProofOfWork()
103
+
104
+ # These always overwrite the saved study
105
+ self.coop = coop
106
+ self.use_study_cache = use_study_cache
107
+ self.overwrite_on_change = overwrite_on_change
108
+ self.proof_of_work_difficulty = proof_of_work_difficulty
109
+
110
+ self.starting_objects = copy.deepcopy(self.objects)
111
+
112
+ if namespace:
113
+ self._namespace = namespace
114
+ else:
115
+ self._namespace = None
116
+
117
+ self._create_mapping_dicts()
118
+
119
+ @property
120
+ def namespace(self):
121
+ return self._find_stack()
122
+
123
+ def _find_stack(self) -> dict:
124
+ "Finds the frame with the Study context"
125
+ # if self.verbose:
126
+ # self.explore_stacks()
127
+ frame = inspect.currentframe()
128
+ candidate_frames = []
129
+ while frame:
130
+ if "Study" in frame.f_globals:
131
+ candidate_frames.append(frame)
132
+
133
+ frame = frame.f_back
134
+
135
+ found_variables_dict = {}
136
+ for frame in candidate_frames:
137
+ found_variables_dict.update(frame.f_globals)
138
+ found_variables_dict.update(frame.f_locals)
139
+
140
+ return found_variables_dict
141
+
142
+ def explore_stacks(self):
143
+ frame = inspect.currentframe()
144
+ count = 0
145
+ d = {}
146
+ while frame:
147
+ d[count] = "Study" in frame.f_globals.keys()
148
+ count += 1
149
+ from rich import print as rprint
150
+
151
+ print("Globals:")
152
+ rprint(frame.f_globals["__name__"])
153
+ rprint(frame.f_globals.keys())
154
+ print("Locals:")
155
+ rprint(frame.f_locals.keys())
156
+ print("\n")
157
+ frame = frame.f_back
158
+ return d
159
+
160
+ def _create_mapping_dicts(self):
161
+ self._name_to_object = {}
162
+ self._hash_to_name = {}
163
+ self._name_to_oe = {}
164
+ name_counts = {}
165
+ for hash, obj in self.objects.items():
166
+ new_name = obj.variable_name
167
+ if obj.variable_name in name_counts:
168
+ name_counts[obj.variable_name] += 1
169
+ new_name = obj.variable_name + "_" + str(name_counts[obj.variable_name])
170
+ else:
171
+ name_counts[obj.variable_name] = 1
172
+ self._name_to_object[new_name] = obj.object
173
+ self._hash_to_name[hash] = new_name
174
+
175
+ def __len__(self):
176
+ return len(self.objects)
177
+
178
+ def __eq__(self, other):
179
+ return self.objects.keys() == other.objects.keys()
180
+
181
+ @property
182
+ def name_to_object(self):
183
+ """
184
+ >>> s = Study.example()
185
+ >>> s.name_to_object
186
+ {'q': Question('free_text', question_name = \"""how_are_you\""", question_text = \"""How are you?\""")}
187
+ """
188
+ self._create_mapping_dicts()
189
+ return self._name_to_object
190
+
191
+ @property
192
+ def hash_to_name(self):
193
+ self._create_mapping_dicts()
194
+ return self._hash_to_name
195
+
196
+ def __getattr__(self, name):
197
+ return self.name_to_object[name]
198
+
199
+ @classmethod
200
+ def from_file(cls, filename: str):
201
+ """Load a study from a file."""
202
+ if filename.endswith(".json"):
203
+ filename = filename[:-5]
204
+ return cls(filename=filename)
205
+
206
+ def _load_from_file(self):
207
+ """Load the study from a file.
208
+
209
+ >>> import tempfile
210
+ >>> filename = tempfile.NamedTemporaryFile(delete=False)
211
+ >>> study = Study(name = "poo", filename = filename.name)
212
+ >>> study.save()
213
+ Saving study to ...
214
+ >>> study2 = Study(filename = filename.name)
215
+ Using existing study file ...
216
+ >>> study2.name
217
+ 'poo'
218
+ """
219
+ with open(self.filename + ".json", "r") as f:
220
+ d = json.load(f)
221
+ d["cache"] = Cache.from_dict(d["cache"])
222
+ d["proof_of_work"] = ProofOfWork.from_dict(d["proof_of_work"])
223
+ d["objects"] = {
224
+ hash: ObjectEntry.from_dict(obj_dict)
225
+ for hash, obj_dict in d["objects"].items()
226
+ }
227
+ self.__dict__.update(d)
228
+
229
+ def __enter__(self):
230
+ """
231
+ >>> s = Study(name = "temp", use_study_cache = True, verbose = False)
232
+ >>> _ = s.__enter__()
233
+ >>> from edsl.config import CONFIG
234
+ >>> hasattr(CONFIG, "EDSL_SESSION_CACHE")
235
+ True
236
+ >>> _ = s.__exit__(None, None, None)
237
+ >>> len(s.objects)
238
+ 0
239
+ >>> os.remove("temp.json")
240
+
241
+ """
242
+ if self.verbose:
243
+ print("Existing objects in study:")
244
+ self.print()
245
+ snapshot = SnapShot(self.namespace, exclude=[self])
246
+ if self.use_study_cache:
247
+ if self.verbose:
248
+ print("Using study cache.")
249
+ set_session_cache(self.cache)
250
+
251
+ if snapshot.edsl_objects:
252
+ raise ValueError(
253
+ "You have EDSL objects in the global namespace.",
254
+ "Please remove them before starting a study or put under the 'Study' context manager."
255
+ "Objects found:",
256
+ snapshot.edsl_objects,
257
+ )
258
+ return self
259
+
260
+ def __hash__(self) -> int:
261
+ return dict_hash(list(self.objects.keys()))
262
+
263
+ def study_diff(self):
264
+ ## Need to also report missing.
265
+ from edsl.BaseDiff import BaseDiff
266
+
267
+ raise NotImplementedError("Need to implement this.")
268
+
269
+ def print(self):
270
+ from rich.console import Console
271
+ from rich.table import Table
272
+
273
+ console = Console()
274
+ table = Table(title="Study")
275
+ table.add_column("Original Name")
276
+ table.add_column("Study Name")
277
+ table.add_column("Class")
278
+ table.add_column("Description")
279
+ table.add_column("Hash")
280
+ table.add_column("Coop info")
281
+ table.add_column("Created")
282
+
283
+ for obj_hash, obj in self.objects.items():
284
+ url = (
285
+ ""
286
+ if not hasattr(obj, "coop_info") or obj.coop_info is None
287
+ else obj.coop_info.get("url", "")
288
+ )
289
+ table.add_row(
290
+ obj.variable_name,
291
+ self.hash_to_name[obj_hash],
292
+ obj.edsl_class_name,
293
+ obj.description,
294
+ obj.hash,
295
+ url,
296
+ datetime.fromtimestamp(obj.created_at).strftime("%Y-%m-%d %H:%M:%S"),
297
+ )
298
+ # Add cache at the end
299
+ table.add_row(
300
+ "N/A - Study Cache",
301
+ "cache",
302
+ "Cache",
303
+ f"Cache of study, entries: {len(self.cache)}",
304
+ str(hash(self.cache)),
305
+ "N/A",
306
+ "N/A",
307
+ )
308
+ console.print(table)
309
+
310
+ def __exit__(self, exc_type, exc_val, exc_tb):
311
+ snapshot = SnapShot(namespace=self.namespace, exclude=[self])
312
+ # print("Frame objects are:", snapshot.namespace.keys())
313
+ # breakpoint()
314
+ if self.use_study_cache:
315
+ unset_session_cache()
316
+
317
+ for variable_name, object in snapshot.edsl_objects.items():
318
+ self._add_edsl_object(object=object, variable_name=variable_name)
319
+
320
+ if not self.starting_objects:
321
+ if self.verbose:
322
+ print(f"New study saved to {self.filename}.json")
323
+ self.save()
324
+
325
+ if self.starting_objects and list(self.starting_objects.keys()) == list(
326
+ self.objects.keys()
327
+ ):
328
+ if self.verbose:
329
+ print("Study perfectly replicated.")
330
+ else:
331
+ if self.verbose:
332
+ print("Starting hashes:", self.starting_objects.keys())
333
+ print("Current hashes:", self.objects.keys())
334
+ if self.starting_objects:
335
+ missing = set(self.starting_objects.keys()) - set(self.objects.keys())
336
+ added = set(self.objects.keys()) - set(self.starting_objects.keys())
337
+ # breakpoint()
338
+ print("Study did not perfectly replicate.")
339
+ for hash in missing:
340
+ print(f"Missing object: {self.starting_objects[hash]}")
341
+ for hash in added:
342
+ print(f"Added object: {self.objects[hash]}")
343
+ if self.overwrite_on_change:
344
+ print("Overwriting study file.")
345
+ self.save()
346
+ else:
347
+ print(
348
+ "Please save the study file with a new name or call study iwth 'overwrite_on_change=True' to overwrite the existing study file."
349
+ )
350
+
351
+ if self.coop:
352
+ self.push()
353
+ if self.overwrite_on_change:
354
+ self.save()
355
+ else:
356
+ raise ValueError(
357
+ "If you want to push to coop, you must save the study file with a new name or call study iwth 'overwrite_on_change=True' to overwrite the existing study file."
358
+ )
359
+
360
+ if self.verbose:
361
+ print("Objects in study now:")
362
+ self.print()
363
+
364
+ if self.proof_of_work_difficulty:
365
+ print("Adding proof of work to study...")
366
+ from edsl.study.ProofOfWork import ProofOfWork
367
+
368
+ # TODO: Need to check if hashes are the same.
369
+ if not self.proof_of_work.input_data:
370
+ self.proof_of_work.add_input_data(str(self.__hash__()))
371
+ self.proof_of_work.add_proof(self.proof_of_work_difficulty)
372
+ print(
373
+ "Proof of work added to study with difficulty ",
374
+ self.proof_of_work_difficulty,
375
+ )
376
+ print(self.proof_of_work)
377
+ self.save()
378
+
379
+ def to_dict(self):
380
+ return {
381
+ "name": self.name,
382
+ "description": self.description,
383
+ "objects": {hash: obj.to_dict() for hash, obj in self.objects.items()},
384
+ "filename": self.filename,
385
+ "cache": self.cache.to_dict(),
386
+ "use_study_cache": self.use_study_cache,
387
+ "overwrite_on_change": self.overwrite_on_change,
388
+ "proof_of_work": self.proof_of_work.to_dict(),
389
+ }
390
+
391
+ def versions(self):
392
+ """Return a dictionary of objects grouped by variable name."""
393
+ d = {}
394
+ for _, obj_entry in self.objects.items():
395
+ if obj_entry.variable_name not in d:
396
+ d[obj_entry.variable_name] = [obj_entry]
397
+ else:
398
+ d[obj_entry.variable_name].append(obj_entry)
399
+
400
+ return d
401
+
402
+ def diff(self, variable_name: str, index1: int, index2: int):
403
+ """Return the difference between the versions of an object."""
404
+ versions = self.versions()[variable_name]
405
+ diff = versions[index2].object - versions[index1].object
406
+ return diff
407
+
408
+ @classmethod
409
+ def example(cls, verbose=False):
410
+ import tempfile
411
+
412
+ study_file = tempfile.NamedTemporaryFile()
413
+ with cls(filename=study_file.name, verbose=verbose) as study:
414
+ from edsl import QuestionFreeText
415
+
416
+ q = QuestionFreeText.example()
417
+ return study
418
+
419
+ @classmethod
420
+ def from_dict(cls, d):
421
+ d["cache"] = Cache.from_dict(d["cache"])
422
+ d["objects"] = {
423
+ str(object_hash): ObjectEntry.from_dict(obj_dict)
424
+ for object_hash, obj_dict in d["objects"].items()
425
+ }
426
+ d["proof_of_work"] = ProofOfWork.from_dict(d["proof_of_work"])
427
+ return cls(**d)
428
+
429
+ def save(self):
430
+ if self.verbose:
431
+ print(f"Saving study to {self.filename}.json")
432
+ with open(self.filename + ".json", "w") as f:
433
+ json.dump(self.to_dict(), f, indent=4)
434
+
435
+ def _get_system_info(self):
436
+ return {
437
+ "platform": platform.platform(),
438
+ "system": platform.system(),
439
+ "processor": platform.processor(),
440
+ "hostname": socket.gethostname(),
441
+ }
442
+
443
+ @staticmethod
444
+ def _get_description(object):
445
+ text = ""
446
+ if hasattr(object, "__len__"):
447
+ text += f"Num. entries: {len(object)}"
448
+ if hasattr(object, "question_name"):
449
+ text += f"Question name: {object.question_name}"
450
+ return text
451
+
452
+ def _add_edsl_object(self, object, variable_name, description=None) -> None:
453
+ """
454
+ >>> s = Study.example()
455
+ >>> from edsl import QuestionLinearScale
456
+ >>> s._add_edsl_object(QuestionLinearScale.example(), 'q')
457
+ >>> len(s)
458
+ 2
459
+ """
460
+ if description is None:
461
+ description = self._get_description(object)
462
+ oe = ObjectEntry(
463
+ variable_name=variable_name, object=object, description=description
464
+ )
465
+ if oe.hash in self.objects:
466
+ return
467
+ else:
468
+ self.objects[oe.hash] = oe
469
+
470
+ def push(self, refresh=False) -> None:
471
+ """Push the objects to coop."""
472
+ for obj_entry in self.objects.values():
473
+ obj_entry.push(refresh=refresh)
474
+
475
+ def __repr__(self):
476
+ return f"""Study(name = {self.name}, description = {self.description}, objects = {self.objects}, cache = {self.cache}, filename = {self.filename}, coop = {self.coop}, use_study_cache = {self.use_study_cache}, overwrite_on_change = {self.overwrite_on_change})"""
477
+
478
+
479
+ if __name__ == "__main__":
480
+ import doctest
481
+
482
+ doctest.testmod(optionflags=doctest.ELLIPSIS)
483
+
484
+ # with Study(name = "cool_study") as study:
485
+ # from edsl import QuestionFreeText
486
+ # q = QuestionFreeText.example()
487
+
488
+ # assert len(study.objects) == 1
489
+
490
+ # print(study.versions())
491
+ # {'q': [ObjectEntry(variable_name='q', object=Question('free_text', question_name = """how_are_you""", question_text = """How are you?"""), description='Question name: how_are_you', coop_info=None, created_at=1720276402.561273, edsl_class_name='QuestionFreeText')]}
edsl/study/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ from edsl.study.ObjectEntry import ObjectEntry
2
+ from edsl.study.ProofOfWork import ProofOfWork