edsl 0.1.39__py3-none-any.whl → 0.1.39.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. edsl/Base.py +116 -197
  2. edsl/__init__.py +7 -15
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +147 -351
  5. edsl/agents/AgentList.py +73 -211
  6. edsl/agents/Invigilator.py +50 -101
  7. edsl/agents/InvigilatorBase.py +70 -62
  8. edsl/agents/PromptConstructor.py +225 -143
  9. edsl/agents/__init__.py +1 -0
  10. edsl/agents/prompt_helpers.py +3 -3
  11. edsl/auto/AutoStudy.py +5 -18
  12. edsl/auto/StageBase.py +40 -53
  13. edsl/auto/StageQuestions.py +1 -2
  14. edsl/auto/utilities.py +6 -0
  15. edsl/config.py +2 -22
  16. edsl/conversation/car_buying.py +1 -2
  17. edsl/coop/PriceFetcher.py +1 -1
  18. edsl/coop/coop.py +47 -125
  19. edsl/coop/utils.py +14 -14
  20. edsl/data/Cache.py +27 -45
  21. edsl/data/CacheEntry.py +15 -12
  22. edsl/data/CacheHandler.py +12 -31
  23. edsl/data/RemoteCacheSync.py +46 -154
  24. edsl/data/__init__.py +3 -4
  25. edsl/data_transfer_models.py +1 -2
  26. edsl/enums.py +0 -27
  27. edsl/exceptions/__init__.py +50 -50
  28. edsl/exceptions/agents.py +0 -12
  29. edsl/exceptions/questions.py +6 -24
  30. edsl/exceptions/scenarios.py +0 -7
  31. edsl/inference_services/AnthropicService.py +19 -38
  32. edsl/inference_services/AwsBedrock.py +2 -0
  33. edsl/inference_services/AzureAI.py +2 -0
  34. edsl/inference_services/GoogleService.py +12 -7
  35. edsl/inference_services/InferenceServiceABC.py +85 -18
  36. edsl/inference_services/InferenceServicesCollection.py +79 -120
  37. edsl/inference_services/MistralAIService.py +3 -0
  38. edsl/inference_services/OpenAIService.py +35 -47
  39. edsl/inference_services/PerplexityService.py +3 -0
  40. edsl/inference_services/TestService.py +10 -11
  41. edsl/inference_services/TogetherAIService.py +3 -5
  42. edsl/jobs/Answers.py +14 -1
  43. edsl/jobs/Jobs.py +431 -356
  44. edsl/jobs/JobsChecks.py +10 -35
  45. edsl/jobs/JobsPrompts.py +4 -6
  46. edsl/jobs/JobsRemoteInferenceHandler.py +133 -205
  47. edsl/jobs/buckets/BucketCollection.py +3 -44
  48. edsl/jobs/buckets/TokenBucket.py +21 -53
  49. edsl/jobs/interviews/Interview.py +408 -143
  50. edsl/jobs/runners/JobsRunnerAsyncio.py +403 -88
  51. edsl/jobs/runners/JobsRunnerStatus.py +165 -133
  52. edsl/jobs/tasks/QuestionTaskCreator.py +19 -21
  53. edsl/jobs/tasks/TaskHistory.py +18 -38
  54. edsl/jobs/tasks/task_status_enum.py +2 -0
  55. edsl/language_models/KeyLookup.py +30 -0
  56. edsl/language_models/LanguageModel.py +236 -194
  57. edsl/language_models/ModelList.py +19 -28
  58. edsl/language_models/__init__.py +2 -1
  59. edsl/language_models/registry.py +190 -0
  60. edsl/language_models/repair.py +2 -2
  61. edsl/language_models/unused/ReplicateBase.py +83 -0
  62. edsl/language_models/utilities.py +4 -5
  63. edsl/notebooks/Notebook.py +14 -19
  64. edsl/prompts/Prompt.py +39 -29
  65. edsl/questions/{answer_validator_mixin.py → AnswerValidatorMixin.py} +2 -47
  66. edsl/questions/QuestionBase.py +214 -68
  67. edsl/questions/{question_base_gen_mixin.py → QuestionBaseGenMixin.py} +50 -57
  68. edsl/questions/QuestionBasePromptsMixin.py +3 -7
  69. edsl/questions/QuestionBudget.py +1 -1
  70. edsl/questions/QuestionCheckBox.py +3 -3
  71. edsl/questions/QuestionExtract.py +7 -5
  72. edsl/questions/QuestionFreeText.py +3 -2
  73. edsl/questions/QuestionList.py +18 -10
  74. edsl/questions/QuestionMultipleChoice.py +23 -67
  75. edsl/questions/QuestionNumerical.py +4 -2
  76. edsl/questions/QuestionRank.py +17 -7
  77. edsl/questions/{response_validator_abc.py → ResponseValidatorABC.py} +26 -40
  78. edsl/questions/SimpleAskMixin.py +3 -4
  79. edsl/questions/__init__.py +1 -2
  80. edsl/questions/derived/QuestionLinearScale.py +3 -6
  81. edsl/questions/derived/QuestionTopK.py +1 -1
  82. edsl/questions/descriptors.py +3 -17
  83. edsl/questions/question_registry.py +1 -1
  84. edsl/results/CSSParameterizer.py +1 -1
  85. edsl/results/Dataset.py +7 -170
  86. edsl/results/DatasetExportMixin.py +305 -168
  87. edsl/results/DatasetTree.py +8 -28
  88. edsl/results/Result.py +206 -298
  89. edsl/results/Results.py +131 -149
  90. edsl/results/ResultsDBMixin.py +238 -0
  91. edsl/results/ResultsExportMixin.py +0 -2
  92. edsl/results/{results_selector.py → Selector.py} +13 -23
  93. edsl/results/TableDisplay.py +171 -98
  94. edsl/results/__init__.py +1 -1
  95. edsl/scenarios/FileStore.py +239 -150
  96. edsl/scenarios/Scenario.py +193 -90
  97. edsl/scenarios/ScenarioHtmlMixin.py +3 -4
  98. edsl/scenarios/{scenario_join.py → ScenarioJoin.py} +6 -10
  99. edsl/scenarios/ScenarioList.py +244 -415
  100. edsl/scenarios/ScenarioListExportMixin.py +7 -0
  101. edsl/scenarios/ScenarioListPdfMixin.py +37 -15
  102. edsl/scenarios/__init__.py +2 -1
  103. edsl/study/ObjectEntry.py +1 -1
  104. edsl/study/SnapShot.py +1 -1
  105. edsl/study/Study.py +12 -5
  106. edsl/surveys/Rule.py +4 -5
  107. edsl/surveys/RuleCollection.py +27 -25
  108. edsl/surveys/Survey.py +791 -270
  109. edsl/surveys/SurveyCSS.py +8 -20
  110. edsl/surveys/{SurveyFlowVisualization.py → SurveyFlowVisualizationMixin.py} +9 -11
  111. edsl/surveys/__init__.py +2 -4
  112. edsl/surveys/descriptors.py +2 -6
  113. edsl/surveys/instructions/ChangeInstruction.py +2 -1
  114. edsl/surveys/instructions/Instruction.py +13 -4
  115. edsl/surveys/instructions/InstructionCollection.py +6 -11
  116. edsl/templates/error_reporting/interview_details.html +1 -1
  117. edsl/templates/error_reporting/report.html +1 -1
  118. edsl/tools/plotting.py +1 -1
  119. edsl/utilities/utilities.py +23 -35
  120. {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/METADATA +10 -12
  121. edsl-0.1.39.dev1.dist-info/RECORD +277 -0
  122. {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/WHEEL +1 -1
  123. edsl/agents/QuestionInstructionPromptBuilder.py +0 -128
  124. edsl/agents/QuestionTemplateReplacementsBuilder.py +0 -137
  125. edsl/agents/question_option_processor.py +0 -172
  126. edsl/coop/CoopFunctionsMixin.py +0 -15
  127. edsl/coop/ExpectedParrotKeyHandler.py +0 -125
  128. edsl/exceptions/inference_services.py +0 -5
  129. edsl/inference_services/AvailableModelCacheHandler.py +0 -184
  130. edsl/inference_services/AvailableModelFetcher.py +0 -215
  131. edsl/inference_services/ServiceAvailability.py +0 -135
  132. edsl/inference_services/data_structures.py +0 -134
  133. edsl/jobs/AnswerQuestionFunctionConstructor.py +0 -223
  134. edsl/jobs/FetchInvigilator.py +0 -47
  135. edsl/jobs/InterviewTaskManager.py +0 -98
  136. edsl/jobs/InterviewsConstructor.py +0 -50
  137. edsl/jobs/JobsComponentConstructor.py +0 -189
  138. edsl/jobs/JobsRemoteInferenceLogger.py +0 -239
  139. edsl/jobs/RequestTokenEstimator.py +0 -30
  140. edsl/jobs/async_interview_runner.py +0 -138
  141. edsl/jobs/buckets/TokenBucketAPI.py +0 -211
  142. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  143. edsl/jobs/check_survey_scenario_compatibility.py +0 -85
  144. edsl/jobs/data_structures.py +0 -120
  145. edsl/jobs/decorators.py +0 -35
  146. edsl/jobs/jobs_status_enums.py +0 -9
  147. edsl/jobs/loggers/HTMLTableJobLogger.py +0 -304
  148. edsl/jobs/results_exceptions_handler.py +0 -98
  149. edsl/language_models/ComputeCost.py +0 -63
  150. edsl/language_models/PriceManager.py +0 -127
  151. edsl/language_models/RawResponseHandler.py +0 -106
  152. edsl/language_models/ServiceDataSources.py +0 -0
  153. edsl/language_models/key_management/KeyLookup.py +0 -63
  154. edsl/language_models/key_management/KeyLookupBuilder.py +0 -273
  155. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  156. edsl/language_models/key_management/__init__.py +0 -0
  157. edsl/language_models/key_management/models.py +0 -131
  158. edsl/language_models/model.py +0 -256
  159. edsl/notebooks/NotebookToLaTeX.py +0 -142
  160. edsl/questions/ExceptionExplainer.py +0 -77
  161. edsl/questions/HTMLQuestion.py +0 -103
  162. edsl/questions/QuestionMatrix.py +0 -265
  163. edsl/questions/data_structures.py +0 -20
  164. edsl/questions/loop_processor.py +0 -149
  165. edsl/questions/response_validator_factory.py +0 -34
  166. edsl/questions/templates/matrix/__init__.py +0 -1
  167. edsl/questions/templates/matrix/answering_instructions.jinja +0 -5
  168. edsl/questions/templates/matrix/question_presentation.jinja +0 -20
  169. edsl/results/MarkdownToDocx.py +0 -122
  170. edsl/results/MarkdownToPDF.py +0 -111
  171. edsl/results/TextEditor.py +0 -50
  172. edsl/results/file_exports.py +0 -252
  173. edsl/results/smart_objects.py +0 -96
  174. edsl/results/table_data_class.py +0 -12
  175. edsl/results/table_renderers.py +0 -118
  176. edsl/scenarios/ConstructDownloadLink.py +0 -109
  177. edsl/scenarios/DocumentChunker.py +0 -102
  178. edsl/scenarios/DocxScenario.py +0 -16
  179. edsl/scenarios/PdfExtractor.py +0 -40
  180. edsl/scenarios/directory_scanner.py +0 -96
  181. edsl/scenarios/file_methods.py +0 -85
  182. edsl/scenarios/handlers/__init__.py +0 -13
  183. edsl/scenarios/handlers/csv.py +0 -49
  184. edsl/scenarios/handlers/docx.py +0 -76
  185. edsl/scenarios/handlers/html.py +0 -37
  186. edsl/scenarios/handlers/json.py +0 -111
  187. edsl/scenarios/handlers/latex.py +0 -5
  188. edsl/scenarios/handlers/md.py +0 -51
  189. edsl/scenarios/handlers/pdf.py +0 -68
  190. edsl/scenarios/handlers/png.py +0 -39
  191. edsl/scenarios/handlers/pptx.py +0 -105
  192. edsl/scenarios/handlers/py.py +0 -294
  193. edsl/scenarios/handlers/sql.py +0 -313
  194. edsl/scenarios/handlers/sqlite.py +0 -149
  195. edsl/scenarios/handlers/txt.py +0 -33
  196. edsl/scenarios/scenario_selector.py +0 -156
  197. edsl/surveys/ConstructDAG.py +0 -92
  198. edsl/surveys/EditSurvey.py +0 -221
  199. edsl/surveys/InstructionHandler.py +0 -100
  200. edsl/surveys/MemoryManagement.py +0 -72
  201. edsl/surveys/RuleManager.py +0 -172
  202. edsl/surveys/Simulator.py +0 -75
  203. edsl/surveys/SurveyToApp.py +0 -141
  204. edsl/utilities/PrettyList.py +0 -56
  205. edsl/utilities/is_notebook.py +0 -18
  206. edsl/utilities/is_valid_variable_name.py +0 -11
  207. edsl/utilities/remove_edsl_version.py +0 -24
  208. edsl-0.1.39.dist-info/RECORD +0 -358
  209. /edsl/questions/{register_questions_meta.py → RegisterQuestionsMeta.py} +0 -0
  210. /edsl/results/{results_fetch_mixin.py → ResultsFetchMixin.py} +0 -0
  211. /edsl/results/{results_tools_mixin.py → ResultsToolsMixin.py} +0 -0
  212. {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/LICENSE +0 -0
edsl/results/Dataset.py CHANGED
@@ -1,22 +1,19 @@
1
1
  """A module to represent a dataset of observations."""
2
2
 
3
3
  from __future__ import annotations
4
- import sys
5
- import json
6
4
  import random
5
+ import json
7
6
  from collections import UserList
8
7
  from typing import Any, Union, Optional
8
+ import sys
9
+ import numpy as np
9
10
 
10
11
  from edsl.results.ResultsExportMixin import ResultsExportMixin
11
12
  from edsl.results.DatasetTree import Tree
12
13
  from edsl.results.TableDisplay import TableDisplay
13
- from edsl.Base import PersistenceMixin, HashingMixin
14
14
 
15
15
 
16
- from edsl.results.smart_objects import FirstObject
17
-
18
-
19
- class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
16
+ class Dataset(UserList, ResultsExportMixin):
20
17
  """A class to represent a dataset of observations."""
21
18
 
22
19
  def __init__(
@@ -39,46 +36,6 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
39
36
  _, values = list(self.data[0].items())[0]
40
37
  return len(values)
41
38
 
42
- def tail(self, n: int = 5) -> Dataset:
43
- """Return the last n observations in the dataset.
44
-
45
- >>> d = Dataset([{'a.b':[1,2,3,4]}])
46
- >>> d.tail(2)
47
- Dataset([{'a.b': [3, 4]}])
48
- """
49
- new_data = []
50
- for observation in self.data:
51
- key, values = list(observation.items())[0]
52
- new_data.append({key: values[-n:]})
53
- return Dataset(new_data)
54
-
55
- def head(self, n: int = 5) -> Dataset:
56
- """Return the first n observations in the dataset.
57
-
58
- >>> d = Dataset([{'a.b':[1,2,3,4]}])
59
- >>> d.head(2)
60
- Dataset([{'a.b': [1, 2]}])
61
- """
62
- new_data = []
63
- for observation in self.data:
64
- key, values = list(observation.items())[0]
65
- new_data.append({key: values[:n]})
66
- return Dataset(new_data)
67
-
68
- def expand(self, field):
69
- return self.to_scenario_list().expand(field)
70
-
71
- def view(self):
72
- from perspective.widget import PerspectiveWidget
73
-
74
- w = PerspectiveWidget(
75
- self.to_pandas(),
76
- plugin="Datagrid",
77
- aggregates={"datetime": "any"},
78
- sort=[["date", "desc"]],
79
- )
80
- return w
81
-
82
39
  def keys(self) -> list[str]:
83
40
  """Return the keys of the first observation in the dataset.
84
41
 
@@ -91,79 +48,6 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
91
48
  def filter(self, expression):
92
49
  return self.to_scenario_list().filter(expression).to_dataset()
93
50
 
94
- def long(self, exclude_fields: list[str] = None) -> Dataset:
95
- headers, data = self._tabular()
96
- exclude_fields = exclude_fields or []
97
-
98
- # Initialize result dictionaries for each column
99
- result_dict = {}
100
-
101
- for index, row in enumerate(data):
102
- row_values = dict(zip(headers, row))
103
- excluded_values = {field: row_values[field] for field in exclude_fields}
104
-
105
- # Transform non-excluded fields to long format
106
- for header, value in row_values.items():
107
- if header not in exclude_fields:
108
- # Initialize lists in result_dict if needed
109
- if not result_dict:
110
- result_dict = {
111
- "row": [],
112
- "key": [],
113
- "value": [],
114
- **{field: [] for field in exclude_fields},
115
- }
116
-
117
- # Add values to each column
118
- result_dict["row"].append(index)
119
- result_dict["key"].append(header)
120
- result_dict["value"].append(value)
121
- for field in exclude_fields:
122
- result_dict[field].append(excluded_values[field])
123
-
124
- return Dataset([{k: v} for k, v in result_dict.items()])
125
-
126
- def wide(self) -> "Dataset":
127
- """
128
- Convert a long-format dataset (with row, key, value columns) to wide format.
129
-
130
- Expected input format:
131
- - A dataset with three columns containing dictionaries:
132
- - row: list of row indices
133
- - key: list of column names
134
- - value: list of values
135
-
136
- Returns:
137
- - Dataset: A new dataset with columns corresponding to unique keys
138
- """
139
- # Extract the component arrays
140
- row_dict = next(col for col in self if "row" in col)
141
- key_dict = next(col for col in self if "key" in col)
142
- value_dict = next(col for col in self if "value" in col)
143
-
144
- rows = row_dict["row"]
145
- keys = key_dict["key"]
146
- values = value_dict["value"]
147
-
148
- if not (len(rows) == len(keys) == len(values)):
149
- raise ValueError("All input arrays must have the same length")
150
-
151
- # Get unique keys and row indices
152
- unique_keys = sorted(set(keys))
153
- unique_rows = sorted(set(rows))
154
-
155
- # Create a dictionary to store the result
156
- result = {key: [None] * len(unique_rows) for key in unique_keys}
157
-
158
- # Populate the result dictionary
159
- for row_idx, key, value in zip(rows, keys, values):
160
- # Find the position in the output array for this row
161
- output_row_idx = unique_rows.index(row_idx)
162
- result[key][output_row_idx] = value
163
-
164
- # Convert to list of column dictionaries format
165
- return Dataset([{key: values} for key, values in result.items()])
166
-
167
51
  def __repr__(self) -> str:
168
52
  """Return a string representation of the dataset."""
169
53
  return f"Dataset({self.data})"
@@ -242,21 +126,7 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
242
126
  """Get the values of the first key in the dictionary."""
243
127
  return list(d.values())[0]
244
128
 
245
- return FirstObject(get_values(self.data[0])[0])
246
-
247
- def latex(self, **kwargs):
248
- return self.table().latex()
249
-
250
- def remove_prefix(self) -> Dataset:
251
- new_data = []
252
- for observation in self.data:
253
- key, values = list(observation.items())[0]
254
- if "." in key:
255
- new_key = key.split(".")[1]
256
- new_data.append({new_key: values})
257
- else:
258
- new_data.append({key: values})
259
- return Dataset(new_data)
129
+ return get_values(self.data[0])[0]
260
130
 
261
131
  def print(self, pretty_labels=None, **kwargs):
262
132
  if "format" in kwargs:
@@ -276,25 +146,6 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
276
146
  new_data.append({new_key: values})
277
147
  return Dataset(new_data)
278
148
 
279
- def merge(self, other: Dataset, by_x, by_y) -> Dataset:
280
- """Merge the dataset with another dataset on the given keys.""
281
-
282
- merged_df = df1.merge(df2, how="left", on=["key1", "key2"])
283
- """
284
- df1 = self.to_pandas()
285
- df2 = other.to_pandas()
286
- merged_df = df1.merge(df2, how="left", left_on=by_x, right_on=by_y)
287
- return Dataset.from_pandas_dataframe(merged_df)
288
-
289
- def to(self, survey_or_question: Union["Survey", "QuestionBase"]) -> "Jobs":
290
- from edsl.surveys.Survey import Survey
291
- from edsl.questions.QuestionBase import QuestionBase
292
-
293
- if isinstance(survey_or_question, Survey):
294
- return survey_or_question.by(self.to_scenario_list())
295
- elif isinstance(survey_or_question, QuestionBase):
296
- return Survey([survey_or_question]).by(self.to_scenario_list())
297
-
298
149
  def select(self, *keys) -> Dataset:
299
150
  """Return a new dataset with only the selected keys.
300
151
 
@@ -430,7 +281,6 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
430
281
 
431
282
 
432
283
  """
433
- import numpy as np
434
284
 
435
285
  def sort_indices(lst: list[Any]) -> list[int]:
436
286
  """
@@ -559,26 +409,13 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
559
409
  return Dataset([{"num_observations": [len(self)], "keys": [self.keys()]}])
560
410
 
561
411
  @classmethod
562
- def example(self, n: int = None):
412
+ def example(self):
563
413
  """Return an example dataset.
564
414
 
565
415
  >>> Dataset.example()
566
416
  Dataset([{'a': [1, 2, 3, 4]}, {'b': [4, 3, 2, 1]}])
567
417
  """
568
- if n is None:
569
- return Dataset([{"a": [1, 2, 3, 4]}, {"b": [4, 3, 2, 1]}])
570
- else:
571
- return Dataset([{"a": [1] * n}, {"b": [2] * n}])
572
-
573
- @classmethod
574
- def from_edsl_object(cls, object):
575
- d = object.to_dict(add_edsl_version=False)
576
- return cls([{"key": list(d.keys())}, {"value": list(d.values())}])
577
-
578
- @classmethod
579
- def from_pandas_dataframe(cls, df):
580
- result = cls([{col: df[col].tolist()} for col in df.columns])
581
- return result
418
+ return Dataset([{"a": [1, 2, 3, 4]}, {"b": [4, 3, 2, 1]}])
582
419
 
583
420
 
584
421
  if __name__ == "__main__":