edsl 0.1.38.dev4__py3-none-any.whl → 0.1.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. edsl/Base.py +197 -116
  2. edsl/__init__.py +15 -7
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +351 -147
  5. edsl/agents/AgentList.py +211 -73
  6. edsl/agents/Invigilator.py +101 -50
  7. edsl/agents/InvigilatorBase.py +62 -70
  8. edsl/agents/PromptConstructor.py +143 -225
  9. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  10. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  11. edsl/agents/__init__.py +0 -1
  12. edsl/agents/prompt_helpers.py +3 -3
  13. edsl/agents/question_option_processor.py +172 -0
  14. edsl/auto/AutoStudy.py +18 -5
  15. edsl/auto/StageBase.py +53 -40
  16. edsl/auto/StageQuestions.py +2 -1
  17. edsl/auto/utilities.py +0 -6
  18. edsl/config.py +22 -2
  19. edsl/conversation/car_buying.py +2 -1
  20. edsl/coop/CoopFunctionsMixin.py +15 -0
  21. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  22. edsl/coop/PriceFetcher.py +1 -1
  23. edsl/coop/coop.py +125 -47
  24. edsl/coop/utils.py +14 -14
  25. edsl/data/Cache.py +45 -27
  26. edsl/data/CacheEntry.py +12 -15
  27. edsl/data/CacheHandler.py +31 -12
  28. edsl/data/RemoteCacheSync.py +154 -46
  29. edsl/data/__init__.py +4 -3
  30. edsl/data_transfer_models.py +2 -1
  31. edsl/enums.py +27 -0
  32. edsl/exceptions/__init__.py +50 -50
  33. edsl/exceptions/agents.py +12 -0
  34. edsl/exceptions/inference_services.py +5 -0
  35. edsl/exceptions/questions.py +24 -6
  36. edsl/exceptions/scenarios.py +7 -0
  37. edsl/inference_services/AnthropicService.py +38 -19
  38. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  39. edsl/inference_services/AvailableModelFetcher.py +215 -0
  40. edsl/inference_services/AwsBedrock.py +0 -2
  41. edsl/inference_services/AzureAI.py +0 -2
  42. edsl/inference_services/GoogleService.py +7 -12
  43. edsl/inference_services/InferenceServiceABC.py +18 -85
  44. edsl/inference_services/InferenceServicesCollection.py +120 -79
  45. edsl/inference_services/MistralAIService.py +0 -3
  46. edsl/inference_services/OpenAIService.py +47 -35
  47. edsl/inference_services/PerplexityService.py +0 -3
  48. edsl/inference_services/ServiceAvailability.py +135 -0
  49. edsl/inference_services/TestService.py +11 -10
  50. edsl/inference_services/TogetherAIService.py +5 -3
  51. edsl/inference_services/data_structures.py +134 -0
  52. edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
  53. edsl/jobs/Answers.py +1 -14
  54. edsl/jobs/FetchInvigilator.py +47 -0
  55. edsl/jobs/InterviewTaskManager.py +98 -0
  56. edsl/jobs/InterviewsConstructor.py +50 -0
  57. edsl/jobs/Jobs.py +356 -431
  58. edsl/jobs/JobsChecks.py +35 -10
  59. edsl/jobs/JobsComponentConstructor.py +189 -0
  60. edsl/jobs/JobsPrompts.py +6 -4
  61. edsl/jobs/JobsRemoteInferenceHandler.py +205 -133
  62. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  63. edsl/jobs/RequestTokenEstimator.py +30 -0
  64. edsl/jobs/async_interview_runner.py +138 -0
  65. edsl/jobs/buckets/BucketCollection.py +44 -3
  66. edsl/jobs/buckets/TokenBucket.py +53 -21
  67. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  68. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  69. edsl/jobs/check_survey_scenario_compatibility.py +85 -0
  70. edsl/jobs/data_structures.py +120 -0
  71. edsl/jobs/decorators.py +35 -0
  72. edsl/jobs/interviews/Interview.py +143 -408
  73. edsl/jobs/jobs_status_enums.py +9 -0
  74. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  75. edsl/jobs/results_exceptions_handler.py +98 -0
  76. edsl/jobs/runners/JobsRunnerAsyncio.py +88 -403
  77. edsl/jobs/runners/JobsRunnerStatus.py +133 -165
  78. edsl/jobs/tasks/QuestionTaskCreator.py +21 -19
  79. edsl/jobs/tasks/TaskHistory.py +38 -18
  80. edsl/jobs/tasks/task_status_enum.py +0 -2
  81. edsl/language_models/ComputeCost.py +63 -0
  82. edsl/language_models/LanguageModel.py +194 -236
  83. edsl/language_models/ModelList.py +28 -19
  84. edsl/language_models/PriceManager.py +127 -0
  85. edsl/language_models/RawResponseHandler.py +106 -0
  86. edsl/language_models/ServiceDataSources.py +0 -0
  87. edsl/language_models/__init__.py +1 -2
  88. edsl/language_models/key_management/KeyLookup.py +63 -0
  89. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  90. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  91. edsl/language_models/key_management/__init__.py +0 -0
  92. edsl/language_models/key_management/models.py +131 -0
  93. edsl/language_models/model.py +256 -0
  94. edsl/language_models/repair.py +2 -2
  95. edsl/language_models/utilities.py +5 -4
  96. edsl/notebooks/Notebook.py +19 -14
  97. edsl/notebooks/NotebookToLaTeX.py +142 -0
  98. edsl/prompts/Prompt.py +29 -39
  99. edsl/questions/ExceptionExplainer.py +77 -0
  100. edsl/questions/HTMLQuestion.py +103 -0
  101. edsl/questions/QuestionBase.py +68 -214
  102. edsl/questions/QuestionBasePromptsMixin.py +7 -3
  103. edsl/questions/QuestionBudget.py +1 -1
  104. edsl/questions/QuestionCheckBox.py +3 -3
  105. edsl/questions/QuestionExtract.py +5 -7
  106. edsl/questions/QuestionFreeText.py +2 -3
  107. edsl/questions/QuestionList.py +10 -18
  108. edsl/questions/QuestionMatrix.py +265 -0
  109. edsl/questions/QuestionMultipleChoice.py +67 -23
  110. edsl/questions/QuestionNumerical.py +2 -4
  111. edsl/questions/QuestionRank.py +7 -17
  112. edsl/questions/SimpleAskMixin.py +4 -3
  113. edsl/questions/__init__.py +2 -1
  114. edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +47 -2
  115. edsl/questions/data_structures.py +20 -0
  116. edsl/questions/derived/QuestionLinearScale.py +6 -3
  117. edsl/questions/derived/QuestionTopK.py +1 -1
  118. edsl/questions/descriptors.py +17 -3
  119. edsl/questions/loop_processor.py +149 -0
  120. edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +57 -50
  121. edsl/questions/question_registry.py +1 -1
  122. edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +40 -26
  123. edsl/questions/response_validator_factory.py +34 -0
  124. edsl/questions/templates/matrix/__init__.py +1 -0
  125. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  126. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  127. edsl/results/CSSParameterizer.py +1 -1
  128. edsl/results/Dataset.py +170 -7
  129. edsl/results/DatasetExportMixin.py +168 -305
  130. edsl/results/DatasetTree.py +28 -8
  131. edsl/results/MarkdownToDocx.py +122 -0
  132. edsl/results/MarkdownToPDF.py +111 -0
  133. edsl/results/Result.py +298 -206
  134. edsl/results/Results.py +149 -131
  135. edsl/results/ResultsExportMixin.py +2 -0
  136. edsl/results/TableDisplay.py +98 -171
  137. edsl/results/TextEditor.py +50 -0
  138. edsl/results/__init__.py +1 -1
  139. edsl/results/file_exports.py +252 -0
  140. edsl/results/{Selector.py → results_selector.py} +23 -13
  141. edsl/results/smart_objects.py +96 -0
  142. edsl/results/table_data_class.py +12 -0
  143. edsl/results/table_renderers.py +118 -0
  144. edsl/scenarios/ConstructDownloadLink.py +109 -0
  145. edsl/scenarios/DocumentChunker.py +102 -0
  146. edsl/scenarios/DocxScenario.py +16 -0
  147. edsl/scenarios/FileStore.py +150 -239
  148. edsl/scenarios/PdfExtractor.py +40 -0
  149. edsl/scenarios/Scenario.py +90 -193
  150. edsl/scenarios/ScenarioHtmlMixin.py +4 -3
  151. edsl/scenarios/ScenarioList.py +415 -244
  152. edsl/scenarios/ScenarioListExportMixin.py +0 -7
  153. edsl/scenarios/ScenarioListPdfMixin.py +15 -37
  154. edsl/scenarios/__init__.py +1 -2
  155. edsl/scenarios/directory_scanner.py +96 -0
  156. edsl/scenarios/file_methods.py +85 -0
  157. edsl/scenarios/handlers/__init__.py +13 -0
  158. edsl/scenarios/handlers/csv.py +49 -0
  159. edsl/scenarios/handlers/docx.py +76 -0
  160. edsl/scenarios/handlers/html.py +37 -0
  161. edsl/scenarios/handlers/json.py +111 -0
  162. edsl/scenarios/handlers/latex.py +5 -0
  163. edsl/scenarios/handlers/md.py +51 -0
  164. edsl/scenarios/handlers/pdf.py +68 -0
  165. edsl/scenarios/handlers/png.py +39 -0
  166. edsl/scenarios/handlers/pptx.py +105 -0
  167. edsl/scenarios/handlers/py.py +294 -0
  168. edsl/scenarios/handlers/sql.py +313 -0
  169. edsl/scenarios/handlers/sqlite.py +149 -0
  170. edsl/scenarios/handlers/txt.py +33 -0
  171. edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +10 -6
  172. edsl/scenarios/scenario_selector.py +156 -0
  173. edsl/study/ObjectEntry.py +1 -1
  174. edsl/study/SnapShot.py +1 -1
  175. edsl/study/Study.py +5 -12
  176. edsl/surveys/ConstructDAG.py +92 -0
  177. edsl/surveys/EditSurvey.py +221 -0
  178. edsl/surveys/InstructionHandler.py +100 -0
  179. edsl/surveys/MemoryManagement.py +72 -0
  180. edsl/surveys/Rule.py +5 -4
  181. edsl/surveys/RuleCollection.py +25 -27
  182. edsl/surveys/RuleManager.py +172 -0
  183. edsl/surveys/Simulator.py +75 -0
  184. edsl/surveys/Survey.py +270 -791
  185. edsl/surveys/SurveyCSS.py +20 -8
  186. edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +11 -9
  187. edsl/surveys/SurveyToApp.py +141 -0
  188. edsl/surveys/__init__.py +4 -2
  189. edsl/surveys/descriptors.py +6 -2
  190. edsl/surveys/instructions/ChangeInstruction.py +1 -2
  191. edsl/surveys/instructions/Instruction.py +4 -13
  192. edsl/surveys/instructions/InstructionCollection.py +11 -6
  193. edsl/templates/error_reporting/interview_details.html +1 -1
  194. edsl/templates/error_reporting/report.html +1 -1
  195. edsl/tools/plotting.py +1 -1
  196. edsl/utilities/PrettyList.py +56 -0
  197. edsl/utilities/is_notebook.py +18 -0
  198. edsl/utilities/is_valid_variable_name.py +11 -0
  199. edsl/utilities/remove_edsl_version.py +24 -0
  200. edsl/utilities/utilities.py +35 -23
  201. {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/METADATA +12 -10
  202. edsl-0.1.39.dist-info/RECORD +358 -0
  203. {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/WHEEL +1 -1
  204. edsl/language_models/KeyLookup.py +0 -30
  205. edsl/language_models/registry.py +0 -190
  206. edsl/language_models/unused/ReplicateBase.py +0 -83
  207. edsl/results/ResultsDBMixin.py +0 -238
  208. edsl-0.1.38.dev4.dist-info/RECORD +0 -277
  209. /edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +0 -0
  210. /edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +0 -0
  211. /edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +0 -0
  212. {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/LICENSE +0 -0
edsl/results/Dataset.py CHANGED
@@ -1,19 +1,22 @@
1
1
  """A module to represent a dataset of observations."""
2
2
 
3
3
  from __future__ import annotations
4
- import random
4
+ import sys
5
5
  import json
6
+ import random
6
7
  from collections import UserList
7
8
  from typing import Any, Union, Optional
8
- import sys
9
- import numpy as np
10
9
 
11
10
  from edsl.results.ResultsExportMixin import ResultsExportMixin
12
11
  from edsl.results.DatasetTree import Tree
13
12
  from edsl.results.TableDisplay import TableDisplay
13
+ from edsl.Base import PersistenceMixin, HashingMixin
14
14
 
15
15
 
16
- class Dataset(UserList, ResultsExportMixin):
16
+ from edsl.results.smart_objects import FirstObject
17
+
18
+
19
+ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
17
20
  """A class to represent a dataset of observations."""
18
21
 
19
22
  def __init__(
@@ -36,6 +39,46 @@ class Dataset(UserList, ResultsExportMixin):
36
39
  _, values = list(self.data[0].items())[0]
37
40
  return len(values)
38
41
 
42
+ def tail(self, n: int = 5) -> Dataset:
43
+ """Return the last n observations in the dataset.
44
+
45
+ >>> d = Dataset([{'a.b':[1,2,3,4]}])
46
+ >>> d.tail(2)
47
+ Dataset([{'a.b': [3, 4]}])
48
+ """
49
+ new_data = []
50
+ for observation in self.data:
51
+ key, values = list(observation.items())[0]
52
+ new_data.append({key: values[-n:]})
53
+ return Dataset(new_data)
54
+
55
+ def head(self, n: int = 5) -> Dataset:
56
+ """Return the first n observations in the dataset.
57
+
58
+ >>> d = Dataset([{'a.b':[1,2,3,4]}])
59
+ >>> d.head(2)
60
+ Dataset([{'a.b': [1, 2]}])
61
+ """
62
+ new_data = []
63
+ for observation in self.data:
64
+ key, values = list(observation.items())[0]
65
+ new_data.append({key: values[:n]})
66
+ return Dataset(new_data)
67
+
68
+ def expand(self, field):
69
+ return self.to_scenario_list().expand(field)
70
+
71
+ def view(self):
72
+ from perspective.widget import PerspectiveWidget
73
+
74
+ w = PerspectiveWidget(
75
+ self.to_pandas(),
76
+ plugin="Datagrid",
77
+ aggregates={"datetime": "any"},
78
+ sort=[["date", "desc"]],
79
+ )
80
+ return w
81
+
39
82
  def keys(self) -> list[str]:
40
83
  """Return the keys of the first observation in the dataset.
41
84
 
@@ -48,6 +91,79 @@ class Dataset(UserList, ResultsExportMixin):
48
91
  def filter(self, expression):
49
92
  return self.to_scenario_list().filter(expression).to_dataset()
50
93
 
94
+ def long(self, exclude_fields: list[str] = None) -> Dataset:
95
+ headers, data = self._tabular()
96
+ exclude_fields = exclude_fields or []
97
+
98
+ # Initialize result dictionaries for each column
99
+ result_dict = {}
100
+
101
+ for index, row in enumerate(data):
102
+ row_values = dict(zip(headers, row))
103
+ excluded_values = {field: row_values[field] for field in exclude_fields}
104
+
105
+ # Transform non-excluded fields to long format
106
+ for header, value in row_values.items():
107
+ if header not in exclude_fields:
108
+ # Initialize lists in result_dict if needed
109
+ if not result_dict:
110
+ result_dict = {
111
+ "row": [],
112
+ "key": [],
113
+ "value": [],
114
+ **{field: [] for field in exclude_fields},
115
+ }
116
+
117
+ # Add values to each column
118
+ result_dict["row"].append(index)
119
+ result_dict["key"].append(header)
120
+ result_dict["value"].append(value)
121
+ for field in exclude_fields:
122
+ result_dict[field].append(excluded_values[field])
123
+
124
+ return Dataset([{k: v} for k, v in result_dict.items()])
125
+
126
+ def wide(self) -> "Dataset":
127
+ """
128
+ Convert a long-format dataset (with row, key, value columns) to wide format.
129
+
130
+ Expected input format:
131
+ - A dataset with three columns containing dictionaries:
132
+ - row: list of row indices
133
+ - key: list of column names
134
+ - value: list of values
135
+
136
+ Returns:
137
+ - Dataset: A new dataset with columns corresponding to unique keys
138
+ """
139
+ # Extract the component arrays
140
+ row_dict = next(col for col in self if "row" in col)
141
+ key_dict = next(col for col in self if "key" in col)
142
+ value_dict = next(col for col in self if "value" in col)
143
+
144
+ rows = row_dict["row"]
145
+ keys = key_dict["key"]
146
+ values = value_dict["value"]
147
+
148
+ if not (len(rows) == len(keys) == len(values)):
149
+ raise ValueError("All input arrays must have the same length")
150
+
151
+ # Get unique keys and row indices
152
+ unique_keys = sorted(set(keys))
153
+ unique_rows = sorted(set(rows))
154
+
155
+ # Create a dictionary to store the result
156
+ result = {key: [None] * len(unique_rows) for key in unique_keys}
157
+
158
+ # Populate the result dictionary
159
+ for row_idx, key, value in zip(rows, keys, values):
160
+ # Find the position in the output array for this row
161
+ output_row_idx = unique_rows.index(row_idx)
162
+ result[key][output_row_idx] = value
163
+
164
+ # Convert to list of column dictionaries format
165
+ return Dataset([{key: values} for key, values in result.items()])
166
+
51
167
  def __repr__(self) -> str:
52
168
  """Return a string representation of the dataset."""
53
169
  return f"Dataset({self.data})"
@@ -126,7 +242,21 @@ class Dataset(UserList, ResultsExportMixin):
126
242
  """Get the values of the first key in the dictionary."""
127
243
  return list(d.values())[0]
128
244
 
129
- return get_values(self.data[0])[0]
245
+ return FirstObject(get_values(self.data[0])[0])
246
+
247
+ def latex(self, **kwargs):
248
+ return self.table().latex()
249
+
250
+ def remove_prefix(self) -> Dataset:
251
+ new_data = []
252
+ for observation in self.data:
253
+ key, values = list(observation.items())[0]
254
+ if "." in key:
255
+ new_key = key.split(".")[1]
256
+ new_data.append({new_key: values})
257
+ else:
258
+ new_data.append({key: values})
259
+ return Dataset(new_data)
130
260
 
131
261
  def print(self, pretty_labels=None, **kwargs):
132
262
  if "format" in kwargs:
@@ -146,6 +276,25 @@ class Dataset(UserList, ResultsExportMixin):
146
276
  new_data.append({new_key: values})
147
277
  return Dataset(new_data)
148
278
 
279
+ def merge(self, other: Dataset, by_x, by_y) -> Dataset:
280
+ """Merge the dataset with another dataset on the given keys.""
281
+
282
+ merged_df = df1.merge(df2, how="left", on=["key1", "key2"])
283
+ """
284
+ df1 = self.to_pandas()
285
+ df2 = other.to_pandas()
286
+ merged_df = df1.merge(df2, how="left", left_on=by_x, right_on=by_y)
287
+ return Dataset.from_pandas_dataframe(merged_df)
288
+
289
+ def to(self, survey_or_question: Union["Survey", "QuestionBase"]) -> "Jobs":
290
+ from edsl.surveys.Survey import Survey
291
+ from edsl.questions.QuestionBase import QuestionBase
292
+
293
+ if isinstance(survey_or_question, Survey):
294
+ return survey_or_question.by(self.to_scenario_list())
295
+ elif isinstance(survey_or_question, QuestionBase):
296
+ return Survey([survey_or_question]).by(self.to_scenario_list())
297
+
149
298
  def select(self, *keys) -> Dataset:
150
299
  """Return a new dataset with only the selected keys.
151
300
 
@@ -281,6 +430,7 @@ class Dataset(UserList, ResultsExportMixin):
281
430
 
282
431
 
283
432
  """
433
+ import numpy as np
284
434
 
285
435
  def sort_indices(lst: list[Any]) -> list[int]:
286
436
  """
@@ -409,13 +559,26 @@ class Dataset(UserList, ResultsExportMixin):
409
559
  return Dataset([{"num_observations": [len(self)], "keys": [self.keys()]}])
410
560
 
411
561
  @classmethod
412
- def example(self):
562
+ def example(self, n: int = None):
413
563
  """Return an example dataset.
414
564
 
415
565
  >>> Dataset.example()
416
566
  Dataset([{'a': [1, 2, 3, 4]}, {'b': [4, 3, 2, 1]}])
417
567
  """
418
- return Dataset([{"a": [1, 2, 3, 4]}, {"b": [4, 3, 2, 1]}])
568
+ if n is None:
569
+ return Dataset([{"a": [1, 2, 3, 4]}, {"b": [4, 3, 2, 1]}])
570
+ else:
571
+ return Dataset([{"a": [1] * n}, {"b": [2] * n}])
572
+
573
+ @classmethod
574
+ def from_edsl_object(cls, object):
575
+ d = object.to_dict(add_edsl_version=False)
576
+ return cls([{"key": list(d.keys())}, {"value": list(d.values())}])
577
+
578
+ @classmethod
579
+ def from_pandas_dataframe(cls, df):
580
+ result = cls([{col: df[col].tolist()} for col in df.columns])
581
+ return result
419
582
 
420
583
 
421
584
  if __name__ == "__main__":