edsl 0.1.38.dev4__py3-none-any.whl → 0.1.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +197 -116
- edsl/__init__.py +15 -7
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +351 -147
- edsl/agents/AgentList.py +211 -73
- edsl/agents/Invigilator.py +101 -50
- edsl/agents/InvigilatorBase.py +62 -70
- edsl/agents/PromptConstructor.py +143 -225
- edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
- edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
- edsl/agents/__init__.py +0 -1
- edsl/agents/prompt_helpers.py +3 -3
- edsl/agents/question_option_processor.py +172 -0
- edsl/auto/AutoStudy.py +18 -5
- edsl/auto/StageBase.py +53 -40
- edsl/auto/StageQuestions.py +2 -1
- edsl/auto/utilities.py +0 -6
- edsl/config.py +22 -2
- edsl/conversation/car_buying.py +2 -1
- edsl/coop/CoopFunctionsMixin.py +15 -0
- edsl/coop/ExpectedParrotKeyHandler.py +125 -0
- edsl/coop/PriceFetcher.py +1 -1
- edsl/coop/coop.py +125 -47
- edsl/coop/utils.py +14 -14
- edsl/data/Cache.py +45 -27
- edsl/data/CacheEntry.py +12 -15
- edsl/data/CacheHandler.py +31 -12
- edsl/data/RemoteCacheSync.py +154 -46
- edsl/data/__init__.py +4 -3
- edsl/data_transfer_models.py +2 -1
- edsl/enums.py +27 -0
- edsl/exceptions/__init__.py +50 -50
- edsl/exceptions/agents.py +12 -0
- edsl/exceptions/inference_services.py +5 -0
- edsl/exceptions/questions.py +24 -6
- edsl/exceptions/scenarios.py +7 -0
- edsl/inference_services/AnthropicService.py +38 -19
- edsl/inference_services/AvailableModelCacheHandler.py +184 -0
- edsl/inference_services/AvailableModelFetcher.py +215 -0
- edsl/inference_services/AwsBedrock.py +0 -2
- edsl/inference_services/AzureAI.py +0 -2
- edsl/inference_services/GoogleService.py +7 -12
- edsl/inference_services/InferenceServiceABC.py +18 -85
- edsl/inference_services/InferenceServicesCollection.py +120 -79
- edsl/inference_services/MistralAIService.py +0 -3
- edsl/inference_services/OpenAIService.py +47 -35
- edsl/inference_services/PerplexityService.py +0 -3
- edsl/inference_services/ServiceAvailability.py +135 -0
- edsl/inference_services/TestService.py +11 -10
- edsl/inference_services/TogetherAIService.py +5 -3
- edsl/inference_services/data_structures.py +134 -0
- edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
- edsl/jobs/Answers.py +1 -14
- edsl/jobs/FetchInvigilator.py +47 -0
- edsl/jobs/InterviewTaskManager.py +98 -0
- edsl/jobs/InterviewsConstructor.py +50 -0
- edsl/jobs/Jobs.py +356 -431
- edsl/jobs/JobsChecks.py +35 -10
- edsl/jobs/JobsComponentConstructor.py +189 -0
- edsl/jobs/JobsPrompts.py +6 -4
- edsl/jobs/JobsRemoteInferenceHandler.py +205 -133
- edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
- edsl/jobs/RequestTokenEstimator.py +30 -0
- edsl/jobs/async_interview_runner.py +138 -0
- edsl/jobs/buckets/BucketCollection.py +44 -3
- edsl/jobs/buckets/TokenBucket.py +53 -21
- edsl/jobs/buckets/TokenBucketAPI.py +211 -0
- edsl/jobs/buckets/TokenBucketClient.py +191 -0
- edsl/jobs/check_survey_scenario_compatibility.py +85 -0
- edsl/jobs/data_structures.py +120 -0
- edsl/jobs/decorators.py +35 -0
- edsl/jobs/interviews/Interview.py +143 -408
- edsl/jobs/jobs_status_enums.py +9 -0
- edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
- edsl/jobs/results_exceptions_handler.py +98 -0
- edsl/jobs/runners/JobsRunnerAsyncio.py +88 -403
- edsl/jobs/runners/JobsRunnerStatus.py +133 -165
- edsl/jobs/tasks/QuestionTaskCreator.py +21 -19
- edsl/jobs/tasks/TaskHistory.py +38 -18
- edsl/jobs/tasks/task_status_enum.py +0 -2
- edsl/language_models/ComputeCost.py +63 -0
- edsl/language_models/LanguageModel.py +194 -236
- edsl/language_models/ModelList.py +28 -19
- edsl/language_models/PriceManager.py +127 -0
- edsl/language_models/RawResponseHandler.py +106 -0
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/__init__.py +1 -2
- edsl/language_models/key_management/KeyLookup.py +63 -0
- edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
- edsl/language_models/key_management/KeyLookupCollection.py +38 -0
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +131 -0
- edsl/language_models/model.py +256 -0
- edsl/language_models/repair.py +2 -2
- edsl/language_models/utilities.py +5 -4
- edsl/notebooks/Notebook.py +19 -14
- edsl/notebooks/NotebookToLaTeX.py +142 -0
- edsl/prompts/Prompt.py +29 -39
- edsl/questions/ExceptionExplainer.py +77 -0
- edsl/questions/HTMLQuestion.py +103 -0
- edsl/questions/QuestionBase.py +68 -214
- edsl/questions/QuestionBasePromptsMixin.py +7 -3
- edsl/questions/QuestionBudget.py +1 -1
- edsl/questions/QuestionCheckBox.py +3 -3
- edsl/questions/QuestionExtract.py +5 -7
- edsl/questions/QuestionFreeText.py +2 -3
- edsl/questions/QuestionList.py +10 -18
- edsl/questions/QuestionMatrix.py +265 -0
- edsl/questions/QuestionMultipleChoice.py +67 -23
- edsl/questions/QuestionNumerical.py +2 -4
- edsl/questions/QuestionRank.py +7 -17
- edsl/questions/SimpleAskMixin.py +4 -3
- edsl/questions/__init__.py +2 -1
- edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +47 -2
- edsl/questions/data_structures.py +20 -0
- edsl/questions/derived/QuestionLinearScale.py +6 -3
- edsl/questions/derived/QuestionTopK.py +1 -1
- edsl/questions/descriptors.py +17 -3
- edsl/questions/loop_processor.py +149 -0
- edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +57 -50
- edsl/questions/question_registry.py +1 -1
- edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +40 -26
- edsl/questions/response_validator_factory.py +34 -0
- edsl/questions/templates/matrix/__init__.py +1 -0
- edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
- edsl/questions/templates/matrix/question_presentation.jinja +20 -0
- edsl/results/CSSParameterizer.py +1 -1
- edsl/results/Dataset.py +170 -7
- edsl/results/DatasetExportMixin.py +168 -305
- edsl/results/DatasetTree.py +28 -8
- edsl/results/MarkdownToDocx.py +122 -0
- edsl/results/MarkdownToPDF.py +111 -0
- edsl/results/Result.py +298 -206
- edsl/results/Results.py +149 -131
- edsl/results/ResultsExportMixin.py +2 -0
- edsl/results/TableDisplay.py +98 -171
- edsl/results/TextEditor.py +50 -0
- edsl/results/__init__.py +1 -1
- edsl/results/file_exports.py +252 -0
- edsl/results/{Selector.py → results_selector.py} +23 -13
- edsl/results/smart_objects.py +96 -0
- edsl/results/table_data_class.py +12 -0
- edsl/results/table_renderers.py +118 -0
- edsl/scenarios/ConstructDownloadLink.py +109 -0
- edsl/scenarios/DocumentChunker.py +102 -0
- edsl/scenarios/DocxScenario.py +16 -0
- edsl/scenarios/FileStore.py +150 -239
- edsl/scenarios/PdfExtractor.py +40 -0
- edsl/scenarios/Scenario.py +90 -193
- edsl/scenarios/ScenarioHtmlMixin.py +4 -3
- edsl/scenarios/ScenarioList.py +415 -244
- edsl/scenarios/ScenarioListExportMixin.py +0 -7
- edsl/scenarios/ScenarioListPdfMixin.py +15 -37
- edsl/scenarios/__init__.py +1 -2
- edsl/scenarios/directory_scanner.py +96 -0
- edsl/scenarios/file_methods.py +85 -0
- edsl/scenarios/handlers/__init__.py +13 -0
- edsl/scenarios/handlers/csv.py +49 -0
- edsl/scenarios/handlers/docx.py +76 -0
- edsl/scenarios/handlers/html.py +37 -0
- edsl/scenarios/handlers/json.py +111 -0
- edsl/scenarios/handlers/latex.py +5 -0
- edsl/scenarios/handlers/md.py +51 -0
- edsl/scenarios/handlers/pdf.py +68 -0
- edsl/scenarios/handlers/png.py +39 -0
- edsl/scenarios/handlers/pptx.py +105 -0
- edsl/scenarios/handlers/py.py +294 -0
- edsl/scenarios/handlers/sql.py +313 -0
- edsl/scenarios/handlers/sqlite.py +149 -0
- edsl/scenarios/handlers/txt.py +33 -0
- edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +10 -6
- edsl/scenarios/scenario_selector.py +156 -0
- edsl/study/ObjectEntry.py +1 -1
- edsl/study/SnapShot.py +1 -1
- edsl/study/Study.py +5 -12
- edsl/surveys/ConstructDAG.py +92 -0
- edsl/surveys/EditSurvey.py +221 -0
- edsl/surveys/InstructionHandler.py +100 -0
- edsl/surveys/MemoryManagement.py +72 -0
- edsl/surveys/Rule.py +5 -4
- edsl/surveys/RuleCollection.py +25 -27
- edsl/surveys/RuleManager.py +172 -0
- edsl/surveys/Simulator.py +75 -0
- edsl/surveys/Survey.py +270 -791
- edsl/surveys/SurveyCSS.py +20 -8
- edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +11 -9
- edsl/surveys/SurveyToApp.py +141 -0
- edsl/surveys/__init__.py +4 -2
- edsl/surveys/descriptors.py +6 -2
- edsl/surveys/instructions/ChangeInstruction.py +1 -2
- edsl/surveys/instructions/Instruction.py +4 -13
- edsl/surveys/instructions/InstructionCollection.py +11 -6
- edsl/templates/error_reporting/interview_details.html +1 -1
- edsl/templates/error_reporting/report.html +1 -1
- edsl/tools/plotting.py +1 -1
- edsl/utilities/PrettyList.py +56 -0
- edsl/utilities/is_notebook.py +18 -0
- edsl/utilities/is_valid_variable_name.py +11 -0
- edsl/utilities/remove_edsl_version.py +24 -0
- edsl/utilities/utilities.py +35 -23
- {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/METADATA +12 -10
- edsl-0.1.39.dist-info/RECORD +358 -0
- {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/WHEEL +1 -1
- edsl/language_models/KeyLookup.py +0 -30
- edsl/language_models/registry.py +0 -190
- edsl/language_models/unused/ReplicateBase.py +0 -83
- edsl/results/ResultsDBMixin.py +0 -238
- edsl-0.1.38.dev4.dist-info/RECORD +0 -277
- /edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +0 -0
- /edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +0 -0
- /edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +0 -0
- {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/LICENSE +0 -0
edsl/results/Dataset.py
CHANGED
@@ -1,19 +1,22 @@
|
|
1
1
|
"""A module to represent a dataset of observations."""
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
|
-
import
|
4
|
+
import sys
|
5
5
|
import json
|
6
|
+
import random
|
6
7
|
from collections import UserList
|
7
8
|
from typing import Any, Union, Optional
|
8
|
-
import sys
|
9
|
-
import numpy as np
|
10
9
|
|
11
10
|
from edsl.results.ResultsExportMixin import ResultsExportMixin
|
12
11
|
from edsl.results.DatasetTree import Tree
|
13
12
|
from edsl.results.TableDisplay import TableDisplay
|
13
|
+
from edsl.Base import PersistenceMixin, HashingMixin
|
14
14
|
|
15
15
|
|
16
|
-
|
16
|
+
from edsl.results.smart_objects import FirstObject
|
17
|
+
|
18
|
+
|
19
|
+
class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
|
17
20
|
"""A class to represent a dataset of observations."""
|
18
21
|
|
19
22
|
def __init__(
|
@@ -36,6 +39,46 @@ class Dataset(UserList, ResultsExportMixin):
|
|
36
39
|
_, values = list(self.data[0].items())[0]
|
37
40
|
return len(values)
|
38
41
|
|
42
|
+
def tail(self, n: int = 5) -> Dataset:
|
43
|
+
"""Return the last n observations in the dataset.
|
44
|
+
|
45
|
+
>>> d = Dataset([{'a.b':[1,2,3,4]}])
|
46
|
+
>>> d.tail(2)
|
47
|
+
Dataset([{'a.b': [3, 4]}])
|
48
|
+
"""
|
49
|
+
new_data = []
|
50
|
+
for observation in self.data:
|
51
|
+
key, values = list(observation.items())[0]
|
52
|
+
new_data.append({key: values[-n:]})
|
53
|
+
return Dataset(new_data)
|
54
|
+
|
55
|
+
def head(self, n: int = 5) -> Dataset:
|
56
|
+
"""Return the first n observations in the dataset.
|
57
|
+
|
58
|
+
>>> d = Dataset([{'a.b':[1,2,3,4]}])
|
59
|
+
>>> d.head(2)
|
60
|
+
Dataset([{'a.b': [1, 2]}])
|
61
|
+
"""
|
62
|
+
new_data = []
|
63
|
+
for observation in self.data:
|
64
|
+
key, values = list(observation.items())[0]
|
65
|
+
new_data.append({key: values[:n]})
|
66
|
+
return Dataset(new_data)
|
67
|
+
|
68
|
+
def expand(self, field):
|
69
|
+
return self.to_scenario_list().expand(field)
|
70
|
+
|
71
|
+
def view(self):
|
72
|
+
from perspective.widget import PerspectiveWidget
|
73
|
+
|
74
|
+
w = PerspectiveWidget(
|
75
|
+
self.to_pandas(),
|
76
|
+
plugin="Datagrid",
|
77
|
+
aggregates={"datetime": "any"},
|
78
|
+
sort=[["date", "desc"]],
|
79
|
+
)
|
80
|
+
return w
|
81
|
+
|
39
82
|
def keys(self) -> list[str]:
|
40
83
|
"""Return the keys of the first observation in the dataset.
|
41
84
|
|
@@ -48,6 +91,79 @@ class Dataset(UserList, ResultsExportMixin):
|
|
48
91
|
def filter(self, expression):
|
49
92
|
return self.to_scenario_list().filter(expression).to_dataset()
|
50
93
|
|
94
|
+
def long(self, exclude_fields: list[str] = None) -> Dataset:
|
95
|
+
headers, data = self._tabular()
|
96
|
+
exclude_fields = exclude_fields or []
|
97
|
+
|
98
|
+
# Initialize result dictionaries for each column
|
99
|
+
result_dict = {}
|
100
|
+
|
101
|
+
for index, row in enumerate(data):
|
102
|
+
row_values = dict(zip(headers, row))
|
103
|
+
excluded_values = {field: row_values[field] for field in exclude_fields}
|
104
|
+
|
105
|
+
# Transform non-excluded fields to long format
|
106
|
+
for header, value in row_values.items():
|
107
|
+
if header not in exclude_fields:
|
108
|
+
# Initialize lists in result_dict if needed
|
109
|
+
if not result_dict:
|
110
|
+
result_dict = {
|
111
|
+
"row": [],
|
112
|
+
"key": [],
|
113
|
+
"value": [],
|
114
|
+
**{field: [] for field in exclude_fields},
|
115
|
+
}
|
116
|
+
|
117
|
+
# Add values to each column
|
118
|
+
result_dict["row"].append(index)
|
119
|
+
result_dict["key"].append(header)
|
120
|
+
result_dict["value"].append(value)
|
121
|
+
for field in exclude_fields:
|
122
|
+
result_dict[field].append(excluded_values[field])
|
123
|
+
|
124
|
+
return Dataset([{k: v} for k, v in result_dict.items()])
|
125
|
+
|
126
|
+
def wide(self) -> "Dataset":
|
127
|
+
"""
|
128
|
+
Convert a long-format dataset (with row, key, value columns) to wide format.
|
129
|
+
|
130
|
+
Expected input format:
|
131
|
+
- A dataset with three columns containing dictionaries:
|
132
|
+
- row: list of row indices
|
133
|
+
- key: list of column names
|
134
|
+
- value: list of values
|
135
|
+
|
136
|
+
Returns:
|
137
|
+
- Dataset: A new dataset with columns corresponding to unique keys
|
138
|
+
"""
|
139
|
+
# Extract the component arrays
|
140
|
+
row_dict = next(col for col in self if "row" in col)
|
141
|
+
key_dict = next(col for col in self if "key" in col)
|
142
|
+
value_dict = next(col for col in self if "value" in col)
|
143
|
+
|
144
|
+
rows = row_dict["row"]
|
145
|
+
keys = key_dict["key"]
|
146
|
+
values = value_dict["value"]
|
147
|
+
|
148
|
+
if not (len(rows) == len(keys) == len(values)):
|
149
|
+
raise ValueError("All input arrays must have the same length")
|
150
|
+
|
151
|
+
# Get unique keys and row indices
|
152
|
+
unique_keys = sorted(set(keys))
|
153
|
+
unique_rows = sorted(set(rows))
|
154
|
+
|
155
|
+
# Create a dictionary to store the result
|
156
|
+
result = {key: [None] * len(unique_rows) for key in unique_keys}
|
157
|
+
|
158
|
+
# Populate the result dictionary
|
159
|
+
for row_idx, key, value in zip(rows, keys, values):
|
160
|
+
# Find the position in the output array for this row
|
161
|
+
output_row_idx = unique_rows.index(row_idx)
|
162
|
+
result[key][output_row_idx] = value
|
163
|
+
|
164
|
+
# Convert to list of column dictionaries format
|
165
|
+
return Dataset([{key: values} for key, values in result.items()])
|
166
|
+
|
51
167
|
def __repr__(self) -> str:
|
52
168
|
"""Return a string representation of the dataset."""
|
53
169
|
return f"Dataset({self.data})"
|
@@ -126,7 +242,21 @@ class Dataset(UserList, ResultsExportMixin):
|
|
126
242
|
"""Get the values of the first key in the dictionary."""
|
127
243
|
return list(d.values())[0]
|
128
244
|
|
129
|
-
return get_values(self.data[0])[0]
|
245
|
+
return FirstObject(get_values(self.data[0])[0])
|
246
|
+
|
247
|
+
def latex(self, **kwargs):
|
248
|
+
return self.table().latex()
|
249
|
+
|
250
|
+
def remove_prefix(self) -> Dataset:
|
251
|
+
new_data = []
|
252
|
+
for observation in self.data:
|
253
|
+
key, values = list(observation.items())[0]
|
254
|
+
if "." in key:
|
255
|
+
new_key = key.split(".")[1]
|
256
|
+
new_data.append({new_key: values})
|
257
|
+
else:
|
258
|
+
new_data.append({key: values})
|
259
|
+
return Dataset(new_data)
|
130
260
|
|
131
261
|
def print(self, pretty_labels=None, **kwargs):
|
132
262
|
if "format" in kwargs:
|
@@ -146,6 +276,25 @@ class Dataset(UserList, ResultsExportMixin):
|
|
146
276
|
new_data.append({new_key: values})
|
147
277
|
return Dataset(new_data)
|
148
278
|
|
279
|
+
def merge(self, other: Dataset, by_x, by_y) -> Dataset:
|
280
|
+
"""Merge the dataset with another dataset on the given keys.""
|
281
|
+
|
282
|
+
merged_df = df1.merge(df2, how="left", on=["key1", "key2"])
|
283
|
+
"""
|
284
|
+
df1 = self.to_pandas()
|
285
|
+
df2 = other.to_pandas()
|
286
|
+
merged_df = df1.merge(df2, how="left", left_on=by_x, right_on=by_y)
|
287
|
+
return Dataset.from_pandas_dataframe(merged_df)
|
288
|
+
|
289
|
+
def to(self, survey_or_question: Union["Survey", "QuestionBase"]) -> "Jobs":
|
290
|
+
from edsl.surveys.Survey import Survey
|
291
|
+
from edsl.questions.QuestionBase import QuestionBase
|
292
|
+
|
293
|
+
if isinstance(survey_or_question, Survey):
|
294
|
+
return survey_or_question.by(self.to_scenario_list())
|
295
|
+
elif isinstance(survey_or_question, QuestionBase):
|
296
|
+
return Survey([survey_or_question]).by(self.to_scenario_list())
|
297
|
+
|
149
298
|
def select(self, *keys) -> Dataset:
|
150
299
|
"""Return a new dataset with only the selected keys.
|
151
300
|
|
@@ -281,6 +430,7 @@ class Dataset(UserList, ResultsExportMixin):
|
|
281
430
|
|
282
431
|
|
283
432
|
"""
|
433
|
+
import numpy as np
|
284
434
|
|
285
435
|
def sort_indices(lst: list[Any]) -> list[int]:
|
286
436
|
"""
|
@@ -409,13 +559,26 @@ class Dataset(UserList, ResultsExportMixin):
|
|
409
559
|
return Dataset([{"num_observations": [len(self)], "keys": [self.keys()]}])
|
410
560
|
|
411
561
|
@classmethod
|
412
|
-
def example(self):
|
562
|
+
def example(self, n: int = None):
|
413
563
|
"""Return an example dataset.
|
414
564
|
|
415
565
|
>>> Dataset.example()
|
416
566
|
Dataset([{'a': [1, 2, 3, 4]}, {'b': [4, 3, 2, 1]}])
|
417
567
|
"""
|
418
|
-
|
568
|
+
if n is None:
|
569
|
+
return Dataset([{"a": [1, 2, 3, 4]}, {"b": [4, 3, 2, 1]}])
|
570
|
+
else:
|
571
|
+
return Dataset([{"a": [1] * n}, {"b": [2] * n}])
|
572
|
+
|
573
|
+
@classmethod
|
574
|
+
def from_edsl_object(cls, object):
|
575
|
+
d = object.to_dict(add_edsl_version=False)
|
576
|
+
return cls([{"key": list(d.keys())}, {"value": list(d.values())}])
|
577
|
+
|
578
|
+
@classmethod
|
579
|
+
def from_pandas_dataframe(cls, df):
|
580
|
+
result = cls([{col: df[col].tolist()} for col in df.columns])
|
581
|
+
return result
|
419
582
|
|
420
583
|
|
421
584
|
if __name__ == "__main__":
|