edsl 0.1.39__py3-none-any.whl → 0.1.39.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +116 -197
- edsl/__init__.py +7 -15
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +147 -351
- edsl/agents/AgentList.py +73 -211
- edsl/agents/Invigilator.py +50 -101
- edsl/agents/InvigilatorBase.py +70 -62
- edsl/agents/PromptConstructor.py +225 -143
- edsl/agents/__init__.py +1 -0
- edsl/agents/prompt_helpers.py +3 -3
- edsl/auto/AutoStudy.py +5 -18
- edsl/auto/StageBase.py +40 -53
- edsl/auto/StageQuestions.py +1 -2
- edsl/auto/utilities.py +6 -0
- edsl/config.py +2 -22
- edsl/conversation/car_buying.py +1 -2
- edsl/coop/PriceFetcher.py +1 -1
- edsl/coop/coop.py +47 -125
- edsl/coop/utils.py +14 -14
- edsl/data/Cache.py +27 -45
- edsl/data/CacheEntry.py +15 -12
- edsl/data/CacheHandler.py +12 -31
- edsl/data/RemoteCacheSync.py +46 -154
- edsl/data/__init__.py +3 -4
- edsl/data_transfer_models.py +1 -2
- edsl/enums.py +0 -27
- edsl/exceptions/__init__.py +50 -50
- edsl/exceptions/agents.py +0 -12
- edsl/exceptions/questions.py +6 -24
- edsl/exceptions/scenarios.py +0 -7
- edsl/inference_services/AnthropicService.py +19 -38
- edsl/inference_services/AwsBedrock.py +2 -0
- edsl/inference_services/AzureAI.py +2 -0
- edsl/inference_services/GoogleService.py +12 -7
- edsl/inference_services/InferenceServiceABC.py +85 -18
- edsl/inference_services/InferenceServicesCollection.py +79 -120
- edsl/inference_services/MistralAIService.py +3 -0
- edsl/inference_services/OpenAIService.py +35 -47
- edsl/inference_services/PerplexityService.py +3 -0
- edsl/inference_services/TestService.py +10 -11
- edsl/inference_services/TogetherAIService.py +3 -5
- edsl/jobs/Answers.py +14 -1
- edsl/jobs/Jobs.py +431 -356
- edsl/jobs/JobsChecks.py +10 -35
- edsl/jobs/JobsPrompts.py +4 -6
- edsl/jobs/JobsRemoteInferenceHandler.py +133 -205
- edsl/jobs/buckets/BucketCollection.py +3 -44
- edsl/jobs/buckets/TokenBucket.py +21 -53
- edsl/jobs/interviews/Interview.py +408 -143
- edsl/jobs/runners/JobsRunnerAsyncio.py +403 -88
- edsl/jobs/runners/JobsRunnerStatus.py +165 -133
- edsl/jobs/tasks/QuestionTaskCreator.py +19 -21
- edsl/jobs/tasks/TaskHistory.py +18 -38
- edsl/jobs/tasks/task_status_enum.py +2 -0
- edsl/language_models/KeyLookup.py +30 -0
- edsl/language_models/LanguageModel.py +236 -194
- edsl/language_models/ModelList.py +19 -28
- edsl/language_models/__init__.py +2 -1
- edsl/language_models/registry.py +190 -0
- edsl/language_models/repair.py +2 -2
- edsl/language_models/unused/ReplicateBase.py +83 -0
- edsl/language_models/utilities.py +4 -5
- edsl/notebooks/Notebook.py +14 -19
- edsl/prompts/Prompt.py +39 -29
- edsl/questions/{answer_validator_mixin.py → AnswerValidatorMixin.py} +2 -47
- edsl/questions/QuestionBase.py +214 -68
- edsl/questions/{question_base_gen_mixin.py → QuestionBaseGenMixin.py} +50 -57
- edsl/questions/QuestionBasePromptsMixin.py +3 -7
- edsl/questions/QuestionBudget.py +1 -1
- edsl/questions/QuestionCheckBox.py +3 -3
- edsl/questions/QuestionExtract.py +7 -5
- edsl/questions/QuestionFreeText.py +3 -2
- edsl/questions/QuestionList.py +18 -10
- edsl/questions/QuestionMultipleChoice.py +23 -67
- edsl/questions/QuestionNumerical.py +4 -2
- edsl/questions/QuestionRank.py +17 -7
- edsl/questions/{response_validator_abc.py → ResponseValidatorABC.py} +26 -40
- edsl/questions/SimpleAskMixin.py +3 -4
- edsl/questions/__init__.py +1 -2
- edsl/questions/derived/QuestionLinearScale.py +3 -6
- edsl/questions/derived/QuestionTopK.py +1 -1
- edsl/questions/descriptors.py +3 -17
- edsl/questions/question_registry.py +1 -1
- edsl/results/CSSParameterizer.py +1 -1
- edsl/results/Dataset.py +7 -170
- edsl/results/DatasetExportMixin.py +305 -168
- edsl/results/DatasetTree.py +8 -28
- edsl/results/Result.py +206 -298
- edsl/results/Results.py +131 -149
- edsl/results/ResultsDBMixin.py +238 -0
- edsl/results/ResultsExportMixin.py +0 -2
- edsl/results/{results_selector.py → Selector.py} +13 -23
- edsl/results/TableDisplay.py +171 -98
- edsl/results/__init__.py +1 -1
- edsl/scenarios/FileStore.py +239 -150
- edsl/scenarios/Scenario.py +193 -90
- edsl/scenarios/ScenarioHtmlMixin.py +3 -4
- edsl/scenarios/{scenario_join.py → ScenarioJoin.py} +6 -10
- edsl/scenarios/ScenarioList.py +244 -415
- edsl/scenarios/ScenarioListExportMixin.py +7 -0
- edsl/scenarios/ScenarioListPdfMixin.py +37 -15
- edsl/scenarios/__init__.py +2 -1
- edsl/study/ObjectEntry.py +1 -1
- edsl/study/SnapShot.py +1 -1
- edsl/study/Study.py +12 -5
- edsl/surveys/Rule.py +4 -5
- edsl/surveys/RuleCollection.py +27 -25
- edsl/surveys/Survey.py +791 -270
- edsl/surveys/SurveyCSS.py +8 -20
- edsl/surveys/{SurveyFlowVisualization.py → SurveyFlowVisualizationMixin.py} +9 -11
- edsl/surveys/__init__.py +2 -4
- edsl/surveys/descriptors.py +2 -6
- edsl/surveys/instructions/ChangeInstruction.py +2 -1
- edsl/surveys/instructions/Instruction.py +13 -4
- edsl/surveys/instructions/InstructionCollection.py +6 -11
- edsl/templates/error_reporting/interview_details.html +1 -1
- edsl/templates/error_reporting/report.html +1 -1
- edsl/tools/plotting.py +1 -1
- edsl/utilities/utilities.py +23 -35
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/METADATA +10 -12
- edsl-0.1.39.dev1.dist-info/RECORD +277 -0
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/WHEEL +1 -1
- edsl/agents/QuestionInstructionPromptBuilder.py +0 -128
- edsl/agents/QuestionTemplateReplacementsBuilder.py +0 -137
- edsl/agents/question_option_processor.py +0 -172
- edsl/coop/CoopFunctionsMixin.py +0 -15
- edsl/coop/ExpectedParrotKeyHandler.py +0 -125
- edsl/exceptions/inference_services.py +0 -5
- edsl/inference_services/AvailableModelCacheHandler.py +0 -184
- edsl/inference_services/AvailableModelFetcher.py +0 -215
- edsl/inference_services/ServiceAvailability.py +0 -135
- edsl/inference_services/data_structures.py +0 -134
- edsl/jobs/AnswerQuestionFunctionConstructor.py +0 -223
- edsl/jobs/FetchInvigilator.py +0 -47
- edsl/jobs/InterviewTaskManager.py +0 -98
- edsl/jobs/InterviewsConstructor.py +0 -50
- edsl/jobs/JobsComponentConstructor.py +0 -189
- edsl/jobs/JobsRemoteInferenceLogger.py +0 -239
- edsl/jobs/RequestTokenEstimator.py +0 -30
- edsl/jobs/async_interview_runner.py +0 -138
- edsl/jobs/buckets/TokenBucketAPI.py +0 -211
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/check_survey_scenario_compatibility.py +0 -85
- edsl/jobs/data_structures.py +0 -120
- edsl/jobs/decorators.py +0 -35
- edsl/jobs/jobs_status_enums.py +0 -9
- edsl/jobs/loggers/HTMLTableJobLogger.py +0 -304
- edsl/jobs/results_exceptions_handler.py +0 -98
- edsl/language_models/ComputeCost.py +0 -63
- edsl/language_models/PriceManager.py +0 -127
- edsl/language_models/RawResponseHandler.py +0 -106
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupBuilder.py +0 -273
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +0 -131
- edsl/language_models/model.py +0 -256
- edsl/notebooks/NotebookToLaTeX.py +0 -142
- edsl/questions/ExceptionExplainer.py +0 -77
- edsl/questions/HTMLQuestion.py +0 -103
- edsl/questions/QuestionMatrix.py +0 -265
- edsl/questions/data_structures.py +0 -20
- edsl/questions/loop_processor.py +0 -149
- edsl/questions/response_validator_factory.py +0 -34
- edsl/questions/templates/matrix/__init__.py +0 -1
- edsl/questions/templates/matrix/answering_instructions.jinja +0 -5
- edsl/questions/templates/matrix/question_presentation.jinja +0 -20
- edsl/results/MarkdownToDocx.py +0 -122
- edsl/results/MarkdownToPDF.py +0 -111
- edsl/results/TextEditor.py +0 -50
- edsl/results/file_exports.py +0 -252
- edsl/results/smart_objects.py +0 -96
- edsl/results/table_data_class.py +0 -12
- edsl/results/table_renderers.py +0 -118
- edsl/scenarios/ConstructDownloadLink.py +0 -109
- edsl/scenarios/DocumentChunker.py +0 -102
- edsl/scenarios/DocxScenario.py +0 -16
- edsl/scenarios/PdfExtractor.py +0 -40
- edsl/scenarios/directory_scanner.py +0 -96
- edsl/scenarios/file_methods.py +0 -85
- edsl/scenarios/handlers/__init__.py +0 -13
- edsl/scenarios/handlers/csv.py +0 -49
- edsl/scenarios/handlers/docx.py +0 -76
- edsl/scenarios/handlers/html.py +0 -37
- edsl/scenarios/handlers/json.py +0 -111
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/scenarios/handlers/md.py +0 -51
- edsl/scenarios/handlers/pdf.py +0 -68
- edsl/scenarios/handlers/png.py +0 -39
- edsl/scenarios/handlers/pptx.py +0 -105
- edsl/scenarios/handlers/py.py +0 -294
- edsl/scenarios/handlers/sql.py +0 -313
- edsl/scenarios/handlers/sqlite.py +0 -149
- edsl/scenarios/handlers/txt.py +0 -33
- edsl/scenarios/scenario_selector.py +0 -156
- edsl/surveys/ConstructDAG.py +0 -92
- edsl/surveys/EditSurvey.py +0 -221
- edsl/surveys/InstructionHandler.py +0 -100
- edsl/surveys/MemoryManagement.py +0 -72
- edsl/surveys/RuleManager.py +0 -172
- edsl/surveys/Simulator.py +0 -75
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/utilities/PrettyList.py +0 -56
- edsl/utilities/is_notebook.py +0 -18
- edsl/utilities/is_valid_variable_name.py +0 -11
- edsl/utilities/remove_edsl_version.py +0 -24
- edsl-0.1.39.dist-info/RECORD +0 -358
- /edsl/questions/{register_questions_meta.py → RegisterQuestionsMeta.py} +0 -0
- /edsl/results/{results_fetch_mixin.py → ResultsFetchMixin.py} +0 -0
- /edsl/results/{results_tools_mixin.py → ResultsToolsMixin.py} +0 -0
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/LICENSE +0 -0
edsl/results/Dataset.py
CHANGED
@@ -1,22 +1,19 @@
|
|
1
1
|
"""A module to represent a dataset of observations."""
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
|
-
import sys
|
5
|
-
import json
|
6
4
|
import random
|
5
|
+
import json
|
7
6
|
from collections import UserList
|
8
7
|
from typing import Any, Union, Optional
|
8
|
+
import sys
|
9
|
+
import numpy as np
|
9
10
|
|
10
11
|
from edsl.results.ResultsExportMixin import ResultsExportMixin
|
11
12
|
from edsl.results.DatasetTree import Tree
|
12
13
|
from edsl.results.TableDisplay import TableDisplay
|
13
|
-
from edsl.Base import PersistenceMixin, HashingMixin
|
14
14
|
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
|
16
|
+
class Dataset(UserList, ResultsExportMixin):
|
20
17
|
"""A class to represent a dataset of observations."""
|
21
18
|
|
22
19
|
def __init__(
|
@@ -39,46 +36,6 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
|
|
39
36
|
_, values = list(self.data[0].items())[0]
|
40
37
|
return len(values)
|
41
38
|
|
42
|
-
def tail(self, n: int = 5) -> Dataset:
|
43
|
-
"""Return the last n observations in the dataset.
|
44
|
-
|
45
|
-
>>> d = Dataset([{'a.b':[1,2,3,4]}])
|
46
|
-
>>> d.tail(2)
|
47
|
-
Dataset([{'a.b': [3, 4]}])
|
48
|
-
"""
|
49
|
-
new_data = []
|
50
|
-
for observation in self.data:
|
51
|
-
key, values = list(observation.items())[0]
|
52
|
-
new_data.append({key: values[-n:]})
|
53
|
-
return Dataset(new_data)
|
54
|
-
|
55
|
-
def head(self, n: int = 5) -> Dataset:
|
56
|
-
"""Return the first n observations in the dataset.
|
57
|
-
|
58
|
-
>>> d = Dataset([{'a.b':[1,2,3,4]}])
|
59
|
-
>>> d.head(2)
|
60
|
-
Dataset([{'a.b': [1, 2]}])
|
61
|
-
"""
|
62
|
-
new_data = []
|
63
|
-
for observation in self.data:
|
64
|
-
key, values = list(observation.items())[0]
|
65
|
-
new_data.append({key: values[:n]})
|
66
|
-
return Dataset(new_data)
|
67
|
-
|
68
|
-
def expand(self, field):
|
69
|
-
return self.to_scenario_list().expand(field)
|
70
|
-
|
71
|
-
def view(self):
|
72
|
-
from perspective.widget import PerspectiveWidget
|
73
|
-
|
74
|
-
w = PerspectiveWidget(
|
75
|
-
self.to_pandas(),
|
76
|
-
plugin="Datagrid",
|
77
|
-
aggregates={"datetime": "any"},
|
78
|
-
sort=[["date", "desc"]],
|
79
|
-
)
|
80
|
-
return w
|
81
|
-
|
82
39
|
def keys(self) -> list[str]:
|
83
40
|
"""Return the keys of the first observation in the dataset.
|
84
41
|
|
@@ -91,79 +48,6 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
|
|
91
48
|
def filter(self, expression):
|
92
49
|
return self.to_scenario_list().filter(expression).to_dataset()
|
93
50
|
|
94
|
-
def long(self, exclude_fields: list[str] = None) -> Dataset:
|
95
|
-
headers, data = self._tabular()
|
96
|
-
exclude_fields = exclude_fields or []
|
97
|
-
|
98
|
-
# Initialize result dictionaries for each column
|
99
|
-
result_dict = {}
|
100
|
-
|
101
|
-
for index, row in enumerate(data):
|
102
|
-
row_values = dict(zip(headers, row))
|
103
|
-
excluded_values = {field: row_values[field] for field in exclude_fields}
|
104
|
-
|
105
|
-
# Transform non-excluded fields to long format
|
106
|
-
for header, value in row_values.items():
|
107
|
-
if header not in exclude_fields:
|
108
|
-
# Initialize lists in result_dict if needed
|
109
|
-
if not result_dict:
|
110
|
-
result_dict = {
|
111
|
-
"row": [],
|
112
|
-
"key": [],
|
113
|
-
"value": [],
|
114
|
-
**{field: [] for field in exclude_fields},
|
115
|
-
}
|
116
|
-
|
117
|
-
# Add values to each column
|
118
|
-
result_dict["row"].append(index)
|
119
|
-
result_dict["key"].append(header)
|
120
|
-
result_dict["value"].append(value)
|
121
|
-
for field in exclude_fields:
|
122
|
-
result_dict[field].append(excluded_values[field])
|
123
|
-
|
124
|
-
return Dataset([{k: v} for k, v in result_dict.items()])
|
125
|
-
|
126
|
-
def wide(self) -> "Dataset":
|
127
|
-
"""
|
128
|
-
Convert a long-format dataset (with row, key, value columns) to wide format.
|
129
|
-
|
130
|
-
Expected input format:
|
131
|
-
- A dataset with three columns containing dictionaries:
|
132
|
-
- row: list of row indices
|
133
|
-
- key: list of column names
|
134
|
-
- value: list of values
|
135
|
-
|
136
|
-
Returns:
|
137
|
-
- Dataset: A new dataset with columns corresponding to unique keys
|
138
|
-
"""
|
139
|
-
# Extract the component arrays
|
140
|
-
row_dict = next(col for col in self if "row" in col)
|
141
|
-
key_dict = next(col for col in self if "key" in col)
|
142
|
-
value_dict = next(col for col in self if "value" in col)
|
143
|
-
|
144
|
-
rows = row_dict["row"]
|
145
|
-
keys = key_dict["key"]
|
146
|
-
values = value_dict["value"]
|
147
|
-
|
148
|
-
if not (len(rows) == len(keys) == len(values)):
|
149
|
-
raise ValueError("All input arrays must have the same length")
|
150
|
-
|
151
|
-
# Get unique keys and row indices
|
152
|
-
unique_keys = sorted(set(keys))
|
153
|
-
unique_rows = sorted(set(rows))
|
154
|
-
|
155
|
-
# Create a dictionary to store the result
|
156
|
-
result = {key: [None] * len(unique_rows) for key in unique_keys}
|
157
|
-
|
158
|
-
# Populate the result dictionary
|
159
|
-
for row_idx, key, value in zip(rows, keys, values):
|
160
|
-
# Find the position in the output array for this row
|
161
|
-
output_row_idx = unique_rows.index(row_idx)
|
162
|
-
result[key][output_row_idx] = value
|
163
|
-
|
164
|
-
# Convert to list of column dictionaries format
|
165
|
-
return Dataset([{key: values} for key, values in result.items()])
|
166
|
-
|
167
51
|
def __repr__(self) -> str:
|
168
52
|
"""Return a string representation of the dataset."""
|
169
53
|
return f"Dataset({self.data})"
|
@@ -242,21 +126,7 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
|
|
242
126
|
"""Get the values of the first key in the dictionary."""
|
243
127
|
return list(d.values())[0]
|
244
128
|
|
245
|
-
return
|
246
|
-
|
247
|
-
def latex(self, **kwargs):
|
248
|
-
return self.table().latex()
|
249
|
-
|
250
|
-
def remove_prefix(self) -> Dataset:
|
251
|
-
new_data = []
|
252
|
-
for observation in self.data:
|
253
|
-
key, values = list(observation.items())[0]
|
254
|
-
if "." in key:
|
255
|
-
new_key = key.split(".")[1]
|
256
|
-
new_data.append({new_key: values})
|
257
|
-
else:
|
258
|
-
new_data.append({key: values})
|
259
|
-
return Dataset(new_data)
|
129
|
+
return get_values(self.data[0])[0]
|
260
130
|
|
261
131
|
def print(self, pretty_labels=None, **kwargs):
|
262
132
|
if "format" in kwargs:
|
@@ -276,25 +146,6 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
|
|
276
146
|
new_data.append({new_key: values})
|
277
147
|
return Dataset(new_data)
|
278
148
|
|
279
|
-
def merge(self, other: Dataset, by_x, by_y) -> Dataset:
|
280
|
-
"""Merge the dataset with another dataset on the given keys.""
|
281
|
-
|
282
|
-
merged_df = df1.merge(df2, how="left", on=["key1", "key2"])
|
283
|
-
"""
|
284
|
-
df1 = self.to_pandas()
|
285
|
-
df2 = other.to_pandas()
|
286
|
-
merged_df = df1.merge(df2, how="left", left_on=by_x, right_on=by_y)
|
287
|
-
return Dataset.from_pandas_dataframe(merged_df)
|
288
|
-
|
289
|
-
def to(self, survey_or_question: Union["Survey", "QuestionBase"]) -> "Jobs":
|
290
|
-
from edsl.surveys.Survey import Survey
|
291
|
-
from edsl.questions.QuestionBase import QuestionBase
|
292
|
-
|
293
|
-
if isinstance(survey_or_question, Survey):
|
294
|
-
return survey_or_question.by(self.to_scenario_list())
|
295
|
-
elif isinstance(survey_or_question, QuestionBase):
|
296
|
-
return Survey([survey_or_question]).by(self.to_scenario_list())
|
297
|
-
|
298
149
|
def select(self, *keys) -> Dataset:
|
299
150
|
"""Return a new dataset with only the selected keys.
|
300
151
|
|
@@ -430,7 +281,6 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
|
|
430
281
|
|
431
282
|
|
432
283
|
"""
|
433
|
-
import numpy as np
|
434
284
|
|
435
285
|
def sort_indices(lst: list[Any]) -> list[int]:
|
436
286
|
"""
|
@@ -559,26 +409,13 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
|
|
559
409
|
return Dataset([{"num_observations": [len(self)], "keys": [self.keys()]}])
|
560
410
|
|
561
411
|
@classmethod
|
562
|
-
def example(self
|
412
|
+
def example(self):
|
563
413
|
"""Return an example dataset.
|
564
414
|
|
565
415
|
>>> Dataset.example()
|
566
416
|
Dataset([{'a': [1, 2, 3, 4]}, {'b': [4, 3, 2, 1]}])
|
567
417
|
"""
|
568
|
-
|
569
|
-
return Dataset([{"a": [1, 2, 3, 4]}, {"b": [4, 3, 2, 1]}])
|
570
|
-
else:
|
571
|
-
return Dataset([{"a": [1] * n}, {"b": [2] * n}])
|
572
|
-
|
573
|
-
@classmethod
|
574
|
-
def from_edsl_object(cls, object):
|
575
|
-
d = object.to_dict(add_edsl_version=False)
|
576
|
-
return cls([{"key": list(d.keys())}, {"value": list(d.values())}])
|
577
|
-
|
578
|
-
@classmethod
|
579
|
-
def from_pandas_dataframe(cls, df):
|
580
|
-
result = cls([{col: df[col].tolist()} for col in df.columns])
|
581
|
-
return result
|
418
|
+
return Dataset([{"a": [1, 2, 3, 4]}, {"b": [4, 3, 2, 1]}])
|
582
419
|
|
583
420
|
|
584
421
|
if __name__ == "__main__":
|