edsl 0.1.48__py3-none-any.whl → 0.1.50__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +124 -53
- edsl/__version__.py +1 -1
- edsl/agents/agent.py +21 -21
- edsl/agents/agent_list.py +2 -5
- edsl/agents/exceptions.py +119 -5
- edsl/base/__init__.py +10 -35
- edsl/base/base_class.py +71 -36
- edsl/base/base_exception.py +204 -0
- edsl/base/data_transfer_models.py +1 -1
- edsl/base/exceptions.py +94 -0
- edsl/buckets/__init__.py +15 -1
- edsl/buckets/bucket_collection.py +3 -4
- edsl/buckets/exceptions.py +75 -0
- edsl/buckets/model_buckets.py +1 -2
- edsl/buckets/token_bucket.py +11 -6
- edsl/buckets/token_bucket_api.py +1 -2
- edsl/buckets/token_bucket_client.py +9 -7
- edsl/caching/cache.py +7 -2
- edsl/caching/cache_entry.py +10 -9
- edsl/caching/exceptions.py +113 -7
- edsl/caching/remote_cache_sync.py +1 -2
- edsl/caching/sql_dict.py +17 -12
- edsl/cli.py +43 -0
- edsl/config/config_class.py +30 -6
- edsl/conversation/Conversation.py +3 -2
- edsl/conversation/exceptions.py +58 -0
- edsl/conversation/mug_negotiation.py +0 -2
- edsl/coop/__init__.py +20 -1
- edsl/coop/coop.py +129 -38
- edsl/coop/exceptions.py +188 -9
- edsl/coop/price_fetcher.py +3 -6
- edsl/coop/utils.py +4 -6
- edsl/dataset/__init__.py +5 -4
- edsl/dataset/dataset.py +53 -43
- edsl/dataset/dataset_operations_mixin.py +86 -72
- edsl/dataset/dataset_tree.py +9 -5
- edsl/dataset/display/table_display.py +0 -2
- edsl/dataset/display/table_renderers.py +0 -1
- edsl/dataset/exceptions.py +125 -0
- edsl/dataset/file_exports.py +18 -11
- edsl/dataset/r/ggplot.py +13 -6
- edsl/display/__init__.py +27 -0
- edsl/display/core.py +147 -0
- edsl/display/plugin.py +189 -0
- edsl/display/utils.py +52 -0
- edsl/inference_services/__init__.py +9 -1
- edsl/inference_services/available_model_cache_handler.py +1 -1
- edsl/inference_services/available_model_fetcher.py +4 -5
- edsl/inference_services/data_structures.py +9 -6
- edsl/inference_services/exceptions.py +132 -1
- edsl/inference_services/inference_service_abc.py +2 -2
- edsl/inference_services/inference_services_collection.py +2 -6
- edsl/inference_services/registry.py +4 -3
- edsl/inference_services/service_availability.py +2 -1
- edsl/inference_services/services/anthropic_service.py +4 -1
- edsl/inference_services/services/aws_bedrock.py +13 -12
- edsl/inference_services/services/azure_ai.py +12 -10
- edsl/inference_services/services/deep_infra_service.py +1 -4
- edsl/inference_services/services/deep_seek_service.py +1 -5
- edsl/inference_services/services/google_service.py +6 -2
- edsl/inference_services/services/groq_service.py +1 -1
- edsl/inference_services/services/mistral_ai_service.py +4 -2
- edsl/inference_services/services/ollama_service.py +1 -1
- edsl/inference_services/services/open_ai_service.py +7 -5
- edsl/inference_services/services/perplexity_service.py +6 -2
- edsl/inference_services/services/test_service.py +8 -7
- edsl/inference_services/services/together_ai_service.py +2 -3
- edsl/inference_services/services/xai_service.py +1 -1
- edsl/instructions/__init__.py +1 -1
- edsl/instructions/change_instruction.py +3 -2
- edsl/instructions/exceptions.py +61 -0
- edsl/instructions/instruction.py +5 -2
- edsl/instructions/instruction_collection.py +2 -1
- edsl/instructions/instruction_handler.py +4 -9
- edsl/interviews/ReportErrors.py +0 -3
- edsl/interviews/__init__.py +9 -2
- edsl/interviews/answering_function.py +11 -13
- edsl/interviews/exception_tracking.py +14 -7
- edsl/interviews/exceptions.py +79 -0
- edsl/interviews/interview.py +32 -29
- edsl/interviews/interview_status_dictionary.py +4 -2
- edsl/interviews/interview_status_log.py +2 -1
- edsl/interviews/interview_task_manager.py +3 -3
- edsl/interviews/request_token_estimator.py +3 -1
- edsl/interviews/statistics.py +2 -3
- edsl/invigilators/__init__.py +7 -1
- edsl/invigilators/exceptions.py +79 -0
- edsl/invigilators/invigilator_base.py +0 -1
- edsl/invigilators/invigilators.py +8 -12
- edsl/invigilators/prompt_constructor.py +1 -5
- edsl/invigilators/prompt_helpers.py +8 -4
- edsl/invigilators/question_instructions_prompt_builder.py +1 -1
- edsl/invigilators/question_option_processor.py +9 -5
- edsl/invigilators/question_template_replacements_builder.py +3 -2
- edsl/jobs/__init__.py +3 -3
- edsl/jobs/async_interview_runner.py +24 -22
- edsl/jobs/check_survey_scenario_compatibility.py +7 -6
- edsl/jobs/data_structures.py +7 -4
- edsl/jobs/exceptions.py +177 -8
- edsl/jobs/fetch_invigilator.py +1 -1
- edsl/jobs/jobs.py +72 -67
- edsl/jobs/jobs_checks.py +2 -3
- edsl/jobs/jobs_component_constructor.py +2 -2
- edsl/jobs/jobs_pricing_estimation.py +3 -2
- edsl/jobs/jobs_remote_inference_logger.py +5 -4
- edsl/jobs/jobs_runner_asyncio.py +1 -2
- edsl/jobs/jobs_runner_status.py +8 -9
- edsl/jobs/remote_inference.py +26 -23
- edsl/jobs/results_exceptions_handler.py +8 -5
- edsl/key_management/__init__.py +3 -1
- edsl/key_management/exceptions.py +62 -0
- edsl/key_management/key_lookup.py +1 -1
- edsl/key_management/key_lookup_builder.py +37 -14
- edsl/key_management/key_lookup_collection.py +2 -0
- edsl/language_models/__init__.py +1 -1
- edsl/language_models/exceptions.py +302 -14
- edsl/language_models/language_model.py +4 -7
- edsl/language_models/model.py +4 -4
- edsl/language_models/model_list.py +1 -1
- edsl/language_models/price_manager.py +1 -1
- edsl/language_models/raw_response_handler.py +14 -9
- edsl/language_models/registry.py +17 -21
- edsl/language_models/repair.py +0 -6
- edsl/language_models/unused/fake_openai_service.py +0 -1
- edsl/load_plugins.py +69 -0
- edsl/logger.py +146 -0
- edsl/notebooks/notebook.py +1 -1
- edsl/notebooks/notebook_to_latex.py +0 -1
- edsl/plugins/__init__.py +63 -0
- edsl/plugins/built_in/export_example.py +50 -0
- edsl/plugins/built_in/pig_latin.py +67 -0
- edsl/plugins/cli.py +372 -0
- edsl/plugins/cli_typer.py +283 -0
- edsl/plugins/exceptions.py +31 -0
- edsl/plugins/hookspec.py +51 -0
- edsl/plugins/plugin_host.py +128 -0
- edsl/plugins/plugin_manager.py +633 -0
- edsl/plugins/plugins_registry.py +168 -0
- edsl/prompts/__init__.py +2 -0
- edsl/prompts/exceptions.py +107 -5
- edsl/prompts/prompt.py +14 -6
- edsl/questions/HTMLQuestion.py +5 -11
- edsl/questions/Quick.py +0 -1
- edsl/questions/__init__.py +2 -0
- edsl/questions/answer_validator_mixin.py +318 -318
- edsl/questions/compose_questions.py +2 -2
- edsl/questions/descriptors.py +10 -49
- edsl/questions/exceptions.py +278 -22
- edsl/questions/loop_processor.py +7 -5
- edsl/questions/prompt_templates/question_list.jinja +3 -0
- edsl/questions/question_base.py +14 -16
- edsl/questions/question_base_gen_mixin.py +2 -2
- edsl/questions/question_base_prompts_mixin.py +9 -3
- edsl/questions/question_budget.py +9 -5
- edsl/questions/question_check_box.py +3 -5
- edsl/questions/question_dict.py +171 -194
- edsl/questions/question_extract.py +1 -1
- edsl/questions/question_free_text.py +4 -6
- edsl/questions/question_functional.py +4 -3
- edsl/questions/question_list.py +36 -9
- edsl/questions/question_matrix.py +95 -61
- edsl/questions/question_multiple_choice.py +6 -4
- edsl/questions/question_numerical.py +2 -4
- edsl/questions/question_registry.py +4 -2
- edsl/questions/register_questions_meta.py +0 -1
- edsl/questions/response_validator_abc.py +7 -13
- edsl/questions/templates/dict/answering_instructions.jinja +1 -0
- edsl/questions/templates/rank/question_presentation.jinja +1 -1
- edsl/results/__init__.py +1 -1
- edsl/results/exceptions.py +141 -7
- edsl/results/report.py +0 -1
- edsl/results/result.py +4 -5
- edsl/results/results.py +10 -51
- edsl/results/results_selector.py +8 -4
- edsl/scenarios/PdfExtractor.py +2 -2
- edsl/scenarios/construct_download_link.py +69 -35
- edsl/scenarios/directory_scanner.py +33 -14
- edsl/scenarios/document_chunker.py +1 -1
- edsl/scenarios/exceptions.py +238 -14
- edsl/scenarios/file_methods.py +1 -1
- edsl/scenarios/file_store.py +7 -3
- edsl/scenarios/handlers/__init__.py +17 -0
- edsl/scenarios/handlers/docx_file_store.py +0 -5
- edsl/scenarios/handlers/pdf_file_store.py +0 -1
- edsl/scenarios/handlers/pptx_file_store.py +0 -5
- edsl/scenarios/handlers/py_file_store.py +0 -1
- edsl/scenarios/handlers/sql_file_store.py +1 -4
- edsl/scenarios/handlers/sqlite_file_store.py +0 -1
- edsl/scenarios/handlers/txt_file_store.py +1 -1
- edsl/scenarios/scenario.py +0 -1
- edsl/scenarios/scenario_list.py +152 -18
- edsl/scenarios/scenario_list_pdf_tools.py +1 -0
- edsl/scenarios/scenario_selector.py +0 -1
- edsl/surveys/__init__.py +3 -4
- edsl/surveys/dag/__init__.py +4 -2
- edsl/surveys/descriptors.py +1 -1
- edsl/surveys/edit_survey.py +1 -0
- edsl/surveys/exceptions.py +165 -9
- edsl/surveys/memory/__init__.py +5 -3
- edsl/surveys/memory/memory_management.py +1 -0
- edsl/surveys/memory/memory_plan.py +6 -15
- edsl/surveys/rules/__init__.py +5 -3
- edsl/surveys/rules/rule.py +1 -2
- edsl/surveys/rules/rule_collection.py +1 -1
- edsl/surveys/survey.py +12 -24
- edsl/surveys/survey_export.py +6 -3
- edsl/surveys/survey_flow_visualization.py +10 -1
- edsl/tasks/__init__.py +2 -0
- edsl/tasks/question_task_creator.py +3 -3
- edsl/tasks/task_creators.py +1 -3
- edsl/tasks/task_history.py +5 -7
- edsl/tasks/task_status_log.py +1 -2
- edsl/tokens/__init__.py +3 -1
- edsl/tokens/token_usage.py +1 -1
- edsl/utilities/__init__.py +21 -1
- edsl/utilities/decorators.py +1 -2
- edsl/utilities/markdown_to_docx.py +2 -2
- edsl/utilities/markdown_to_pdf.py +1 -1
- edsl/utilities/repair_functions.py +0 -1
- edsl/utilities/restricted_python.py +0 -1
- edsl/utilities/template_loader.py +2 -3
- edsl/utilities/utilities.py +8 -29
- {edsl-0.1.48.dist-info → edsl-0.1.50.dist-info}/METADATA +32 -2
- edsl-0.1.50.dist-info/RECORD +363 -0
- edsl-0.1.50.dist-info/entry_points.txt +3 -0
- edsl/dataset/smart_objects.py +0 -96
- edsl/exceptions/BaseException.py +0 -21
- edsl/exceptions/__init__.py +0 -54
- edsl/exceptions/configuration.py +0 -16
- edsl/exceptions/general.py +0 -34
- edsl/study/ObjectEntry.py +0 -173
- edsl/study/ProofOfWork.py +0 -113
- edsl/study/SnapShot.py +0 -80
- edsl/study/Study.py +0 -520
- edsl/study/__init__.py +0 -6
- edsl/utilities/interface.py +0 -135
- edsl-0.1.48.dist-info/RECORD +0 -347
- {edsl-0.1.48.dist-info → edsl-0.1.50.dist-info}/LICENSE +0 -0
- {edsl-0.1.48.dist-info → edsl-0.1.50.dist-info}/WHEEL +0 -0
edsl/coop/utils.py
CHANGED
@@ -7,7 +7,6 @@ from ..notebooks import Notebook
|
|
7
7
|
from ..results import Results
|
8
8
|
from ..scenarios import Scenario, ScenarioList
|
9
9
|
from ..surveys import Survey
|
10
|
-
from ..study import Study
|
11
10
|
|
12
11
|
from ..language_models import LanguageModel
|
13
12
|
from ..questions import QuestionBase
|
@@ -24,7 +23,6 @@ EDSLObject = Union[
|
|
24
23
|
Scenario,
|
25
24
|
ScenarioList,
|
26
25
|
Survey,
|
27
|
-
Study,
|
28
26
|
]
|
29
27
|
|
30
28
|
ObjectType = Literal[
|
@@ -39,7 +37,6 @@ ObjectType = Literal[
|
|
39
37
|
"scenario",
|
40
38
|
"scenario_list",
|
41
39
|
"survey",
|
42
|
-
"study",
|
43
40
|
]
|
44
41
|
|
45
42
|
|
@@ -90,7 +87,6 @@ class ObjectRegistry:
|
|
90
87
|
{"object_type": "scenario", "edsl_class": Scenario},
|
91
88
|
{"object_type": "scenario_list", "edsl_class": ScenarioList},
|
92
89
|
{"object_type": "survey", "edsl_class": Survey},
|
93
|
-
{"object_type": "study", "edsl_class": Study},
|
94
90
|
]
|
95
91
|
|
96
92
|
# Create mappings for efficient lookups
|
@@ -133,7 +129,8 @@ class ObjectRegistry:
|
|
133
129
|
# Look up the object type
|
134
130
|
object_type = cls.edsl_class_to_object_type.get(edsl_class_name)
|
135
131
|
if object_type is None:
|
136
|
-
|
132
|
+
from edsl.coop.exceptions import CoopValueError
|
133
|
+
raise CoopValueError(f"Object type not found for {edsl_object=}")
|
137
134
|
return object_type
|
138
135
|
|
139
136
|
@classmethod
|
@@ -155,7 +152,8 @@ class ObjectRegistry:
|
|
155
152
|
"""
|
156
153
|
EDSL_class = cls.object_type_to_edsl_class.get(object_type)
|
157
154
|
if EDSL_class is None:
|
158
|
-
|
155
|
+
from edsl.coop.exceptions import CoopValueError
|
156
|
+
raise CoopValueError(f"EDSL class not found for {object_type=}")
|
159
157
|
return EDSL_class
|
160
158
|
|
161
159
|
@classmethod
|
edsl/dataset/__init__.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
from .dataset import Dataset
|
2
2
|
|
3
|
-
|
4
|
-
from .dataset_operations_mixin import
|
5
|
-
from .dataset_operations_mixin import
|
6
|
-
from .dataset_operations_mixin import
|
3
|
+
# These imports are used by other packages in the repo
|
4
|
+
from .dataset_operations_mixin import AgentListOperationsMixin # noqa: F401
|
5
|
+
from .dataset_operations_mixin import ScenarioListOperationsMixin # noqa: F401
|
6
|
+
from .dataset_operations_mixin import DatasetOperationsMixin # noqa: F401
|
7
|
+
from .dataset_operations_mixin import ResultsOperationsMixin # noqa: F401
|
7
8
|
|
8
9
|
__all__ = [
|
9
10
|
"Dataset",
|
edsl/dataset/dataset.py
CHANGED
@@ -5,20 +5,22 @@ import sys
|
|
5
5
|
import json
|
6
6
|
import random
|
7
7
|
from collections import UserList
|
8
|
-
from typing import Any, Union, Optional, TYPE_CHECKING
|
8
|
+
from typing import Any, Union, Optional, TYPE_CHECKING, Callable
|
9
9
|
|
10
10
|
from ..base import PersistenceMixin, HashingMixin
|
11
11
|
|
12
12
|
from .dataset_tree import Tree
|
13
|
+
from .exceptions import DatasetKeyError, DatasetValueError
|
13
14
|
|
14
15
|
from .display.table_display import TableDisplay
|
15
|
-
from .smart_objects import FirstObject
|
16
|
-
from .r.ggplot import GGPlotMethod
|
16
|
+
#from .smart_objects import FirstObject
|
17
17
|
from .dataset_operations_mixin import DatasetOperationsMixin
|
18
18
|
|
19
19
|
if TYPE_CHECKING:
|
20
20
|
from ..surveys import Survey
|
21
|
-
from ..questions
|
21
|
+
from ..questions import QuestionBase
|
22
|
+
from ..jobs import Job # noqa: F401
|
23
|
+
|
22
24
|
|
23
25
|
class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
24
26
|
"""
|
@@ -76,6 +78,7 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
|
76
78
|
Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}])
|
77
79
|
"""
|
78
80
|
super().__init__(data)
|
81
|
+
#self.data = data
|
79
82
|
self.print_parameters = print_parameters
|
80
83
|
|
81
84
|
|
@@ -121,16 +124,16 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
|
121
124
|
def expand(self, field):
|
122
125
|
return self.to_scenario_list().expand(field)
|
123
126
|
|
124
|
-
def view(self):
|
125
|
-
|
127
|
+
# def view(self):
|
128
|
+
# from perspective.widget import PerspectiveWidget
|
126
129
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
130
|
+
# w = PerspectiveWidget(
|
131
|
+
# self.to_pandas(),
|
132
|
+
# plugin="Datagrid",
|
133
|
+
# aggregates={"datetime": "any"},
|
134
|
+
# sort=[["date", "desc"]],
|
135
|
+
# )
|
136
|
+
# return w
|
134
137
|
|
135
138
|
def keys(self) -> list[str]:
|
136
139
|
"""Return the keys of the dataset.
|
@@ -212,7 +215,7 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
|
212
215
|
values = value_dict["value"]
|
213
216
|
|
214
217
|
if not (len(rows) == len(keys) == len(values)):
|
215
|
-
raise
|
218
|
+
raise DatasetValueError("All input arrays must have the same length")
|
216
219
|
|
217
220
|
# Get unique keys and row indices
|
218
221
|
unique_keys = sorted(set(keys))
|
@@ -272,12 +275,6 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
|
272
275
|
>>> d = Dataset([{'a.b':[1,2,3,4]}])
|
273
276
|
>>> d._key_to_value('a.b')
|
274
277
|
[1, 2, 3, 4]
|
275
|
-
|
276
|
-
>>> d._key_to_value('a')
|
277
|
-
Traceback (most recent call last):
|
278
|
-
...
|
279
|
-
KeyError: "Key 'a' not found in any of the dictionaries."
|
280
|
-
|
281
278
|
"""
|
282
279
|
potential_matches = []
|
283
280
|
for data_dict in self.data:
|
@@ -290,11 +287,13 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
|
290
287
|
if len(potential_matches) == 1:
|
291
288
|
return potential_matches[0][1]
|
292
289
|
elif len(potential_matches) > 1:
|
293
|
-
|
290
|
+
from edsl.dataset.exceptions import DatasetKeyError
|
291
|
+
raise DatasetKeyError(
|
294
292
|
f"Key '{key}' found in more than one location: {[m[0] for m in potential_matches]}"
|
295
293
|
)
|
296
294
|
|
297
|
-
|
295
|
+
from edsl.dataset.exceptions import DatasetKeyError
|
296
|
+
raise DatasetKeyError(f"Key '{key}' not found in any of the dictionaries.")
|
298
297
|
|
299
298
|
def first(self) -> dict[str, Any]:
|
300
299
|
"""Get the first value of the first key in the first dictionary.
|
@@ -308,7 +307,7 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
|
308
307
|
"""Get the values of the first key in the dictionary."""
|
309
308
|
return list(d.values())[0]
|
310
309
|
|
311
|
-
return
|
310
|
+
return get_values(self.data[0])[0]
|
312
311
|
|
313
312
|
def latex(self, **kwargs):
|
314
313
|
return self.table().latex()
|
@@ -338,7 +337,7 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
|
338
337
|
"""
|
339
338
|
if "format" in kwargs:
|
340
339
|
if kwargs["format"] not in ["html", "markdown", "rich", "latex"]:
|
341
|
-
raise
|
340
|
+
raise DatasetValueError(f"Format '{kwargs['format']}' not supported.")
|
342
341
|
|
343
342
|
# If rich format is requested, set tablefmt accordingly
|
344
343
|
if kwargs["format"] == "rich":
|
@@ -371,10 +370,17 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
|
371
370
|
merged_df = df1.merge(df2, how="left", left_on=by_x, right_on=by_y)
|
372
371
|
return Dataset.from_pandas_dataframe(merged_df)
|
373
372
|
|
374
|
-
def to(self, survey_or_question: Union["Survey", "QuestionBase"]) -> "
|
375
|
-
"""Return a new dataset with the observations transformed by the given survey or question.
|
373
|
+
def to(self, survey_or_question: Union["Survey", "QuestionBase"]) -> "Job":
|
374
|
+
"""Return a new dataset with the observations transformed by the given survey or question.
|
375
|
+
|
376
|
+
>>> d = Dataset([{'person_name':["John"]}])
|
377
|
+
>>> from edsl import QuestionFreeText
|
378
|
+
>>> q = QuestionFreeText(question_text = "How are you, {{ person_name ?}}?", question_name = "how_feeling")
|
379
|
+
>>> d.to(q)
|
380
|
+
Jobs(...)
|
381
|
+
"""
|
376
382
|
from edsl.surveys import Survey
|
377
|
-
from edsl.questions
|
383
|
+
from edsl.questions import QuestionBase
|
378
384
|
|
379
385
|
if isinstance(survey_or_question, Survey):
|
380
386
|
return survey_or_question.by(self.to_scenario_list())
|
@@ -396,9 +402,10 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
|
396
402
|
"""
|
397
403
|
for key in keys:
|
398
404
|
if key not in self.keys():
|
399
|
-
|
400
|
-
|
401
|
-
|
405
|
+
from edsl.dataset.exceptions import DatasetValueError
|
406
|
+
raise DatasetValueError(f"Key '{key}' not found in the dataset. "
|
407
|
+
f"Available keys: {self.keys()}"
|
408
|
+
)
|
402
409
|
|
403
410
|
if isinstance(keys, str):
|
404
411
|
keys = [keys]
|
@@ -442,7 +449,11 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
|
442
449
|
|
443
450
|
return self
|
444
451
|
|
445
|
-
def
|
452
|
+
def expand_field(self, field):
|
453
|
+
"""Expand a field in the dataset.
|
454
|
+
|
455
|
+
Renamed to avoid conflict with the expand method defined earlier.
|
456
|
+
"""
|
446
457
|
return self.to_scenario_list().expand(field).to_dataset()
|
447
458
|
|
448
459
|
def sample(
|
@@ -462,21 +473,18 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
|
462
473
|
>>> d = Dataset([{'a.b':[1,2,3,4]}])
|
463
474
|
>>> d.sample(n=2, seed=0, with_replacement=True)
|
464
475
|
Dataset([{'a.b': [4, 4]}])
|
465
|
-
|
466
|
-
>>> d.sample(n = 10, seed=0, with_replacement=False)
|
467
|
-
Traceback (most recent call last):
|
468
|
-
...
|
469
|
-
ValueError: Sample size cannot be greater than the number of available elements when sampling without replacement.
|
470
476
|
"""
|
471
477
|
if seed is not None:
|
472
478
|
random.seed(seed)
|
473
479
|
|
474
480
|
# Validate the input for sampling parameters
|
475
481
|
if n is None and frac is None:
|
476
|
-
|
482
|
+
from edsl.dataset.exceptions import DatasetValueError
|
483
|
+
raise DatasetValueError("Either 'n' or 'frac' must be provided for sampling.")
|
477
484
|
|
478
485
|
if n is not None and frac is not None:
|
479
|
-
|
486
|
+
from edsl.dataset.exceptions import DatasetValueError
|
487
|
+
raise DatasetValueError("Only one of 'n' or 'frac' should be specified.")
|
480
488
|
|
481
489
|
# Get the length of the lists from the first entry
|
482
490
|
first_key, first_values = list(self[0].items())[0]
|
@@ -487,7 +495,8 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
|
487
495
|
n = int(total_length * frac)
|
488
496
|
|
489
497
|
if not with_replacement and n > total_length:
|
490
|
-
|
498
|
+
from edsl.dataset.exceptions import DatasetValueError
|
499
|
+
raise DatasetValueError(
|
491
500
|
"Sample size cannot be greater than the number of available elements when sampling without replacement."
|
492
501
|
)
|
493
502
|
|
@@ -549,9 +558,9 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
|
549
558
|
number_found += 1
|
550
559
|
|
551
560
|
if number_found == 0:
|
552
|
-
raise
|
561
|
+
raise DatasetKeyError(f"Key '{sort_key}' not found in any of the dictionaries.")
|
553
562
|
elif number_found > 1:
|
554
|
-
raise
|
563
|
+
raise DatasetKeyError(f"Key '{sort_key}' found in more than one dictionary.")
|
555
564
|
|
556
565
|
# relevant_values = self._key_to_value(sort_key)
|
557
566
|
sort_indices_list = sort_indices(relevant_values)
|
@@ -578,7 +587,7 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
|
578
587
|
def table(
|
579
588
|
self,
|
580
589
|
*fields,
|
581
|
-
tablefmt: Optional[str] =
|
590
|
+
tablefmt: Optional[str] = "rich",
|
582
591
|
max_rows: Optional[int] = None,
|
583
592
|
pretty_labels=None,
|
584
593
|
print_parameters: Optional[dict] = None,
|
@@ -637,7 +646,8 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
|
637
646
|
|
638
647
|
if max_rows is not None:
|
639
648
|
if max_rows > len(data):
|
640
|
-
|
649
|
+
from edsl.dataset.exceptions import DatasetValueError
|
650
|
+
raise DatasetValueError(
|
641
651
|
"max_rows cannot be greater than the number of rows in the dataset."
|
642
652
|
)
|
643
653
|
last_line = data[-1]
|