edsl 0.1.49__py3-none-any.whl → 0.1.51__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +124 -53
- edsl/__version__.py +1 -1
- edsl/agents/agent.py +21 -21
- edsl/agents/agent_list.py +2 -5
- edsl/agents/exceptions.py +119 -5
- edsl/base/__init__.py +10 -35
- edsl/base/base_class.py +71 -36
- edsl/base/base_exception.py +204 -0
- edsl/base/data_transfer_models.py +1 -1
- edsl/base/exceptions.py +94 -0
- edsl/buckets/__init__.py +15 -1
- edsl/buckets/bucket_collection.py +3 -4
- edsl/buckets/exceptions.py +107 -0
- edsl/buckets/model_buckets.py +1 -2
- edsl/buckets/token_bucket.py +11 -6
- edsl/buckets/token_bucket_api.py +27 -12
- edsl/buckets/token_bucket_client.py +9 -7
- edsl/caching/cache.py +12 -4
- edsl/caching/cache_entry.py +10 -9
- edsl/caching/exceptions.py +113 -7
- edsl/caching/remote_cache_sync.py +6 -7
- edsl/caching/sql_dict.py +20 -14
- edsl/cli.py +43 -0
- edsl/config/__init__.py +1 -1
- edsl/config/config_class.py +32 -6
- edsl/conversation/Conversation.py +8 -4
- edsl/conversation/car_buying.py +1 -3
- edsl/conversation/exceptions.py +58 -0
- edsl/conversation/mug_negotiation.py +2 -8
- edsl/coop/__init__.py +28 -6
- edsl/coop/coop.py +120 -29
- edsl/coop/coop_functions.py +1 -1
- edsl/coop/ep_key_handling.py +1 -1
- edsl/coop/exceptions.py +188 -9
- edsl/coop/price_fetcher.py +5 -8
- edsl/coop/utils.py +4 -6
- edsl/dataset/__init__.py +5 -4
- edsl/dataset/dataset.py +177 -86
- edsl/dataset/dataset_operations_mixin.py +98 -76
- edsl/dataset/dataset_tree.py +11 -7
- edsl/dataset/display/table_display.py +0 -2
- edsl/dataset/display/table_renderers.py +6 -4
- edsl/dataset/exceptions.py +125 -0
- edsl/dataset/file_exports.py +18 -11
- edsl/dataset/r/ggplot.py +13 -6
- edsl/display/__init__.py +27 -0
- edsl/display/core.py +147 -0
- edsl/display/plugin.py +189 -0
- edsl/display/utils.py +52 -0
- edsl/inference_services/__init__.py +9 -1
- edsl/inference_services/available_model_cache_handler.py +1 -1
- edsl/inference_services/available_model_fetcher.py +5 -6
- edsl/inference_services/data_structures.py +10 -7
- edsl/inference_services/exceptions.py +132 -1
- edsl/inference_services/inference_service_abc.py +2 -2
- edsl/inference_services/inference_services_collection.py +2 -6
- edsl/inference_services/registry.py +4 -3
- edsl/inference_services/service_availability.py +4 -3
- edsl/inference_services/services/anthropic_service.py +4 -1
- edsl/inference_services/services/aws_bedrock.py +13 -12
- edsl/inference_services/services/azure_ai.py +12 -10
- edsl/inference_services/services/deep_infra_service.py +1 -4
- edsl/inference_services/services/deep_seek_service.py +1 -5
- edsl/inference_services/services/google_service.py +7 -3
- edsl/inference_services/services/groq_service.py +1 -1
- edsl/inference_services/services/mistral_ai_service.py +4 -2
- edsl/inference_services/services/ollama_service.py +1 -1
- edsl/inference_services/services/open_ai_service.py +7 -5
- edsl/inference_services/services/perplexity_service.py +6 -2
- edsl/inference_services/services/test_service.py +8 -7
- edsl/inference_services/services/together_ai_service.py +2 -3
- edsl/inference_services/services/xai_service.py +1 -1
- edsl/instructions/__init__.py +1 -1
- edsl/instructions/change_instruction.py +7 -5
- edsl/instructions/exceptions.py +61 -0
- edsl/instructions/instruction.py +6 -2
- edsl/instructions/instruction_collection.py +6 -4
- edsl/instructions/instruction_handler.py +12 -15
- edsl/interviews/ReportErrors.py +0 -3
- edsl/interviews/__init__.py +9 -2
- edsl/interviews/answering_function.py +11 -13
- edsl/interviews/exception_tracking.py +15 -8
- edsl/interviews/exceptions.py +79 -0
- edsl/interviews/interview.py +33 -30
- edsl/interviews/interview_status_dictionary.py +4 -2
- edsl/interviews/interview_status_log.py +2 -1
- edsl/interviews/interview_task_manager.py +5 -5
- edsl/interviews/request_token_estimator.py +5 -2
- edsl/interviews/statistics.py +3 -4
- edsl/invigilators/__init__.py +7 -1
- edsl/invigilators/exceptions.py +79 -0
- edsl/invigilators/invigilator_base.py +0 -1
- edsl/invigilators/invigilators.py +9 -13
- edsl/invigilators/prompt_constructor.py +1 -5
- edsl/invigilators/prompt_helpers.py +8 -4
- edsl/invigilators/question_instructions_prompt_builder.py +1 -1
- edsl/invigilators/question_option_processor.py +9 -5
- edsl/invigilators/question_template_replacements_builder.py +3 -2
- edsl/jobs/__init__.py +42 -5
- edsl/jobs/async_interview_runner.py +25 -23
- edsl/jobs/check_survey_scenario_compatibility.py +11 -10
- edsl/jobs/data_structures.py +8 -5
- edsl/jobs/exceptions.py +177 -8
- edsl/jobs/fetch_invigilator.py +1 -1
- edsl/jobs/jobs.py +74 -69
- edsl/jobs/jobs_checks.py +6 -7
- edsl/jobs/jobs_component_constructor.py +4 -4
- edsl/jobs/jobs_pricing_estimation.py +4 -3
- edsl/jobs/jobs_remote_inference_logger.py +5 -4
- edsl/jobs/jobs_runner_asyncio.py +3 -4
- edsl/jobs/jobs_runner_status.py +8 -9
- edsl/jobs/remote_inference.py +27 -24
- edsl/jobs/results_exceptions_handler.py +10 -7
- edsl/key_management/__init__.py +3 -1
- edsl/key_management/exceptions.py +62 -0
- edsl/key_management/key_lookup.py +1 -1
- edsl/key_management/key_lookup_builder.py +37 -14
- edsl/key_management/key_lookup_collection.py +2 -0
- edsl/language_models/__init__.py +1 -1
- edsl/language_models/exceptions.py +302 -14
- edsl/language_models/language_model.py +9 -8
- edsl/language_models/model.py +4 -4
- edsl/language_models/model_list.py +1 -1
- edsl/language_models/price_manager.py +1 -1
- edsl/language_models/raw_response_handler.py +14 -9
- edsl/language_models/registry.py +17 -21
- edsl/language_models/repair.py +0 -6
- edsl/language_models/unused/fake_openai_service.py +0 -1
- edsl/load_plugins.py +69 -0
- edsl/logger.py +146 -0
- edsl/notebooks/__init__.py +24 -1
- edsl/notebooks/exceptions.py +82 -0
- edsl/notebooks/notebook.py +7 -3
- edsl/notebooks/notebook_to_latex.py +1 -2
- edsl/plugins/__init__.py +63 -0
- edsl/plugins/built_in/export_example.py +50 -0
- edsl/plugins/built_in/pig_latin.py +67 -0
- edsl/plugins/cli.py +372 -0
- edsl/plugins/cli_typer.py +283 -0
- edsl/plugins/exceptions.py +31 -0
- edsl/plugins/hookspec.py +51 -0
- edsl/plugins/plugin_host.py +128 -0
- edsl/plugins/plugin_manager.py +633 -0
- edsl/plugins/plugins_registry.py +168 -0
- edsl/prompts/__init__.py +24 -1
- edsl/prompts/exceptions.py +107 -5
- edsl/prompts/prompt.py +15 -7
- edsl/questions/HTMLQuestion.py +5 -11
- edsl/questions/Quick.py +0 -1
- edsl/questions/__init__.py +6 -4
- edsl/questions/answer_validator_mixin.py +318 -323
- edsl/questions/compose_questions.py +3 -3
- edsl/questions/descriptors.py +11 -50
- edsl/questions/exceptions.py +278 -22
- edsl/questions/loop_processor.py +7 -5
- edsl/questions/prompt_templates/question_list.jinja +3 -0
- edsl/questions/question_base.py +46 -19
- edsl/questions/question_base_gen_mixin.py +2 -2
- edsl/questions/question_base_prompts_mixin.py +13 -7
- edsl/questions/question_budget.py +503 -98
- edsl/questions/question_check_box.py +660 -160
- edsl/questions/question_dict.py +345 -194
- edsl/questions/question_extract.py +401 -61
- edsl/questions/question_free_text.py +80 -14
- edsl/questions/question_functional.py +119 -9
- edsl/questions/{derived/question_likert_five.py → question_likert_five.py} +2 -2
- edsl/questions/{derived/question_linear_scale.py → question_linear_scale.py} +3 -4
- edsl/questions/question_list.py +275 -28
- edsl/questions/question_matrix.py +643 -96
- edsl/questions/question_multiple_choice.py +219 -51
- edsl/questions/question_numerical.py +361 -32
- edsl/questions/question_rank.py +401 -124
- edsl/questions/question_registry.py +7 -5
- edsl/questions/{derived/question_top_k.py → question_top_k.py} +3 -3
- edsl/questions/{derived/question_yes_no.py → question_yes_no.py} +3 -4
- edsl/questions/register_questions_meta.py +2 -2
- edsl/questions/response_validator_abc.py +13 -15
- edsl/questions/response_validator_factory.py +10 -12
- edsl/questions/templates/dict/answering_instructions.jinja +1 -0
- edsl/questions/templates/rank/question_presentation.jinja +1 -1
- edsl/results/__init__.py +1 -1
- edsl/results/exceptions.py +141 -7
- edsl/results/report.py +1 -2
- edsl/results/result.py +11 -9
- edsl/results/results.py +480 -321
- edsl/results/results_selector.py +8 -4
- edsl/scenarios/PdfExtractor.py +2 -2
- edsl/scenarios/construct_download_link.py +69 -35
- edsl/scenarios/directory_scanner.py +33 -14
- edsl/scenarios/document_chunker.py +1 -1
- edsl/scenarios/exceptions.py +238 -14
- edsl/scenarios/file_methods.py +1 -1
- edsl/scenarios/file_store.py +7 -3
- edsl/scenarios/handlers/__init__.py +17 -0
- edsl/scenarios/handlers/docx_file_store.py +0 -5
- edsl/scenarios/handlers/pdf_file_store.py +0 -1
- edsl/scenarios/handlers/pptx_file_store.py +0 -5
- edsl/scenarios/handlers/py_file_store.py +0 -1
- edsl/scenarios/handlers/sql_file_store.py +1 -4
- edsl/scenarios/handlers/sqlite_file_store.py +0 -1
- edsl/scenarios/handlers/txt_file_store.py +1 -1
- edsl/scenarios/scenario.py +1 -3
- edsl/scenarios/scenario_list.py +179 -27
- edsl/scenarios/scenario_list_pdf_tools.py +1 -0
- edsl/scenarios/scenario_selector.py +0 -1
- edsl/surveys/__init__.py +3 -4
- edsl/surveys/dag/__init__.py +4 -2
- edsl/surveys/descriptors.py +1 -1
- edsl/surveys/edit_survey.py +1 -0
- edsl/surveys/exceptions.py +165 -9
- edsl/surveys/memory/__init__.py +5 -3
- edsl/surveys/memory/memory_management.py +1 -0
- edsl/surveys/memory/memory_plan.py +6 -15
- edsl/surveys/rules/__init__.py +5 -3
- edsl/surveys/rules/rule.py +1 -2
- edsl/surveys/rules/rule_collection.py +1 -1
- edsl/surveys/survey.py +12 -24
- edsl/surveys/survey_css.py +3 -3
- edsl/surveys/survey_export.py +6 -3
- edsl/surveys/survey_flow_visualization.py +10 -1
- edsl/surveys/survey_simulator.py +2 -1
- edsl/tasks/__init__.py +23 -1
- edsl/tasks/exceptions.py +72 -0
- edsl/tasks/question_task_creator.py +3 -3
- edsl/tasks/task_creators.py +1 -3
- edsl/tasks/task_history.py +8 -10
- edsl/tasks/task_status_log.py +1 -2
- edsl/tokens/__init__.py +29 -1
- edsl/tokens/exceptions.py +37 -0
- edsl/tokens/interview_token_usage.py +3 -2
- edsl/tokens/token_usage.py +4 -3
- edsl/utilities/__init__.py +21 -1
- edsl/utilities/decorators.py +1 -2
- edsl/utilities/markdown_to_docx.py +2 -2
- edsl/utilities/markdown_to_pdf.py +1 -1
- edsl/utilities/repair_functions.py +0 -1
- edsl/utilities/restricted_python.py +0 -1
- edsl/utilities/template_loader.py +2 -3
- edsl/utilities/utilities.py +8 -29
- {edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/METADATA +32 -2
- edsl-0.1.51.dist-info/RECORD +365 -0
- edsl-0.1.51.dist-info/entry_points.txt +3 -0
- edsl/dataset/smart_objects.py +0 -96
- edsl/exceptions/BaseException.py +0 -21
- edsl/exceptions/__init__.py +0 -54
- edsl/exceptions/configuration.py +0 -16
- edsl/exceptions/general.py +0 -34
- edsl/questions/derived/__init__.py +0 -0
- edsl/study/ObjectEntry.py +0 -173
- edsl/study/ProofOfWork.py +0 -113
- edsl/study/SnapShot.py +0 -80
- edsl/study/Study.py +0 -520
- edsl/study/__init__.py +0 -6
- edsl/utilities/interface.py +0 -135
- edsl-0.1.49.dist-info/RECORD +0 -347
- {edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/LICENSE +0 -0
- {edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/WHEEL +0 -0
@@ -12,16 +12,18 @@ ScenarioList, AgentList) to share the same data manipulation interface, enabling
|
|
12
12
|
fluid operations across different parts of the EDSL ecosystem.
|
13
13
|
"""
|
14
14
|
|
15
|
-
from abc import ABC, abstractmethod
|
16
15
|
import io
|
17
16
|
import warnings
|
18
17
|
import textwrap
|
19
|
-
from typing import Optional, Tuple, Union, List, TYPE_CHECKING
|
18
|
+
from typing import Optional, Tuple, Union, List, TYPE_CHECKING # Callable not used
|
19
|
+
from functools import wraps
|
20
20
|
from .r.ggplot import GGPlotMethod
|
21
|
+
from .exceptions import DatasetKeyError, DatasetValueError, DatasetTypeError, DatasetExportError
|
21
22
|
|
22
23
|
if TYPE_CHECKING:
|
23
24
|
from docx import Document
|
24
25
|
from .dataset import Dataset
|
26
|
+
from ..jobs import Job # noqa: F401
|
25
27
|
|
26
28
|
class DataOperationsBase:
|
27
29
|
"""
|
@@ -135,10 +137,7 @@ class DataOperationsBase:
|
|
135
137
|
>>> sorted(Results.example().select().relevant_columns(data_type = "model"))
|
136
138
|
['model.frequency_penalty', ...]
|
137
139
|
|
138
|
-
>>>
|
139
|
-
Traceback (most recent call last):
|
140
|
-
...
|
141
|
-
ValueError: No columns found for data type: flimflam. Available data types are: ...
|
140
|
+
>>> # Testing relevant_columns with invalid data_type raises DatasetValueError - tested in unit tests
|
142
141
|
"""
|
143
142
|
columns = [list(x.keys())[0] for x in self]
|
144
143
|
if remove_prefix:
|
@@ -159,7 +158,7 @@ class DataOperationsBase:
|
|
159
158
|
all_data_types = sorted(
|
160
159
|
list(set(get_data_type(column) for column in all_columns))
|
161
160
|
)
|
162
|
-
raise
|
161
|
+
raise DatasetValueError(
|
163
162
|
f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
|
164
163
|
)
|
165
164
|
|
@@ -179,12 +178,19 @@ class DataOperationsBase:
|
|
179
178
|
_num_observations = len(values)
|
180
179
|
else:
|
181
180
|
if len(values) != _num_observations:
|
182
|
-
raise
|
181
|
+
raise DatasetValueError(
|
183
182
|
f"The number of observations is not consistent across columns. "
|
184
183
|
f"Column '{key}' has {len(values)} observations, but previous columns had {_num_observations} observations."
|
185
184
|
)
|
186
185
|
|
187
186
|
return _num_observations
|
187
|
+
|
188
|
+
def chart(self):
|
189
|
+
"""
|
190
|
+
Create a chart from the results.
|
191
|
+
"""
|
192
|
+
import altair as alt
|
193
|
+
return alt.Chart(self.to_pandas(remove_prefix=True))
|
188
194
|
|
189
195
|
def make_tabular(
|
190
196
|
self, remove_prefix: bool, pretty_labels: Optional[dict] = None
|
@@ -262,8 +268,9 @@ class DataOperationsBase:
|
|
262
268
|
remove_prefix=remove_prefix, pretty_labels=pretty_labels
|
263
269
|
)
|
264
270
|
|
265
|
-
def to_jsonl(self, filename: Optional[str] = None)
|
271
|
+
def to_jsonl(self, filename: Optional[str] = None):
|
266
272
|
"""Export the results to a FileStore instance containing JSONL data."""
|
273
|
+
from .file_exports import JSONLExport
|
267
274
|
exporter = JSONLExport(data=self, filename=filename)
|
268
275
|
return exporter.export()
|
269
276
|
|
@@ -274,8 +281,9 @@ class DataOperationsBase:
|
|
274
281
|
pretty_labels: Optional[dict] = None,
|
275
282
|
table_name: str = "results",
|
276
283
|
if_exists: str = "replace",
|
277
|
-
)
|
284
|
+
):
|
278
285
|
"""Export the results to a SQLite database file."""
|
286
|
+
from .file_exports import SQLiteExport
|
279
287
|
exporter = SQLiteExport(
|
280
288
|
data=self,
|
281
289
|
filename=filename,
|
@@ -291,7 +299,7 @@ class DataOperationsBase:
|
|
291
299
|
filename: Optional[str] = None,
|
292
300
|
remove_prefix: bool = False,
|
293
301
|
pretty_labels: Optional[dict] = None,
|
294
|
-
)
|
302
|
+
):
|
295
303
|
"""Export the results to a FileStore instance containing CSV data."""
|
296
304
|
from .file_exports import CSVExport
|
297
305
|
|
@@ -309,9 +317,9 @@ class DataOperationsBase:
|
|
309
317
|
remove_prefix: bool = False,
|
310
318
|
pretty_labels: Optional[dict] = None,
|
311
319
|
sheet_name: Optional[str] = None,
|
312
|
-
)
|
320
|
+
):
|
313
321
|
"""Export the results to a FileStore instance containing Excel data."""
|
314
|
-
from .file_exports import
|
322
|
+
from .file_exports import ExcelExport
|
315
323
|
|
316
324
|
exporter = ExcelExport(
|
317
325
|
data=self,
|
@@ -324,25 +332,28 @@ class DataOperationsBase:
|
|
324
332
|
|
325
333
|
def _db(
|
326
334
|
self, remove_prefix: bool = True, shape: str = "wide"
|
327
|
-
)
|
335
|
+
):
|
328
336
|
"""Create a SQLite database in memory and return the connection.
|
329
337
|
|
330
338
|
Args:
|
331
339
|
remove_prefix: Whether to remove the prefix from the column names
|
332
340
|
shape: The shape of the data in the database ("wide" or "long")
|
333
|
-
|
341
|
+
|
334
342
|
Returns:
|
335
343
|
A database connection
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
+
|
345
|
+
Examples:
|
346
|
+
>>> from sqlalchemy import text
|
347
|
+
>>> from edsl import Results
|
348
|
+
>>> engine = Results.example()._db()
|
349
|
+
>>> len(engine.execute(text("SELECT * FROM self")).fetchall())
|
350
|
+
4
|
351
|
+
>>> engine = Results.example()._db(shape = "long")
|
352
|
+
>>> len(engine.execute(text("SELECT * FROM self")).fetchall())
|
353
|
+
172
|
344
354
|
"""
|
345
|
-
|
355
|
+
# Import needed for database connection
|
356
|
+
from sqlalchemy import create_engine
|
346
357
|
|
347
358
|
engine = create_engine("sqlite:///:memory:")
|
348
359
|
if remove_prefix and shape == "wide":
|
@@ -445,29 +456,35 @@ class DataOperationsBase:
|
|
445
456
|
|
446
457
|
def to_pandas(
|
447
458
|
self, remove_prefix: bool = False, lists_as_strings=False
|
448
|
-
)
|
459
|
+
):
|
449
460
|
"""Convert the results to a pandas DataFrame, ensuring that lists remain as lists.
|
450
461
|
|
451
|
-
:
|
452
|
-
|
462
|
+
Args:
|
463
|
+
remove_prefix: Whether to remove the prefix from the column names.
|
464
|
+
lists_as_strings: Whether to convert lists to strings.
|
465
|
+
|
466
|
+
Returns:
|
467
|
+
A pandas DataFrame.
|
453
468
|
"""
|
469
|
+
# pandas is imported in _to_pandas_strings
|
454
470
|
return self._to_pandas_strings(remove_prefix)
|
455
471
|
|
456
|
-
def _to_pandas_strings(self, remove_prefix: bool = False)
|
472
|
+
def _to_pandas_strings(self, remove_prefix: bool = False):
|
457
473
|
"""Convert the results to a pandas DataFrame.
|
458
474
|
|
459
|
-
:
|
475
|
+
Args:
|
476
|
+
remove_prefix: Whether to remove the prefix from the column names.
|
460
477
|
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
478
|
+
Examples:
|
479
|
+
>>> from edsl.results import Results
|
480
|
+
>>> r = Results.example()
|
481
|
+
>>> r.select('how_feeling').to_pandas()
|
482
|
+
answer.how_feeling
|
483
|
+
0 OK
|
484
|
+
1 Great
|
485
|
+
2 Terrible
|
486
|
+
3 OK
|
469
487
|
"""
|
470
|
-
|
471
488
|
import pandas as pd
|
472
489
|
|
473
490
|
csv_string = self.to_csv(remove_prefix=remove_prefix).text
|
@@ -478,17 +495,27 @@ class DataOperationsBase:
|
|
478
495
|
|
479
496
|
def to_polars(
|
480
497
|
self, remove_prefix: bool = False, lists_as_strings=False
|
481
|
-
)
|
498
|
+
):
|
482
499
|
"""Convert the results to a Polars DataFrame.
|
483
500
|
|
484
|
-
:
|
501
|
+
Args:
|
502
|
+
remove_prefix: Whether to remove the prefix from the column names.
|
503
|
+
lists_as_strings: Whether to convert lists to strings.
|
504
|
+
|
505
|
+
Returns:
|
506
|
+
A Polars DataFrame.
|
485
507
|
"""
|
508
|
+
# polars is imported in _to_polars_strings
|
486
509
|
return self._to_polars_strings(remove_prefix)
|
487
510
|
|
488
|
-
def _to_polars_strings(self, remove_prefix: bool = False)
|
511
|
+
def _to_polars_strings(self, remove_prefix: bool = False):
|
489
512
|
"""Convert the results to a Polars DataFrame.
|
490
513
|
|
491
|
-
:
|
514
|
+
Args:
|
515
|
+
remove_prefix: Whether to remove the prefix from the column names.
|
516
|
+
|
517
|
+
Returns:
|
518
|
+
A Polars DataFrame.
|
492
519
|
"""
|
493
520
|
import polars as pl
|
494
521
|
|
@@ -496,10 +523,14 @@ class DataOperationsBase:
|
|
496
523
|
df = pl.read_csv(io.StringIO(csv_string))
|
497
524
|
return df
|
498
525
|
|
499
|
-
def tree(self, node_order: Optional[List[str]] = None)
|
526
|
+
def tree(self, node_order: Optional[List[str]] = None):
|
500
527
|
"""Convert the results to a Tree.
|
501
528
|
|
502
|
-
:
|
529
|
+
Args:
|
530
|
+
node_order: The order of the nodes.
|
531
|
+
|
532
|
+
Returns:
|
533
|
+
A Tree object.
|
503
534
|
"""
|
504
535
|
from .dataset_tree import Tree
|
505
536
|
return Tree(self, node_order=node_order)
|
@@ -514,13 +545,14 @@ class DataOperationsBase:
|
|
514
545
|
>>> r.select('how_feeling').to_scenario_list()
|
515
546
|
ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
|
516
547
|
"""
|
517
|
-
from
|
548
|
+
from ..scenarios import ScenarioList, Scenario
|
518
549
|
|
519
550
|
list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
|
520
551
|
scenarios = []
|
521
552
|
for d in list_of_dicts:
|
522
553
|
scenarios.append(Scenario(d))
|
523
554
|
return ScenarioList(scenarios)
|
555
|
+
|
524
556
|
|
525
557
|
def to_agent_list(self, remove_prefix: bool = True):
|
526
558
|
"""Convert the results to a list of dictionaries, one per agent.
|
@@ -532,7 +564,7 @@ class DataOperationsBase:
|
|
532
564
|
>>> r.select('how_feeling').to_agent_list()
|
533
565
|
AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
|
534
566
|
"""
|
535
|
-
from
|
567
|
+
from ..agents import Agent, AgentList
|
536
568
|
|
537
569
|
list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
|
538
570
|
agents = []
|
@@ -598,15 +630,12 @@ class DataOperationsBase:
|
|
598
630
|
[1, 9, 2, 3, 4]
|
599
631
|
|
600
632
|
>>> from edsl.dataset import Dataset
|
601
|
-
>>>
|
602
|
-
Traceback (most recent call last):
|
603
|
-
...
|
604
|
-
ValueError: Cannot flatten a list of lists when there are multiple columns selected.
|
633
|
+
>>> # Testing to_list flatten with multiple columns raises DatasetValueError - tested in unit tests
|
605
634
|
|
606
635
|
|
607
636
|
"""
|
608
637
|
if len(self.relevant_columns()) > 1 and flatten:
|
609
|
-
raise
|
638
|
+
raise DatasetValueError(
|
610
639
|
"Cannot flatten a list of lists when there are multiple columns selected."
|
611
640
|
)
|
612
641
|
|
@@ -632,7 +661,6 @@ class DataOperationsBase:
|
|
632
661
|
new_list.append(item)
|
633
662
|
list_to_return = new_list
|
634
663
|
|
635
|
-
from edsl.utilities.PrettyList import PrettyList
|
636
664
|
|
637
665
|
#return PrettyList(list_to_return)
|
638
666
|
return list_to_return
|
@@ -645,9 +673,8 @@ class DataOperationsBase:
|
|
645
673
|
):
|
646
674
|
import os
|
647
675
|
import tempfile
|
648
|
-
from
|
676
|
+
from ..utilities.utilities import is_notebook
|
649
677
|
from IPython.display import HTML, display
|
650
|
-
from edsl.utilities.utilities import is_notebook
|
651
678
|
|
652
679
|
df = self.to_pandas()
|
653
680
|
|
@@ -698,7 +725,7 @@ class DataOperationsBase:
|
|
698
725
|
all_fields = list(fields) + [f for f in header_fields if f not in fields]
|
699
726
|
for field in all_fields:
|
700
727
|
if field not in self.relevant_columns():
|
701
|
-
raise
|
728
|
+
raise DatasetKeyError(f"Field '{field}' not found in dataset")
|
702
729
|
|
703
730
|
# Get data for each field
|
704
731
|
field_data = {}
|
@@ -780,7 +807,8 @@ class DataOperationsBase:
|
|
780
807
|
from docx.shared import Pt
|
781
808
|
import json
|
782
809
|
except ImportError:
|
783
|
-
|
810
|
+
from .exceptions import DatasetImportError
|
811
|
+
raise DatasetImportError("The python-docx package is required for DOCX export. Install it with 'pip install python-docx'.")
|
784
812
|
|
785
813
|
doc = Document()
|
786
814
|
|
@@ -797,7 +825,7 @@ class DataOperationsBase:
|
|
797
825
|
if header_parts:
|
798
826
|
header_text += f" ({', '.join(header_parts)})"
|
799
827
|
|
800
|
-
|
828
|
+
doc.add_heading(header_text, level=1)
|
801
829
|
|
802
830
|
# Add the remaining fields
|
803
831
|
for field in fields:
|
@@ -823,7 +851,7 @@ class DataOperationsBase:
|
|
823
851
|
def report(self, *fields: Optional[str], top_n: Optional[int] = None,
|
824
852
|
header_fields: Optional[List[str]] = None, divider: bool = True,
|
825
853
|
return_string: bool = False, format: str = "markdown",
|
826
|
-
filename: Optional[str] = None) -> Optional[Union[str, "
|
854
|
+
filename: Optional[str] = None) -> Optional[Union[str, "Document"]]:
|
827
855
|
"""Generates a report of the results by iterating through rows.
|
828
856
|
|
829
857
|
Args:
|
@@ -851,7 +879,7 @@ class DataOperationsBase:
|
|
851
879
|
>>> isinstance(doc, object)
|
852
880
|
True
|
853
881
|
"""
|
854
|
-
from
|
882
|
+
from ..utilities.utilities import is_notebook
|
855
883
|
|
856
884
|
# Prepare the data for the report
|
857
885
|
field_data, num_obs, fields, header_fields = self._prepare_report_data(
|
@@ -886,7 +914,7 @@ class DataOperationsBase:
|
|
886
914
|
return doc
|
887
915
|
|
888
916
|
else:
|
889
|
-
raise
|
917
|
+
raise DatasetExportError(f"Unsupported format: {format}. Use 'markdown' or 'docx'.")
|
890
918
|
|
891
919
|
def tally(
|
892
920
|
self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
|
@@ -945,7 +973,7 @@ class DataOperationsBase:
|
|
945
973
|
f in self.relevant_columns() or f in relevant_columns_without_prefix
|
946
974
|
for f in fields
|
947
975
|
):
|
948
|
-
raise
|
976
|
+
raise DatasetKeyError("One or more specified fields are not in the dataset."
|
949
977
|
f"The available fields are: {self.relevant_columns()}"
|
950
978
|
)
|
951
979
|
|
@@ -963,7 +991,7 @@ class DataOperationsBase:
|
|
963
991
|
except TypeError:
|
964
992
|
tally = dict(Counter([str(v) for v in values]))
|
965
993
|
except Exception as e:
|
966
|
-
raise
|
994
|
+
raise DatasetValueError(f"Error tallying values: {e}")
|
967
995
|
|
968
996
|
sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
|
969
997
|
if top_n is not None:
|
@@ -1056,7 +1084,8 @@ class DataOperationsBase:
|
|
1056
1084
|
# Check if the field is ambiguous
|
1057
1085
|
if len(matching_entries) > 1:
|
1058
1086
|
matching_cols = [next(iter(entry.keys())) for entry in matching_entries]
|
1059
|
-
|
1087
|
+
from .exceptions import DatasetValueError
|
1088
|
+
raise DatasetValueError(
|
1060
1089
|
f"Ambiguous field name '{field}'. It matches multiple columns: {matching_cols}. "
|
1061
1090
|
f"Please specify the full column name to flatten."
|
1062
1091
|
)
|
@@ -1159,13 +1188,13 @@ class DataOperationsBase:
|
|
1159
1188
|
break
|
1160
1189
|
|
1161
1190
|
if field_index is None:
|
1162
|
-
raise
|
1191
|
+
raise DatasetKeyError(f"Field '{field}' not found in dataset")
|
1163
1192
|
|
1164
1193
|
field_data = result.data[field_index][field]
|
1165
1194
|
|
1166
1195
|
# Check if values are lists
|
1167
1196
|
if not all(isinstance(v, list) for v in field_data):
|
1168
|
-
raise
|
1197
|
+
raise DatasetTypeError(f"Field '{field}' does not contain lists in all entries")
|
1169
1198
|
|
1170
1199
|
# Get the maximum length of lists
|
1171
1200
|
max_len = max(len(v) for v in field_data)
|
@@ -1209,16 +1238,13 @@ class DataOperationsBase:
|
|
1209
1238
|
>>> d.drop('a')
|
1210
1239
|
Dataset([{'b': [4, 5, 6]}])
|
1211
1240
|
|
1212
|
-
>>>
|
1213
|
-
Traceback (most recent call last):
|
1214
|
-
...
|
1215
|
-
KeyError: "Field 'c' not found in dataset"
|
1241
|
+
>>> # Testing drop with nonexistent field raises DatasetKeyError - tested in unit tests
|
1216
1242
|
"""
|
1217
1243
|
from .dataset import Dataset
|
1218
1244
|
|
1219
1245
|
# Check if field exists in the dataset
|
1220
1246
|
if field_name not in self.relevant_columns():
|
1221
|
-
raise
|
1247
|
+
raise DatasetKeyError(f"Field '{field_name}' not found in dataset")
|
1222
1248
|
|
1223
1249
|
# Create a new dataset without the specified field
|
1224
1250
|
new_data = [entry for entry in self.data if field_name not in entry]
|
@@ -1248,9 +1274,7 @@ class DataOperationsBase:
|
|
1248
1274
|
>>> d = Dataset([{'a.x': [1, 2, 3]}, {'b.x': [4, 5, 6]}])
|
1249
1275
|
>>> # d.remove_prefix()
|
1250
1276
|
|
1251
|
-
|
1252
|
-
...
|
1253
|
-
ValueError: Removing prefixes would result in duplicate column names: ['x']
|
1277
|
+
# Testing remove_prefix with duplicate column names raises DatasetValueError - tested in unit tests
|
1254
1278
|
"""
|
1255
1279
|
from .dataset import Dataset
|
1256
1280
|
|
@@ -1273,7 +1297,7 @@ class DataOperationsBase:
|
|
1273
1297
|
|
1274
1298
|
# Check for duplicates
|
1275
1299
|
if duplicates:
|
1276
|
-
raise
|
1300
|
+
raise DatasetValueError(f"Removing prefixes would result in duplicate column names: {sorted(list(duplicates))}")
|
1277
1301
|
|
1278
1302
|
# Create a new dataset with unprefixed column names
|
1279
1303
|
new_data = []
|
@@ -1288,8 +1312,6 @@ class DataOperationsBase:
|
|
1288
1312
|
return Dataset(new_data)
|
1289
1313
|
|
1290
1314
|
|
1291
|
-
from functools import wraps
|
1292
|
-
|
1293
1315
|
def to_dataset(func):
|
1294
1316
|
"""
|
1295
1317
|
Decorator that ensures functions receive a Dataset object as their first argument.
|
edsl/dataset/dataset_tree.py
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
-
from typing import
|
1
|
+
from typing import Optional, List, TYPE_CHECKING
|
2
|
+
|
3
|
+
if TYPE_CHECKING:
|
4
|
+
from .dataset import Dataset
|
2
5
|
|
3
6
|
|
4
7
|
def is_hashable(v):
|
@@ -16,8 +19,10 @@ class TreeNode:
|
|
16
19
|
self.children = {}
|
17
20
|
|
18
21
|
|
22
|
+
|
19
23
|
class Tree:
|
20
24
|
def __init__(self, data: "Dataset", node_order: Optional[List[str]] = None):
|
25
|
+
"""Initialize the tree with a Dataset."""
|
21
26
|
d = {}
|
22
27
|
for entry in data:
|
23
28
|
d.update(entry)
|
@@ -46,7 +51,8 @@ class Tree:
|
|
46
51
|
else:
|
47
52
|
if not set(node_order).issubset(set(self.data.keys())):
|
48
53
|
invalid_keys = set(node_order) - set(self.data.keys())
|
49
|
-
|
54
|
+
from .exceptions import DatasetValueError
|
55
|
+
raise DatasetValueError(f"Invalid keys in node_order: {invalid_keys}")
|
50
56
|
|
51
57
|
self.root = TreeNode()
|
52
58
|
|
@@ -95,8 +101,7 @@ class Tree:
|
|
95
101
|
filename = "tree_structure.docx"
|
96
102
|
|
97
103
|
from docx import Document
|
98
|
-
from docx.shared import
|
99
|
-
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
104
|
+
from docx.shared import Pt
|
100
105
|
from docx.enum.style import WD_STYLE_TYPE
|
101
106
|
|
102
107
|
doc = Document()
|
@@ -118,7 +123,6 @@ class Tree:
|
|
118
123
|
self._add_to_docx(doc, self.root, 0)
|
119
124
|
import base64
|
120
125
|
from io import BytesIO
|
121
|
-
import base64
|
122
126
|
|
123
127
|
# Save document to bytes buffer
|
124
128
|
doc_buffer = BytesIO()
|
@@ -126,7 +130,7 @@ class Tree:
|
|
126
130
|
doc_buffer.seek(0)
|
127
131
|
|
128
132
|
base64_string = base64.b64encode(doc_buffer.getvalue()).decode("utf-8")
|
129
|
-
from
|
133
|
+
from ..scenarios.file_store import FileStore
|
130
134
|
|
131
135
|
# Create and return FileStore instance
|
132
136
|
return FileStore(
|
@@ -331,7 +335,7 @@ class Tree:
|
|
331
335
|
Returns:
|
332
336
|
A string containing the markdown document, or renders markdown in notebooks.
|
333
337
|
"""
|
334
|
-
from
|
338
|
+
from ..utilities.utilities import is_notebook
|
335
339
|
from IPython.display import Markdown, display
|
336
340
|
|
337
341
|
if node is None:
|
@@ -1,5 +1,4 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
|
-
import os
|
3
2
|
from pathlib import Path
|
4
3
|
from .table_data_class import TableData
|
5
4
|
|
@@ -104,9 +103,12 @@ class PandasStyleRenderer(DataTablesRendererABC):
|
|
104
103
|
else:
|
105
104
|
df = pd.DataFrame(self.table_data.data, columns=self.table_data.headers)
|
106
105
|
|
107
|
-
styled_df = df.style.set_properties(
|
108
|
-
|
109
|
-
|
106
|
+
styled_df = df.style.set_properties(**{
|
107
|
+
"text-align": "left",
|
108
|
+
"white-space": "pre-wrap", # Allows text wrapping
|
109
|
+
"max-width": "300px", # Maximum width before wrapping
|
110
|
+
"word-wrap": "break-word" # Breaks words that exceed max-width
|
111
|
+
}).background_gradient()
|
110
112
|
|
111
113
|
return f"""
|
112
114
|
<div style="max-height: 500px; overflow-y: auto;">
|
@@ -0,0 +1,125 @@
|
|
1
|
+
"""
|
2
|
+
Exceptions module for dataset-related operations.
|
3
|
+
|
4
|
+
This module defines custom exception classes for all dataset-related error conditions
|
5
|
+
in the EDSL framework, ensuring consistent error handling for data manipulation,
|
6
|
+
transformation, and analysis operations.
|
7
|
+
"""
|
8
|
+
|
9
|
+
from ..base import BaseException
|
10
|
+
|
11
|
+
|
12
|
+
class DatasetError(BaseException):
|
13
|
+
"""
|
14
|
+
Base exception class for all dataset-related errors.
|
15
|
+
|
16
|
+
This is the parent class for exceptions related to Dataset operations
|
17
|
+
in the EDSL framework, including data creation, manipulation, validation,
|
18
|
+
and serialization.
|
19
|
+
|
20
|
+
Examples:
|
21
|
+
```python
|
22
|
+
# Usually not raised directly, but through subclasses:
|
23
|
+
dataset = Dataset([])
|
24
|
+
dataset["missing_key"] # Would raise DatasetKeyError
|
25
|
+
```
|
26
|
+
"""
|
27
|
+
relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
|
28
|
+
|
29
|
+
|
30
|
+
class DatasetKeyError(DatasetError):
|
31
|
+
"""
|
32
|
+
Exception raised when a key is not found in a dataset.
|
33
|
+
|
34
|
+
This exception occurs when attempting to access a field or column
|
35
|
+
that doesn't exist in the dataset.
|
36
|
+
|
37
|
+
Examples:
|
38
|
+
```python
|
39
|
+
dataset = Dataset([{"a": 1}])
|
40
|
+
dataset["b"] # Raises DatasetKeyError
|
41
|
+
```
|
42
|
+
"""
|
43
|
+
relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
|
44
|
+
|
45
|
+
|
46
|
+
class DatasetValueError(DatasetError):
|
47
|
+
"""
|
48
|
+
Exception raised when there's an issue with dataset values.
|
49
|
+
|
50
|
+
This exception occurs when dataset values are invalid, incompatible
|
51
|
+
with an operation, or otherwise problematic.
|
52
|
+
|
53
|
+
Examples:
|
54
|
+
```python
|
55
|
+
dataset = Dataset([{"a": 1}, {"b": 2}])
|
56
|
+
dataset.select(["c"]) # Raises DatasetValueError for missing field
|
57
|
+
```
|
58
|
+
"""
|
59
|
+
relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
|
60
|
+
|
61
|
+
|
62
|
+
class DatasetTypeError(DatasetError):
|
63
|
+
"""
|
64
|
+
Exception raised when there's a type mismatch in dataset operations.
|
65
|
+
|
66
|
+
This exception occurs when trying to perform operations with
|
67
|
+
incompatible data types.
|
68
|
+
|
69
|
+
Examples:
|
70
|
+
```python
|
71
|
+
dataset = Dataset([{"a": 1}])
|
72
|
+
dataset + "not a dataset" # Raises DatasetTypeError
|
73
|
+
```
|
74
|
+
"""
|
75
|
+
relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
|
76
|
+
|
77
|
+
|
78
|
+
class DatasetExportError(DatasetError):
|
79
|
+
"""
|
80
|
+
Exception raised when exporting a dataset to a different format fails.
|
81
|
+
|
82
|
+
This exception occurs when trying to export a dataset to a file format
|
83
|
+
(like CSV, SQLite, etc.) and the operation fails.
|
84
|
+
|
85
|
+
Examples:
|
86
|
+
```python
|
87
|
+
dataset = Dataset([{"a": complex(1, 2)}])
|
88
|
+
dataset.to_csv("file.csv") # Raises DatasetExportError (complex not serializable)
|
89
|
+
```
|
90
|
+
"""
|
91
|
+
relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
|
92
|
+
|
93
|
+
|
94
|
+
class DatasetImportError(DatasetError):
|
95
|
+
"""
|
96
|
+
Exception raised when importing data from an external source fails.
|
97
|
+
|
98
|
+
This exception occurs when trying to import data from an external source or format
|
99
|
+
(like CSV, JSON, etc.) and the operation fails, often due to missing dependencies
|
100
|
+
or format issues.
|
101
|
+
|
102
|
+
Examples:
|
103
|
+
```python
|
104
|
+
# Trying to export to DOCX without python-docx package
|
105
|
+
dataset.to_docx("file.docx") # Raises DatasetImportError
|
106
|
+
```
|
107
|
+
"""
|
108
|
+
relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
|
109
|
+
|
110
|
+
|
111
|
+
class DatasetRuntimeError(DatasetError):
|
112
|
+
"""
|
113
|
+
Exception raised when an operation fails during runtime.
|
114
|
+
|
115
|
+
This exception is used for runtime errors in dataset operations,
|
116
|
+
typically for operations that depend on external systems or libraries
|
117
|
+
like R integration.
|
118
|
+
|
119
|
+
Examples:
|
120
|
+
```python
|
121
|
+
# Plotting with ggplot when R is not installed
|
122
|
+
dataset.ggplot() # Raises DatasetRuntimeError
|
123
|
+
```
|
124
|
+
"""
|
125
|
+
relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
|