PyPI - edsl - Versions diffs - 0.1.48__py3-none-any.whl → 0.1.50__py3-none-any.whl - Mend

edsl 0.1.48py3-none-any.whl → 0.1.50py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (239) hide show

edsl/__init__.py +124 -53
edsl/__version__.py +1 -1
edsl/agents/agent.py +21 -21
edsl/agents/agent_list.py +2 -5
edsl/agents/exceptions.py +119 -5
edsl/base/__init__.py +10 -35
edsl/base/base_class.py +71 -36
edsl/base/base_exception.py +204 -0
edsl/base/data_transfer_models.py +1 -1
edsl/base/exceptions.py +94 -0
edsl/buckets/__init__.py +15 -1
edsl/buckets/bucket_collection.py +3 -4
edsl/buckets/exceptions.py +75 -0
edsl/buckets/model_buckets.py +1 -2
edsl/buckets/token_bucket.py +11 -6
edsl/buckets/token_bucket_api.py +1 -2
edsl/buckets/token_bucket_client.py +9 -7
edsl/caching/cache.py +7 -2
edsl/caching/cache_entry.py +10 -9
edsl/caching/exceptions.py +113 -7
edsl/caching/remote_cache_sync.py +1 -2
edsl/caching/sql_dict.py +17 -12
edsl/cli.py +43 -0
edsl/config/config_class.py +30 -6
edsl/conversation/Conversation.py +3 -2
edsl/conversation/exceptions.py +58 -0
edsl/conversation/mug_negotiation.py +0 -2
edsl/coop/__init__.py +20 -1
edsl/coop/coop.py +129 -38
edsl/coop/exceptions.py +188 -9
edsl/coop/price_fetcher.py +3 -6
edsl/coop/utils.py +4 -6
edsl/dataset/__init__.py +5 -4
edsl/dataset/dataset.py +53 -43
edsl/dataset/dataset_operations_mixin.py +86 -72
edsl/dataset/dataset_tree.py +9 -5
edsl/dataset/display/table_display.py +0 -2
edsl/dataset/display/table_renderers.py +0 -1
edsl/dataset/exceptions.py +125 -0
edsl/dataset/file_exports.py +18 -11
edsl/dataset/r/ggplot.py +13 -6
edsl/display/__init__.py +27 -0
edsl/display/core.py +147 -0
edsl/display/plugin.py +189 -0
edsl/display/utils.py +52 -0
edsl/inference_services/__init__.py +9 -1
edsl/inference_services/available_model_cache_handler.py +1 -1
edsl/inference_services/available_model_fetcher.py +4 -5
edsl/inference_services/data_structures.py +9 -6
edsl/inference_services/exceptions.py +132 -1
edsl/inference_services/inference_service_abc.py +2 -2
edsl/inference_services/inference_services_collection.py +2 -6
edsl/inference_services/registry.py +4 -3
edsl/inference_services/service_availability.py +2 -1
edsl/inference_services/services/anthropic_service.py +4 -1
edsl/inference_services/services/aws_bedrock.py +13 -12
edsl/inference_services/services/azure_ai.py +12 -10
edsl/inference_services/services/deep_infra_service.py +1 -4
edsl/inference_services/services/deep_seek_service.py +1 -5
edsl/inference_services/services/google_service.py +6 -2
edsl/inference_services/services/groq_service.py +1 -1
edsl/inference_services/services/mistral_ai_service.py +4 -2
edsl/inference_services/services/ollama_service.py +1 -1
edsl/inference_services/services/open_ai_service.py +7 -5
edsl/inference_services/services/perplexity_service.py +6 -2
edsl/inference_services/services/test_service.py +8 -7
edsl/inference_services/services/together_ai_service.py +2 -3
edsl/inference_services/services/xai_service.py +1 -1
edsl/instructions/__init__.py +1 -1
edsl/instructions/change_instruction.py +3 -2
edsl/instructions/exceptions.py +61 -0
edsl/instructions/instruction.py +5 -2
edsl/instructions/instruction_collection.py +2 -1
edsl/instructions/instruction_handler.py +4 -9
edsl/interviews/ReportErrors.py +0 -3
edsl/interviews/__init__.py +9 -2
edsl/interviews/answering_function.py +11 -13
edsl/interviews/exception_tracking.py +14 -7
edsl/interviews/exceptions.py +79 -0
edsl/interviews/interview.py +32 -29
edsl/interviews/interview_status_dictionary.py +4 -2
edsl/interviews/interview_status_log.py +2 -1
edsl/interviews/interview_task_manager.py +3 -3
edsl/interviews/request_token_estimator.py +3 -1
edsl/interviews/statistics.py +2 -3
edsl/invigilators/__init__.py +7 -1
edsl/invigilators/exceptions.py +79 -0
edsl/invigilators/invigilator_base.py +0 -1
edsl/invigilators/invigilators.py +8 -12
edsl/invigilators/prompt_constructor.py +1 -5
edsl/invigilators/prompt_helpers.py +8 -4
edsl/invigilators/question_instructions_prompt_builder.py +1 -1
edsl/invigilators/question_option_processor.py +9 -5
edsl/invigilators/question_template_replacements_builder.py +3 -2
edsl/jobs/__init__.py +3 -3
edsl/jobs/async_interview_runner.py +24 -22
edsl/jobs/check_survey_scenario_compatibility.py +7 -6
edsl/jobs/data_structures.py +7 -4
edsl/jobs/exceptions.py +177 -8
edsl/jobs/fetch_invigilator.py +1 -1
edsl/jobs/jobs.py +72 -67
edsl/jobs/jobs_checks.py +2 -3
edsl/jobs/jobs_component_constructor.py +2 -2
edsl/jobs/jobs_pricing_estimation.py +3 -2
edsl/jobs/jobs_remote_inference_logger.py +5 -4
edsl/jobs/jobs_runner_asyncio.py +1 -2
edsl/jobs/jobs_runner_status.py +8 -9
edsl/jobs/remote_inference.py +26 -23
edsl/jobs/results_exceptions_handler.py +8 -5
edsl/key_management/__init__.py +3 -1
edsl/key_management/exceptions.py +62 -0
edsl/key_management/key_lookup.py +1 -1
edsl/key_management/key_lookup_builder.py +37 -14
edsl/key_management/key_lookup_collection.py +2 -0
edsl/language_models/__init__.py +1 -1
edsl/language_models/exceptions.py +302 -14
edsl/language_models/language_model.py +4 -7
edsl/language_models/model.py +4 -4
edsl/language_models/model_list.py +1 -1
edsl/language_models/price_manager.py +1 -1
edsl/language_models/raw_response_handler.py +14 -9
edsl/language_models/registry.py +17 -21
edsl/language_models/repair.py +0 -6
edsl/language_models/unused/fake_openai_service.py +0 -1
edsl/load_plugins.py +69 -0
edsl/logger.py +146 -0
edsl/notebooks/notebook.py +1 -1
edsl/notebooks/notebook_to_latex.py +0 -1
edsl/plugins/__init__.py +63 -0
edsl/plugins/built_in/export_example.py +50 -0
edsl/plugins/built_in/pig_latin.py +67 -0
edsl/plugins/cli.py +372 -0
edsl/plugins/cli_typer.py +283 -0
edsl/plugins/exceptions.py +31 -0
edsl/plugins/hookspec.py +51 -0
edsl/plugins/plugin_host.py +128 -0
edsl/plugins/plugin_manager.py +633 -0
edsl/plugins/plugins_registry.py +168 -0
edsl/prompts/__init__.py +2 -0
edsl/prompts/exceptions.py +107 -5
edsl/prompts/prompt.py +14 -6
edsl/questions/HTMLQuestion.py +5 -11
edsl/questions/Quick.py +0 -1
edsl/questions/__init__.py +2 -0
edsl/questions/answer_validator_mixin.py +318 -318
edsl/questions/compose_questions.py +2 -2
edsl/questions/descriptors.py +10 -49
edsl/questions/exceptions.py +278 -22
edsl/questions/loop_processor.py +7 -5
edsl/questions/prompt_templates/question_list.jinja +3 -0
edsl/questions/question_base.py +14 -16
edsl/questions/question_base_gen_mixin.py +2 -2
edsl/questions/question_base_prompts_mixin.py +9 -3
edsl/questions/question_budget.py +9 -5
edsl/questions/question_check_box.py +3 -5
edsl/questions/question_dict.py +171 -194
edsl/questions/question_extract.py +1 -1
edsl/questions/question_free_text.py +4 -6
edsl/questions/question_functional.py +4 -3
edsl/questions/question_list.py +36 -9
edsl/questions/question_matrix.py +95 -61
edsl/questions/question_multiple_choice.py +6 -4
edsl/questions/question_numerical.py +2 -4
edsl/questions/question_registry.py +4 -2
edsl/questions/register_questions_meta.py +0 -1
edsl/questions/response_validator_abc.py +7 -13
edsl/questions/templates/dict/answering_instructions.jinja +1 -0
edsl/questions/templates/rank/question_presentation.jinja +1 -1
edsl/results/__init__.py +1 -1
edsl/results/exceptions.py +141 -7
edsl/results/report.py +0 -1
edsl/results/result.py +4 -5
edsl/results/results.py +10 -51
edsl/results/results_selector.py +8 -4
edsl/scenarios/PdfExtractor.py +2 -2
edsl/scenarios/construct_download_link.py +69 -35
edsl/scenarios/directory_scanner.py +33 -14
edsl/scenarios/document_chunker.py +1 -1
edsl/scenarios/exceptions.py +238 -14
edsl/scenarios/file_methods.py +1 -1
edsl/scenarios/file_store.py +7 -3
edsl/scenarios/handlers/__init__.py +17 -0
edsl/scenarios/handlers/docx_file_store.py +0 -5
edsl/scenarios/handlers/pdf_file_store.py +0 -1
edsl/scenarios/handlers/pptx_file_store.py +0 -5
edsl/scenarios/handlers/py_file_store.py +0 -1
edsl/scenarios/handlers/sql_file_store.py +1 -4
edsl/scenarios/handlers/sqlite_file_store.py +0 -1
edsl/scenarios/handlers/txt_file_store.py +1 -1
edsl/scenarios/scenario.py +0 -1
edsl/scenarios/scenario_list.py +152 -18
edsl/scenarios/scenario_list_pdf_tools.py +1 -0
edsl/scenarios/scenario_selector.py +0 -1
edsl/surveys/__init__.py +3 -4
edsl/surveys/dag/__init__.py +4 -2
edsl/surveys/descriptors.py +1 -1
edsl/surveys/edit_survey.py +1 -0
edsl/surveys/exceptions.py +165 -9
edsl/surveys/memory/__init__.py +5 -3
edsl/surveys/memory/memory_management.py +1 -0
edsl/surveys/memory/memory_plan.py +6 -15
edsl/surveys/rules/__init__.py +5 -3
edsl/surveys/rules/rule.py +1 -2
edsl/surveys/rules/rule_collection.py +1 -1
edsl/surveys/survey.py +12 -24
edsl/surveys/survey_export.py +6 -3
edsl/surveys/survey_flow_visualization.py +10 -1
edsl/tasks/__init__.py +2 -0
edsl/tasks/question_task_creator.py +3 -3
edsl/tasks/task_creators.py +1 -3
edsl/tasks/task_history.py +5 -7
edsl/tasks/task_status_log.py +1 -2
edsl/tokens/__init__.py +3 -1
edsl/tokens/token_usage.py +1 -1
edsl/utilities/__init__.py +21 -1
edsl/utilities/decorators.py +1 -2
edsl/utilities/markdown_to_docx.py +2 -2
edsl/utilities/markdown_to_pdf.py +1 -1
edsl/utilities/repair_functions.py +0 -1
edsl/utilities/restricted_python.py +0 -1
edsl/utilities/template_loader.py +2 -3
edsl/utilities/utilities.py +8 -29
{edsl-0.1.48.dist-info → edsl-0.1.50.dist-info}/METADATA +32 -2
edsl-0.1.50.dist-info/RECORD +363 -0
edsl-0.1.50.dist-info/entry_points.txt +3 -0
edsl/dataset/smart_objects.py +0 -96
edsl/exceptions/BaseException.py +0 -21
edsl/exceptions/__init__.py +0 -54
edsl/exceptions/configuration.py +0 -16
edsl/exceptions/general.py +0 -34
edsl/study/ObjectEntry.py +0 -173
edsl/study/ProofOfWork.py +0 -113
edsl/study/SnapShot.py +0 -80
edsl/study/Study.py +0 -520
edsl/study/__init__.py +0 -6
edsl/utilities/interface.py +0 -135
edsl-0.1.48.dist-info/RECORD +0 -347
{edsl-0.1.48.dist-info → edsl-0.1.50.dist-info}/LICENSE +0 -0
{edsl-0.1.48.dist-info → edsl-0.1.50.dist-info}/WHEEL +0 -0

edsl/dataset/dataset_operations_mixin.py CHANGED Viewed

@@ -12,16 +12,18 @@ ScenarioList, AgentList) to share the same data manipulation interface, enabling
 fluid operations across different parts of the EDSL ecosystem.
 """
-from abc import ABC, abstractmethod
 import io
 import warnings
 import textwrap
-from typing import Optional, Tuple, Union, List, TYPE_CHECKING
+from typing import Optional, Tuple, Union, List, TYPE_CHECKING  # Callable not used
+from functools import wraps
 from .r.ggplot import GGPlotMethod
+from .exceptions import DatasetKeyError, DatasetValueError, DatasetTypeError, DatasetExportError
 if TYPE_CHECKING:
     from docx import Document
     from .dataset import Dataset
+    from ..jobs import Job  # noqa: F401
 class DataOperationsBase:
     """
@@ -135,10 +137,7 @@ class DataOperationsBase:
         >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
         ['model.frequency_penalty', ...]
-        >>> Results.example().relevant_columns(data_type = "flimflam")
-        Traceback (most recent call last):
-        ...
-        ValueError: No columns found for data type: flimflam. Available data types are: ...
+        >>> # Testing relevant_columns with invalid data_type raises DatasetValueError - tested in unit tests
         """
         columns = [list(x.keys())[0] for x in self]
         if remove_prefix:
@@ -159,7 +158,7 @@ class DataOperationsBase:
                 all_data_types = sorted(
                     list(set(get_data_type(column) for column in all_columns))
                 )
-                raise ValueError(
+                raise DatasetValueError(
                     f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
                 )
@@ -179,7 +178,7 @@ class DataOperationsBase:
                 _num_observations = len(values)
             else:
                 if len(values) != _num_observations:
-                    raise ValueError(
+                    raise DatasetValueError(
                         f"The number of observations is not consistent across columns. "
                         f"Column '{key}' has {len(values)} observations, but previous columns had {_num_observations} observations."
                     )
@@ -262,8 +261,9 @@ class DataOperationsBase:
             remove_prefix=remove_prefix, pretty_labels=pretty_labels
         )
-    def to_jsonl(self, filename: Optional[str] = None) -> Optional["FileStore"]:
+    def to_jsonl(self, filename: Optional[str] = None):
         """Export the results to a FileStore instance containing JSONL data."""
+        from .file_exports import JSONLExport
         exporter = JSONLExport(data=self, filename=filename)
         return exporter.export()
@@ -274,8 +274,9 @@ class DataOperationsBase:
         pretty_labels: Optional[dict] = None,
         table_name: str = "results",
         if_exists: str = "replace",
-    ) -> Optional["FileStore"]:
+    ):
         """Export the results to a SQLite database file."""
+        from .file_exports import SQLiteExport
         exporter = SQLiteExport(
             data=self,
             filename=filename,
@@ -291,7 +292,7 @@ class DataOperationsBase:
         filename: Optional[str] = None,
         remove_prefix: bool = False,
         pretty_labels: Optional[dict] = None,
-    ) -> Optional["FileStore"]:
+    ):
         """Export the results to a FileStore instance containing CSV data."""
         from .file_exports import CSVExport
@@ -309,9 +310,9 @@ class DataOperationsBase:
         remove_prefix: bool = False,
         pretty_labels: Optional[dict] = None,
         sheet_name: Optional[str] = None,
-    ) -> Optional["FileStore"]:
+    ):
         """Export the results to a FileStore instance containing Excel data."""
-        from .file_exports import  ExcelExport
+        from .file_exports import ExcelExport
         exporter = ExcelExport(
             data=self,
@@ -324,25 +325,28 @@ class DataOperationsBase:
     def _db(
         self, remove_prefix: bool = True, shape: str = "wide"
-    ) -> "sqlalchemy.engine.Engine":
+    ):
         """Create a SQLite database in memory and return the connection.
         Args:
             remove_prefix: Whether to remove the prefix from the column names
             shape: The shape of the data in the database ("wide" or "long")
         Returns:
             A database connection
-        >>> from sqlalchemy import text
-        >>> from edsl import Results
-        >>> engine = Results.example()._db()
-        >>> len(engine.execute(text("SELECT * FROM self")).fetchall())
-        4
-        >>> engine = Results.example()._db(shape = "long")
-        >>> len(engine.execute(text("SELECT * FROM self")).fetchall())
-        172
+        Examples:
+            >>> from sqlalchemy import text
+            >>> from edsl import Results
+            >>> engine = Results.example()._db()
+            >>> len(engine.execute(text("SELECT * FROM self")).fetchall())
+            4
+            >>> engine = Results.example()._db(shape = "long")
+            >>> len(engine.execute(text("SELECT * FROM self")).fetchall())
+            172
         """
-        from sqlalchemy import create_engine, text
+        # Import needed for database connection
+        from sqlalchemy import create_engine
         engine = create_engine("sqlite:///:memory:")
         if remove_prefix and shape == "wide":
@@ -445,29 +449,35 @@ class DataOperationsBase:
     def to_pandas(
         self, remove_prefix: bool = False, lists_as_strings=False
-    ) -> "DataFrame":
+    ):
         """Convert the results to a pandas DataFrame, ensuring that lists remain as lists.
-        :param remove_prefix: Whether to remove the prefix from the column names.
+        Args:
+            remove_prefix: Whether to remove the prefix from the column names.
+            lists_as_strings: Whether to convert lists to strings.
+        Returns:
+            A pandas DataFrame.
         """
+        # pandas is imported in _to_pandas_strings
         return self._to_pandas_strings(remove_prefix)
-    def _to_pandas_strings(self, remove_prefix: bool = False) -> "pd.DataFrame":
+    def _to_pandas_strings(self, remove_prefix: bool = False):
         """Convert the results to a pandas DataFrame.
-        :param remove_prefix: Whether to remove the prefix from the column names.
+        Args:
+            remove_prefix: Whether to remove the prefix from the column names.
-        >>> from edsl.results import Results
-        >>> r = Results.example()
-        >>> r.select('how_feeling').to_pandas()
-          answer.how_feeling
-        0                 OK
-        1              Great
-        2           Terrible
-        3                 OK
+        Examples:
+            >>> from edsl.results import Results
+            >>> r = Results.example()
+            >>> r.select('how_feeling').to_pandas()
+              answer.how_feeling
+            0                 OK
+            1              Great
+            2           Terrible
+            3                 OK
         """
         import pandas as pd
         csv_string = self.to_csv(remove_prefix=remove_prefix).text
@@ -478,17 +488,27 @@ class DataOperationsBase:
     def to_polars(
         self, remove_prefix: bool = False, lists_as_strings=False
-    ) -> "pl.DataFrame":
+    ):
         """Convert the results to a Polars DataFrame.
-        :param remove_prefix: Whether to remove the prefix from the column names.
+        Args:
+            remove_prefix: Whether to remove the prefix from the column names.
+            lists_as_strings: Whether to convert lists to strings.
+        Returns:
+            A Polars DataFrame.
         """
+        # polars is imported in _to_polars_strings
         return self._to_polars_strings(remove_prefix)
-    def _to_polars_strings(self, remove_prefix: bool = False) -> "pl.DataFrame":
+    def _to_polars_strings(self, remove_prefix: bool = False):
         """Convert the results to a Polars DataFrame.
-        :param remove_prefix: Whether to remove the prefix from the column names.
+        Args:
+            remove_prefix: Whether to remove the prefix from the column names.
+        Returns:
+            A Polars DataFrame.
         """
         import polars as pl
@@ -496,10 +516,14 @@ class DataOperationsBase:
         df = pl.read_csv(io.StringIO(csv_string))
         return df
-    def tree(self, node_order: Optional[List[str]] = None) -> "Tree":
+    def tree(self, node_order: Optional[List[str]] = None):
         """Convert the results to a Tree.
-        :param node_order: The order of the nodes.
+        Args:
+            node_order: The order of the nodes.
+        Returns:
+            A Tree object.
         """
         from .dataset_tree import Tree
         return Tree(self, node_order=node_order)
@@ -598,15 +622,12 @@ class DataOperationsBase:
         [1, 9, 2, 3, 4]
         >>> from edsl.dataset import Dataset
-        >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}, {'c': [6, 2, 3, 4]}]).select('a.b', 'c').to_list(flatten = True)
-        Traceback (most recent call last):
-        ...
-        ValueError: Cannot flatten a list of lists when there are multiple columns selected.
+        >>> # Testing to_list flatten with multiple columns raises DatasetValueError - tested in unit tests
         """
         if len(self.relevant_columns()) > 1 and flatten:
-            raise ValueError(
+            raise DatasetValueError(
                 "Cannot flatten a list of lists when there are multiple columns selected."
             )
@@ -632,7 +653,6 @@ class DataOperationsBase:
                     new_list.append(item)
             list_to_return = new_list
-        from edsl.utilities.PrettyList import PrettyList
         #return PrettyList(list_to_return)
         return list_to_return
@@ -647,7 +667,6 @@ class DataOperationsBase:
         import tempfile
         from edsl.utilities.utilities import is_notebook
         from IPython.display import HTML, display
-        from edsl.utilities.utilities import is_notebook
         df = self.to_pandas()
@@ -698,7 +717,7 @@ class DataOperationsBase:
         all_fields = list(fields) + [f for f in header_fields if f not in fields]
         for field in all_fields:
             if field not in self.relevant_columns():
-                raise ValueError(f"Field '{field}' not found in dataset")
+                raise DatasetKeyError(f"Field '{field}' not found in dataset")
         # Get data for each field
         field_data = {}
@@ -780,7 +799,8 @@ class DataOperationsBase:
             from docx.shared import Pt
             import json
         except ImportError:
-            raise ImportError("The python-docx package is required for DOCX export. Install it with 'pip install python-docx'.")
+            from edsl.dataset.exceptions import DatasetImportError
+            raise DatasetImportError("The python-docx package is required for DOCX export. Install it with 'pip install python-docx'.")
         doc = Document()
@@ -797,7 +817,7 @@ class DataOperationsBase:
                 if header_parts:
                     header_text += f" ({', '.join(header_parts)})"
-            heading = doc.add_heading(header_text, level=1)
+            doc.add_heading(header_text, level=1)
             # Add the remaining fields
             for field in fields:
@@ -823,7 +843,7 @@ class DataOperationsBase:
     def report(self, *fields: Optional[str], top_n: Optional[int] = None,
                header_fields: Optional[List[str]] = None, divider: bool = True,
                return_string: bool = False, format: str = "markdown",
-               filename: Optional[str] = None) -> Optional[Union[str, "docx.Document"]]:
+               filename: Optional[str] = None) -> Optional[Union[str, "Document"]]:
         """Generates a report of the results by iterating through rows.
         Args:
@@ -886,7 +906,7 @@ class DataOperationsBase:
             return doc
         else:
-            raise ValueError(f"Unsupported format: {format}. Use 'markdown' or 'docx'.")
+            raise DatasetExportError(f"Unsupported format: {format}. Use 'markdown' or 'docx'.")
     def tally(
         self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
@@ -945,7 +965,7 @@ class DataOperationsBase:
             f in self.relevant_columns() or f in relevant_columns_without_prefix
             for f in fields
         ):
-            raise ValueError("One or more specified fields are not in the dataset."
+            raise DatasetKeyError("One or more specified fields are not in the dataset."
                              f"The available fields are: {self.relevant_columns()}"
                              )
@@ -963,7 +983,7 @@ class DataOperationsBase:
         except TypeError:
             tally = dict(Counter([str(v) for v in values]))
         except Exception as e:
-            raise ValueError(f"Error tallying values: {e}")
+            raise DatasetValueError(f"Error tallying values: {e}")
         sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
         if top_n is not None:
@@ -1056,7 +1076,8 @@ class DataOperationsBase:
         # Check if the field is ambiguous
         if len(matching_entries) > 1:
             matching_cols = [next(iter(entry.keys())) for entry in matching_entries]
-            raise ValueError(
+            from edsl.dataset.exceptions import DatasetValueError
+            raise DatasetValueError(
                 f"Ambiguous field name '{field}'. It matches multiple columns: {matching_cols}. "
                 f"Please specify the full column name to flatten."
             )
@@ -1159,13 +1180,13 @@ class DataOperationsBase:
                 break
         if field_index is None:
-            raise ValueError(f"Field '{field}' not found in dataset")
+            raise DatasetKeyError(f"Field '{field}' not found in dataset")
         field_data = result.data[field_index][field]
         # Check if values are lists
         if not all(isinstance(v, list) for v in field_data):
-            raise ValueError(f"Field '{field}' does not contain lists in all entries")
+            raise DatasetTypeError(f"Field '{field}' does not contain lists in all entries")
         # Get the maximum length of lists
         max_len = max(len(v) for v in field_data)
@@ -1209,16 +1230,13 @@ class DataOperationsBase:
             >>> d.drop('a')
             Dataset([{'b': [4, 5, 6]}])
-            >>> d.drop('c')
-            Traceback (most recent call last):
-            ...
-            KeyError: "Field 'c' not found in dataset"
+            >>> # Testing drop with nonexistent field raises DatasetKeyError - tested in unit tests
         """
         from .dataset import Dataset
         # Check if field exists in the dataset
         if field_name not in self.relevant_columns():
-            raise KeyError(f"Field '{field_name}' not found in dataset")
+            raise DatasetKeyError(f"Field '{field_name}' not found in dataset")
         # Create a new dataset without the specified field
         new_data = [entry for entry in self.data if field_name not in entry]
@@ -1248,9 +1266,7 @@ class DataOperationsBase:
             >>> d = Dataset([{'a.x': [1, 2, 3]}, {'b.x': [4, 5, 6]}])
             >>> # d.remove_prefix()
-        Traceback (most recent call last):
-        ...
-        ValueError: Removing prefixes would result in duplicate column names: ['x']
+        # Testing remove_prefix with duplicate column names raises DatasetValueError - tested in unit tests
         """
         from .dataset import Dataset
@@ -1273,7 +1289,7 @@ class DataOperationsBase:
         # Check for duplicates
         if duplicates:
-            raise ValueError(f"Removing prefixes would result in duplicate column names: {sorted(list(duplicates))}")
+            raise DatasetValueError(f"Removing prefixes would result in duplicate column names: {sorted(list(duplicates))}")
         # Create a new dataset with unprefixed column names
         new_data = []
@@ -1288,8 +1304,6 @@ class DataOperationsBase:
         return Dataset(new_data)
-from functools import wraps
 def to_dataset(func):
     """
     Decorator that ensures functions receive a Dataset object as their first argument.

edsl/dataset/dataset_tree.py CHANGED Viewed

@@ -1,4 +1,7 @@
-from typing import Dict, List, Any, Optional, List
+from typing import Optional, List, TYPE_CHECKING
+if TYPE_CHECKING:
+    from .dataset import Dataset
 def is_hashable(v):
@@ -16,8 +19,10 @@ class TreeNode:
         self.children = {}
 class Tree:
     def __init__(self, data: "Dataset", node_order: Optional[List[str]] = None):
+        """Initialize the tree with a Dataset."""
         d = {}
         for entry in data:
             d.update(entry)
@@ -46,7 +51,8 @@ class Tree:
         else:
             if not set(node_order).issubset(set(self.data.keys())):
                 invalid_keys = set(node_order) - set(self.data.keys())
-                raise ValueError(f"Invalid keys in node_order: {invalid_keys}")
+                from edsl.dataset.exceptions import DatasetValueError
+                raise DatasetValueError(f"Invalid keys in node_order: {invalid_keys}")
         self.root = TreeNode()
@@ -95,8 +101,7 @@ class Tree:
             filename = "tree_structure.docx"
         from docx import Document
-        from docx.shared import Inches, Pt
-        from docx.enum.text import WD_ALIGN_PARAGRAPH
+        from docx.shared import Pt
         from docx.enum.style import WD_STYLE_TYPE
         doc = Document()
@@ -118,7 +123,6 @@ class Tree:
         self._add_to_docx(doc, self.root, 0)
         import base64
         from io import BytesIO
-        import base64
         # Save document to bytes buffer
         doc_buffer = BytesIO()

edsl/dataset/display/table_display.py CHANGED Viewed

@@ -1,7 +1,5 @@
 from typing import (
     Protocol,
-    List,
-    Any,
     Optional,
     TYPE_CHECKING,
     Sequence,

edsl/dataset/display/table_renderers.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from abc import ABC, abstractmethod
-import os
 from pathlib import Path
 from .table_data_class import TableData

edsl/dataset/exceptions.py ADDED Viewed

@@ -0,0 +1,125 @@
+"""
+Exceptions module for dataset-related operations.
+This module defines custom exception classes for all dataset-related error conditions
+in the EDSL framework, ensuring consistent error handling for data manipulation,
+transformation, and analysis operations.
+"""
+from ..base import BaseException
+class DatasetError(BaseException):
+    """
+    Base exception class for all dataset-related errors.
+    This is the parent class for exceptions related to Dataset operations
+    in the EDSL framework, including data creation, manipulation, validation,
+    and serialization.
+    Examples:
+        ```python
+        # Usually not raised directly, but through subclasses:
+        dataset = Dataset([])
+        dataset["missing_key"]  # Would raise DatasetKeyError
+        ```
+    """
+    relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
+class DatasetKeyError(DatasetError):
+    """
+    Exception raised when a key is not found in a dataset.
+    This exception occurs when attempting to access a field or column
+    that doesn't exist in the dataset.
+    Examples:
+        ```python
+        dataset = Dataset([{"a": 1}])
+        dataset["b"]  # Raises DatasetKeyError
+        ```
+    """
+    relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
+class DatasetValueError(DatasetError):
+    """
+    Exception raised when there's an issue with dataset values.
+    This exception occurs when dataset values are invalid, incompatible
+    with an operation, or otherwise problematic.
+    Examples:
+        ```python
+        dataset = Dataset([{"a": 1}, {"b": 2}])
+        dataset.select(["c"])  # Raises DatasetValueError for missing field
+        ```
+    """
+    relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
+class DatasetTypeError(DatasetError):
+    """
+    Exception raised when there's a type mismatch in dataset operations.
+    This exception occurs when trying to perform operations with
+    incompatible data types.
+    Examples:
+        ```python
+        dataset = Dataset([{"a": 1}])
+        dataset + "not a dataset"  # Raises DatasetTypeError
+        ```
+    """
+    relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
+class DatasetExportError(DatasetError):
+    """
+    Exception raised when exporting a dataset to a different format fails.
+    This exception occurs when trying to export a dataset to a file format
+    (like CSV, SQLite, etc.) and the operation fails.
+    Examples:
+        ```python
+        dataset = Dataset([{"a": complex(1, 2)}])
+        dataset.to_csv("file.csv")  # Raises DatasetExportError (complex not serializable)
+        ```
+    """
+    relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
+class DatasetImportError(DatasetError):
+    """
+    Exception raised when importing data from an external source fails.
+    This exception occurs when trying to import data from an external source or format
+    (like CSV, JSON, etc.) and the operation fails, often due to missing dependencies
+    or format issues.
+    Examples:
+        ```python
+        # Trying to export to DOCX without python-docx package
+        dataset.to_docx("file.docx")  # Raises DatasetImportError
+        ```
+    """
+    relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
+class DatasetRuntimeError(DatasetError):
+    """
+    Exception raised when an operation fails during runtime.
+    This exception is used for runtime errors in dataset operations,
+    typically for operations that depend on external systems or libraries
+    like R integration.
+    Examples:
+        ```python
+        # Plotting with ggplot when R is not installed
+        dataset.ggplot()  # Raises DatasetRuntimeError
+        ```
+    """
+    relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"

edsl/dataset/file_exports.py CHANGED Viewed

@@ -2,7 +2,8 @@ from abc import ABC, abstractmethod
 import io
 import csv
 import base64
-from typing import Optional, Union, Tuple, List, Any, Dict
+import sqlite3
+from typing import Optional, Union, Any, Dict
 class FileExport(ABC):
@@ -37,14 +38,15 @@ class FileExport(ABC):
         """Generate default filename for this format."""
         return f"results.{self.suffix}"
-    def _create_filestore(self, data: Union[str, bytes]) -> "FileStore":
+    def _create_filestore(self, data: Union[str, bytes]):
         """Create a FileStore instance with encoded data."""
+        from ..scenarios import FileStore
         if isinstance(data, str):
             base64_string = base64.b64encode(data.encode()).decode()
         else:
             base64_string = base64.b64encode(data).decode()
-        from edsl.scenarios import FileStore
+        # FileStore already imported
         path = self.filename or self._get_default_filename()
@@ -66,8 +68,12 @@ class FileExport(ABC):
         """Convert the input data to the target format."""
         pass
-    def export(self) -> Optional["FileStore"]:
-        """Export the data to a FileStore instance."""
+    def export(self) -> Optional:
+        """Export the data to a FileStore instance.
+        Returns:
+            A FileStore instance or None if the file was written directly.
+        """
         formatted_data = self.format_data()
         return self._create_filestore(formatted_data)
@@ -140,8 +146,6 @@ class ExcelExport(TabularExport):
         return buffer.getvalue()
-import sqlite3
-from typing import Any
 class SQLiteExport(TabularExport):
@@ -195,11 +199,12 @@ class SQLiteExport(TabularExport):
             cursor.execute(f"DROP TABLE IF EXISTS {self.table_name}")
         elif self.if_exists == "fail":
             cursor.execute(
-                f"SELECT name FROM sqlite_master WHERE type='table' AND name=?",
+                "SELECT name FROM sqlite_master WHERE type='table' AND name=?",
                 (self.table_name,),
             )
             if cursor.fetchone():
-                raise ValueError(f"Table {self.table_name} already exists")
+                from edsl.dataset.exceptions import DatasetValueError
+                raise DatasetValueError(f"Table {self.table_name} already exists")
         # Create table
         columns = ", ".join(f'"{col}" {dtype}' for col, dtype in column_types)
@@ -240,12 +245,14 @@ class SQLiteExport(TabularExport):
         """Validate initialization parameters."""
         valid_if_exists = {"fail", "replace", "append"}
         if self.if_exists not in valid_if_exists:
-            raise ValueError(
+            from edsl.dataset.exceptions import DatasetValueError
+            raise DatasetValueError(
                 f"if_exists must be one of {valid_if_exists}, got {self.if_exists}"
             )
         # Validate table name (basic SQLite identifier validation)
         if not self.table_name.isalnum() and not all(c in "_" for c in self.table_name):
-            raise ValueError(
+            from edsl.dataset.exceptions import DatasetValueError
+            raise DatasetValueError(
                 f"Invalid table name: {self.table_name}. Must contain only alphanumeric characters and underscores."
             )

edsl 0.1.48__py3-none-any.whl → 0.1.50__py3-none-any.whl

edsl 0.1.48py3-none-any.whl → 0.1.50py3-none-any.whl