PyPI - edsl - Versions diffs - 0.1.49__py3-none-any.whl → 0.1.51__py3-none-any.whl - Mend

edsl 0.1.49py3-none-any.whl → 0.1.51py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (257) hide show

edsl/__init__.py +124 -53
edsl/__version__.py +1 -1
edsl/agents/agent.py +21 -21
edsl/agents/agent_list.py +2 -5
edsl/agents/exceptions.py +119 -5
edsl/base/__init__.py +10 -35
edsl/base/base_class.py +71 -36
edsl/base/base_exception.py +204 -0
edsl/base/data_transfer_models.py +1 -1
edsl/base/exceptions.py +94 -0
edsl/buckets/__init__.py +15 -1
edsl/buckets/bucket_collection.py +3 -4
edsl/buckets/exceptions.py +107 -0
edsl/buckets/model_buckets.py +1 -2
edsl/buckets/token_bucket.py +11 -6
edsl/buckets/token_bucket_api.py +27 -12
edsl/buckets/token_bucket_client.py +9 -7
edsl/caching/cache.py +12 -4
edsl/caching/cache_entry.py +10 -9
edsl/caching/exceptions.py +113 -7
edsl/caching/remote_cache_sync.py +6 -7
edsl/caching/sql_dict.py +20 -14
edsl/cli.py +43 -0
edsl/config/__init__.py +1 -1
edsl/config/config_class.py +32 -6
edsl/conversation/Conversation.py +8 -4
edsl/conversation/car_buying.py +1 -3
edsl/conversation/exceptions.py +58 -0
edsl/conversation/mug_negotiation.py +2 -8
edsl/coop/__init__.py +28 -6
edsl/coop/coop.py +120 -29
edsl/coop/coop_functions.py +1 -1
edsl/coop/ep_key_handling.py +1 -1
edsl/coop/exceptions.py +188 -9
edsl/coop/price_fetcher.py +5 -8
edsl/coop/utils.py +4 -6
edsl/dataset/__init__.py +5 -4
edsl/dataset/dataset.py +177 -86
edsl/dataset/dataset_operations_mixin.py +98 -76
edsl/dataset/dataset_tree.py +11 -7
edsl/dataset/display/table_display.py +0 -2
edsl/dataset/display/table_renderers.py +6 -4
edsl/dataset/exceptions.py +125 -0
edsl/dataset/file_exports.py +18 -11
edsl/dataset/r/ggplot.py +13 -6
edsl/display/__init__.py +27 -0
edsl/display/core.py +147 -0
edsl/display/plugin.py +189 -0
edsl/display/utils.py +52 -0
edsl/inference_services/__init__.py +9 -1
edsl/inference_services/available_model_cache_handler.py +1 -1
edsl/inference_services/available_model_fetcher.py +5 -6
edsl/inference_services/data_structures.py +10 -7
edsl/inference_services/exceptions.py +132 -1
edsl/inference_services/inference_service_abc.py +2 -2
edsl/inference_services/inference_services_collection.py +2 -6
edsl/inference_services/registry.py +4 -3
edsl/inference_services/service_availability.py +4 -3
edsl/inference_services/services/anthropic_service.py +4 -1
edsl/inference_services/services/aws_bedrock.py +13 -12
edsl/inference_services/services/azure_ai.py +12 -10
edsl/inference_services/services/deep_infra_service.py +1 -4
edsl/inference_services/services/deep_seek_service.py +1 -5
edsl/inference_services/services/google_service.py +7 -3
edsl/inference_services/services/groq_service.py +1 -1
edsl/inference_services/services/mistral_ai_service.py +4 -2
edsl/inference_services/services/ollama_service.py +1 -1
edsl/inference_services/services/open_ai_service.py +7 -5
edsl/inference_services/services/perplexity_service.py +6 -2
edsl/inference_services/services/test_service.py +8 -7
edsl/inference_services/services/together_ai_service.py +2 -3
edsl/inference_services/services/xai_service.py +1 -1
edsl/instructions/__init__.py +1 -1
edsl/instructions/change_instruction.py +7 -5
edsl/instructions/exceptions.py +61 -0
edsl/instructions/instruction.py +6 -2
edsl/instructions/instruction_collection.py +6 -4
edsl/instructions/instruction_handler.py +12 -15
edsl/interviews/ReportErrors.py +0 -3
edsl/interviews/__init__.py +9 -2
edsl/interviews/answering_function.py +11 -13
edsl/interviews/exception_tracking.py +15 -8
edsl/interviews/exceptions.py +79 -0
edsl/interviews/interview.py +33 -30
edsl/interviews/interview_status_dictionary.py +4 -2
edsl/interviews/interview_status_log.py +2 -1
edsl/interviews/interview_task_manager.py +5 -5
edsl/interviews/request_token_estimator.py +5 -2
edsl/interviews/statistics.py +3 -4
edsl/invigilators/__init__.py +7 -1
edsl/invigilators/exceptions.py +79 -0
edsl/invigilators/invigilator_base.py +0 -1
edsl/invigilators/invigilators.py +9 -13
edsl/invigilators/prompt_constructor.py +1 -5
edsl/invigilators/prompt_helpers.py +8 -4
edsl/invigilators/question_instructions_prompt_builder.py +1 -1
edsl/invigilators/question_option_processor.py +9 -5
edsl/invigilators/question_template_replacements_builder.py +3 -2
edsl/jobs/__init__.py +42 -5
edsl/jobs/async_interview_runner.py +25 -23
edsl/jobs/check_survey_scenario_compatibility.py +11 -10
edsl/jobs/data_structures.py +8 -5
edsl/jobs/exceptions.py +177 -8
edsl/jobs/fetch_invigilator.py +1 -1
edsl/jobs/jobs.py +74 -69
edsl/jobs/jobs_checks.py +6 -7
edsl/jobs/jobs_component_constructor.py +4 -4
edsl/jobs/jobs_pricing_estimation.py +4 -3
edsl/jobs/jobs_remote_inference_logger.py +5 -4
edsl/jobs/jobs_runner_asyncio.py +3 -4
edsl/jobs/jobs_runner_status.py +8 -9
edsl/jobs/remote_inference.py +27 -24
edsl/jobs/results_exceptions_handler.py +10 -7
edsl/key_management/__init__.py +3 -1
edsl/key_management/exceptions.py +62 -0
edsl/key_management/key_lookup.py +1 -1
edsl/key_management/key_lookup_builder.py +37 -14
edsl/key_management/key_lookup_collection.py +2 -0
edsl/language_models/__init__.py +1 -1
edsl/language_models/exceptions.py +302 -14
edsl/language_models/language_model.py +9 -8
edsl/language_models/model.py +4 -4
edsl/language_models/model_list.py +1 -1
edsl/language_models/price_manager.py +1 -1
edsl/language_models/raw_response_handler.py +14 -9
edsl/language_models/registry.py +17 -21
edsl/language_models/repair.py +0 -6
edsl/language_models/unused/fake_openai_service.py +0 -1
edsl/load_plugins.py +69 -0
edsl/logger.py +146 -0
edsl/notebooks/__init__.py +24 -1
edsl/notebooks/exceptions.py +82 -0
edsl/notebooks/notebook.py +7 -3
edsl/notebooks/notebook_to_latex.py +1 -2
edsl/plugins/__init__.py +63 -0
edsl/plugins/built_in/export_example.py +50 -0
edsl/plugins/built_in/pig_latin.py +67 -0
edsl/plugins/cli.py +372 -0
edsl/plugins/cli_typer.py +283 -0
edsl/plugins/exceptions.py +31 -0
edsl/plugins/hookspec.py +51 -0
edsl/plugins/plugin_host.py +128 -0
edsl/plugins/plugin_manager.py +633 -0
edsl/plugins/plugins_registry.py +168 -0
edsl/prompts/__init__.py +24 -1
edsl/prompts/exceptions.py +107 -5
edsl/prompts/prompt.py +15 -7
edsl/questions/HTMLQuestion.py +5 -11
edsl/questions/Quick.py +0 -1
edsl/questions/__init__.py +6 -4
edsl/questions/answer_validator_mixin.py +318 -323
edsl/questions/compose_questions.py +3 -3
edsl/questions/descriptors.py +11 -50
edsl/questions/exceptions.py +278 -22
edsl/questions/loop_processor.py +7 -5
edsl/questions/prompt_templates/question_list.jinja +3 -0
edsl/questions/question_base.py +46 -19
edsl/questions/question_base_gen_mixin.py +2 -2
edsl/questions/question_base_prompts_mixin.py +13 -7
edsl/questions/question_budget.py +503 -98
edsl/questions/question_check_box.py +660 -160
edsl/questions/question_dict.py +345 -194
edsl/questions/question_extract.py +401 -61
edsl/questions/question_free_text.py +80 -14
edsl/questions/question_functional.py +119 -9
edsl/questions/{derived/question_likert_five.py → question_likert_five.py} +2 -2
edsl/questions/{derived/question_linear_scale.py → question_linear_scale.py} +3 -4
edsl/questions/question_list.py +275 -28
edsl/questions/question_matrix.py +643 -96
edsl/questions/question_multiple_choice.py +219 -51
edsl/questions/question_numerical.py +361 -32
edsl/questions/question_rank.py +401 -124
edsl/questions/question_registry.py +7 -5
edsl/questions/{derived/question_top_k.py → question_top_k.py} +3 -3
edsl/questions/{derived/question_yes_no.py → question_yes_no.py} +3 -4
edsl/questions/register_questions_meta.py +2 -2
edsl/questions/response_validator_abc.py +13 -15
edsl/questions/response_validator_factory.py +10 -12
edsl/questions/templates/dict/answering_instructions.jinja +1 -0
edsl/questions/templates/rank/question_presentation.jinja +1 -1
edsl/results/__init__.py +1 -1
edsl/results/exceptions.py +141 -7
edsl/results/report.py +1 -2
edsl/results/result.py +11 -9
edsl/results/results.py +480 -321
edsl/results/results_selector.py +8 -4
edsl/scenarios/PdfExtractor.py +2 -2
edsl/scenarios/construct_download_link.py +69 -35
edsl/scenarios/directory_scanner.py +33 -14
edsl/scenarios/document_chunker.py +1 -1
edsl/scenarios/exceptions.py +238 -14
edsl/scenarios/file_methods.py +1 -1
edsl/scenarios/file_store.py +7 -3
edsl/scenarios/handlers/__init__.py +17 -0
edsl/scenarios/handlers/docx_file_store.py +0 -5
edsl/scenarios/handlers/pdf_file_store.py +0 -1
edsl/scenarios/handlers/pptx_file_store.py +0 -5
edsl/scenarios/handlers/py_file_store.py +0 -1
edsl/scenarios/handlers/sql_file_store.py +1 -4
edsl/scenarios/handlers/sqlite_file_store.py +0 -1
edsl/scenarios/handlers/txt_file_store.py +1 -1
edsl/scenarios/scenario.py +1 -3
edsl/scenarios/scenario_list.py +179 -27
edsl/scenarios/scenario_list_pdf_tools.py +1 -0
edsl/scenarios/scenario_selector.py +0 -1
edsl/surveys/__init__.py +3 -4
edsl/surveys/dag/__init__.py +4 -2
edsl/surveys/descriptors.py +1 -1
edsl/surveys/edit_survey.py +1 -0
edsl/surveys/exceptions.py +165 -9
edsl/surveys/memory/__init__.py +5 -3
edsl/surveys/memory/memory_management.py +1 -0
edsl/surveys/memory/memory_plan.py +6 -15
edsl/surveys/rules/__init__.py +5 -3
edsl/surveys/rules/rule.py +1 -2
edsl/surveys/rules/rule_collection.py +1 -1
edsl/surveys/survey.py +12 -24
edsl/surveys/survey_css.py +3 -3
edsl/surveys/survey_export.py +6 -3
edsl/surveys/survey_flow_visualization.py +10 -1
edsl/surveys/survey_simulator.py +2 -1
edsl/tasks/__init__.py +23 -1
edsl/tasks/exceptions.py +72 -0
edsl/tasks/question_task_creator.py +3 -3
edsl/tasks/task_creators.py +1 -3
edsl/tasks/task_history.py +8 -10
edsl/tasks/task_status_log.py +1 -2
edsl/tokens/__init__.py +29 -1
edsl/tokens/exceptions.py +37 -0
edsl/tokens/interview_token_usage.py +3 -2
edsl/tokens/token_usage.py +4 -3
edsl/utilities/__init__.py +21 -1
edsl/utilities/decorators.py +1 -2
edsl/utilities/markdown_to_docx.py +2 -2
edsl/utilities/markdown_to_pdf.py +1 -1
edsl/utilities/repair_functions.py +0 -1
edsl/utilities/restricted_python.py +0 -1
edsl/utilities/template_loader.py +2 -3
edsl/utilities/utilities.py +8 -29
{edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/METADATA +32 -2
edsl-0.1.51.dist-info/RECORD +365 -0
edsl-0.1.51.dist-info/entry_points.txt +3 -0
edsl/dataset/smart_objects.py +0 -96
edsl/exceptions/BaseException.py +0 -21
edsl/exceptions/__init__.py +0 -54
edsl/exceptions/configuration.py +0 -16
edsl/exceptions/general.py +0 -34
edsl/questions/derived/__init__.py +0 -0
edsl/study/ObjectEntry.py +0 -173
edsl/study/ProofOfWork.py +0 -113
edsl/study/SnapShot.py +0 -80
edsl/study/Study.py +0 -520
edsl/study/__init__.py +0 -6
edsl/utilities/interface.py +0 -135
edsl-0.1.49.dist-info/RECORD +0 -347
{edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/LICENSE +0 -0
{edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/WHEEL +0 -0

edsl/dataset/dataset_operations_mixin.py CHANGED Viewed

@@ -12,16 +12,18 @@ ScenarioList, AgentList) to share the same data manipulation interface, enabling
 fluid operations across different parts of the EDSL ecosystem.
 """
-from abc import ABC, abstractmethod
 import io
 import warnings
 import textwrap
-from typing import Optional, Tuple, Union, List, TYPE_CHECKING
+from typing import Optional, Tuple, Union, List, TYPE_CHECKING  # Callable not used
+from functools import wraps
 from .r.ggplot import GGPlotMethod
+from .exceptions import DatasetKeyError, DatasetValueError, DatasetTypeError, DatasetExportError
 if TYPE_CHECKING:
     from docx import Document
     from .dataset import Dataset
+    from ..jobs import Job  # noqa: F401
 class DataOperationsBase:
     """
@@ -135,10 +137,7 @@ class DataOperationsBase:
         >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
         ['model.frequency_penalty', ...]
-        >>> Results.example().relevant_columns(data_type = "flimflam")
-        Traceback (most recent call last):
-        ...
-        ValueError: No columns found for data type: flimflam. Available data types are: ...
+        >>> # Testing relevant_columns with invalid data_type raises DatasetValueError - tested in unit tests
         """
         columns = [list(x.keys())[0] for x in self]
         if remove_prefix:
@@ -159,7 +158,7 @@ class DataOperationsBase:
                 all_data_types = sorted(
                     list(set(get_data_type(column) for column in all_columns))
                 )
-                raise ValueError(
+                raise DatasetValueError(
                     f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
                 )
@@ -179,12 +178,19 @@ class DataOperationsBase:
                 _num_observations = len(values)
             else:
                 if len(values) != _num_observations:
-                    raise ValueError(
+                    raise DatasetValueError(
                         f"The number of observations is not consistent across columns. "
                         f"Column '{key}' has {len(values)} observations, but previous columns had {_num_observations} observations."
                     )
         return _num_observations
+    def chart(self):
+        """
+        Create a chart from the results.
+        """
+        import altair as alt
+        return alt.Chart(self.to_pandas(remove_prefix=True))
     def make_tabular(
         self, remove_prefix: bool, pretty_labels: Optional[dict] = None
@@ -262,8 +268,9 @@ class DataOperationsBase:
             remove_prefix=remove_prefix, pretty_labels=pretty_labels
         )
-    def to_jsonl(self, filename: Optional[str] = None) -> Optional["FileStore"]:
+    def to_jsonl(self, filename: Optional[str] = None):
         """Export the results to a FileStore instance containing JSONL data."""
+        from .file_exports import JSONLExport
         exporter = JSONLExport(data=self, filename=filename)
         return exporter.export()
@@ -274,8 +281,9 @@ class DataOperationsBase:
         pretty_labels: Optional[dict] = None,
         table_name: str = "results",
         if_exists: str = "replace",
-    ) -> Optional["FileStore"]:
+    ):
         """Export the results to a SQLite database file."""
+        from .file_exports import SQLiteExport
         exporter = SQLiteExport(
             data=self,
             filename=filename,
@@ -291,7 +299,7 @@ class DataOperationsBase:
         filename: Optional[str] = None,
         remove_prefix: bool = False,
         pretty_labels: Optional[dict] = None,
-    ) -> Optional["FileStore"]:
+    ):
         """Export the results to a FileStore instance containing CSV data."""
         from .file_exports import CSVExport
@@ -309,9 +317,9 @@ class DataOperationsBase:
         remove_prefix: bool = False,
         pretty_labels: Optional[dict] = None,
         sheet_name: Optional[str] = None,
-    ) -> Optional["FileStore"]:
+    ):
         """Export the results to a FileStore instance containing Excel data."""
-        from .file_exports import  ExcelExport
+        from .file_exports import ExcelExport
         exporter = ExcelExport(
             data=self,
@@ -324,25 +332,28 @@ class DataOperationsBase:
     def _db(
         self, remove_prefix: bool = True, shape: str = "wide"
-    ) -> "sqlalchemy.engine.Engine":
+    ):
         """Create a SQLite database in memory and return the connection.
         Args:
             remove_prefix: Whether to remove the prefix from the column names
             shape: The shape of the data in the database ("wide" or "long")
         Returns:
             A database connection
-        >>> from sqlalchemy import text
-        >>> from edsl import Results
-        >>> engine = Results.example()._db()
-        >>> len(engine.execute(text("SELECT * FROM self")).fetchall())
-        4
-        >>> engine = Results.example()._db(shape = "long")
-        >>> len(engine.execute(text("SELECT * FROM self")).fetchall())
-        172
+        Examples:
+            >>> from sqlalchemy import text
+            >>> from edsl import Results
+            >>> engine = Results.example()._db()
+            >>> len(engine.execute(text("SELECT * FROM self")).fetchall())
+            4
+            >>> engine = Results.example()._db(shape = "long")
+            >>> len(engine.execute(text("SELECT * FROM self")).fetchall())
+            172
         """
-        from sqlalchemy import create_engine, text
+        # Import needed for database connection
+        from sqlalchemy import create_engine
         engine = create_engine("sqlite:///:memory:")
         if remove_prefix and shape == "wide":
@@ -445,29 +456,35 @@ class DataOperationsBase:
     def to_pandas(
         self, remove_prefix: bool = False, lists_as_strings=False
-    ) -> "DataFrame":
+    ):
         """Convert the results to a pandas DataFrame, ensuring that lists remain as lists.
-        :param remove_prefix: Whether to remove the prefix from the column names.
+        Args:
+            remove_prefix: Whether to remove the prefix from the column names.
+            lists_as_strings: Whether to convert lists to strings.
+        Returns:
+            A pandas DataFrame.
         """
+        # pandas is imported in _to_pandas_strings
         return self._to_pandas_strings(remove_prefix)
-    def _to_pandas_strings(self, remove_prefix: bool = False) -> "pd.DataFrame":
+    def _to_pandas_strings(self, remove_prefix: bool = False):
         """Convert the results to a pandas DataFrame.
-        :param remove_prefix: Whether to remove the prefix from the column names.
+        Args:
+            remove_prefix: Whether to remove the prefix from the column names.
-        >>> from edsl.results import Results
-        >>> r = Results.example()
-        >>> r.select('how_feeling').to_pandas()
-          answer.how_feeling
-        0                 OK
-        1              Great
-        2           Terrible
-        3                 OK
+        Examples:
+            >>> from edsl.results import Results
+            >>> r = Results.example()
+            >>> r.select('how_feeling').to_pandas()
+              answer.how_feeling
+            0                 OK
+            1              Great
+            2           Terrible
+            3                 OK
         """
         import pandas as pd
         csv_string = self.to_csv(remove_prefix=remove_prefix).text
@@ -478,17 +495,27 @@ class DataOperationsBase:
     def to_polars(
         self, remove_prefix: bool = False, lists_as_strings=False
-    ) -> "pl.DataFrame":
+    ):
         """Convert the results to a Polars DataFrame.
-        :param remove_prefix: Whether to remove the prefix from the column names.
+        Args:
+            remove_prefix: Whether to remove the prefix from the column names.
+            lists_as_strings: Whether to convert lists to strings.
+        Returns:
+            A Polars DataFrame.
         """
+        # polars is imported in _to_polars_strings
         return self._to_polars_strings(remove_prefix)
-    def _to_polars_strings(self, remove_prefix: bool = False) -> "pl.DataFrame":
+    def _to_polars_strings(self, remove_prefix: bool = False):
         """Convert the results to a Polars DataFrame.
-        :param remove_prefix: Whether to remove the prefix from the column names.
+        Args:
+            remove_prefix: Whether to remove the prefix from the column names.
+        Returns:
+            A Polars DataFrame.
         """
         import polars as pl
@@ -496,10 +523,14 @@ class DataOperationsBase:
         df = pl.read_csv(io.StringIO(csv_string))
         return df
-    def tree(self, node_order: Optional[List[str]] = None) -> "Tree":
+    def tree(self, node_order: Optional[List[str]] = None):
         """Convert the results to a Tree.
-        :param node_order: The order of the nodes.
+        Args:
+            node_order: The order of the nodes.
+        Returns:
+            A Tree object.
         """
         from .dataset_tree import Tree
         return Tree(self, node_order=node_order)
@@ -514,13 +545,14 @@ class DataOperationsBase:
         >>> r.select('how_feeling').to_scenario_list()
         ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
         """
-        from edsl.scenarios import ScenarioList, Scenario
+        from ..scenarios import ScenarioList, Scenario
         list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
         scenarios = []
         for d in list_of_dicts:
             scenarios.append(Scenario(d))
         return ScenarioList(scenarios)
     def to_agent_list(self, remove_prefix: bool = True):
         """Convert the results to a list of dictionaries, one per agent.
@@ -532,7 +564,7 @@ class DataOperationsBase:
         >>> r.select('how_feeling').to_agent_list()
         AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
         """
-        from edsl.agents import Agent, AgentList
+        from ..agents import Agent, AgentList
         list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
         agents = []
@@ -598,15 +630,12 @@ class DataOperationsBase:
         [1, 9, 2, 3, 4]
         >>> from edsl.dataset import Dataset
-        >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}, {'c': [6, 2, 3, 4]}]).select('a.b', 'c').to_list(flatten = True)
-        Traceback (most recent call last):
-        ...
-        ValueError: Cannot flatten a list of lists when there are multiple columns selected.
+        >>> # Testing to_list flatten with multiple columns raises DatasetValueError - tested in unit tests
         """
         if len(self.relevant_columns()) > 1 and flatten:
-            raise ValueError(
+            raise DatasetValueError(
                 "Cannot flatten a list of lists when there are multiple columns selected."
             )
@@ -632,7 +661,6 @@ class DataOperationsBase:
                     new_list.append(item)
             list_to_return = new_list
-        from edsl.utilities.PrettyList import PrettyList
         #return PrettyList(list_to_return)
         return list_to_return
@@ -645,9 +673,8 @@ class DataOperationsBase:
     ):
         import os
         import tempfile
-        from edsl.utilities.utilities import is_notebook
+        from ..utilities.utilities import is_notebook
         from IPython.display import HTML, display
-        from edsl.utilities.utilities import is_notebook
         df = self.to_pandas()
@@ -698,7 +725,7 @@ class DataOperationsBase:
         all_fields = list(fields) + [f for f in header_fields if f not in fields]
         for field in all_fields:
             if field not in self.relevant_columns():
-                raise ValueError(f"Field '{field}' not found in dataset")
+                raise DatasetKeyError(f"Field '{field}' not found in dataset")
         # Get data for each field
         field_data = {}
@@ -780,7 +807,8 @@ class DataOperationsBase:
             from docx.shared import Pt
             import json
         except ImportError:
-            raise ImportError("The python-docx package is required for DOCX export. Install it with 'pip install python-docx'.")
+            from .exceptions import DatasetImportError
+            raise DatasetImportError("The python-docx package is required for DOCX export. Install it with 'pip install python-docx'.")
         doc = Document()
@@ -797,7 +825,7 @@ class DataOperationsBase:
                 if header_parts:
                     header_text += f" ({', '.join(header_parts)})"
-            heading = doc.add_heading(header_text, level=1)
+            doc.add_heading(header_text, level=1)
             # Add the remaining fields
             for field in fields:
@@ -823,7 +851,7 @@ class DataOperationsBase:
     def report(self, *fields: Optional[str], top_n: Optional[int] = None,
                header_fields: Optional[List[str]] = None, divider: bool = True,
                return_string: bool = False, format: str = "markdown",
-               filename: Optional[str] = None) -> Optional[Union[str, "docx.Document"]]:
+               filename: Optional[str] = None) -> Optional[Union[str, "Document"]]:
         """Generates a report of the results by iterating through rows.
         Args:
@@ -851,7 +879,7 @@ class DataOperationsBase:
             >>> isinstance(doc, object)
             True
         """
-        from edsl.utilities.utilities import is_notebook
+        from ..utilities.utilities import is_notebook
         # Prepare the data for the report
         field_data, num_obs, fields, header_fields = self._prepare_report_data(
@@ -886,7 +914,7 @@ class DataOperationsBase:
             return doc
         else:
-            raise ValueError(f"Unsupported format: {format}. Use 'markdown' or 'docx'.")
+            raise DatasetExportError(f"Unsupported format: {format}. Use 'markdown' or 'docx'.")
     def tally(
         self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
@@ -945,7 +973,7 @@ class DataOperationsBase:
             f in self.relevant_columns() or f in relevant_columns_without_prefix
             for f in fields
         ):
-            raise ValueError("One or more specified fields are not in the dataset."
+            raise DatasetKeyError("One or more specified fields are not in the dataset."
                              f"The available fields are: {self.relevant_columns()}"
                              )
@@ -963,7 +991,7 @@ class DataOperationsBase:
         except TypeError:
             tally = dict(Counter([str(v) for v in values]))
         except Exception as e:
-            raise ValueError(f"Error tallying values: {e}")
+            raise DatasetValueError(f"Error tallying values: {e}")
         sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
         if top_n is not None:
@@ -1056,7 +1084,8 @@ class DataOperationsBase:
         # Check if the field is ambiguous
         if len(matching_entries) > 1:
             matching_cols = [next(iter(entry.keys())) for entry in matching_entries]
-            raise ValueError(
+            from .exceptions import DatasetValueError
+            raise DatasetValueError(
                 f"Ambiguous field name '{field}'. It matches multiple columns: {matching_cols}. "
                 f"Please specify the full column name to flatten."
             )
@@ -1159,13 +1188,13 @@ class DataOperationsBase:
                 break
         if field_index is None:
-            raise ValueError(f"Field '{field}' not found in dataset")
+            raise DatasetKeyError(f"Field '{field}' not found in dataset")
         field_data = result.data[field_index][field]
         # Check if values are lists
         if not all(isinstance(v, list) for v in field_data):
-            raise ValueError(f"Field '{field}' does not contain lists in all entries")
+            raise DatasetTypeError(f"Field '{field}' does not contain lists in all entries")
         # Get the maximum length of lists
         max_len = max(len(v) for v in field_data)
@@ -1209,16 +1238,13 @@ class DataOperationsBase:
             >>> d.drop('a')
             Dataset([{'b': [4, 5, 6]}])
-            >>> d.drop('c')
-            Traceback (most recent call last):
-            ...
-            KeyError: "Field 'c' not found in dataset"
+            >>> # Testing drop with nonexistent field raises DatasetKeyError - tested in unit tests
         """
         from .dataset import Dataset
         # Check if field exists in the dataset
         if field_name not in self.relevant_columns():
-            raise KeyError(f"Field '{field_name}' not found in dataset")
+            raise DatasetKeyError(f"Field '{field_name}' not found in dataset")
         # Create a new dataset without the specified field
         new_data = [entry for entry in self.data if field_name not in entry]
@@ -1248,9 +1274,7 @@ class DataOperationsBase:
             >>> d = Dataset([{'a.x': [1, 2, 3]}, {'b.x': [4, 5, 6]}])
             >>> # d.remove_prefix()
-        Traceback (most recent call last):
-        ...
-        ValueError: Removing prefixes would result in duplicate column names: ['x']
+        # Testing remove_prefix with duplicate column names raises DatasetValueError - tested in unit tests
         """
         from .dataset import Dataset
@@ -1273,7 +1297,7 @@ class DataOperationsBase:
         # Check for duplicates
         if duplicates:
-            raise ValueError(f"Removing prefixes would result in duplicate column names: {sorted(list(duplicates))}")
+            raise DatasetValueError(f"Removing prefixes would result in duplicate column names: {sorted(list(duplicates))}")
         # Create a new dataset with unprefixed column names
         new_data = []
@@ -1288,8 +1312,6 @@ class DataOperationsBase:
         return Dataset(new_data)
-from functools import wraps
 def to_dataset(func):
     """
     Decorator that ensures functions receive a Dataset object as their first argument.

edsl/dataset/dataset_tree.py CHANGED Viewed

@@ -1,4 +1,7 @@
-from typing import Dict, List, Any, Optional, List
+from typing import Optional, List, TYPE_CHECKING
+if TYPE_CHECKING:
+    from .dataset import Dataset
 def is_hashable(v):
@@ -16,8 +19,10 @@ class TreeNode:
         self.children = {}
 class Tree:
     def __init__(self, data: "Dataset", node_order: Optional[List[str]] = None):
+        """Initialize the tree with a Dataset."""
         d = {}
         for entry in data:
             d.update(entry)
@@ -46,7 +51,8 @@ class Tree:
         else:
             if not set(node_order).issubset(set(self.data.keys())):
                 invalid_keys = set(node_order) - set(self.data.keys())
-                raise ValueError(f"Invalid keys in node_order: {invalid_keys}")
+                from .exceptions import DatasetValueError
+                raise DatasetValueError(f"Invalid keys in node_order: {invalid_keys}")
         self.root = TreeNode()
@@ -95,8 +101,7 @@ class Tree:
             filename = "tree_structure.docx"
         from docx import Document
-        from docx.shared import Inches, Pt
-        from docx.enum.text import WD_ALIGN_PARAGRAPH
+        from docx.shared import Pt
         from docx.enum.style import WD_STYLE_TYPE
         doc = Document()
@@ -118,7 +123,6 @@ class Tree:
         self._add_to_docx(doc, self.root, 0)
         import base64
         from io import BytesIO
-        import base64
         # Save document to bytes buffer
         doc_buffer = BytesIO()
@@ -126,7 +130,7 @@ class Tree:
         doc_buffer.seek(0)
         base64_string = base64.b64encode(doc_buffer.getvalue()).decode("utf-8")
-        from edsl.scenarios.FileStore import FileStore
+        from ..scenarios.file_store import FileStore
         # Create and return FileStore instance
         return FileStore(
@@ -331,7 +335,7 @@ class Tree:
         Returns:
             A string containing the markdown document, or renders markdown in notebooks.
         """
-        from edsl.utilities.utilities import is_notebook
+        from ..utilities.utilities import is_notebook
         from IPython.display import Markdown, display
         if node is None:

edsl/dataset/display/table_display.py CHANGED Viewed

@@ -1,7 +1,5 @@
 from typing import (
     Protocol,
-    List,
-    Any,
     Optional,
     TYPE_CHECKING,
     Sequence,

edsl/dataset/display/table_renderers.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from abc import ABC, abstractmethod
-import os
 from pathlib import Path
 from .table_data_class import TableData
@@ -104,9 +103,12 @@ class PandasStyleRenderer(DataTablesRendererABC):
             else:
                 df = pd.DataFrame(self.table_data.data, columns=self.table_data.headers)
-            styled_df = df.style.set_properties(
-                **{"text-align": "left"}
-            ).background_gradient()
+            styled_df = df.style.set_properties(**{
+                "text-align": "left",
+                "white-space": "pre-wrap",  # Allows text wrapping
+                "max-width": "300px",       # Maximum width before wrapping
+                "word-wrap": "break-word"   # Breaks words that exceed max-width
+            }).background_gradient()
             return f"""
             <div style="max-height: 500px; overflow-y: auto;">

edsl/dataset/exceptions.py ADDED Viewed

@@ -0,0 +1,125 @@
+"""
+Exceptions module for dataset-related operations.
+This module defines custom exception classes for all dataset-related error conditions
+in the EDSL framework, ensuring consistent error handling for data manipulation,
+transformation, and analysis operations.
+"""
+from ..base import BaseException
+class DatasetError(BaseException):
+    """
+    Base exception class for all dataset-related errors.
+    This is the parent class for exceptions related to Dataset operations
+    in the EDSL framework, including data creation, manipulation, validation,
+    and serialization.
+    Examples:
+        ```python
+        # Usually not raised directly, but through subclasses:
+        dataset = Dataset([])
+        dataset["missing_key"]  # Would raise DatasetKeyError
+        ```
+    """
+    relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
+class DatasetKeyError(DatasetError):
+    """
+    Exception raised when a key is not found in a dataset.
+    This exception occurs when attempting to access a field or column
+    that doesn't exist in the dataset.
+    Examples:
+        ```python
+        dataset = Dataset([{"a": 1}])
+        dataset["b"]  # Raises DatasetKeyError
+        ```
+    """
+    relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
+class DatasetValueError(DatasetError):
+    """
+    Exception raised when there's an issue with dataset values.
+    This exception occurs when dataset values are invalid, incompatible
+    with an operation, or otherwise problematic.
+    Examples:
+        ```python
+        dataset = Dataset([{"a": 1}, {"b": 2}])
+        dataset.select(["c"])  # Raises DatasetValueError for missing field
+        ```
+    """
+    relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
+class DatasetTypeError(DatasetError):
+    """
+    Exception raised when there's a type mismatch in dataset operations.
+    This exception occurs when trying to perform operations with
+    incompatible data types.
+    Examples:
+        ```python
+        dataset = Dataset([{"a": 1}])
+        dataset + "not a dataset"  # Raises DatasetTypeError
+        ```
+    """
+    relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
+class DatasetExportError(DatasetError):
+    """
+    Exception raised when exporting a dataset to a different format fails.
+    This exception occurs when trying to export a dataset to a file format
+    (like CSV, SQLite, etc.) and the operation fails.
+    Examples:
+        ```python
+        dataset = Dataset([{"a": complex(1, 2)}])
+        dataset.to_csv("file.csv")  # Raises DatasetExportError (complex not serializable)
+        ```
+    """
+    relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
+class DatasetImportError(DatasetError):
+    """
+    Exception raised when importing data from an external source fails.
+    This exception occurs when trying to import data from an external source or format
+    (like CSV, JSON, etc.) and the operation fails, often due to missing dependencies
+    or format issues.
+    Examples:
+        ```python
+        # Trying to export to DOCX without python-docx package
+        dataset.to_docx("file.docx")  # Raises DatasetImportError
+        ```
+    """
+    relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
+class DatasetRuntimeError(DatasetError):
+    """
+    Exception raised when an operation fails during runtime.
+    This exception is used for runtime errors in dataset operations,
+    typically for operations that depend on external systems or libraries
+    like R integration.
+    Examples:
+        ```python
+        # Plotting with ggplot when R is not installed
+        dataset.ggplot()  # Raises DatasetRuntimeError
+        ```
+    """
+    relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"

edsl 0.1.49__py3-none-any.whl → 0.1.51__py3-none-any.whl

edsl 0.1.49py3-none-any.whl → 0.1.51py3-none-any.whl