PyPI - edsl - Versions diffs - 0.1.49__py3-none-any.whl → 0.1.51__py3-none-any.whl - Mend

edsl 0.1.49py3-none-any.whl → 0.1.51py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (257) hide show

edsl/__init__.py +124 -53
edsl/__version__.py +1 -1
edsl/agents/agent.py +21 -21
edsl/agents/agent_list.py +2 -5
edsl/agents/exceptions.py +119 -5
edsl/base/__init__.py +10 -35
edsl/base/base_class.py +71 -36
edsl/base/base_exception.py +204 -0
edsl/base/data_transfer_models.py +1 -1
edsl/base/exceptions.py +94 -0
edsl/buckets/__init__.py +15 -1
edsl/buckets/bucket_collection.py +3 -4
edsl/buckets/exceptions.py +107 -0
edsl/buckets/model_buckets.py +1 -2
edsl/buckets/token_bucket.py +11 -6
edsl/buckets/token_bucket_api.py +27 -12
edsl/buckets/token_bucket_client.py +9 -7
edsl/caching/cache.py +12 -4
edsl/caching/cache_entry.py +10 -9
edsl/caching/exceptions.py +113 -7
edsl/caching/remote_cache_sync.py +6 -7
edsl/caching/sql_dict.py +20 -14
edsl/cli.py +43 -0
edsl/config/__init__.py +1 -1
edsl/config/config_class.py +32 -6
edsl/conversation/Conversation.py +8 -4
edsl/conversation/car_buying.py +1 -3
edsl/conversation/exceptions.py +58 -0
edsl/conversation/mug_negotiation.py +2 -8
edsl/coop/__init__.py +28 -6
edsl/coop/coop.py +120 -29
edsl/coop/coop_functions.py +1 -1
edsl/coop/ep_key_handling.py +1 -1
edsl/coop/exceptions.py +188 -9
edsl/coop/price_fetcher.py +5 -8
edsl/coop/utils.py +4 -6
edsl/dataset/__init__.py +5 -4
edsl/dataset/dataset.py +177 -86
edsl/dataset/dataset_operations_mixin.py +98 -76
edsl/dataset/dataset_tree.py +11 -7
edsl/dataset/display/table_display.py +0 -2
edsl/dataset/display/table_renderers.py +6 -4
edsl/dataset/exceptions.py +125 -0
edsl/dataset/file_exports.py +18 -11
edsl/dataset/r/ggplot.py +13 -6
edsl/display/__init__.py +27 -0
edsl/display/core.py +147 -0
edsl/display/plugin.py +189 -0
edsl/display/utils.py +52 -0
edsl/inference_services/__init__.py +9 -1
edsl/inference_services/available_model_cache_handler.py +1 -1
edsl/inference_services/available_model_fetcher.py +5 -6
edsl/inference_services/data_structures.py +10 -7
edsl/inference_services/exceptions.py +132 -1
edsl/inference_services/inference_service_abc.py +2 -2
edsl/inference_services/inference_services_collection.py +2 -6
edsl/inference_services/registry.py +4 -3
edsl/inference_services/service_availability.py +4 -3
edsl/inference_services/services/anthropic_service.py +4 -1
edsl/inference_services/services/aws_bedrock.py +13 -12
edsl/inference_services/services/azure_ai.py +12 -10
edsl/inference_services/services/deep_infra_service.py +1 -4
edsl/inference_services/services/deep_seek_service.py +1 -5
edsl/inference_services/services/google_service.py +7 -3
edsl/inference_services/services/groq_service.py +1 -1
edsl/inference_services/services/mistral_ai_service.py +4 -2
edsl/inference_services/services/ollama_service.py +1 -1
edsl/inference_services/services/open_ai_service.py +7 -5
edsl/inference_services/services/perplexity_service.py +6 -2
edsl/inference_services/services/test_service.py +8 -7
edsl/inference_services/services/together_ai_service.py +2 -3
edsl/inference_services/services/xai_service.py +1 -1
edsl/instructions/__init__.py +1 -1
edsl/instructions/change_instruction.py +7 -5
edsl/instructions/exceptions.py +61 -0
edsl/instructions/instruction.py +6 -2
edsl/instructions/instruction_collection.py +6 -4
edsl/instructions/instruction_handler.py +12 -15
edsl/interviews/ReportErrors.py +0 -3
edsl/interviews/__init__.py +9 -2
edsl/interviews/answering_function.py +11 -13
edsl/interviews/exception_tracking.py +15 -8
edsl/interviews/exceptions.py +79 -0
edsl/interviews/interview.py +33 -30
edsl/interviews/interview_status_dictionary.py +4 -2
edsl/interviews/interview_status_log.py +2 -1
edsl/interviews/interview_task_manager.py +5 -5
edsl/interviews/request_token_estimator.py +5 -2
edsl/interviews/statistics.py +3 -4
edsl/invigilators/__init__.py +7 -1
edsl/invigilators/exceptions.py +79 -0
edsl/invigilators/invigilator_base.py +0 -1
edsl/invigilators/invigilators.py +9 -13
edsl/invigilators/prompt_constructor.py +1 -5
edsl/invigilators/prompt_helpers.py +8 -4
edsl/invigilators/question_instructions_prompt_builder.py +1 -1
edsl/invigilators/question_option_processor.py +9 -5
edsl/invigilators/question_template_replacements_builder.py +3 -2
edsl/jobs/__init__.py +42 -5
edsl/jobs/async_interview_runner.py +25 -23
edsl/jobs/check_survey_scenario_compatibility.py +11 -10
edsl/jobs/data_structures.py +8 -5
edsl/jobs/exceptions.py +177 -8
edsl/jobs/fetch_invigilator.py +1 -1
edsl/jobs/jobs.py +74 -69
edsl/jobs/jobs_checks.py +6 -7
edsl/jobs/jobs_component_constructor.py +4 -4
edsl/jobs/jobs_pricing_estimation.py +4 -3
edsl/jobs/jobs_remote_inference_logger.py +5 -4
edsl/jobs/jobs_runner_asyncio.py +3 -4
edsl/jobs/jobs_runner_status.py +8 -9
edsl/jobs/remote_inference.py +27 -24
edsl/jobs/results_exceptions_handler.py +10 -7
edsl/key_management/__init__.py +3 -1
edsl/key_management/exceptions.py +62 -0
edsl/key_management/key_lookup.py +1 -1
edsl/key_management/key_lookup_builder.py +37 -14
edsl/key_management/key_lookup_collection.py +2 -0
edsl/language_models/__init__.py +1 -1
edsl/language_models/exceptions.py +302 -14
edsl/language_models/language_model.py +9 -8
edsl/language_models/model.py +4 -4
edsl/language_models/model_list.py +1 -1
edsl/language_models/price_manager.py +1 -1
edsl/language_models/raw_response_handler.py +14 -9
edsl/language_models/registry.py +17 -21
edsl/language_models/repair.py +0 -6
edsl/language_models/unused/fake_openai_service.py +0 -1
edsl/load_plugins.py +69 -0
edsl/logger.py +146 -0
edsl/notebooks/__init__.py +24 -1
edsl/notebooks/exceptions.py +82 -0
edsl/notebooks/notebook.py +7 -3
edsl/notebooks/notebook_to_latex.py +1 -2
edsl/plugins/__init__.py +63 -0
edsl/plugins/built_in/export_example.py +50 -0
edsl/plugins/built_in/pig_latin.py +67 -0
edsl/plugins/cli.py +372 -0
edsl/plugins/cli_typer.py +283 -0
edsl/plugins/exceptions.py +31 -0
edsl/plugins/hookspec.py +51 -0
edsl/plugins/plugin_host.py +128 -0
edsl/plugins/plugin_manager.py +633 -0
edsl/plugins/plugins_registry.py +168 -0
edsl/prompts/__init__.py +24 -1
edsl/prompts/exceptions.py +107 -5
edsl/prompts/prompt.py +15 -7
edsl/questions/HTMLQuestion.py +5 -11
edsl/questions/Quick.py +0 -1
edsl/questions/__init__.py +6 -4
edsl/questions/answer_validator_mixin.py +318 -323
edsl/questions/compose_questions.py +3 -3
edsl/questions/descriptors.py +11 -50
edsl/questions/exceptions.py +278 -22
edsl/questions/loop_processor.py +7 -5
edsl/questions/prompt_templates/question_list.jinja +3 -0
edsl/questions/question_base.py +46 -19
edsl/questions/question_base_gen_mixin.py +2 -2
edsl/questions/question_base_prompts_mixin.py +13 -7
edsl/questions/question_budget.py +503 -98
edsl/questions/question_check_box.py +660 -160
edsl/questions/question_dict.py +345 -194
edsl/questions/question_extract.py +401 -61
edsl/questions/question_free_text.py +80 -14
edsl/questions/question_functional.py +119 -9
edsl/questions/{derived/question_likert_five.py → question_likert_five.py} +2 -2
edsl/questions/{derived/question_linear_scale.py → question_linear_scale.py} +3 -4
edsl/questions/question_list.py +275 -28
edsl/questions/question_matrix.py +643 -96
edsl/questions/question_multiple_choice.py +219 -51
edsl/questions/question_numerical.py +361 -32
edsl/questions/question_rank.py +401 -124
edsl/questions/question_registry.py +7 -5
edsl/questions/{derived/question_top_k.py → question_top_k.py} +3 -3
edsl/questions/{derived/question_yes_no.py → question_yes_no.py} +3 -4
edsl/questions/register_questions_meta.py +2 -2
edsl/questions/response_validator_abc.py +13 -15
edsl/questions/response_validator_factory.py +10 -12
edsl/questions/templates/dict/answering_instructions.jinja +1 -0
edsl/questions/templates/rank/question_presentation.jinja +1 -1
edsl/results/__init__.py +1 -1
edsl/results/exceptions.py +141 -7
edsl/results/report.py +1 -2
edsl/results/result.py +11 -9
edsl/results/results.py +480 -321
edsl/results/results_selector.py +8 -4
edsl/scenarios/PdfExtractor.py +2 -2
edsl/scenarios/construct_download_link.py +69 -35
edsl/scenarios/directory_scanner.py +33 -14
edsl/scenarios/document_chunker.py +1 -1
edsl/scenarios/exceptions.py +238 -14
edsl/scenarios/file_methods.py +1 -1
edsl/scenarios/file_store.py +7 -3
edsl/scenarios/handlers/__init__.py +17 -0
edsl/scenarios/handlers/docx_file_store.py +0 -5
edsl/scenarios/handlers/pdf_file_store.py +0 -1
edsl/scenarios/handlers/pptx_file_store.py +0 -5
edsl/scenarios/handlers/py_file_store.py +0 -1
edsl/scenarios/handlers/sql_file_store.py +1 -4
edsl/scenarios/handlers/sqlite_file_store.py +0 -1
edsl/scenarios/handlers/txt_file_store.py +1 -1
edsl/scenarios/scenario.py +1 -3
edsl/scenarios/scenario_list.py +179 -27
edsl/scenarios/scenario_list_pdf_tools.py +1 -0
edsl/scenarios/scenario_selector.py +0 -1
edsl/surveys/__init__.py +3 -4
edsl/surveys/dag/__init__.py +4 -2
edsl/surveys/descriptors.py +1 -1
edsl/surveys/edit_survey.py +1 -0
edsl/surveys/exceptions.py +165 -9
edsl/surveys/memory/__init__.py +5 -3
edsl/surveys/memory/memory_management.py +1 -0
edsl/surveys/memory/memory_plan.py +6 -15
edsl/surveys/rules/__init__.py +5 -3
edsl/surveys/rules/rule.py +1 -2
edsl/surveys/rules/rule_collection.py +1 -1
edsl/surveys/survey.py +12 -24
edsl/surveys/survey_css.py +3 -3
edsl/surveys/survey_export.py +6 -3
edsl/surveys/survey_flow_visualization.py +10 -1
edsl/surveys/survey_simulator.py +2 -1
edsl/tasks/__init__.py +23 -1
edsl/tasks/exceptions.py +72 -0
edsl/tasks/question_task_creator.py +3 -3
edsl/tasks/task_creators.py +1 -3
edsl/tasks/task_history.py +8 -10
edsl/tasks/task_status_log.py +1 -2
edsl/tokens/__init__.py +29 -1
edsl/tokens/exceptions.py +37 -0
edsl/tokens/interview_token_usage.py +3 -2
edsl/tokens/token_usage.py +4 -3
edsl/utilities/__init__.py +21 -1
edsl/utilities/decorators.py +1 -2
edsl/utilities/markdown_to_docx.py +2 -2
edsl/utilities/markdown_to_pdf.py +1 -1
edsl/utilities/repair_functions.py +0 -1
edsl/utilities/restricted_python.py +0 -1
edsl/utilities/template_loader.py +2 -3
edsl/utilities/utilities.py +8 -29
{edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/METADATA +32 -2
edsl-0.1.51.dist-info/RECORD +365 -0
edsl-0.1.51.dist-info/entry_points.txt +3 -0
edsl/dataset/smart_objects.py +0 -96
edsl/exceptions/BaseException.py +0 -21
edsl/exceptions/__init__.py +0 -54
edsl/exceptions/configuration.py +0 -16
edsl/exceptions/general.py +0 -34
edsl/questions/derived/__init__.py +0 -0
edsl/study/ObjectEntry.py +0 -173
edsl/study/ProofOfWork.py +0 -113
edsl/study/SnapShot.py +0 -80
edsl/study/Study.py +0 -520
edsl/study/__init__.py +0 -6
edsl/utilities/interface.py +0 -135
edsl-0.1.49.dist-info/RECORD +0 -347
{edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/LICENSE +0 -0
{edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/WHEEL +0 -0

edsl/dataset/dataset.py CHANGED Viewed

@@ -1,24 +1,25 @@
 from __future__ import annotations
 import sys
 import json
 import random
 from collections import UserList
-from typing import Any, Union, Optional, TYPE_CHECKING
+from typing import Any, Union, Optional, TYPE_CHECKING, Callable
 from ..base import PersistenceMixin, HashingMixin
 from .dataset_tree import Tree
+from .exceptions import DatasetKeyError, DatasetValueError, DatasetTypeError
 from .display.table_display import TableDisplay
-from .smart_objects import FirstObject
-from .r.ggplot import GGPlotMethod
+#from .smart_objects import FirstObject
 from .dataset_operations_mixin import DatasetOperationsMixin
 if TYPE_CHECKING:
     from ..surveys import Survey
-    from ..questions.QuestionBase import QuestionBase
+    from ..questions import QuestionBase
+    from ..jobs import Job  # noqa: F401
 class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
     """
@@ -76,6 +77,7 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
             Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}])
         """
         super().__init__(data)
+        #self.data = data
         self.print_parameters = print_parameters
@@ -118,19 +120,9 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
             new_data.append({key: values[:n]})
         return Dataset(new_data)
-    def expand(self, field):
-        return self.to_scenario_list().expand(field)
+    # def expand(self, field):
+    #     return self.to_scenario_list().expand(field)
-    def view(self):
-        from perspective.widget import PerspectiveWidget
-        w = PerspectiveWidget(
-            self.to_pandas(),
-            plugin="Datagrid",
-            aggregates={"datetime": "any"},
-            sort=[["date", "desc"]],
-        )
-        return w
     def keys(self) -> list[str]:
         """Return the keys of the dataset.
@@ -212,7 +204,7 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         values = value_dict["value"]
         if not (len(rows) == len(keys) == len(values)):
-            raise ValueError("All input arrays must have the same length")
+            raise DatasetValueError("All input arrays must have the same length")
         # Get unique keys and row indices
         unique_keys = sorted(set(keys))
@@ -272,12 +264,6 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         >>> d = Dataset([{'a.b':[1,2,3,4]}])
         >>> d._key_to_value('a.b')
         [1, 2, 3, 4]
-        >>> d._key_to_value('a')
-        Traceback (most recent call last):
-        ...
-        KeyError: "Key 'a' not found in any of the dictionaries."
         """
         potential_matches = []
         for data_dict in self.data:
@@ -290,11 +276,13 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         if len(potential_matches) == 1:
             return potential_matches[0][1]
         elif len(potential_matches) > 1:
-            raise KeyError(
+            from .exceptions import DatasetKeyError
+            raise DatasetKeyError(
                 f"Key '{key}' found in more than one location: {[m[0] for m in potential_matches]}"
             )
-        raise KeyError(f"Key '{key}' not found in any of the dictionaries.")
+        from .exceptions import DatasetKeyError
+        raise DatasetKeyError(f"Key '{key}' not found in any of the dictionaries.")
     def first(self) -> dict[str, Any]:
         """Get the first value of the first key in the first dictionary.
@@ -308,7 +296,7 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
             """Get the values of the first key in the dictionary."""
             return list(d.values())[0]
-        return FirstObject(get_values(self.data[0])[0])
+        return get_values(self.data[0])[0]
     def latex(self, **kwargs):
         return self.table().latex()
@@ -338,7 +326,7 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         """
         if "format" in kwargs:
             if kwargs["format"] not in ["html", "markdown", "rich", "latex"]:
-                raise ValueError(f"Format '{kwargs['format']}' not supported.")
+                raise DatasetValueError(f"Format '{kwargs['format']}' not supported.")
             # If rich format is requested, set tablefmt accordingly
             if kwargs["format"] == "rich":
@@ -371,10 +359,18 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         merged_df = df1.merge(df2, how="left", left_on=by_x, right_on=by_y)
         return Dataset.from_pandas_dataframe(merged_df)
-    def to(self, survey_or_question: Union["Survey", "QuestionBase"]) -> "Jobs":
-        """Return a new dataset with the observations transformed by the given survey or question."""
-        from edsl.surveys import Survey
-        from edsl.questions.QuestionBase import QuestionBase
+    def to(self, survey_or_question: Union["Survey", "QuestionBase"]) -> "Job":
+        """Return a new dataset with the observations transformed by the given survey or question.
+        >>> d = Dataset([{'person_name':["John"]}])
+        >>> from edsl import QuestionFreeText
+        >>> q = QuestionFreeText(question_text = "How are you, {{ person_name ?}}?", question_name = "how_feeling")
+        >>> jobs = d.to(q)
+        >>> isinstance(jobs, object)
+        True
+        """
+        from ..surveys import Survey
+        from ..questions import QuestionBase
         if isinstance(survey_or_question, Survey):
             return survey_or_question.by(self.to_scenario_list())
@@ -396,9 +392,10 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         """
         for key in keys:
             if key not in self.keys():
-                raise ValueError(f"Key '{key}' not found in the dataset."
-                                 f"Available keys: {self.keys()}"
-                                 )
+                from .exceptions import DatasetValueError
+                raise DatasetValueError(f"Key '{key}' not found in the dataset. "
+                                        f"Available keys: {self.keys()}"
+                                       )
         if isinstance(keys, str):
             keys = [keys]
@@ -442,7 +439,11 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         return self
-    def expand(self, field):
+    def expand_field(self, field):
+        """Expand a field in the dataset.
+        Renamed to avoid conflict with the expand method defined earlier.
+        """
         return self.to_scenario_list().expand(field).to_dataset()
     def sample(
@@ -462,21 +463,18 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         >>> d = Dataset([{'a.b':[1,2,3,4]}])
         >>> d.sample(n=2, seed=0, with_replacement=True)
         Dataset([{'a.b': [4, 4]}])
-        >>> d.sample(n = 10, seed=0, with_replacement=False)
-        Traceback (most recent call last):
-        ...
-        ValueError: Sample size cannot be greater than the number of available elements when sampling without replacement.
         """
         if seed is not None:
             random.seed(seed)
         # Validate the input for sampling parameters
         if n is None and frac is None:
-            raise ValueError("Either 'n' or 'frac' must be provided for sampling.")
+            from .exceptions import DatasetValueError
+            raise DatasetValueError("Either 'n' or 'frac' must be provided for sampling.")
         if n is not None and frac is not None:
-            raise ValueError("Only one of 'n' or 'frac' should be specified.")
+            from .exceptions import DatasetValueError
+            raise DatasetValueError("Only one of 'n' or 'frac' should be specified.")
         # Get the length of the lists from the first entry
         first_key, first_values = list(self[0].items())[0]
@@ -487,7 +485,8 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
             n = int(total_length * frac)
         if not with_replacement and n > total_length:
-            raise ValueError(
+            from .exceptions import DatasetValueError
+            raise DatasetValueError(
                 "Sample size cannot be greater than the number of available elements when sampling without replacement."
             )
@@ -504,60 +503,72 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         return self
-    def order_by(self, sort_key: str, reverse: bool = False) -> Dataset:
-        """Return a new dataset with the observations sorted by the given key.
-        :param sort_key: The key to sort the observations by.
-        :param reverse: Whether to sort in reverse order.
-        >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[4,3,2,1]}])
-        >>> d.order_by('a')
-        Dataset([{'a': [1, 2, 3, 4]}, {'b': [4, 3, 2, 1]}])
-        >>> d.order_by('a', reverse=True)
-        Dataset([{'a': [4, 3, 2, 1]}, {'b': [1, 2, 3, 4]}])
-        >>> d = Dataset([{'X.a':[1,2,3,4]}, {'X.b':[4,3,2,1]}])
-        >>> d.order_by('a')
-        Dataset([{'X.a': [1, 2, 3, 4]}, {'X.b': [4, 3, 2, 1]}])
+    def get_sort_indices(self, lst: list[Any], reverse: bool = False, use_numpy: bool = True) -> list[int]:
         """
-        import numpy as np
+        Return the indices that would sort the list, using either numpy or pure Python.
+        None values are placed at the end of the sorted list.
-        def sort_indices(lst: list[Any]) -> list[int]:
-            """
-            Return the indices that would sort the list.
+        Args:
+            lst: The list to be sorted
+            reverse: Whether to sort in descending order
+            use_numpy: Whether to use numpy implementation (falls back to pure Python if numpy is unavailable)
-            :param lst: The list to be sorted.
-            :return: A list of indices that would sort the list.
-            """
-            indices = np.argsort(lst).tolist()
-            if reverse:
-                indices.reverse()
-            return indices
+        Returns:
+            A list of indices that would sort the list
+        """
+        if use_numpy:
+            try:
+                import numpy as np
+                # Convert list to numpy array
+                arr = np.array(lst, dtype=object)
+                # Get mask of non-None values
+                mask = ~(arr is None)
+                # Get indices of non-None and None values
+                non_none_indices = np.where(mask)[0]
+                none_indices = np.where(~mask)[0]
+                # Sort non-None values
+                sorted_indices = non_none_indices[np.argsort(arr[mask])]
+                # Combine sorted non-None indices with None indices
+                indices = np.concatenate([sorted_indices, none_indices]).tolist()
+                if reverse:
+                    # When reversing, keep None values at end
+                    indices = sorted_indices[::-1].tolist() + none_indices.tolist()
+                return indices
+            except ImportError:
+                # Fallback to pure Python if numpy is not available
+                pass
+        # Pure Python implementation
+        enumerated = list(enumerate(lst))
+        # Sort None values to end by using (is_none, value) as sort key
+        sorted_pairs = sorted(enumerated,
+                            key=lambda x: (x[1] is None, x[1]),
+                            reverse=reverse)
+        return [index for index, _ in sorted_pairs]
+    def order_by(self, sort_key: str, reverse: bool = False, use_numpy: bool = True) -> Dataset:
+        """Return a new dataset with the observations sorted by the given key.
+        Args:
+            sort_key: The key to sort the observations by
+            reverse: Whether to sort in reverse order
+            use_numpy: Whether to use numpy for sorting (faster for large lists)
+        """
         number_found = 0
         for obs in self.data:
             key, values = list(obs.items())[0]
-            # an obseration is {'a':[1,2,3,4]}
-            # key = list(obs.keys())[0]
-            if (
-                sort_key == key or sort_key == key.split(".")[-1]
-            ):  # e.g., "age" in "scenario.age"
+            if sort_key == key or sort_key == key.split(".")[-1]:
                 relevant_values = values
                 number_found += 1
         if number_found == 0:
-            raise ValueError(f"Key '{sort_key}' not found in any of the dictionaries.")
+            raise DatasetKeyError(f"Key '{sort_key}' not found in any of the dictionaries.")
         elif number_found > 1:
-            raise ValueError(f"Key '{sort_key}' found in more than one dictionary.")
+            raise DatasetKeyError(f"Key '{sort_key}' found in more than one dictionary.")
-        # relevant_values = self._key_to_value(sort_key)
-        sort_indices_list = sort_indices(relevant_values)
+        sort_indices_list = self.get_sort_indices(relevant_values, reverse=reverse, use_numpy=use_numpy)
         new_data = []
         for observation in self.data:
-            # print(observation)
             key, values = list(observation.items())[0]
             new_values = [values[i] for i in sort_indices_list]
             new_data.append({key: new_values})
@@ -578,7 +589,7 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
     def table(
         self,
         *fields,
-        tablefmt: Optional[str] = None,
+        tablefmt: Optional[str] = "rich",
         max_rows: Optional[int] = None,
         pretty_labels=None,
         print_parameters: Optional[dict] = None,
@@ -637,7 +648,8 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         if max_rows is not None:
             if max_rows > len(data):
-                raise ValueError(
+                from .exceptions import DatasetValueError
+                raise DatasetValueError(
                     "max_rows cannot be greater than the number of rows in the dataset."
                 )
             last_line = data[-1]
@@ -675,6 +687,19 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
     def from_pandas_dataframe(cls, df):
         result = cls([{col: df[col].tolist()} for col in df.columns])
         return result
+    def to_dict(self) -> dict:
+        """
+        Convert the dataset to a dictionary.
+        """
+        return {'data': self.data}
+    @classmethod
+    def from_dict(cls, data: dict) -> 'Dataset':
+        """
+        Convert a dictionary to a dataset.
+        """
+        return cls(data['data'])
     def to_docx(self, output_file: str, title: str = None) -> None:
         """
@@ -726,6 +751,72 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         # Save the document
         doc.save(output_file)
+    def expand(self, field: str, number_field: bool = False) -> "Dataset":
+        """
+        Expand a field containing lists into multiple rows.
+        Args:
+            field: The field containing lists to expand
+            number_field: If True, adds a number field indicating the position in the original list
+        Returns:
+            A new Dataset with the expanded rows
+        Example:
+            >>> from edsl.dataset import Dataset
+            >>> d = Dataset([{'a': [[1, 2, 3], [4, 5, 6]]}, {'b': ['x', 'y']}])
+            >>> d.expand('a')
+            Dataset([{'a': [1, 2, 3, 4, 5, 6]}, {'b': ['x', 'x', 'x', 'y', 'y', 'y']}])
+        """
+        from collections.abc import Iterable
+        # Find the field in the dataset
+        field_data = None
+        for entry in self.data:
+            key = list(entry.keys())[0]
+            if key == field:
+                field_data = entry[key]
+                break
+        if field_data is None:
+            raise DatasetKeyError(f"Field '{field}' not found in dataset. Available fields are: {self.keys()}")
+        # Validate that the field contains lists
+        if not all(isinstance(v, list) for v in field_data):
+            raise DatasetTypeError(f"Field '{field}' must contain lists in all entries")
+        # Create new expanded data structure
+        new_data = []
+        # Process each field
+        for entry in self.data:
+            key, values = list(entry.items())[0]
+            new_values = []
+            if key == field:
+                # This is the field to expand - flatten all sublists
+                for row_values in values:
+                    if not isinstance(row_values, Iterable) or isinstance(row_values, str):
+                        row_values = [row_values]
+                    new_values.extend(row_values)
+            else:
+                # For other fields, repeat each value the appropriate number of times
+                for i, row_value in enumerate(values):
+                    expand_length = len(field_data[i]) if i < len(field_data) else 0
+                    new_values.extend([row_value] * expand_length)
+            new_data.append({key: new_values})
+        # Add number field if requested
+        if number_field:
+            number_values = []
+            for i, lst in enumerate(field_data):
+                number_values.extend(range(1, len(lst) + 1))
+            new_data.append({f"{field}_number": number_values})
+        return Dataset(new_data)
 if __name__ == "__main__":
     import doctest

edsl 0.1.49__py3-none-any.whl → 0.1.51__py3-none-any.whl

edsl 0.1.49py3-none-any.whl → 0.1.51py3-none-any.whl