PyPI - edsl - Versions diffs - 0.1.50__py3-none-any.whl → 0.1.52__py3-none-any.whl - Mend

edsl 0.1.50py3-none-any.whl → 0.1.52py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (119) hide show

edsl/__init__.py +45 -34
edsl/__version__.py +1 -1
edsl/base/base_exception.py +2 -2
edsl/buckets/bucket_collection.py +1 -1
edsl/buckets/exceptions.py +32 -0
edsl/buckets/token_bucket_api.py +26 -10
edsl/caching/cache.py +5 -2
edsl/caching/remote_cache_sync.py +5 -5
edsl/caching/sql_dict.py +12 -11
edsl/config/__init__.py +1 -1
edsl/config/config_class.py +4 -2
edsl/conversation/Conversation.py +9 -5
edsl/conversation/car_buying.py +1 -3
edsl/conversation/mug_negotiation.py +2 -6
edsl/coop/__init__.py +11 -8
edsl/coop/coop.py +15 -13
edsl/coop/coop_functions.py +1 -1
edsl/coop/ep_key_handling.py +1 -1
edsl/coop/price_fetcher.py +2 -2
edsl/coop/utils.py +2 -2
edsl/dataset/dataset.py +144 -63
edsl/dataset/dataset_operations_mixin.py +14 -6
edsl/dataset/dataset_tree.py +3 -3
edsl/dataset/display/table_renderers.py +6 -3
edsl/dataset/file_exports.py +4 -4
edsl/dataset/r/ggplot.py +3 -3
edsl/inference_services/available_model_fetcher.py +2 -2
edsl/inference_services/data_structures.py +5 -5
edsl/inference_services/inference_service_abc.py +1 -1
edsl/inference_services/inference_services_collection.py +1 -1
edsl/inference_services/service_availability.py +3 -3
edsl/inference_services/services/azure_ai.py +3 -3
edsl/inference_services/services/google_service.py +1 -1
edsl/inference_services/services/test_service.py +1 -1
edsl/instructions/change_instruction.py +5 -4
edsl/instructions/instruction.py +1 -0
edsl/instructions/instruction_collection.py +5 -4
edsl/instructions/instruction_handler.py +10 -8
edsl/interviews/answering_function.py +20 -21
edsl/interviews/exception_tracking.py +3 -2
edsl/interviews/interview.py +1 -1
edsl/interviews/interview_status_dictionary.py +1 -1
edsl/interviews/interview_task_manager.py +7 -4
edsl/interviews/request_token_estimator.py +3 -2
edsl/interviews/statistics.py +2 -2
edsl/invigilators/invigilators.py +34 -6
edsl/jobs/__init__.py +39 -2
edsl/jobs/async_interview_runner.py +1 -1
edsl/jobs/check_survey_scenario_compatibility.py +5 -5
edsl/jobs/data_structures.py +2 -2
edsl/jobs/html_table_job_logger.py +494 -257
edsl/jobs/jobs.py +2 -2
edsl/jobs/jobs_checks.py +5 -5
edsl/jobs/jobs_component_constructor.py +2 -2
edsl/jobs/jobs_pricing_estimation.py +1 -1
edsl/jobs/jobs_runner_asyncio.py +2 -2
edsl/jobs/jobs_status_enums.py +1 -0
edsl/jobs/remote_inference.py +47 -13
edsl/jobs/results_exceptions_handler.py +2 -2
edsl/language_models/language_model.py +151 -145
edsl/notebooks/__init__.py +24 -1
edsl/notebooks/exceptions.py +82 -0
edsl/notebooks/notebook.py +7 -3
edsl/notebooks/notebook_to_latex.py +1 -1
edsl/prompts/__init__.py +23 -2
edsl/prompts/prompt.py +1 -1
edsl/questions/__init__.py +4 -4
edsl/questions/answer_validator_mixin.py +0 -5
edsl/questions/compose_questions.py +2 -2
edsl/questions/descriptors.py +1 -1
edsl/questions/question_base.py +32 -3
edsl/questions/question_base_prompts_mixin.py +4 -4
edsl/questions/question_budget.py +503 -102
edsl/questions/question_check_box.py +658 -156
edsl/questions/question_dict.py +176 -2
edsl/questions/question_extract.py +401 -61
edsl/questions/question_free_text.py +77 -9
edsl/questions/question_functional.py +118 -9
edsl/questions/{derived/question_likert_five.py → question_likert_five.py} +2 -2
edsl/questions/{derived/question_linear_scale.py → question_linear_scale.py} +3 -4
edsl/questions/question_list.py +246 -26
edsl/questions/question_matrix.py +586 -73
edsl/questions/question_multiple_choice.py +213 -47
edsl/questions/question_numerical.py +360 -29
edsl/questions/question_rank.py +401 -124
edsl/questions/question_registry.py +3 -3
edsl/questions/{derived/question_top_k.py → question_top_k.py} +3 -3
edsl/questions/{derived/question_yes_no.py → question_yes_no.py} +3 -4
edsl/questions/register_questions_meta.py +2 -1
edsl/questions/response_validator_abc.py +6 -2
edsl/questions/response_validator_factory.py +10 -12
edsl/results/report.py +1 -1
edsl/results/result.py +7 -4
edsl/results/results.py +500 -271
edsl/results/results_selector.py +2 -2
edsl/scenarios/construct_download_link.py +3 -3
edsl/scenarios/scenario.py +1 -2
edsl/scenarios/scenario_list.py +41 -23
edsl/surveys/survey_css.py +3 -3
edsl/surveys/survey_simulator.py +2 -1
edsl/tasks/__init__.py +22 -2
edsl/tasks/exceptions.py +72 -0
edsl/tasks/task_history.py +48 -11
edsl/templates/error_reporting/base.html +37 -4
edsl/templates/error_reporting/exceptions_table.html +105 -33
edsl/templates/error_reporting/interview_details.html +130 -126
edsl/templates/error_reporting/overview.html +21 -25
edsl/templates/error_reporting/report.css +215 -46
edsl/templates/error_reporting/report.js +122 -20
edsl/tokens/__init__.py +27 -1
edsl/tokens/exceptions.py +37 -0
edsl/tokens/interview_token_usage.py +3 -2
edsl/tokens/token_usage.py +4 -3
{edsl-0.1.50.dist-info → edsl-0.1.52.dist-info}/METADATA +1 -1
{edsl-0.1.50.dist-info → edsl-0.1.52.dist-info}/RECORD +118 -116
edsl/questions/derived/__init__.py +0 -0
{edsl-0.1.50.dist-info → edsl-0.1.52.dist-info}/LICENSE +0 -0
{edsl-0.1.50.dist-info → edsl-0.1.52.dist-info}/WHEEL +0 -0
{edsl-0.1.50.dist-info → edsl-0.1.52.dist-info}/entry_points.txt +0 -0

edsl/coop/coop.py CHANGED Viewed

@@ -180,7 +180,7 @@ class Coop(CoopFunctionsMixin):
                     timeout=timeout,
                 )
             else:
-                from edsl.coop.exceptions import CoopInvalidMethodError
+                from .exceptions import CoopInvalidMethodError
                 raise CoopInvalidMethodError(f"Invalid {method=}.")
         except requests.ConnectionError:
@@ -303,7 +303,7 @@ class Coop(CoopFunctionsMixin):
                 message = root.find("Message").text
                 details = root.find("Details").text
             except Exception:
-                from edsl.coop.exceptions import CoopServerResponseError
+                from .exceptions import CoopServerResponseError
                 raise CoopServerResponseError(
                     f"Server returned status code {response.status_code}. "
@@ -311,7 +311,7 @@ class Coop(CoopFunctionsMixin):
                     f"The server response was: {response.text}"
                 )
-            from edsl.coop.exceptions import CoopServerResponseError
+            from .exceptions import CoopServerResponseError
             raise CoopServerResponseError(
                 f"An error occurred: {code} - {message} - {details}"
@@ -538,7 +538,7 @@ class Coop(CoopFunctionsMixin):
             if response_json.get("upload_signed_url"):
                 signed_url = response_json.get("upload_signed_url")
             else:
-                from edsl.coop.exceptions import CoopResponseError
+                from .exceptions import CoopResponseError
                 raise CoopResponseError("No signed url was provided received")
@@ -551,7 +551,7 @@ class Coop(CoopFunctionsMixin):
                 "file_store_upload_signed_url"
             )
             if file_store_metadata and not file_store_upload_signed_url:
-                from edsl.coop.exceptions import CoopResponseError
+                from .exceptions import CoopResponseError
                 raise CoopResponseError("No file store signed url provided.")
             elif file_store_metadata:
@@ -659,13 +659,15 @@ class Coop(CoopFunctionsMixin):
             json_string = object_data.text
         object_type = response.json().get("object_type")
         if expected_object_type and object_type != expected_object_type:
-            from edsl.coop.exceptions import CoopObjectTypeError
+            from .exceptions import CoopObjectTypeError
             raise CoopObjectTypeError(
                 f"Expected {expected_object_type=} but got {object_type=}"
             )
         edsl_class = ObjectRegistry.object_type_to_edsl_class.get(object_type)
         object = edsl_class.from_dict(json.loads(json_string))
+        if object_type == "results":
+            object.initialize_cache_from_results()
         return object
     def get_all(self, object_type: ObjectType) -> list[dict[str, Any]]:
@@ -754,7 +756,7 @@ class Coop(CoopFunctionsMixin):
             and value is None
             and alias is None
         ):
-            from edsl.coop.exceptions import CoopPatchError
+            from .exceptions import CoopPatchError
             raise CoopPatchError("Nothing to patch.")
@@ -887,7 +889,7 @@ class Coop(CoopFunctionsMixin):
         [CacheEntry(...), CacheEntry(...), ...]
         """
         if job_uuid is None:
-            from edsl.coop.exceptions import CoopValueError
+            from .exceptions import CoopValueError
             raise CoopValueError("Must provide a job_uuid.")
         response = self._send_server_request(
@@ -917,7 +919,7 @@ class Coop(CoopFunctionsMixin):
         [CacheEntry(...), CacheEntry(...), ...]
         """
         if select_keys is None or len(select_keys) == 0:
-            from edsl.coop.exceptions import CoopValueError
+            from .exceptions import CoopValueError
             raise CoopValueError("Must provide a non-empty list of select_keys.")
         response = self._send_server_request(
@@ -1182,7 +1184,7 @@ class Coop(CoopFunctionsMixin):
             ...     print(f"Results available at: {job_status['results_url']}")
         """
         if job_uuid is None and results_uuid is None:
-            from edsl.coop.exceptions import CoopValueError
+            from .exceptions import CoopValueError
             raise CoopValueError("Either job_uuid or results_uuid must be provided.")
         elif job_uuid is not None:
@@ -1258,7 +1260,7 @@ class Coop(CoopFunctionsMixin):
         elif isinstance(input, Survey):
             job = Jobs(survey=input)
         else:
-            from edsl.coop.exceptions import CoopTypeError
+            from .exceptions import CoopTypeError
             raise CoopTypeError("Input must be either a Job or a Survey.")
@@ -1395,7 +1397,7 @@ class Coop(CoopFunctionsMixin):
         elif CONFIG.get("EDSL_FETCH_TOKEN_PRICES") == "False":
             return {}
         else:
-            from edsl.coop.exceptions import CoopValueError
+            from .exceptions import CoopValueError
             raise CoopValueError(
                 "Invalid EDSL_FETCH_TOKEN_PRICES value---should be 'True' or 'False'."
@@ -1553,7 +1555,7 @@ class Coop(CoopFunctionsMixin):
         api_key = self._poll_for_api_key(edsl_auth_token)
         if api_key is None:
-            from edsl.coop.exceptions import CoopTimeoutError
+            from .exceptions import CoopTimeoutError
             raise CoopTimeoutError("Timed out waiting for login. Please try again.")

edsl/coop/coop_functions.py CHANGED Viewed

@@ -1,6 +1,6 @@
 class CoopFunctionsMixin:
     def better_names(self, existing_names):
-        from edsl import QuestionList, Scenario
+        from .. import QuestionList, Scenario
         s = Scenario({"existing_names": existing_names})
         q = QuestionList(

edsl/coop/ep_key_handling.py CHANGED Viewed

@@ -70,7 +70,7 @@ class ExpectedParrotKeyHandler:
     def ok_to_ask_to_store(self):
         """Check if it's okay to ask the user to store the key."""
-        from edsl.config import CONFIG
+        from ..config import CONFIG
         if CONFIG.get("EDSL_RUN_MODE") != "production":
             return False

edsl/coop/price_fetcher.py CHANGED Viewed

@@ -7,6 +7,7 @@ that price information is only fetched once and then cached for efficiency.
 """
 import requests
+import os
 from typing import Dict, Tuple, Any
@@ -82,7 +83,6 @@ class PriceFetcher:
         if self._cached_prices is not None:
             return self._cached_prices
-        import os
         from ..config import CONFIG
         try:
@@ -120,4 +120,4 @@ class PriceFetcher:
         except requests.RequestException:
             # Silently handle errors and return empty dict
             # print(f"An error occurred: {e}")
-            return {}
+            return {}

edsl/coop/utils.py CHANGED Viewed

@@ -129,7 +129,7 @@ class ObjectRegistry:
         # Look up the object type
         object_type = cls.edsl_class_to_object_type.get(edsl_class_name)
         if object_type is None:
-            from edsl.coop.exceptions import CoopValueError
+            from .exceptions import CoopValueError
             raise CoopValueError(f"Object type not found for {edsl_object=}")
         return object_type
@@ -152,7 +152,7 @@ class ObjectRegistry:
         """
         EDSL_class = cls.object_type_to_edsl_class.get(object_type)
         if EDSL_class is None:
-            from edsl.coop.exceptions import CoopValueError
+            from .exceptions import CoopValueError
             raise CoopValueError(f"EDSL class not found for {object_type=}")
         return EDSL_class

edsl/dataset/dataset.py CHANGED Viewed

@@ -1,5 +1,3 @@
 from __future__ import annotations
 import sys
 import json
@@ -10,7 +8,8 @@ from typing import Any, Union, Optional, TYPE_CHECKING, Callable
 from ..base import PersistenceMixin, HashingMixin
 from .dataset_tree import Tree
-from .exceptions import DatasetKeyError, DatasetValueError
+from .exceptions import DatasetKeyError, DatasetValueError, DatasetTypeError
 from .display.table_display import TableDisplay
 #from .smart_objects import FirstObject
@@ -121,19 +120,9 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
             new_data.append({key: values[:n]})
         return Dataset(new_data)
-    def expand(self, field):
-        return self.to_scenario_list().expand(field)
-    # def view(self):
-    #     from perspective.widget import PerspectiveWidget
+    # def expand(self, field):
+    #     return self.to_scenario_list().expand(field)
-    #     w = PerspectiveWidget(
-    #         self.to_pandas(),
-    #         plugin="Datagrid",
-    #         aggregates={"datetime": "any"},
-    #         sort=[["date", "desc"]],
-    #     )
-    #     return w
     def keys(self) -> list[str]:
         """Return the keys of the dataset.
@@ -287,12 +276,12 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         if len(potential_matches) == 1:
             return potential_matches[0][1]
         elif len(potential_matches) > 1:
-            from edsl.dataset.exceptions import DatasetKeyError
+            from .exceptions import DatasetKeyError
             raise DatasetKeyError(
                 f"Key '{key}' found in more than one location: {[m[0] for m in potential_matches]}"
             )
-        from edsl.dataset.exceptions import DatasetKeyError
+        from .exceptions import DatasetKeyError
         raise DatasetKeyError(f"Key '{key}' not found in any of the dictionaries.")
     def first(self) -> dict[str, Any]:
@@ -376,11 +365,12 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         >>> d = Dataset([{'person_name':["John"]}])
         >>> from edsl import QuestionFreeText
         >>> q = QuestionFreeText(question_text = "How are you, {{ person_name ?}}?", question_name = "how_feeling")
-        >>> d.to(q)
-        Jobs(...)
+        >>> jobs = d.to(q)
+        >>> isinstance(jobs, object)
+        True
         """
-        from edsl.surveys import Survey
-        from edsl.questions import QuestionBase
+        from ..surveys import Survey
+        from ..questions import QuestionBase
         if isinstance(survey_or_question, Survey):
             return survey_or_question.by(self.to_scenario_list())
@@ -402,7 +392,7 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         """
         for key in keys:
             if key not in self.keys():
-                from edsl.dataset.exceptions import DatasetValueError
+                from .exceptions import DatasetValueError
                 raise DatasetValueError(f"Key '{key}' not found in the dataset. "
                                         f"Available keys: {self.keys()}"
                                        )
@@ -479,11 +469,11 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         # Validate the input for sampling parameters
         if n is None and frac is None:
-            from edsl.dataset.exceptions import DatasetValueError
+            from .exceptions import DatasetValueError
             raise DatasetValueError("Either 'n' or 'frac' must be provided for sampling.")
         if n is not None and frac is not None:
-            from edsl.dataset.exceptions import DatasetValueError
+            from .exceptions import DatasetValueError
             raise DatasetValueError("Only one of 'n' or 'frac' should be specified.")
         # Get the length of the lists from the first entry
@@ -495,7 +485,7 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
             n = int(total_length * frac)
         if not with_replacement and n > total_length:
-            from edsl.dataset.exceptions import DatasetValueError
+            from .exceptions import DatasetValueError
             raise DatasetValueError(
                 "Sample size cannot be greater than the number of available elements when sampling without replacement."
             )
@@ -513,47 +503,61 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         return self
-    def order_by(self, sort_key: str, reverse: bool = False) -> Dataset:
-        """Return a new dataset with the observations sorted by the given key.
-        :param sort_key: The key to sort the observations by.
-        :param reverse: Whether to sort in reverse order.
-        >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[4,3,2,1]}])
-        >>> d.order_by('a')
-        Dataset([{'a': [1, 2, 3, 4]}, {'b': [4, 3, 2, 1]}])
-        >>> d.order_by('a', reverse=True)
-        Dataset([{'a': [4, 3, 2, 1]}, {'b': [1, 2, 3, 4]}])
-        >>> d = Dataset([{'X.a':[1,2,3,4]}, {'X.b':[4,3,2,1]}])
-        >>> d.order_by('a')
-        Dataset([{'X.a': [1, 2, 3, 4]}, {'X.b': [4, 3, 2, 1]}])
+    def get_sort_indices(self, lst: list[Any], reverse: bool = False, use_numpy: bool = True) -> list[int]:
         """
-        import numpy as np
+        Return the indices that would sort the list, using either numpy or pure Python.
+        None values are placed at the end of the sorted list.
-        def sort_indices(lst: list[Any]) -> list[int]:
-            """
-            Return the indices that would sort the list.
+        Args:
+            lst: The list to be sorted
+            reverse: Whether to sort in descending order
+            use_numpy: Whether to use numpy implementation (falls back to pure Python if numpy is unavailable)
-            :param lst: The list to be sorted.
-            :return: A list of indices that would sort the list.
-            """
-            indices = np.argsort(lst).tolist()
-            if reverse:
-                indices.reverse()
-            return indices
+        Returns:
+            A list of indices that would sort the list
+        """
+        if use_numpy:
+            try:
+                import numpy as np
+                # Convert list to numpy array
+                arr = np.array(lst, dtype=object)
+                # Get mask of non-None values
+                mask = ~(arr is None)
+                # Get indices of non-None and None values
+                non_none_indices = np.where(mask)[0]
+                none_indices = np.where(~mask)[0]
+                # Sort non-None values
+                sorted_indices = non_none_indices[np.argsort(arr[mask])]
+                # Combine sorted non-None indices with None indices
+                indices = np.concatenate([sorted_indices, none_indices]).tolist()
+                if reverse:
+                    # When reversing, keep None values at end
+                    indices = sorted_indices[::-1].tolist() + none_indices.tolist()
+                return indices
+            except ImportError:
+                # Fallback to pure Python if numpy is not available
+                pass
+        # Pure Python implementation
+        enumerated = list(enumerate(lst))
+        # Sort None values to end by using (is_none, value) as sort key
+        sorted_pairs = sorted(enumerated,
+                            key=lambda x: (x[1] is None, x[1]),
+                            reverse=reverse)
+        return [index for index, _ in sorted_pairs]
+    def order_by(self, sort_key: str, reverse: bool = False, use_numpy: bool = True) -> Dataset:
+        """Return a new dataset with the observations sorted by the given key.
+        Args:
+            sort_key: The key to sort the observations by
+            reverse: Whether to sort in reverse order
+            use_numpy: Whether to use numpy for sorting (faster for large lists)
+        """
         number_found = 0
         for obs in self.data:
             key, values = list(obs.items())[0]
-            # an obseration is {'a':[1,2,3,4]}
-            # key = list(obs.keys())[0]
-            if (
-                sort_key == key or sort_key == key.split(".")[-1]
-            ):  # e.g., "age" in "scenario.age"
+            if sort_key == key or sort_key == key.split(".")[-1]:
                 relevant_values = values
                 number_found += 1
@@ -562,11 +566,9 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         elif number_found > 1:
             raise DatasetKeyError(f"Key '{sort_key}' found in more than one dictionary.")
-        # relevant_values = self._key_to_value(sort_key)
-        sort_indices_list = sort_indices(relevant_values)
+        sort_indices_list = self.get_sort_indices(relevant_values, reverse=reverse, use_numpy=use_numpy)
         new_data = []
         for observation in self.data:
-            # print(observation)
             key, values = list(observation.items())[0]
             new_values = [values[i] for i in sort_indices_list]
             new_data.append({key: new_values})
@@ -646,7 +648,7 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         if max_rows is not None:
             if max_rows > len(data):
-                from edsl.dataset.exceptions import DatasetValueError
+                from .exceptions import DatasetValueError
                 raise DatasetValueError(
                     "max_rows cannot be greater than the number of rows in the dataset."
                 )
@@ -685,6 +687,19 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
     def from_pandas_dataframe(cls, df):
         result = cls([{col: df[col].tolist()} for col in df.columns])
         return result
+    def to_dict(self) -> dict:
+        """
+        Convert the dataset to a dictionary.
+        """
+        return {'data': self.data}
+    @classmethod
+    def from_dict(cls, data: dict) -> 'Dataset':
+        """
+        Convert a dictionary to a dataset.
+        """
+        return cls(data['data'])
     def to_docx(self, output_file: str, title: str = None) -> None:
         """
@@ -736,6 +751,72 @@ class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
         # Save the document
         doc.save(output_file)
+    def expand(self, field: str, number_field: bool = False) -> "Dataset":
+        """
+        Expand a field containing lists into multiple rows.
+        Args:
+            field: The field containing lists to expand
+            number_field: If True, adds a number field indicating the position in the original list
+        Returns:
+            A new Dataset with the expanded rows
+        Example:
+            >>> from edsl.dataset import Dataset
+            >>> d = Dataset([{'a': [[1, 2, 3], [4, 5, 6]]}, {'b': ['x', 'y']}])
+            >>> d.expand('a')
+            Dataset([{'a': [1, 2, 3, 4, 5, 6]}, {'b': ['x', 'x', 'x', 'y', 'y', 'y']}])
+        """
+        from collections.abc import Iterable
+        # Find the field in the dataset
+        field_data = None
+        for entry in self.data:
+            key = list(entry.keys())[0]
+            if key == field:
+                field_data = entry[key]
+                break
+        if field_data is None:
+            raise DatasetKeyError(f"Field '{field}' not found in dataset. Available fields are: {self.keys()}")
+        # Validate that the field contains lists
+        if not all(isinstance(v, list) for v in field_data):
+            raise DatasetTypeError(f"Field '{field}' must contain lists in all entries")
+        # Create new expanded data structure
+        new_data = []
+        # Process each field
+        for entry in self.data:
+            key, values = list(entry.items())[0]
+            new_values = []
+            if key == field:
+                # This is the field to expand - flatten all sublists
+                for row_values in values:
+                    if not isinstance(row_values, Iterable) or isinstance(row_values, str):
+                        row_values = [row_values]
+                    new_values.extend(row_values)
+            else:
+                # For other fields, repeat each value the appropriate number of times
+                for i, row_value in enumerate(values):
+                    expand_length = len(field_data[i]) if i < len(field_data) else 0
+                    new_values.extend([row_value] * expand_length)
+            new_data.append({key: new_values})
+        # Add number field if requested
+        if number_field:
+            number_values = []
+            for i, lst in enumerate(field_data):
+                number_values.extend(range(1, len(lst) + 1))
+            new_data.append({f"{field}_number": number_values})
+        return Dataset(new_data)
 if __name__ == "__main__":
     import doctest

edsl/dataset/dataset_operations_mixin.py CHANGED Viewed

@@ -184,6 +184,13 @@ class DataOperationsBase:
                     )
         return _num_observations
+    def chart(self):
+        """
+        Create a chart from the results.
+        """
+        import altair as alt
+        return alt.Chart(self.to_pandas(remove_prefix=True))
     def make_tabular(
         self, remove_prefix: bool, pretty_labels: Optional[dict] = None
@@ -538,13 +545,14 @@ class DataOperationsBase:
         >>> r.select('how_feeling').to_scenario_list()
         ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
         """
-        from edsl.scenarios import ScenarioList, Scenario
+        from ..scenarios import ScenarioList, Scenario
         list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
         scenarios = []
         for d in list_of_dicts:
             scenarios.append(Scenario(d))
         return ScenarioList(scenarios)
     def to_agent_list(self, remove_prefix: bool = True):
         """Convert the results to a list of dictionaries, one per agent.
@@ -556,7 +564,7 @@ class DataOperationsBase:
         >>> r.select('how_feeling').to_agent_list()
         AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
         """
-        from edsl.agents import Agent, AgentList
+        from ..agents import Agent, AgentList
         list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
         agents = []
@@ -665,7 +673,7 @@ class DataOperationsBase:
     ):
         import os
         import tempfile
-        from edsl.utilities.utilities import is_notebook
+        from ..utilities.utilities import is_notebook
         from IPython.display import HTML, display
         df = self.to_pandas()
@@ -799,7 +807,7 @@ class DataOperationsBase:
             from docx.shared import Pt
             import json
         except ImportError:
-            from edsl.dataset.exceptions import DatasetImportError
+            from .exceptions import DatasetImportError
             raise DatasetImportError("The python-docx package is required for DOCX export. Install it with 'pip install python-docx'.")
         doc = Document()
@@ -871,7 +879,7 @@ class DataOperationsBase:
             >>> isinstance(doc, object)
             True
         """
-        from edsl.utilities.utilities import is_notebook
+        from ..utilities.utilities import is_notebook
         # Prepare the data for the report
         field_data, num_obs, fields, header_fields = self._prepare_report_data(
@@ -1076,7 +1084,7 @@ class DataOperationsBase:
         # Check if the field is ambiguous
         if len(matching_entries) > 1:
             matching_cols = [next(iter(entry.keys())) for entry in matching_entries]
-            from edsl.dataset.exceptions import DatasetValueError
+            from .exceptions import DatasetValueError
             raise DatasetValueError(
                 f"Ambiguous field name '{field}'. It matches multiple columns: {matching_cols}. "
                 f"Please specify the full column name to flatten."

edsl/dataset/dataset_tree.py CHANGED Viewed

@@ -51,7 +51,7 @@ class Tree:
         else:
             if not set(node_order).issubset(set(self.data.keys())):
                 invalid_keys = set(node_order) - set(self.data.keys())
-                from edsl.dataset.exceptions import DatasetValueError
+                from .exceptions import DatasetValueError
                 raise DatasetValueError(f"Invalid keys in node_order: {invalid_keys}")
         self.root = TreeNode()
@@ -130,7 +130,7 @@ class Tree:
         doc_buffer.seek(0)
         base64_string = base64.b64encode(doc_buffer.getvalue()).decode("utf-8")
-        from edsl.scenarios.FileStore import FileStore
+        from ..scenarios.file_store import FileStore
         # Create and return FileStore instance
         return FileStore(
@@ -335,7 +335,7 @@ class Tree:
         Returns:
             A string containing the markdown document, or renders markdown in notebooks.
         """
-        from edsl.utilities.utilities import is_notebook
+        from ..utilities.utilities import is_notebook
         from IPython.display import Markdown, display
         if node is None:

edsl/dataset/display/table_renderers.py CHANGED Viewed

@@ -103,9 +103,12 @@ class PandasStyleRenderer(DataTablesRendererABC):
             else:
                 df = pd.DataFrame(self.table_data.data, columns=self.table_data.headers)
-            styled_df = df.style.set_properties(
-                **{"text-align": "left"}
-            ).background_gradient()
+            styled_df = df.style.set_properties(**{
+                "text-align": "left",
+                "white-space": "pre-wrap",  # Allows text wrapping
+                "max-width": "300px",       # Maximum width before wrapping
+                "word-wrap": "break-word"   # Breaks words that exceed max-width
+            }).background_gradient()
             return f"""
             <div style="max-height: 500px; overflow-y: auto;">

edsl/dataset/file_exports.py CHANGED Viewed

@@ -40,7 +40,7 @@ class FileExport(ABC):
     def _create_filestore(self, data: Union[str, bytes]):
         """Create a FileStore instance with encoded data."""
-        from ..scenarios import FileStore
+        from ..scenarios.file_store import FileStore
         if isinstance(data, str):
             base64_string = base64.b64encode(data.encode()).decode()
         else:
@@ -203,7 +203,7 @@ class SQLiteExport(TabularExport):
                 (self.table_name,),
             )
             if cursor.fetchone():
-                from edsl.dataset.exceptions import DatasetValueError
+                from .exceptions import DatasetValueError
                 raise DatasetValueError(f"Table {self.table_name} already exists")
         # Create table
@@ -245,14 +245,14 @@ class SQLiteExport(TabularExport):
         """Validate initialization parameters."""
         valid_if_exists = {"fail", "replace", "append"}
         if self.if_exists not in valid_if_exists:
-            from edsl.dataset.exceptions import DatasetValueError
+            from .exceptions import DatasetValueError
             raise DatasetValueError(
                 f"if_exists must be one of {valid_if_exists}, got {self.if_exists}"
             )
         # Validate table name (basic SQLite identifier validation)
         if not self.table_name.isalnum() and not all(c in "_" for c in self.table_name):
-            from edsl.dataset.exceptions import DatasetValueError
+            from .exceptions import DatasetValueError
             raise DatasetValueError(
                 f"Invalid table name: {self.table_name}. Must contain only alphanumeric characters and underscores."
             )

edsl/dataset/r/ggplot.py CHANGED Viewed

@@ -30,12 +30,12 @@ class GGPlot:
         if result.returncode != 0:
             if result.returncode == 127:
-                from edsl.dataset.exceptions import DatasetRuntimeError
+                from ..exceptions import DatasetRuntimeError
                 raise DatasetRuntimeError(
                     "Rscript is probably not installed. Please install R from https://cran.r-project.org/"
                 )
             else:
-                from edsl.dataset.exceptions import DatasetRuntimeError
+                from ..exceptions import DatasetRuntimeError
                 raise DatasetRuntimeError(
                     f"An error occurred while running Rscript: {result.stderr}"
                 )
@@ -49,7 +49,7 @@ class GGPlot:
         """Save the plot to a file."""
         format = filename.split('.')[-1].lower()
         if format not in ['svg', 'png']:
-            from edsl.dataset.exceptions import DatasetValueError
+            from ..exceptions import DatasetValueError
             raise DatasetValueError("Only 'svg' and 'png' formats are supported")
         save_command = f'\nggsave("{filename}", plot = last_plot(), width = {self.width}, height = {self.height}, device = "{format}")'

edsl 0.1.50__py3-none-any.whl → 0.1.52__py3-none-any.whl

edsl 0.1.50py3-none-any.whl → 0.1.52py3-none-any.whl