PyPI - edsl - Versions diffs - 0.1.47__py3-none-any.whl → 0.1.49__py3-none-any.whl - Mend

edsl 0.1.47py3-none-any.whl → 0.1.49py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (314) hide show

edsl/__init__.py +44 -39
edsl/__version__.py +1 -1
edsl/agents/__init__.py +4 -2
edsl/agents/{Agent.py → agent.py} +442 -152
edsl/agents/{AgentList.py → agent_list.py} +220 -162
edsl/agents/descriptors.py +46 -7
edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
edsl/base/__init__.py +75 -0
edsl/base/base_class.py +1303 -0
edsl/base/data_transfer_models.py +114 -0
edsl/base/enums.py +215 -0
edsl/base.py +8 -0
edsl/buckets/__init__.py +25 -0
edsl/buckets/bucket_collection.py +324 -0
edsl/buckets/model_buckets.py +206 -0
edsl/buckets/token_bucket.py +502 -0
edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
edsl/buckets/token_bucket_client.py +509 -0
edsl/caching/__init__.py +20 -0
edsl/caching/cache.py +814 -0
edsl/caching/cache_entry.py +427 -0
edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
edsl/caching/exceptions.py +24 -0
edsl/caching/orm.py +30 -0
edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
edsl/caching/sql_dict.py +441 -0
edsl/config/__init__.py +8 -0
edsl/config/config_class.py +177 -0
edsl/config.py +4 -176
edsl/conversation/Conversation.py +7 -7
edsl/conversation/car_buying.py +4 -4
edsl/conversation/chips.py +6 -6
edsl/coop/__init__.py +25 -2
edsl/coop/coop.py +311 -75
edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
edsl/coop/exceptions.py +62 -0
edsl/coop/price_fetcher.py +126 -0
edsl/coop/utils.py +89 -24
edsl/data_transfer_models.py +5 -72
edsl/dataset/__init__.py +10 -0
edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
edsl/{results/DatasetExportMixin.py → dataset/dataset_operations_mixin.py} +606 -122
edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
edsl/{results → dataset/display}/table_renderers.py +58 -2
edsl/{results → dataset}/file_exports.py +4 -5
edsl/{results → dataset}/smart_objects.py +2 -2
edsl/enums.py +5 -205
edsl/inference_services/__init__.py +5 -0
edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
edsl/inference_services/data_structures.py +3 -2
edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
edsl/inference_services/registry.py +4 -41
edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
edsl/inference_services/services/__init__.py +31 -0
edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +3 -7
edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
edsl/inference_services/write_available.py +1 -2
edsl/instructions/__init__.py +6 -0
edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
edsl/interviews/__init__.py +4 -0
edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
edsl/interviews/interview.py +638 -0
edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
edsl/invigilators/__init__.py +38 -0
edsl/invigilators/invigilator_base.py +477 -0
edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
edsl/invigilators/prompt_constructor.py +476 -0
edsl/{agents → invigilators}/prompt_helpers.py +2 -1
edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
edsl/{agents → invigilators}/question_option_processor.py +96 -21
edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
edsl/jobs/__init__.py +7 -1
edsl/jobs/async_interview_runner.py +99 -35
edsl/jobs/check_survey_scenario_compatibility.py +7 -5
edsl/jobs/data_structures.py +153 -22
edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
edsl/jobs/{Jobs.py → jobs.py} +313 -167
edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +19 -17
edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
edsl/jobs/jobs_pricing_estimation.py +347 -0
edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
edsl/jobs/jobs_runner_asyncio.py +282 -0
edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
edsl/jobs/results_exceptions_handler.py +2 -2
edsl/key_management/__init__.py +28 -0
edsl/key_management/key_lookup.py +161 -0
edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
edsl/key_management/key_lookup_collection.py +82 -0
edsl/key_management/models.py +218 -0
edsl/language_models/__init__.py +7 -2
edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
edsl/language_models/language_model.py +1080 -0
edsl/language_models/model.py +10 -25
edsl/language_models/{ModelList.py → model_list.py} +9 -14
edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
edsl/language_models/repair.py +4 -4
edsl/language_models/utilities.py +4 -4
edsl/notebooks/__init__.py +3 -1
edsl/notebooks/{Notebook.py → notebook.py} +7 -8
edsl/prompts/__init__.py +1 -1
edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
edsl/prompts/{Prompt.py → prompt.py} +101 -95
edsl/questions/HTMLQuestion.py +1 -1
edsl/questions/__init__.py +154 -25
edsl/questions/answer_validator_mixin.py +1 -1
edsl/questions/compose_questions.py +4 -3
edsl/questions/derived/question_likert_five.py +166 -0
edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
edsl/questions/descriptors.py +24 -30
edsl/questions/loop_processor.py +65 -19
edsl/questions/question_base.py +881 -0
edsl/questions/question_base_gen_mixin.py +15 -16
edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
edsl/questions/question_free_text.py +282 -0
edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
edsl/questions/{QuestionList.py → question_list.py} +6 -7
edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
edsl/questions/question_registry.py +4 -9
edsl/questions/register_questions_meta.py +8 -4
edsl/questions/response_validator_abc.py +17 -16
edsl/results/__init__.py +4 -1
edsl/{exceptions/results.py → results/exceptions.py} +1 -1
edsl/results/report.py +197 -0
edsl/results/{Result.py → result.py} +131 -45
edsl/results/{Results.py → results.py} +365 -220
edsl/results/results_selector.py +344 -25
edsl/scenarios/__init__.py +30 -3
edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
edsl/scenarios/directory_scanner.py +156 -13
edsl/scenarios/document_chunker.py +186 -0
edsl/scenarios/exceptions.py +101 -0
edsl/scenarios/file_methods.py +2 -3
edsl/scenarios/{FileStore.py → file_store.py} +275 -189
edsl/scenarios/handlers/__init__.py +14 -14
edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
edsl/scenarios/handlers/latex_file_store.py +5 -0
edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
edsl/scenarios/scenario.py +928 -0
edsl/scenarios/scenario_join.py +18 -5
edsl/scenarios/{ScenarioList.py → scenario_list.py} +294 -106
edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
edsl/scenarios/scenario_selector.py +5 -1
edsl/study/ObjectEntry.py +2 -2
edsl/study/SnapShot.py +5 -5
edsl/study/Study.py +18 -19
edsl/study/__init__.py +6 -4
edsl/surveys/__init__.py +7 -4
edsl/surveys/dag/__init__.py +2 -0
edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
edsl/surveys/{DAG.py → dag/dag.py} +13 -10
edsl/surveys/descriptors.py +1 -1
edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
edsl/surveys/memory/__init__.py +3 -0
edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
edsl/surveys/rules/__init__.py +3 -0
edsl/surveys/{Rule.py → rules/rule.py} +103 -43
edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
edsl/surveys/survey.py +1743 -0
edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
edsl/tasks/__init__.py +32 -0
edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
edsl/tasks/task_creators.py +135 -0
edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
edsl/tasks/task_status_log.py +85 -0
edsl/tokens/__init__.py +2 -0
edsl/tokens/interview_token_usage.py +53 -0
edsl/utilities/PrettyList.py +1 -1
edsl/utilities/SystemInfo.py +25 -22
edsl/utilities/__init__.py +29 -21
edsl/utilities/gcp_bucket/__init__.py +2 -0
edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
edsl/utilities/interface.py +44 -536
edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
edsl/utilities/repair_functions.py +1 -1
{edsl-0.1.47.dist-info → edsl-0.1.49.dist-info}/METADATA +1 -1
edsl-0.1.49.dist-info/RECORD +347 -0
edsl/Base.py +0 -493
edsl/BaseDiff.py +0 -260
edsl/agents/InvigilatorBase.py +0 -260
edsl/agents/PromptConstructor.py +0 -318
edsl/coop/PriceFetcher.py +0 -54
edsl/data/Cache.py +0 -582
edsl/data/CacheEntry.py +0 -238
edsl/data/SQLiteDict.py +0 -292
edsl/data/__init__.py +0 -5
edsl/data/orm.py +0 -10
edsl/exceptions/cache.py +0 -5
edsl/exceptions/coop.py +0 -14
edsl/exceptions/data.py +0 -14
edsl/exceptions/scenarios.py +0 -29
edsl/jobs/Answers.py +0 -43
edsl/jobs/JobsPrompts.py +0 -354
edsl/jobs/buckets/BucketCollection.py +0 -134
edsl/jobs/buckets/ModelBuckets.py +0 -65
edsl/jobs/buckets/TokenBucket.py +0 -283
edsl/jobs/buckets/TokenBucketClient.py +0 -191
edsl/jobs/interviews/Interview.py +0 -395
edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
edsl/jobs/tasks/TaskCreators.py +0 -64
edsl/jobs/tasks/TaskStatusLog.py +0 -23
edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
edsl/language_models/LanguageModel.py +0 -635
edsl/language_models/ServiceDataSources.py +0 -0
edsl/language_models/key_management/KeyLookup.py +0 -63
edsl/language_models/key_management/KeyLookupCollection.py +0 -38
edsl/language_models/key_management/models.py +0 -137
edsl/questions/QuestionBase.py +0 -544
edsl/questions/QuestionFreeText.py +0 -130
edsl/questions/derived/QuestionLikertFive.py +0 -76
edsl/results/ResultsExportMixin.py +0 -45
edsl/results/TextEditor.py +0 -50
edsl/results/results_fetch_mixin.py +0 -33
edsl/results/results_tools_mixin.py +0 -98
edsl/scenarios/DocumentChunker.py +0 -104
edsl/scenarios/Scenario.py +0 -548
edsl/scenarios/ScenarioHtmlMixin.py +0 -65
edsl/scenarios/ScenarioListExportMixin.py +0 -45
edsl/scenarios/handlers/latex.py +0 -5
edsl/shared.py +0 -1
edsl/surveys/Survey.py +0 -1301
edsl/surveys/SurveyQualtricsImport.py +0 -284
edsl/surveys/SurveyToApp.py +0 -141
edsl/surveys/instructions/__init__.py +0 -0
edsl/tools/__init__.py +0 -1
edsl/tools/clusters.py +0 -192
edsl/tools/embeddings.py +0 -27
edsl/tools/embeddings_plotting.py +0 -118
edsl/tools/plotting.py +0 -112
edsl/tools/summarize.py +0 -18
edsl/utilities/data/Registry.py +0 -6
edsl/utilities/data/__init__.py +0 -1
edsl/utilities/data/scooter_results.json +0 -1
edsl-0.1.47.dist-info/RECORD +0 -354
/edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
/edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
/edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
/edsl/{results → dataset/display}/table_data_class.py +0 -0
/edsl/{results → dataset/display}/table_display.css +0 -0
/edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
/edsl/{results → dataset}/tree_explore.py +0 -0
/edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
/edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
/edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
/edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
/edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
/edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
/edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
/edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
/edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
/edsl/surveys/{Memory.py → memory/memory.py} +0 -0
/edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
/edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
/edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
/edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
/edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
{edsl-0.1.47.dist-info → edsl-0.1.49.dist-info}/LICENSE +0 -0
{edsl-0.1.47.dist-info → edsl-0.1.49.dist-info}/WHEEL +0 -0

edsl/{results/Dataset.py → dataset/dataset.py} RENAMED Viewed

@@ -1,46 +1,84 @@
-"""A module to represent a dataset of observations."""
 from __future__ import annotations
 import sys
 import json
 import random
 from collections import UserList
-from typing import Any, Union, Optional
-from edsl.results.ResultsExportMixin import ResultsExportMixin
-from edsl.results.DatasetTree import Tree
-from edsl.results.TableDisplay import TableDisplay
-from edsl.Base import PersistenceMixin, HashingMixin
-from edsl.results.smart_objects import FirstObject
-from edsl.results.ResultsGGMixin import GGPlotMethod
-class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
-    """A class to represent a dataset of observations."""
+from typing import Any, Union, Optional, TYPE_CHECKING
+from ..base import PersistenceMixin, HashingMixin
+from .dataset_tree import Tree
+from .display.table_display import TableDisplay
+from .smart_objects import FirstObject
+from .r.ggplot import GGPlotMethod
+from .dataset_operations_mixin import DatasetOperationsMixin
+if TYPE_CHECKING:
+    from ..surveys import Survey
+    from ..questions.QuestionBase import QuestionBase
+class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
+    """
+    A versatile data container for tabular data with powerful manipulation capabilities.
+    The Dataset class is a fundamental data structure in EDSL that represents tabular data
+    in a column-oriented format. It provides a rich set of methods for data manipulation,
+    transformation, analysis, visualization, and export through the DatasetOperationsMixin.
+    Key features:
+    1. Column-oriented data structure optimized for LLM experiment results
+    2. Rich data manipulation API similar to dplyr/pandas (filter, select, mutate, etc.)
+    3. Visualization capabilities including tables, plots, and reports
+    4. Export to various formats (CSV, Excel, SQLite, pandas, etc.)
+    5. Serialization for storage and transport
+    6. Tree-based data exploration
+    A Dataset typically contains multiple columns, each represented as a dictionary
+    with a single key-value pair. The key is the column name and the value is a list
+    of values for that column. All columns must have the same length.
+    The Dataset class inherits from:
+    - UserList: Provides list-like behavior for storing column data
+    - DatasetOperationsMixin: Provides data manipulation methods
+    - PersistenceMixin: Provides serialization capabilities
+    - HashingMixin: Provides hashing functionality for comparison and storage
+    Datasets are typically created by transforming other EDSL container types like
+    Results, AgentList, or ScenarioList, but can also be created directly from data.
+    """
     def __init__(
         self, data: list[dict[str, Any]] = None, print_parameters: Optional[dict] = None
     ):
-        """Initialize the dataset with the given data."""
+        """
+        Initialize a new Dataset instance.
+        Parameters:
+            data: A list of dictionaries, where each dictionary represents a column
+                 in the dataset. Each dictionary should have a single key-value pair,
+                 where the key is the column name and the value is a list of values.
+                 All value lists must have the same length.
+            print_parameters: Optional dictionary of parameters controlling how the
+                             dataset is displayed when printed.
+        Examples:
+            >>> # Create a dataset with two columns
+            >>> d = Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
+            >>> len(d)
+            3
+            >>> # Dataset with a single column
+            >>> Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}])
+            Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}])
+        """
         super().__init__(data)
         self.print_parameters = print_parameters
-    def ggplot2(
-        self,
-        ggplot_code: str,
-        shape="wide",
-        sql: str = None,
-        remove_prefix: bool = True,
-        debug: bool = False,
-        height=4,
-        width=6,
-        factor_orders: Optional[dict] = None,
-    ):
-        return GGPlotMethod(self).ggplot2(ggplot_code, shape, sql, remove_prefix, debug, height, width, factor_orders)
     def __len__(self) -> int:
         """Return the number of observations in the dataset.
@@ -95,16 +133,29 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
         return w
     def keys(self) -> list[str]:
-        """Return the keys of the first observation in the dataset.
+        """Return the keys of the dataset.
         >>> d = Dataset([{'a.b':[1,2,3,4]}])
         >>> d.keys()
+        ['a.b']
+        >>> d = Dataset([{'a.b':[1,2,3,4]}, {'c.d':[5,6,7,8]}])
+        >>> d.keys()
+        ['a.b', 'c.d']
         ['a.b']
         """
         return [list(o.keys())[0] for o in self]
     def filter(self, expression):
         return self.to_scenario_list().filter(expression).to_dataset()
+    def mutate(self, new_var_string: str, functions_dict: Optional[dict[str, Callable]] = None) -> "Dataset":
+        return self.to_scenario_list().mutate(new_var_string, functions_dict).to_dataset()
+    def collapse(self, field:str, separator: Optional[str] = None) -> "Dataset":
+        return self.to_scenario_list().collapse(field, separator).to_dataset()
     def long(self, exclude_fields: list[str] = None) -> Dataset:
         headers, data = self._tabular()
@@ -274,14 +325,33 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
         return Dataset(new_data)
     def print(self, pretty_labels=None, **kwargs):
+        """
+        Print the dataset in a formatted way.
+        Args:
+            pretty_labels: A dictionary mapping column names to their display names
+            **kwargs: Additional arguments
+                format: The output format ("html", "markdown", "rich", "latex")
+        Returns:
+            TableDisplay object
+        """
         if "format" in kwargs:
             if kwargs["format"] not in ["html", "markdown", "rich", "latex"]:
                 raise ValueError(f"Format '{kwargs['format']}' not supported.")
+            # If rich format is requested, set tablefmt accordingly
+            if kwargs["format"] == "rich":
+                kwargs["tablefmt"] = "rich"
         if pretty_labels is None:
             pretty_labels = {}
         else:
             return self.rename(pretty_labels).print(**kwargs)
-        return self.table()
+        # Pass through any tablefmt parameter
+        tablefmt = kwargs.get("tablefmt", None)
+        return self.table(tablefmt=tablefmt)
     def rename(self, rename_dic) -> Dataset:
         new_data = []
@@ -302,7 +372,8 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
         return Dataset.from_pandas_dataframe(merged_df)
     def to(self, survey_or_question: Union["Survey", "QuestionBase"]) -> "Jobs":
-        from edsl.surveys.Survey import Survey
+        """Return a new dataset with the observations transformed by the given survey or question."""
+        from edsl.surveys import Survey
         from edsl.questions.QuestionBase import QuestionBase
         if isinstance(survey_or_question, Survey):
@@ -321,7 +392,14 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
         >>> d.select('a.b', 'c.d')
         Dataset([{'a.b': [1, 2, 3, 4]}, {'c.d': [5, 6, 7, 8]}])
         """
+        for key in keys:
+            if key not in self.keys():
+                raise ValueError(f"Key '{key}' not found in the dataset."
+                                 f"Available keys: {self.keys()}"
+                                 )
         if isinstance(keys, str):
             keys = [keys]
@@ -491,8 +569,10 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
         >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[4,3,2,1]}])
         >>> d.tree()
-        Tree(Dataset({'a': [1, 2, 3, 4], 'b': [4, 3, 2, 1]}))
+        Tree(Dataset({'a': [1, 2, 3, 4], 'b': [4, 3, 2, 1]}), node_order=['a', 'b'])
         """
+        if node_order is None:
+            node_order = self.keys()
         return Tree(self, node_order=node_order)
     def table(
@@ -515,7 +595,8 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
         headers, data = self._tabular()
-        if tablefmt is not None:
+        if tablefmt is not None and tablefmt != "rich":
+            # Rich format is handled separately, so we don't validate it against tabulate_formats
             from tabulate import tabulate_formats
             if tablefmt not in tabulate_formats:
@@ -523,7 +604,7 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
                     f"Error: The following table format is not supported: {tablefmt}",
                     file=sys.stderr,
                 )
-                print(f"\nAvailable formats are: {tabulate_formats}", file=sys.stderr)
+                print(f"\nAvailable formats are: {tabulate_formats} and 'rich'", file=sys.stderr)
                 return None
         if max_rows:
@@ -648,5 +729,4 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
 if __name__ == "__main__":
     import doctest
     doctest.testmod(optionflags=doctest.ELLIPSIS)

edsl 0.1.47__py3-none-any.whl → 0.1.49__py3-none-any.whl

edsl 0.1.47py3-none-any.whl → 0.1.49py3-none-any.whl