edsl 0.1.47__py3-none-any.whl → 0.1.49__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +44 -39
- edsl/__version__.py +1 -1
- edsl/agents/__init__.py +4 -2
- edsl/agents/{Agent.py → agent.py} +442 -152
- edsl/agents/{AgentList.py → agent_list.py} +220 -162
- edsl/agents/descriptors.py +46 -7
- edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
- edsl/base/__init__.py +75 -0
- edsl/base/base_class.py +1303 -0
- edsl/base/data_transfer_models.py +114 -0
- edsl/base/enums.py +215 -0
- edsl/base.py +8 -0
- edsl/buckets/__init__.py +25 -0
- edsl/buckets/bucket_collection.py +324 -0
- edsl/buckets/model_buckets.py +206 -0
- edsl/buckets/token_bucket.py +502 -0
- edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
- edsl/buckets/token_bucket_client.py +509 -0
- edsl/caching/__init__.py +20 -0
- edsl/caching/cache.py +814 -0
- edsl/caching/cache_entry.py +427 -0
- edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
- edsl/caching/exceptions.py +24 -0
- edsl/caching/orm.py +30 -0
- edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
- edsl/caching/sql_dict.py +441 -0
- edsl/config/__init__.py +8 -0
- edsl/config/config_class.py +177 -0
- edsl/config.py +4 -176
- edsl/conversation/Conversation.py +7 -7
- edsl/conversation/car_buying.py +4 -4
- edsl/conversation/chips.py +6 -6
- edsl/coop/__init__.py +25 -2
- edsl/coop/coop.py +311 -75
- edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
- edsl/coop/exceptions.py +62 -0
- edsl/coop/price_fetcher.py +126 -0
- edsl/coop/utils.py +89 -24
- edsl/data_transfer_models.py +5 -72
- edsl/dataset/__init__.py +10 -0
- edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
- edsl/{results/DatasetExportMixin.py → dataset/dataset_operations_mixin.py} +606 -122
- edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
- edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
- edsl/{results → dataset/display}/table_renderers.py +58 -2
- edsl/{results → dataset}/file_exports.py +4 -5
- edsl/{results → dataset}/smart_objects.py +2 -2
- edsl/enums.py +5 -205
- edsl/inference_services/__init__.py +5 -0
- edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
- edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
- edsl/inference_services/data_structures.py +3 -2
- edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
- edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
- edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
- edsl/inference_services/registry.py +4 -41
- edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
- edsl/inference_services/services/__init__.py +31 -0
- edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
- edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
- edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
- edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
- edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
- edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
- edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
- edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
- edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
- edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
- edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +3 -7
- edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
- edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
- edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
- edsl/inference_services/write_available.py +1 -2
- edsl/instructions/__init__.py +6 -0
- edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
- edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
- edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
- edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
- edsl/interviews/__init__.py +4 -0
- edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
- edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
- edsl/interviews/interview.py +638 -0
- edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
- edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
- edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
- edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
- edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
- edsl/invigilators/__init__.py +38 -0
- edsl/invigilators/invigilator_base.py +477 -0
- edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
- edsl/invigilators/prompt_constructor.py +476 -0
- edsl/{agents → invigilators}/prompt_helpers.py +2 -1
- edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
- edsl/{agents → invigilators}/question_option_processor.py +96 -21
- edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
- edsl/jobs/__init__.py +7 -1
- edsl/jobs/async_interview_runner.py +99 -35
- edsl/jobs/check_survey_scenario_compatibility.py +7 -5
- edsl/jobs/data_structures.py +153 -22
- edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
- edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
- edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
- edsl/jobs/{Jobs.py → jobs.py} +313 -167
- edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
- edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +19 -17
- edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
- edsl/jobs/jobs_pricing_estimation.py +347 -0
- edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
- edsl/jobs/jobs_runner_asyncio.py +282 -0
- edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
- edsl/jobs/results_exceptions_handler.py +2 -2
- edsl/key_management/__init__.py +28 -0
- edsl/key_management/key_lookup.py +161 -0
- edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
- edsl/key_management/key_lookup_collection.py +82 -0
- edsl/key_management/models.py +218 -0
- edsl/language_models/__init__.py +7 -2
- edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
- edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
- edsl/language_models/language_model.py +1080 -0
- edsl/language_models/model.py +10 -25
- edsl/language_models/{ModelList.py → model_list.py} +9 -14
- edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
- edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
- edsl/language_models/repair.py +4 -4
- edsl/language_models/utilities.py +4 -4
- edsl/notebooks/__init__.py +3 -1
- edsl/notebooks/{Notebook.py → notebook.py} +7 -8
- edsl/prompts/__init__.py +1 -1
- edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
- edsl/prompts/{Prompt.py → prompt.py} +101 -95
- edsl/questions/HTMLQuestion.py +1 -1
- edsl/questions/__init__.py +154 -25
- edsl/questions/answer_validator_mixin.py +1 -1
- edsl/questions/compose_questions.py +4 -3
- edsl/questions/derived/question_likert_five.py +166 -0
- edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
- edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
- edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
- edsl/questions/descriptors.py +24 -30
- edsl/questions/loop_processor.py +65 -19
- edsl/questions/question_base.py +881 -0
- edsl/questions/question_base_gen_mixin.py +15 -16
- edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
- edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
- edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
- edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
- edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
- edsl/questions/question_free_text.py +282 -0
- edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
- edsl/questions/{QuestionList.py → question_list.py} +6 -7
- edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
- edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
- edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
- edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
- edsl/questions/question_registry.py +4 -9
- edsl/questions/register_questions_meta.py +8 -4
- edsl/questions/response_validator_abc.py +17 -16
- edsl/results/__init__.py +4 -1
- edsl/{exceptions/results.py → results/exceptions.py} +1 -1
- edsl/results/report.py +197 -0
- edsl/results/{Result.py → result.py} +131 -45
- edsl/results/{Results.py → results.py} +365 -220
- edsl/results/results_selector.py +344 -25
- edsl/scenarios/__init__.py +30 -3
- edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
- edsl/scenarios/directory_scanner.py +156 -13
- edsl/scenarios/document_chunker.py +186 -0
- edsl/scenarios/exceptions.py +101 -0
- edsl/scenarios/file_methods.py +2 -3
- edsl/scenarios/{FileStore.py → file_store.py} +275 -189
- edsl/scenarios/handlers/__init__.py +14 -14
- edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
- edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
- edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
- edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
- edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
- edsl/scenarios/handlers/latex_file_store.py +5 -0
- edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
- edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
- edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
- edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
- edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
- edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
- edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
- edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
- edsl/scenarios/scenario.py +928 -0
- edsl/scenarios/scenario_join.py +18 -5
- edsl/scenarios/{ScenarioList.py → scenario_list.py} +294 -106
- edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
- edsl/scenarios/scenario_selector.py +5 -1
- edsl/study/ObjectEntry.py +2 -2
- edsl/study/SnapShot.py +5 -5
- edsl/study/Study.py +18 -19
- edsl/study/__init__.py +6 -4
- edsl/surveys/__init__.py +7 -4
- edsl/surveys/dag/__init__.py +2 -0
- edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
- edsl/surveys/{DAG.py → dag/dag.py} +13 -10
- edsl/surveys/descriptors.py +1 -1
- edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
- edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
- edsl/surveys/memory/__init__.py +3 -0
- edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
- edsl/surveys/rules/__init__.py +3 -0
- edsl/surveys/{Rule.py → rules/rule.py} +103 -43
- edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
- edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
- edsl/surveys/survey.py +1743 -0
- edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
- edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
- edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
- edsl/tasks/__init__.py +32 -0
- edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
- edsl/tasks/task_creators.py +135 -0
- edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
- edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
- edsl/tasks/task_status_log.py +85 -0
- edsl/tokens/__init__.py +2 -0
- edsl/tokens/interview_token_usage.py +53 -0
- edsl/utilities/PrettyList.py +1 -1
- edsl/utilities/SystemInfo.py +25 -22
- edsl/utilities/__init__.py +29 -21
- edsl/utilities/gcp_bucket/__init__.py +2 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
- edsl/utilities/interface.py +44 -536
- edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
- edsl/utilities/repair_functions.py +1 -1
- {edsl-0.1.47.dist-info → edsl-0.1.49.dist-info}/METADATA +1 -1
- edsl-0.1.49.dist-info/RECORD +347 -0
- edsl/Base.py +0 -493
- edsl/BaseDiff.py +0 -260
- edsl/agents/InvigilatorBase.py +0 -260
- edsl/agents/PromptConstructor.py +0 -318
- edsl/coop/PriceFetcher.py +0 -54
- edsl/data/Cache.py +0 -582
- edsl/data/CacheEntry.py +0 -238
- edsl/data/SQLiteDict.py +0 -292
- edsl/data/__init__.py +0 -5
- edsl/data/orm.py +0 -10
- edsl/exceptions/cache.py +0 -5
- edsl/exceptions/coop.py +0 -14
- edsl/exceptions/data.py +0 -14
- edsl/exceptions/scenarios.py +0 -29
- edsl/jobs/Answers.py +0 -43
- edsl/jobs/JobsPrompts.py +0 -354
- edsl/jobs/buckets/BucketCollection.py +0 -134
- edsl/jobs/buckets/ModelBuckets.py +0 -65
- edsl/jobs/buckets/TokenBucket.py +0 -283
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/interviews/Interview.py +0 -395
- edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
- edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
- edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
- edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
- edsl/jobs/tasks/TaskCreators.py +0 -64
- edsl/jobs/tasks/TaskStatusLog.py +0 -23
- edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
- edsl/language_models/LanguageModel.py +0 -635
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/models.py +0 -137
- edsl/questions/QuestionBase.py +0 -544
- edsl/questions/QuestionFreeText.py +0 -130
- edsl/questions/derived/QuestionLikertFive.py +0 -76
- edsl/results/ResultsExportMixin.py +0 -45
- edsl/results/TextEditor.py +0 -50
- edsl/results/results_fetch_mixin.py +0 -33
- edsl/results/results_tools_mixin.py +0 -98
- edsl/scenarios/DocumentChunker.py +0 -104
- edsl/scenarios/Scenario.py +0 -548
- edsl/scenarios/ScenarioHtmlMixin.py +0 -65
- edsl/scenarios/ScenarioListExportMixin.py +0 -45
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/shared.py +0 -1
- edsl/surveys/Survey.py +0 -1301
- edsl/surveys/SurveyQualtricsImport.py +0 -284
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/tools/__init__.py +0 -1
- edsl/tools/clusters.py +0 -192
- edsl/tools/embeddings.py +0 -27
- edsl/tools/embeddings_plotting.py +0 -118
- edsl/tools/plotting.py +0 -112
- edsl/tools/summarize.py +0 -18
- edsl/utilities/data/Registry.py +0 -6
- edsl/utilities/data/__init__.py +0 -1
- edsl/utilities/data/scooter_results.json +0 -1
- edsl-0.1.47.dist-info/RECORD +0 -354
- /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
- /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
- /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
- /edsl/{results → dataset/display}/table_data_class.py +0 -0
- /edsl/{results → dataset/display}/table_display.css +0 -0
- /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
- /edsl/{results → dataset}/tree_explore.py +0 -0
- /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
- /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
- /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
- /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
- /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
- /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
- /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
- /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
- /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
- /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
- /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
- /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
- /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
- /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
- /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
- {edsl-0.1.47.dist-info → edsl-0.1.49.dist-info}/LICENSE +0 -0
- {edsl-0.1.47.dist-info → edsl-0.1.49.dist-info}/WHEEL +0 -0
@@ -1,46 +1,84 @@
|
|
1
|
-
|
1
|
+
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
import sys
|
5
5
|
import json
|
6
6
|
import random
|
7
7
|
from collections import UserList
|
8
|
-
from typing import Any, Union, Optional
|
9
|
-
|
10
|
-
from
|
11
|
-
|
12
|
-
from
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
from
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
8
|
+
from typing import Any, Union, Optional, TYPE_CHECKING
|
9
|
+
|
10
|
+
from ..base import PersistenceMixin, HashingMixin
|
11
|
+
|
12
|
+
from .dataset_tree import Tree
|
13
|
+
|
14
|
+
from .display.table_display import TableDisplay
|
15
|
+
from .smart_objects import FirstObject
|
16
|
+
from .r.ggplot import GGPlotMethod
|
17
|
+
from .dataset_operations_mixin import DatasetOperationsMixin
|
18
|
+
|
19
|
+
if TYPE_CHECKING:
|
20
|
+
from ..surveys import Survey
|
21
|
+
from ..questions.QuestionBase import QuestionBase
|
22
|
+
|
23
|
+
class Dataset(UserList, DatasetOperationsMixin, PersistenceMixin, HashingMixin):
|
24
|
+
"""
|
25
|
+
A versatile data container for tabular data with powerful manipulation capabilities.
|
26
|
+
|
27
|
+
The Dataset class is a fundamental data structure in EDSL that represents tabular data
|
28
|
+
in a column-oriented format. It provides a rich set of methods for data manipulation,
|
29
|
+
transformation, analysis, visualization, and export through the DatasetOperationsMixin.
|
30
|
+
|
31
|
+
Key features:
|
32
|
+
|
33
|
+
1. Column-oriented data structure optimized for LLM experiment results
|
34
|
+
2. Rich data manipulation API similar to dplyr/pandas (filter, select, mutate, etc.)
|
35
|
+
3. Visualization capabilities including tables, plots, and reports
|
36
|
+
4. Export to various formats (CSV, Excel, SQLite, pandas, etc.)
|
37
|
+
5. Serialization for storage and transport
|
38
|
+
6. Tree-based data exploration
|
39
|
+
|
40
|
+
A Dataset typically contains multiple columns, each represented as a dictionary
|
41
|
+
with a single key-value pair. The key is the column name and the value is a list
|
42
|
+
of values for that column. All columns must have the same length.
|
43
|
+
|
44
|
+
The Dataset class inherits from:
|
45
|
+
- UserList: Provides list-like behavior for storing column data
|
46
|
+
- DatasetOperationsMixin: Provides data manipulation methods
|
47
|
+
- PersistenceMixin: Provides serialization capabilities
|
48
|
+
- HashingMixin: Provides hashing functionality for comparison and storage
|
49
|
+
|
50
|
+
Datasets are typically created by transforming other EDSL container types like
|
51
|
+
Results, AgentList, or ScenarioList, but can also be created directly from data.
|
52
|
+
"""
|
22
53
|
|
23
54
|
def __init__(
|
24
55
|
self, data: list[dict[str, Any]] = None, print_parameters: Optional[dict] = None
|
25
56
|
):
|
26
|
-
"""
|
57
|
+
"""
|
58
|
+
Initialize a new Dataset instance.
|
59
|
+
|
60
|
+
Parameters:
|
61
|
+
data: A list of dictionaries, where each dictionary represents a column
|
62
|
+
in the dataset. Each dictionary should have a single key-value pair,
|
63
|
+
where the key is the column name and the value is a list of values.
|
64
|
+
All value lists must have the same length.
|
65
|
+
print_parameters: Optional dictionary of parameters controlling how the
|
66
|
+
dataset is displayed when printed.
|
67
|
+
|
68
|
+
Examples:
|
69
|
+
>>> # Create a dataset with two columns
|
70
|
+
>>> d = Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
|
71
|
+
>>> len(d)
|
72
|
+
3
|
73
|
+
|
74
|
+
>>> # Dataset with a single column
|
75
|
+
>>> Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}])
|
76
|
+
Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}])
|
77
|
+
"""
|
27
78
|
super().__init__(data)
|
28
79
|
self.print_parameters = print_parameters
|
29
80
|
|
30
81
|
|
31
|
-
def ggplot2(
|
32
|
-
self,
|
33
|
-
ggplot_code: str,
|
34
|
-
shape="wide",
|
35
|
-
sql: str = None,
|
36
|
-
remove_prefix: bool = True,
|
37
|
-
debug: bool = False,
|
38
|
-
height=4,
|
39
|
-
width=6,
|
40
|
-
factor_orders: Optional[dict] = None,
|
41
|
-
):
|
42
|
-
return GGPlotMethod(self).ggplot2(ggplot_code, shape, sql, remove_prefix, debug, height, width, factor_orders)
|
43
|
-
|
44
82
|
def __len__(self) -> int:
|
45
83
|
"""Return the number of observations in the dataset.
|
46
84
|
|
@@ -95,16 +133,29 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
|
|
95
133
|
return w
|
96
134
|
|
97
135
|
def keys(self) -> list[str]:
|
98
|
-
"""Return the keys of the
|
136
|
+
"""Return the keys of the dataset.
|
99
137
|
|
100
138
|
>>> d = Dataset([{'a.b':[1,2,3,4]}])
|
101
139
|
>>> d.keys()
|
140
|
+
['a.b']
|
141
|
+
|
142
|
+
>>> d = Dataset([{'a.b':[1,2,3,4]}, {'c.d':[5,6,7,8]}])
|
143
|
+
>>> d.keys()
|
144
|
+
['a.b', 'c.d']
|
145
|
+
|
146
|
+
|
102
147
|
['a.b']
|
103
148
|
"""
|
104
149
|
return [list(o.keys())[0] for o in self]
|
105
150
|
|
106
151
|
def filter(self, expression):
|
107
152
|
return self.to_scenario_list().filter(expression).to_dataset()
|
153
|
+
|
154
|
+
def mutate(self, new_var_string: str, functions_dict: Optional[dict[str, Callable]] = None) -> "Dataset":
|
155
|
+
return self.to_scenario_list().mutate(new_var_string, functions_dict).to_dataset()
|
156
|
+
|
157
|
+
def collapse(self, field:str, separator: Optional[str] = None) -> "Dataset":
|
158
|
+
return self.to_scenario_list().collapse(field, separator).to_dataset()
|
108
159
|
|
109
160
|
def long(self, exclude_fields: list[str] = None) -> Dataset:
|
110
161
|
headers, data = self._tabular()
|
@@ -274,14 +325,33 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
|
|
274
325
|
return Dataset(new_data)
|
275
326
|
|
276
327
|
def print(self, pretty_labels=None, **kwargs):
|
328
|
+
"""
|
329
|
+
Print the dataset in a formatted way.
|
330
|
+
|
331
|
+
Args:
|
332
|
+
pretty_labels: A dictionary mapping column names to their display names
|
333
|
+
**kwargs: Additional arguments
|
334
|
+
format: The output format ("html", "markdown", "rich", "latex")
|
335
|
+
|
336
|
+
Returns:
|
337
|
+
TableDisplay object
|
338
|
+
"""
|
277
339
|
if "format" in kwargs:
|
278
340
|
if kwargs["format"] not in ["html", "markdown", "rich", "latex"]:
|
279
341
|
raise ValueError(f"Format '{kwargs['format']}' not supported.")
|
342
|
+
|
343
|
+
# If rich format is requested, set tablefmt accordingly
|
344
|
+
if kwargs["format"] == "rich":
|
345
|
+
kwargs["tablefmt"] = "rich"
|
346
|
+
|
280
347
|
if pretty_labels is None:
|
281
348
|
pretty_labels = {}
|
282
349
|
else:
|
283
350
|
return self.rename(pretty_labels).print(**kwargs)
|
284
|
-
|
351
|
+
|
352
|
+
# Pass through any tablefmt parameter
|
353
|
+
tablefmt = kwargs.get("tablefmt", None)
|
354
|
+
return self.table(tablefmt=tablefmt)
|
285
355
|
|
286
356
|
def rename(self, rename_dic) -> Dataset:
|
287
357
|
new_data = []
|
@@ -302,7 +372,8 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
|
|
302
372
|
return Dataset.from_pandas_dataframe(merged_df)
|
303
373
|
|
304
374
|
def to(self, survey_or_question: Union["Survey", "QuestionBase"]) -> "Jobs":
|
305
|
-
|
375
|
+
"""Return a new dataset with the observations transformed by the given survey or question."""
|
376
|
+
from edsl.surveys import Survey
|
306
377
|
from edsl.questions.QuestionBase import QuestionBase
|
307
378
|
|
308
379
|
if isinstance(survey_or_question, Survey):
|
@@ -321,7 +392,14 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
|
|
321
392
|
|
322
393
|
>>> d.select('a.b', 'c.d')
|
323
394
|
Dataset([{'a.b': [1, 2, 3, 4]}, {'c.d': [5, 6, 7, 8]}])
|
395
|
+
|
324
396
|
"""
|
397
|
+
for key in keys:
|
398
|
+
if key not in self.keys():
|
399
|
+
raise ValueError(f"Key '{key}' not found in the dataset."
|
400
|
+
f"Available keys: {self.keys()}"
|
401
|
+
)
|
402
|
+
|
325
403
|
if isinstance(keys, str):
|
326
404
|
keys = [keys]
|
327
405
|
|
@@ -491,8 +569,10 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
|
|
491
569
|
|
492
570
|
>>> d = Dataset([{'a':[1,2,3,4]}, {'b':[4,3,2,1]}])
|
493
571
|
>>> d.tree()
|
494
|
-
Tree(Dataset({'a': [1, 2, 3, 4], 'b': [4, 3, 2, 1]}))
|
572
|
+
Tree(Dataset({'a': [1, 2, 3, 4], 'b': [4, 3, 2, 1]}), node_order=['a', 'b'])
|
495
573
|
"""
|
574
|
+
if node_order is None:
|
575
|
+
node_order = self.keys()
|
496
576
|
return Tree(self, node_order=node_order)
|
497
577
|
|
498
578
|
def table(
|
@@ -515,7 +595,8 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
|
|
515
595
|
|
516
596
|
headers, data = self._tabular()
|
517
597
|
|
518
|
-
if tablefmt is not None:
|
598
|
+
if tablefmt is not None and tablefmt != "rich":
|
599
|
+
# Rich format is handled separately, so we don't validate it against tabulate_formats
|
519
600
|
from tabulate import tabulate_formats
|
520
601
|
|
521
602
|
if tablefmt not in tabulate_formats:
|
@@ -523,7 +604,7 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
|
|
523
604
|
f"Error: The following table format is not supported: {tablefmt}",
|
524
605
|
file=sys.stderr,
|
525
606
|
)
|
526
|
-
print(f"\nAvailable formats are: {tabulate_formats}", file=sys.stderr)
|
607
|
+
print(f"\nAvailable formats are: {tabulate_formats} and 'rich'", file=sys.stderr)
|
527
608
|
return None
|
528
609
|
|
529
610
|
if max_rows:
|
@@ -648,5 +729,4 @@ class Dataset(UserList, ResultsExportMixin, PersistenceMixin, HashingMixin):
|
|
648
729
|
|
649
730
|
if __name__ == "__main__":
|
650
731
|
import doctest
|
651
|
-
|
652
732
|
doctest.testmod(optionflags=doctest.ELLIPSIS)
|