edsl 0.1.47__py3-none-any.whl → 0.1.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +44 -39
- edsl/__version__.py +1 -1
- edsl/agents/__init__.py +4 -2
- edsl/agents/{Agent.py → agent.py} +442 -152
- edsl/agents/{AgentList.py → agent_list.py} +220 -162
- edsl/agents/descriptors.py +46 -7
- edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
- edsl/base/__init__.py +75 -0
- edsl/base/base_class.py +1303 -0
- edsl/base/data_transfer_models.py +114 -0
- edsl/base/enums.py +215 -0
- edsl/base.py +8 -0
- edsl/buckets/__init__.py +25 -0
- edsl/buckets/bucket_collection.py +324 -0
- edsl/buckets/model_buckets.py +206 -0
- edsl/buckets/token_bucket.py +502 -0
- edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
- edsl/buckets/token_bucket_client.py +509 -0
- edsl/caching/__init__.py +20 -0
- edsl/caching/cache.py +814 -0
- edsl/caching/cache_entry.py +427 -0
- edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
- edsl/caching/exceptions.py +24 -0
- edsl/caching/orm.py +30 -0
- edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
- edsl/caching/sql_dict.py +441 -0
- edsl/config/__init__.py +8 -0
- edsl/config/config_class.py +177 -0
- edsl/config.py +4 -176
- edsl/conversation/Conversation.py +7 -7
- edsl/conversation/car_buying.py +4 -4
- edsl/conversation/chips.py +6 -6
- edsl/coop/__init__.py +25 -2
- edsl/coop/coop.py +303 -67
- edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
- edsl/coop/exceptions.py +62 -0
- edsl/coop/price_fetcher.py +126 -0
- edsl/coop/utils.py +89 -24
- edsl/data_transfer_models.py +5 -72
- edsl/dataset/__init__.py +10 -0
- edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
- edsl/{results/DatasetExportMixin.py → dataset/dataset_operations_mixin.py} +606 -122
- edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
- edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
- edsl/{results → dataset/display}/table_renderers.py +58 -2
- edsl/{results → dataset}/file_exports.py +4 -5
- edsl/{results → dataset}/smart_objects.py +2 -2
- edsl/enums.py +5 -205
- edsl/inference_services/__init__.py +5 -0
- edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
- edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
- edsl/inference_services/data_structures.py +3 -2
- edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
- edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
- edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
- edsl/inference_services/registry.py +4 -41
- edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
- edsl/inference_services/services/__init__.py +31 -0
- edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
- edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
- edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
- edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
- edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
- edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
- edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
- edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
- edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
- edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
- edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +3 -7
- edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
- edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
- edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
- edsl/inference_services/write_available.py +1 -2
- edsl/instructions/__init__.py +6 -0
- edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
- edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
- edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
- edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
- edsl/interviews/__init__.py +4 -0
- edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
- edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
- edsl/interviews/interview.py +638 -0
- edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
- edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
- edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
- edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
- edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
- edsl/invigilators/__init__.py +38 -0
- edsl/invigilators/invigilator_base.py +477 -0
- edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
- edsl/invigilators/prompt_constructor.py +476 -0
- edsl/{agents → invigilators}/prompt_helpers.py +2 -1
- edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
- edsl/{agents → invigilators}/question_option_processor.py +96 -21
- edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
- edsl/jobs/__init__.py +7 -1
- edsl/jobs/async_interview_runner.py +99 -35
- edsl/jobs/check_survey_scenario_compatibility.py +7 -5
- edsl/jobs/data_structures.py +153 -22
- edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
- edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
- edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
- edsl/jobs/{Jobs.py → jobs.py} +313 -167
- edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
- edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +19 -17
- edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
- edsl/jobs/jobs_pricing_estimation.py +347 -0
- edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
- edsl/jobs/jobs_runner_asyncio.py +282 -0
- edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
- edsl/jobs/results_exceptions_handler.py +2 -2
- edsl/key_management/__init__.py +28 -0
- edsl/key_management/key_lookup.py +161 -0
- edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
- edsl/key_management/key_lookup_collection.py +82 -0
- edsl/key_management/models.py +218 -0
- edsl/language_models/__init__.py +7 -2
- edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
- edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
- edsl/language_models/language_model.py +1080 -0
- edsl/language_models/model.py +10 -25
- edsl/language_models/{ModelList.py → model_list.py} +9 -14
- edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
- edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
- edsl/language_models/repair.py +4 -4
- edsl/language_models/utilities.py +4 -4
- edsl/notebooks/__init__.py +3 -1
- edsl/notebooks/{Notebook.py → notebook.py} +7 -8
- edsl/prompts/__init__.py +1 -1
- edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
- edsl/prompts/{Prompt.py → prompt.py} +101 -95
- edsl/questions/HTMLQuestion.py +1 -1
- edsl/questions/__init__.py +154 -25
- edsl/questions/answer_validator_mixin.py +1 -1
- edsl/questions/compose_questions.py +4 -3
- edsl/questions/derived/question_likert_five.py +166 -0
- edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
- edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
- edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
- edsl/questions/descriptors.py +24 -30
- edsl/questions/loop_processor.py +65 -19
- edsl/questions/question_base.py +881 -0
- edsl/questions/question_base_gen_mixin.py +15 -16
- edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
- edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
- edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
- edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
- edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
- edsl/questions/question_free_text.py +282 -0
- edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
- edsl/questions/{QuestionList.py → question_list.py} +6 -7
- edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
- edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
- edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
- edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
- edsl/questions/question_registry.py +4 -9
- edsl/questions/register_questions_meta.py +8 -4
- edsl/questions/response_validator_abc.py +17 -16
- edsl/results/__init__.py +4 -1
- edsl/{exceptions/results.py → results/exceptions.py} +1 -1
- edsl/results/report.py +197 -0
- edsl/results/{Result.py → result.py} +131 -45
- edsl/results/{Results.py → results.py} +365 -220
- edsl/results/results_selector.py +344 -25
- edsl/scenarios/__init__.py +30 -3
- edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
- edsl/scenarios/directory_scanner.py +156 -13
- edsl/scenarios/document_chunker.py +186 -0
- edsl/scenarios/exceptions.py +101 -0
- edsl/scenarios/file_methods.py +2 -3
- edsl/scenarios/{FileStore.py → file_store.py} +275 -189
- edsl/scenarios/handlers/__init__.py +14 -14
- edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
- edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
- edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
- edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
- edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
- edsl/scenarios/handlers/latex_file_store.py +5 -0
- edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
- edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
- edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
- edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
- edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
- edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
- edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
- edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
- edsl/scenarios/scenario.py +928 -0
- edsl/scenarios/scenario_join.py +18 -5
- edsl/scenarios/{ScenarioList.py → scenario_list.py} +294 -106
- edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
- edsl/scenarios/scenario_selector.py +5 -1
- edsl/study/ObjectEntry.py +2 -2
- edsl/study/SnapShot.py +5 -5
- edsl/study/Study.py +18 -19
- edsl/study/__init__.py +6 -4
- edsl/surveys/__init__.py +7 -4
- edsl/surveys/dag/__init__.py +2 -0
- edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
- edsl/surveys/{DAG.py → dag/dag.py} +13 -10
- edsl/surveys/descriptors.py +1 -1
- edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
- edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
- edsl/surveys/memory/__init__.py +3 -0
- edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
- edsl/surveys/rules/__init__.py +3 -0
- edsl/surveys/{Rule.py → rules/rule.py} +103 -43
- edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
- edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
- edsl/surveys/survey.py +1743 -0
- edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
- edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
- edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
- edsl/tasks/__init__.py +32 -0
- edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
- edsl/tasks/task_creators.py +135 -0
- edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
- edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
- edsl/tasks/task_status_log.py +85 -0
- edsl/tokens/__init__.py +2 -0
- edsl/tokens/interview_token_usage.py +53 -0
- edsl/utilities/PrettyList.py +1 -1
- edsl/utilities/SystemInfo.py +25 -22
- edsl/utilities/__init__.py +29 -21
- edsl/utilities/gcp_bucket/__init__.py +2 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
- edsl/utilities/interface.py +44 -536
- edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
- edsl/utilities/repair_functions.py +1 -1
- {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/METADATA +1 -1
- edsl-0.1.48.dist-info/RECORD +347 -0
- edsl/Base.py +0 -493
- edsl/BaseDiff.py +0 -260
- edsl/agents/InvigilatorBase.py +0 -260
- edsl/agents/PromptConstructor.py +0 -318
- edsl/coop/PriceFetcher.py +0 -54
- edsl/data/Cache.py +0 -582
- edsl/data/CacheEntry.py +0 -238
- edsl/data/SQLiteDict.py +0 -292
- edsl/data/__init__.py +0 -5
- edsl/data/orm.py +0 -10
- edsl/exceptions/cache.py +0 -5
- edsl/exceptions/coop.py +0 -14
- edsl/exceptions/data.py +0 -14
- edsl/exceptions/scenarios.py +0 -29
- edsl/jobs/Answers.py +0 -43
- edsl/jobs/JobsPrompts.py +0 -354
- edsl/jobs/buckets/BucketCollection.py +0 -134
- edsl/jobs/buckets/ModelBuckets.py +0 -65
- edsl/jobs/buckets/TokenBucket.py +0 -283
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/interviews/Interview.py +0 -395
- edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
- edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
- edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
- edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
- edsl/jobs/tasks/TaskCreators.py +0 -64
- edsl/jobs/tasks/TaskStatusLog.py +0 -23
- edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
- edsl/language_models/LanguageModel.py +0 -635
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/models.py +0 -137
- edsl/questions/QuestionBase.py +0 -544
- edsl/questions/QuestionFreeText.py +0 -130
- edsl/questions/derived/QuestionLikertFive.py +0 -76
- edsl/results/ResultsExportMixin.py +0 -45
- edsl/results/TextEditor.py +0 -50
- edsl/results/results_fetch_mixin.py +0 -33
- edsl/results/results_tools_mixin.py +0 -98
- edsl/scenarios/DocumentChunker.py +0 -104
- edsl/scenarios/Scenario.py +0 -548
- edsl/scenarios/ScenarioHtmlMixin.py +0 -65
- edsl/scenarios/ScenarioListExportMixin.py +0 -45
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/shared.py +0 -1
- edsl/surveys/Survey.py +0 -1301
- edsl/surveys/SurveyQualtricsImport.py +0 -284
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/tools/__init__.py +0 -1
- edsl/tools/clusters.py +0 -192
- edsl/tools/embeddings.py +0 -27
- edsl/tools/embeddings_plotting.py +0 -118
- edsl/tools/plotting.py +0 -112
- edsl/tools/summarize.py +0 -18
- edsl/utilities/data/Registry.py +0 -6
- edsl/utilities/data/__init__.py +0 -1
- edsl/utilities/data/scooter_results.json +0 -1
- edsl-0.1.47.dist-info/RECORD +0 -354
- /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
- /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
- /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
- /edsl/{results → dataset/display}/table_data_class.py +0 -0
- /edsl/{results → dataset/display}/table_display.css +0 -0
- /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
- /edsl/{results → dataset}/tree_explore.py +0 -0
- /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
- /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
- /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
- /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
- /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
- /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
- /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
- /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
- /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
- /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
- /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
- /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
- /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
- /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
- /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
- {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
- {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
edsl/results/results_selector.py
CHANGED
@@ -1,30 +1,106 @@
|
|
1
|
-
|
1
|
+
"""
|
2
|
+
Column selection and data extraction module for Results objects.
|
3
|
+
|
4
|
+
This module provides the Selector class that implements the column selection
|
5
|
+
functionality for the Results object's select() method. It handles column name
|
6
|
+
normalization, matching, and data extraction, supporting both direct column references
|
7
|
+
and wildcard patterns.
|
8
|
+
"""
|
9
|
+
|
10
|
+
from typing import Union, List, Dict, Any, Optional, Tuple, Callable
|
2
11
|
import sys
|
3
12
|
from collections import defaultdict
|
4
|
-
from edsl.results.Dataset import Dataset
|
5
13
|
|
6
|
-
from
|
14
|
+
from ..dataset import Dataset
|
15
|
+
from ..utilities import is_notebook
|
7
16
|
|
8
|
-
from
|
17
|
+
from .exceptions import ResultsColumnNotFoundError
|
9
18
|
|
10
19
|
|
11
20
|
class Selector:
|
21
|
+
"""
|
22
|
+
Selects and extracts columns from a Results object to create a Dataset.
|
23
|
+
|
24
|
+
The Selector class provides the functionality to extract specific data columns
|
25
|
+
from Results objects, handling column name resolution, disambiguation,
|
26
|
+
and wildcard matching. It transforms hierarchical Result data into a columnar
|
27
|
+
Dataset format optimized for analysis operations.
|
28
|
+
|
29
|
+
Attributes:
|
30
|
+
known_data_types: List of valid data types (e.g., "answer", "agent", "model")
|
31
|
+
columns: List of available column names in dot notation (e.g., "answer.how_feeling")
|
32
|
+
"""
|
33
|
+
|
12
34
|
def __init__(
|
13
35
|
self,
|
14
36
|
known_data_types: List[str],
|
15
37
|
data_type_to_keys: Dict[str, List[str]],
|
16
38
|
key_to_data_type: Dict[str, str],
|
17
|
-
fetch_list_func,
|
39
|
+
fetch_list_func: Callable[[str, str], List[Any]],
|
18
40
|
columns: List[str],
|
19
41
|
):
|
20
|
-
"""
|
42
|
+
"""
|
43
|
+
Initialize a Selector object.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
known_data_types: List of valid data types (e.g., "answer", "agent", "model")
|
47
|
+
data_type_to_keys: Mapping from data types to lists of keys available in that type
|
48
|
+
key_to_data_type: Mapping from keys to their corresponding data types
|
49
|
+
fetch_list_func: Function that retrieves values for a given data type and key
|
50
|
+
columns: List of available column names in dot notation
|
51
|
+
|
52
|
+
Examples:
|
53
|
+
>>> s = Selector(
|
54
|
+
... known_data_types=["answer", "agent"],
|
55
|
+
... data_type_to_keys={"answer": ["q1", "q2"], "agent": ["name"]},
|
56
|
+
... key_to_data_type={"q1": "answer", "q2": "answer", "name": "agent"},
|
57
|
+
... fetch_list_func=lambda dt, k: [f"{dt}.{k}_val"],
|
58
|
+
... columns=["answer.q1", "answer.q2", "agent.name"]
|
59
|
+
... )
|
60
|
+
>>> isinstance(s, Selector)
|
61
|
+
True
|
62
|
+
"""
|
21
63
|
self.known_data_types = known_data_types
|
22
64
|
self._data_type_to_keys = data_type_to_keys
|
23
65
|
self._key_to_data_type = key_to_data_type
|
24
66
|
self._fetch_list = fetch_list_func
|
25
67
|
self.columns = columns
|
68
|
+
self.items_in_order = [] # Tracks column order for consistent output
|
26
69
|
|
27
70
|
def select(self, *columns: Union[str, List[str]]) -> Optional[Dataset]:
|
71
|
+
"""
|
72
|
+
Select specific columns from the data and return as a Dataset.
|
73
|
+
|
74
|
+
This method processes column specifications, fetches the corresponding data,
|
75
|
+
and constructs a Dataset with the selected columns. It handles error cases
|
76
|
+
differently in notebook vs non-notebook environments.
|
77
|
+
|
78
|
+
Args:
|
79
|
+
*columns: Column names to select. Each name can be a simple attribute
|
80
|
+
name (e.g., "how_feeling"), a fully qualified name with type
|
81
|
+
(e.g., "answer.how_feeling"), or a wildcard pattern
|
82
|
+
(e.g., "answer.*"). If no columns provided, selects all data.
|
83
|
+
|
84
|
+
Returns:
|
85
|
+
A Dataset object containing the selected data, or None if an error occurs
|
86
|
+
in a notebook environment.
|
87
|
+
|
88
|
+
Raises:
|
89
|
+
ResultsColumnNotFoundError: If a specified column cannot be found (non-notebook only)
|
90
|
+
|
91
|
+
Examples:
|
92
|
+
>>> import unittest.mock as mock
|
93
|
+
>>> mock_selector = Selector(
|
94
|
+
... known_data_types=["answer", "agent"],
|
95
|
+
... data_type_to_keys={"answer": ["q1"], "agent": ["name"]},
|
96
|
+
... key_to_data_type={"q1": "answer", "name": "agent"},
|
97
|
+
... fetch_list_func=lambda dt, k: [f"{dt}-{k}1", f"{dt}-{k}2"],
|
98
|
+
... columns=["answer.q1", "agent.name"]
|
99
|
+
... )
|
100
|
+
>>> ds = mock_selector.select("q1")
|
101
|
+
>>> list(ds[0].values())[0][0]
|
102
|
+
'answer-q11'
|
103
|
+
"""
|
28
104
|
try:
|
29
105
|
columns = self._normalize_columns(columns)
|
30
106
|
to_fetch = self._get_columns_to_fetch(columns)
|
@@ -37,14 +113,30 @@ class Selector:
|
|
37
113
|
raise e
|
38
114
|
return Dataset(new_data)
|
39
115
|
|
40
|
-
def _normalize_columns(self, columns: Union[str, List[str]]) ->
|
41
|
-
"""
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
116
|
+
def _normalize_columns(self, columns: Union[str, List[str]]) -> Tuple[str, ...]:
|
117
|
+
"""
|
118
|
+
Normalize column specifications to a standard format.
|
119
|
+
|
120
|
+
This method handles various forms of column specifications, including
|
121
|
+
converting lists to tuples, handling None values, and applying default
|
122
|
+
wildcards when no columns are specified.
|
123
|
+
|
124
|
+
Args:
|
125
|
+
columns: Column specifications as strings or lists
|
126
|
+
|
127
|
+
Returns:
|
128
|
+
A tuple of normalized column name strings
|
129
|
+
|
130
|
+
Examples:
|
131
|
+
>>> s = Selector([], {}, {}, lambda x, y: [], [])
|
132
|
+
>>> s._normalize_columns([["a", "b"]])
|
133
|
+
('a', 'b')
|
134
|
+
>>> s._normalize_columns(None)
|
135
|
+
('*.*',)
|
136
|
+
>>> s._normalize_columns(("a", "b"))
|
137
|
+
('a', 'b')
|
138
|
+
>>> s._normalize_columns(("*",))
|
139
|
+
('*.*',)
|
48
140
|
"""
|
49
141
|
if not columns or columns == ("*",) or columns == (None,):
|
50
142
|
return ("*.*",)
|
@@ -52,13 +144,41 @@ class Selector:
|
|
52
144
|
return tuple(columns[0])
|
53
145
|
return columns
|
54
146
|
|
55
|
-
def _get_columns_to_fetch(self, columns:
|
147
|
+
def _get_columns_to_fetch(self, columns: Tuple[str, ...]) -> Dict[str, List[str]]:
|
148
|
+
"""
|
149
|
+
Process column specifications and determine what data to fetch.
|
150
|
+
|
151
|
+
This method iterates through each column specification, finds matching
|
152
|
+
columns, validates the matches, and builds a structure that organizes
|
153
|
+
which keys to fetch for each data type.
|
154
|
+
|
155
|
+
Args:
|
156
|
+
columns: Tuple of normalized column specifications
|
157
|
+
|
158
|
+
Returns:
|
159
|
+
Dictionary mapping data types to lists of keys to fetch
|
160
|
+
|
161
|
+
Raises:
|
162
|
+
ResultsColumnNotFoundError: If columns are ambiguous or not found
|
163
|
+
|
164
|
+
Examples:
|
165
|
+
>>> import unittest.mock as mock
|
166
|
+
>>> mock_selector = Selector(
|
167
|
+
... known_data_types=["answer"],
|
168
|
+
... data_type_to_keys={"answer": ["q1", "q2"]},
|
169
|
+
... key_to_data_type={"q1": "answer", "q2": "answer"},
|
170
|
+
... fetch_list_func=lambda dt, k: [],
|
171
|
+
... columns=["answer.q1", "answer.q2"]
|
172
|
+
... )
|
173
|
+
>>> to_fetch = mock_selector._get_columns_to_fetch(("q1",))
|
174
|
+
>>> to_fetch["answer"]
|
175
|
+
['q1']
|
176
|
+
"""
|
56
177
|
to_fetch = defaultdict(list)
|
57
178
|
self.items_in_order = []
|
58
179
|
|
59
180
|
for column in columns:
|
60
181
|
matches = self._find_matching_columns(column)
|
61
|
-
# breakpoint()
|
62
182
|
self._validate_matches(column, matches)
|
63
183
|
|
64
184
|
if len(matches) == 1:
|
@@ -69,7 +189,33 @@ class Selector:
|
|
69
189
|
|
70
190
|
return to_fetch
|
71
191
|
|
72
|
-
def _find_matching_columns(self, partial_name: str) ->
|
192
|
+
def _find_matching_columns(self, partial_name: str) -> List[str]:
|
193
|
+
"""
|
194
|
+
Find columns that match a partial column name.
|
195
|
+
|
196
|
+
This method supports both fully qualified column names with data types
|
197
|
+
(containing a dot) and simple column names, handling each case appropriately.
|
198
|
+
It finds all columns that start with the provided partial name.
|
199
|
+
|
200
|
+
Args:
|
201
|
+
partial_name: A full or partial column name to match
|
202
|
+
|
203
|
+
Returns:
|
204
|
+
List of matching column names
|
205
|
+
|
206
|
+
Examples:
|
207
|
+
>>> s = Selector(
|
208
|
+
... known_data_types=["answer", "agent"],
|
209
|
+
... data_type_to_keys={},
|
210
|
+
... key_to_data_type={},
|
211
|
+
... fetch_list_func=lambda dt, k: [],
|
212
|
+
... columns=["answer.q1", "answer.q2", "agent.name"]
|
213
|
+
... )
|
214
|
+
>>> s._find_matching_columns("answer.q")
|
215
|
+
['answer.q1', 'answer.q2']
|
216
|
+
>>> s._find_matching_columns("q")
|
217
|
+
['q1', 'q2']
|
218
|
+
"""
|
73
219
|
if "." in partial_name:
|
74
220
|
search_in_list = self.columns
|
75
221
|
else:
|
@@ -77,7 +223,35 @@ class Selector:
|
|
77
223
|
matches = [s for s in search_in_list if s.startswith(partial_name)]
|
78
224
|
return [partial_name] if partial_name in matches else matches
|
79
225
|
|
80
|
-
def _validate_matches(self, column: str, matches: List[str]):
|
226
|
+
def _validate_matches(self, column: str, matches: List[str]) -> None:
|
227
|
+
"""
|
228
|
+
Validate that matched columns are unambiguous and exist.
|
229
|
+
|
230
|
+
This method checks that the column specification resolves to exactly
|
231
|
+
one column or a wildcard pattern. It raises appropriate exceptions
|
232
|
+
for ambiguous matches or when no matches are found.
|
233
|
+
|
234
|
+
Args:
|
235
|
+
column: The original column specification
|
236
|
+
matches: List of matching column names
|
237
|
+
|
238
|
+
Raises:
|
239
|
+
ResultsColumnNotFoundError: If matches are ambiguous or no matches found
|
240
|
+
|
241
|
+
Examples:
|
242
|
+
>>> s = Selector([], {}, {}, lambda dt, k: [], [])
|
243
|
+
>>> s._validate_matches("col", ["col"]) # No exception
|
244
|
+
>>> try:
|
245
|
+
... s._validate_matches("c", ["col1", "col2"])
|
246
|
+
... except ResultsColumnNotFoundError as e:
|
247
|
+
... "ambiguous" in str(e).lower()
|
248
|
+
True
|
249
|
+
>>> try:
|
250
|
+
... s._validate_matches("xyz", [])
|
251
|
+
... except ResultsColumnNotFoundError as e:
|
252
|
+
... "not found" in str(e).lower()
|
253
|
+
True
|
254
|
+
"""
|
81
255
|
if len(matches) > 1:
|
82
256
|
raise ResultsColumnNotFoundError(
|
83
257
|
f"Column '{column}' is ambiguous. Did you mean one of {matches}?"
|
@@ -85,15 +259,71 @@ class Selector:
|
|
85
259
|
if len(matches) == 0 and ".*" not in column:
|
86
260
|
raise ResultsColumnNotFoundError(f"Column '{column}' not found in data.")
|
87
261
|
|
88
|
-
def _parse_column(self, column: str) ->
|
262
|
+
def _parse_column(self, column: str) -> Tuple[str, str]:
|
263
|
+
"""
|
264
|
+
Parse a column name into data type and key components.
|
265
|
+
|
266
|
+
This method handles both fully qualified column names (containing a dot)
|
267
|
+
and simple column names, looking up the appropriate data type when needed.
|
268
|
+
|
269
|
+
Args:
|
270
|
+
column: Column name to parse
|
271
|
+
|
272
|
+
Returns:
|
273
|
+
Tuple of (data_type, key)
|
274
|
+
|
275
|
+
Raises:
|
276
|
+
ResultsColumnNotFoundError: When key cannot be found in data
|
277
|
+
|
278
|
+
Examples:
|
279
|
+
>>> s = Selector(
|
280
|
+
... [],
|
281
|
+
... {},
|
282
|
+
... {"col1": "type1"},
|
283
|
+
... lambda dt, k: [],
|
284
|
+
... []
|
285
|
+
... )
|
286
|
+
>>> s._parse_column("type2.col2")
|
287
|
+
('type2', 'col2')
|
288
|
+
>>> s._parse_column("col1")
|
289
|
+
('type1', 'col1')
|
290
|
+
"""
|
89
291
|
if "." in column:
|
90
|
-
|
292
|
+
parts = column.split(".")
|
293
|
+
return (parts[0], parts[1]) # Return as tuple instead of list
|
91
294
|
try:
|
92
295
|
return self._key_to_data_type[column], column
|
93
296
|
except KeyError:
|
94
297
|
self._raise_key_error(column)
|
95
298
|
|
96
|
-
def _raise_key_error(self, column: str):
|
299
|
+
def _raise_key_error(self, column: str) -> None:
|
300
|
+
"""
|
301
|
+
Raise an error with helpful suggestions when a column is not found.
|
302
|
+
|
303
|
+
This method uses difflib to find close matches to the specified column,
|
304
|
+
providing helpful suggestions in the error message when possible.
|
305
|
+
|
306
|
+
Args:
|
307
|
+
column: The column name that wasn't found
|
308
|
+
|
309
|
+
Raises:
|
310
|
+
ResultsColumnNotFoundError: Always raised with a descriptive message
|
311
|
+
|
312
|
+
Examples:
|
313
|
+
>>> import unittest.mock as mock
|
314
|
+
>>> s = Selector(
|
315
|
+
... [],
|
316
|
+
... {},
|
317
|
+
... {"column1": "type1", "column2": "type1"},
|
318
|
+
... lambda dt, k: [],
|
319
|
+
... []
|
320
|
+
... )
|
321
|
+
>>> try:
|
322
|
+
... s._raise_key_error("colum1")
|
323
|
+
... except ResultsColumnNotFoundError as e:
|
324
|
+
... "did you mean: column1" in str(e).lower()
|
325
|
+
True
|
326
|
+
"""
|
97
327
|
import difflib
|
98
328
|
|
99
329
|
close_matches = difflib.get_close_matches(column, self._key_to_data_type.keys())
|
@@ -103,9 +333,38 @@ class Selector:
|
|
103
333
|
f"Column '{column}' not found in data. Did you mean: {suggestions}?"
|
104
334
|
)
|
105
335
|
else:
|
106
|
-
raise ResultsColumnNotFoundError(f"Column {column} not found in data")
|
336
|
+
raise ResultsColumnNotFoundError(f"Column '{column}' not found in data")
|
107
337
|
|
108
|
-
def _process_column(self, data_type: str, key: str, to_fetch: Dict[str, List[str]]):
|
338
|
+
def _process_column(self, data_type: str, key: str, to_fetch: Dict[str, List[str]]) -> None:
|
339
|
+
"""
|
340
|
+
Process a parsed column and add it to the list of data to fetch.
|
341
|
+
|
342
|
+
This method handles wildcards in both data types and keys, expands them
|
343
|
+
appropriately, and tracks the order of items for consistent output.
|
344
|
+
|
345
|
+
Args:
|
346
|
+
data_type: The data type component (e.g., "answer", "agent")
|
347
|
+
key: The key component (e.g., "how_feeling", "status")
|
348
|
+
to_fetch: Dictionary to update with data to fetch
|
349
|
+
|
350
|
+
Raises:
|
351
|
+
ResultsColumnNotFoundError: If the key is not found in any relevant data type
|
352
|
+
|
353
|
+
Examples:
|
354
|
+
>>> s = Selector(
|
355
|
+
... ["answer", "agent"],
|
356
|
+
... {"answer": ["q1", "q2"], "agent": ["name"]},
|
357
|
+
... {},
|
358
|
+
... lambda dt, k: [],
|
359
|
+
... []
|
360
|
+
... )
|
361
|
+
>>> to_fetch = defaultdict(list)
|
362
|
+
>>> s._process_column("answer", "q1", to_fetch)
|
363
|
+
>>> to_fetch["answer"]
|
364
|
+
['q1']
|
365
|
+
>>> s.items_in_order
|
366
|
+
['answer.q1']
|
367
|
+
"""
|
109
368
|
data_types = self._get_data_types_to_return(data_type)
|
110
369
|
found_once = False
|
111
370
|
|
@@ -118,24 +377,84 @@ class Selector:
|
|
118
377
|
self.items_in_order.append(f"{dt}.{k}")
|
119
378
|
|
120
379
|
if not found_once:
|
121
|
-
raise ResultsColumnNotFoundError(f"Key {key} not found in data.")
|
380
|
+
raise ResultsColumnNotFoundError(f"Key '{key}' not found in data.")
|
122
381
|
|
123
382
|
def _get_data_types_to_return(self, parsed_data_type: str) -> List[str]:
|
383
|
+
"""
|
384
|
+
Determine which data types to include based on the parsed data type.
|
385
|
+
|
386
|
+
This method handles wildcards in data types, returning either all known
|
387
|
+
data types or validating that a specific data type exists.
|
388
|
+
|
389
|
+
Args:
|
390
|
+
parsed_data_type: Data type string or wildcard (*)
|
391
|
+
|
392
|
+
Returns:
|
393
|
+
List of data types to include
|
394
|
+
|
395
|
+
Raises:
|
396
|
+
ResultsColumnNotFoundError: If the data type is not known
|
397
|
+
|
398
|
+
Examples:
|
399
|
+
>>> s = Selector(
|
400
|
+
... ["answer", "agent", "model"],
|
401
|
+
... {},
|
402
|
+
... {},
|
403
|
+
... lambda dt, k: [],
|
404
|
+
... []
|
405
|
+
... )
|
406
|
+
>>> s._get_data_types_to_return("*")
|
407
|
+
['answer', 'agent', 'model']
|
408
|
+
>>> s._get_data_types_to_return("answer")
|
409
|
+
['answer']
|
410
|
+
>>> try:
|
411
|
+
... s._get_data_types_to_return("unknown")
|
412
|
+
... except ResultsColumnNotFoundError:
|
413
|
+
... True
|
414
|
+
True
|
415
|
+
"""
|
124
416
|
if parsed_data_type == "*":
|
125
417
|
return self.known_data_types
|
126
418
|
if parsed_data_type not in self.known_data_types:
|
127
419
|
raise ResultsColumnNotFoundError(
|
128
|
-
f"Data type {parsed_data_type} not found in data. Did you mean one of {self.known_data_types}"
|
420
|
+
f"Data type '{parsed_data_type}' not found in data. Did you mean one of {self.known_data_types}?"
|
129
421
|
)
|
130
422
|
return [parsed_data_type]
|
131
423
|
|
132
424
|
def _fetch_data(self, to_fetch: Dict[str, List[str]]) -> List[Dict[str, Any]]:
|
425
|
+
"""
|
426
|
+
Fetch the actual data for the specified columns.
|
427
|
+
|
428
|
+
This method retrieves values for each data type and key combination
|
429
|
+
and structures the results for conversion to a Dataset.
|
430
|
+
|
431
|
+
Args:
|
432
|
+
to_fetch: Dictionary mapping data types to lists of keys to fetch
|
433
|
+
|
434
|
+
Returns:
|
435
|
+
List of dictionaries containing the fetched data
|
436
|
+
|
437
|
+
Examples:
|
438
|
+
>>> fetch_mock = lambda dt, k: [f"{dt}-{k}-val1", f"{dt}-{k}-val2"]
|
439
|
+
>>> s = Selector(
|
440
|
+
... ["answer"],
|
441
|
+
... {"answer": ["q1"]},
|
442
|
+
... {},
|
443
|
+
... fetch_mock,
|
444
|
+
... []
|
445
|
+
... )
|
446
|
+
>>> s.items_in_order = ["answer.q1"]
|
447
|
+
>>> data = s._fetch_data({"answer": ["q1"]})
|
448
|
+
>>> data[0]["answer.q1"]
|
449
|
+
['answer-q1-val1', 'answer-q1-val2']
|
450
|
+
"""
|
133
451
|
new_data = []
|
134
452
|
for data_type, keys in to_fetch.items():
|
135
453
|
for key in keys:
|
136
454
|
entries = self._fetch_list(data_type, key)
|
137
455
|
new_data.append({f"{data_type}.{key}": entries})
|
138
456
|
|
457
|
+
# Ensure items are returned in the order they were requested
|
139
458
|
return [d for key in self.items_in_order for d in new_data if key in d]
|
140
459
|
|
141
460
|
|
edsl/scenarios/__init__.py
CHANGED
@@ -1,3 +1,30 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
"""
|
2
|
+
The scenarios package provides tools for creating and managing parameterized templates.
|
3
|
+
|
4
|
+
This package is a core component of EDSL that enables parameterized content through
|
5
|
+
key-value dictionaries called Scenarios. These Scenarios can be used to provide variable
|
6
|
+
content to questions, surveys, and other components within EDSL.
|
7
|
+
|
8
|
+
Key components:
|
9
|
+
- Scenario: A dictionary-like object for storing key-value pairs to parameterize questions
|
10
|
+
- ScenarioList: A collection of Scenario objects with powerful data manipulation capabilities
|
11
|
+
- FileStore: A specialized Scenario subclass for handling files of various formats
|
12
|
+
|
13
|
+
The scenarios package supports various file formats, data sources, and transformations,
|
14
|
+
enabling complex experimental designs and data-driven surveys.
|
15
|
+
|
16
|
+
Example:
|
17
|
+
>>> from edsl.scenarios import Scenario, ScenarioList
|
18
|
+
>>> # Create a simple scenario
|
19
|
+
>>> s1 = Scenario({"product": "coffee", "price": 4.99})
|
20
|
+
>>> s2 = Scenario({"product": "tea", "price": 3.50})
|
21
|
+
>>> # Create a scenario list
|
22
|
+
>>> sl = ScenarioList([s1, s2])
|
23
|
+
>>> # Use scenarios to parameterize questions and surveys
|
24
|
+
"""
|
25
|
+
|
26
|
+
from .scenario import Scenario
|
27
|
+
from .scenario_list import ScenarioList
|
28
|
+
from .file_store import FileStore
|
29
|
+
|
30
|
+
__all__ = ["Scenario", "ScenarioList", "FileStore"]
|
@@ -6,6 +6,12 @@ class ConstructDownloadLink:
|
|
6
6
|
"""
|
7
7
|
A class to create HTML download links for FileStore objects.
|
8
8
|
The links can be displayed in Jupyter notebooks or other web interfaces.
|
9
|
+
|
10
|
+
>>> from edsl import FileStore
|
11
|
+
>>> fs = FileStore.example("txt")
|
12
|
+
>>> link = ConstructDownloadLink(fs)
|
13
|
+
>>> link.create_link()
|
14
|
+
<IPython.core.display.HTML object>
|
9
15
|
"""
|
10
16
|
|
11
17
|
def __init__(self, filestore):
|
@@ -98,6 +104,7 @@ class ConstructDownloadLink:
|
|
98
104
|
)._repr_html_()
|
99
105
|
)
|
100
106
|
|
107
|
+
from IPython.display import HTML
|
101
108
|
return HTML(
|
102
109
|
'<div style="display: flex; gap: 10px;">' + "".join(html_parts) + "</div>"
|
103
110
|
)
|