PyPI - edsl - Versions diffs - 0.1.46__py3-none-any.whl → 0.1.48__py3-none-any.whl - Mend

edsl 0.1.46py3-none-any.whl → 0.1.48py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (328) hide show

edsl/__init__.py +44 -39
edsl/__version__.py +1 -1
edsl/agents/__init__.py +4 -2
edsl/agents/{Agent.py → agent.py} +442 -152
edsl/agents/{AgentList.py → agent_list.py} +220 -162
edsl/agents/descriptors.py +46 -7
edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
edsl/base/__init__.py +75 -0
edsl/base/base_class.py +1303 -0
edsl/base/data_transfer_models.py +114 -0
edsl/base/enums.py +215 -0
edsl/base.py +8 -0
edsl/buckets/__init__.py +25 -0
edsl/buckets/bucket_collection.py +324 -0
edsl/buckets/model_buckets.py +206 -0
edsl/buckets/token_bucket.py +502 -0
edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
edsl/buckets/token_bucket_client.py +509 -0
edsl/caching/__init__.py +20 -0
edsl/caching/cache.py +814 -0
edsl/caching/cache_entry.py +427 -0
edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
edsl/caching/exceptions.py +24 -0
edsl/caching/orm.py +30 -0
edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
edsl/caching/sql_dict.py +441 -0
edsl/config/__init__.py +8 -0
edsl/config/config_class.py +177 -0
edsl/config.py +4 -176
edsl/conversation/Conversation.py +7 -7
edsl/conversation/car_buying.py +4 -4
edsl/conversation/chips.py +6 -6
edsl/coop/__init__.py +25 -2
edsl/coop/coop.py +430 -113
edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
edsl/coop/exceptions.py +62 -0
edsl/coop/price_fetcher.py +126 -0
edsl/coop/utils.py +89 -24
edsl/data_transfer_models.py +5 -72
edsl/dataset/__init__.py +10 -0
edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
edsl/dataset/dataset_operations_mixin.py +1492 -0
edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
edsl/{results → dataset/display}/table_renderers.py +58 -2
edsl/{results → dataset}/file_exports.py +4 -5
edsl/{results → dataset}/smart_objects.py +2 -2
edsl/enums.py +5 -205
edsl/inference_services/__init__.py +5 -0
edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
edsl/inference_services/data_structures.py +3 -2
edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
edsl/inference_services/registry.py +4 -41
edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
edsl/inference_services/services/__init__.py +31 -0
edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +12 -12
edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
edsl/inference_services/write_available.py +1 -2
edsl/instructions/__init__.py +6 -0
edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
edsl/interviews/__init__.py +4 -0
edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
edsl/interviews/interview.py +638 -0
edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
edsl/invigilators/__init__.py +38 -0
edsl/invigilators/invigilator_base.py +477 -0
edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
edsl/invigilators/prompt_constructor.py +476 -0
edsl/{agents → invigilators}/prompt_helpers.py +2 -1
edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
edsl/{agents → invigilators}/question_option_processor.py +96 -21
edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
edsl/jobs/__init__.py +7 -1
edsl/jobs/async_interview_runner.py +99 -35
edsl/jobs/check_survey_scenario_compatibility.py +7 -5
edsl/jobs/data_structures.py +153 -22
edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
edsl/jobs/{Jobs.py → jobs.py} +321 -155
edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +20 -17
edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
edsl/jobs/jobs_pricing_estimation.py +347 -0
edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
edsl/jobs/jobs_runner_asyncio.py +282 -0
edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
edsl/jobs/results_exceptions_handler.py +2 -2
edsl/key_management/__init__.py +28 -0
edsl/key_management/key_lookup.py +161 -0
edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
edsl/key_management/key_lookup_collection.py +82 -0
edsl/key_management/models.py +218 -0
edsl/language_models/__init__.py +7 -2
edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
edsl/language_models/language_model.py +1080 -0
edsl/language_models/model.py +10 -25
edsl/language_models/{ModelList.py → model_list.py} +9 -14
edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
edsl/language_models/repair.py +4 -4
edsl/language_models/utilities.py +4 -4
edsl/notebooks/__init__.py +3 -1
edsl/notebooks/{Notebook.py → notebook.py} +7 -8
edsl/prompts/__init__.py +1 -1
edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
edsl/prompts/{Prompt.py → prompt.py} +101 -95
edsl/questions/HTMLQuestion.py +1 -1
edsl/questions/__init__.py +154 -25
edsl/questions/answer_validator_mixin.py +1 -1
edsl/questions/compose_questions.py +4 -3
edsl/questions/derived/question_likert_five.py +166 -0
edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
edsl/questions/descriptors.py +24 -30
edsl/questions/loop_processor.py +65 -19
edsl/questions/question_base.py +881 -0
edsl/questions/question_base_gen_mixin.py +15 -16
edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
edsl/questions/question_free_text.py +282 -0
edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
edsl/questions/{QuestionList.py → question_list.py} +6 -7
edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
edsl/questions/question_registry.py +10 -16
edsl/questions/register_questions_meta.py +8 -4
edsl/questions/response_validator_abc.py +17 -16
edsl/results/__init__.py +4 -1
edsl/{exceptions/results.py → results/exceptions.py} +1 -1
edsl/results/report.py +197 -0
edsl/results/{Result.py → result.py} +131 -45
edsl/results/{Results.py → results.py} +420 -216
edsl/results/results_selector.py +344 -25
edsl/scenarios/__init__.py +30 -3
edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
edsl/scenarios/directory_scanner.py +156 -13
edsl/scenarios/document_chunker.py +186 -0
edsl/scenarios/exceptions.py +101 -0
edsl/scenarios/file_methods.py +2 -3
edsl/scenarios/file_store.py +755 -0
edsl/scenarios/handlers/__init__.py +14 -14
edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
edsl/scenarios/handlers/latex_file_store.py +5 -0
edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
edsl/scenarios/scenario.py +928 -0
edsl/scenarios/scenario_join.py +18 -5
edsl/scenarios/{ScenarioList.py → scenario_list.py} +424 -106
edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
edsl/scenarios/scenario_selector.py +5 -1
edsl/study/ObjectEntry.py +2 -2
edsl/study/SnapShot.py +5 -5
edsl/study/Study.py +20 -21
edsl/study/__init__.py +6 -4
edsl/surveys/__init__.py +7 -4
edsl/surveys/dag/__init__.py +2 -0
edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
edsl/surveys/{DAG.py → dag/dag.py} +13 -10
edsl/surveys/descriptors.py +1 -1
edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
edsl/surveys/memory/__init__.py +3 -0
edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
edsl/surveys/rules/__init__.py +3 -0
edsl/surveys/{Rule.py → rules/rule.py} +103 -43
edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
edsl/surveys/survey.py +1743 -0
edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
edsl/tasks/__init__.py +32 -0
edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
edsl/tasks/task_creators.py +135 -0
edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
edsl/tasks/task_status_log.py +85 -0
edsl/tokens/__init__.py +2 -0
edsl/tokens/interview_token_usage.py +53 -0
edsl/utilities/PrettyList.py +1 -1
edsl/utilities/SystemInfo.py +25 -22
edsl/utilities/__init__.py +29 -21
edsl/utilities/gcp_bucket/__init__.py +2 -0
edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
edsl/utilities/interface.py +44 -536
edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
edsl/utilities/repair_functions.py +1 -1
{edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/METADATA +3 -2
edsl-0.1.48.dist-info/RECORD +347 -0
edsl/Base.py +0 -426
edsl/BaseDiff.py +0 -260
edsl/agents/InvigilatorBase.py +0 -260
edsl/agents/PromptConstructor.py +0 -318
edsl/auto/AutoStudy.py +0 -130
edsl/auto/StageBase.py +0 -243
edsl/auto/StageGenerateSurvey.py +0 -178
edsl/auto/StageLabelQuestions.py +0 -125
edsl/auto/StagePersona.py +0 -61
edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
edsl/auto/StagePersonaDimensionValues.py +0 -74
edsl/auto/StagePersonaDimensions.py +0 -69
edsl/auto/StageQuestions.py +0 -74
edsl/auto/SurveyCreatorPipeline.py +0 -21
edsl/auto/utilities.py +0 -218
edsl/base/Base.py +0 -279
edsl/coop/PriceFetcher.py +0 -54
edsl/data/Cache.py +0 -580
edsl/data/CacheEntry.py +0 -230
edsl/data/SQLiteDict.py +0 -292
edsl/data/__init__.py +0 -5
edsl/data/orm.py +0 -10
edsl/exceptions/cache.py +0 -5
edsl/exceptions/coop.py +0 -14
edsl/exceptions/data.py +0 -14
edsl/exceptions/scenarios.py +0 -29
edsl/jobs/Answers.py +0 -43
edsl/jobs/JobsPrompts.py +0 -354
edsl/jobs/buckets/BucketCollection.py +0 -134
edsl/jobs/buckets/ModelBuckets.py +0 -65
edsl/jobs/buckets/TokenBucket.py +0 -283
edsl/jobs/buckets/TokenBucketClient.py +0 -191
edsl/jobs/interviews/Interview.py +0 -395
edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
edsl/jobs/tasks/TaskCreators.py +0 -64
edsl/jobs/tasks/TaskStatusLog.py +0 -23
edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
edsl/language_models/LanguageModel.py +0 -635
edsl/language_models/ServiceDataSources.py +0 -0
edsl/language_models/key_management/KeyLookup.py +0 -63
edsl/language_models/key_management/KeyLookupCollection.py +0 -38
edsl/language_models/key_management/models.py +0 -137
edsl/questions/QuestionBase.py +0 -539
edsl/questions/QuestionFreeText.py +0 -130
edsl/questions/derived/QuestionLikertFive.py +0 -76
edsl/results/DatasetExportMixin.py +0 -911
edsl/results/ResultsExportMixin.py +0 -45
edsl/results/TextEditor.py +0 -50
edsl/results/results_fetch_mixin.py +0 -33
edsl/results/results_tools_mixin.py +0 -98
edsl/scenarios/DocumentChunker.py +0 -104
edsl/scenarios/FileStore.py +0 -564
edsl/scenarios/Scenario.py +0 -548
edsl/scenarios/ScenarioHtmlMixin.py +0 -65
edsl/scenarios/ScenarioListExportMixin.py +0 -45
edsl/scenarios/handlers/latex.py +0 -5
edsl/shared.py +0 -1
edsl/surveys/Survey.py +0 -1306
edsl/surveys/SurveyQualtricsImport.py +0 -284
edsl/surveys/SurveyToApp.py +0 -141
edsl/surveys/instructions/__init__.py +0 -0
edsl/tools/__init__.py +0 -1
edsl/tools/clusters.py +0 -192
edsl/tools/embeddings.py +0 -27
edsl/tools/embeddings_plotting.py +0 -118
edsl/tools/plotting.py +0 -112
edsl/tools/summarize.py +0 -18
edsl/utilities/data/Registry.py +0 -6
edsl/utilities/data/__init__.py +0 -1
edsl/utilities/data/scooter_results.json +0 -1
edsl-0.1.46.dist-info/RECORD +0 -366
/edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
/edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
/edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
/edsl/{results → dataset/display}/table_data_class.py +0 -0
/edsl/{results → dataset/display}/table_display.css +0 -0
/edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
/edsl/{results → dataset}/tree_explore.py +0 -0
/edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
/edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
/edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
/edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
/edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
/edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
/edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
/edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
/edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
/edsl/surveys/{Memory.py → memory/memory.py} +0 -0
/edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
/edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
/edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
/edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
/edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
{edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
{edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0

edsl/results/results_selector.py CHANGED Viewed

@@ -1,30 +1,106 @@
-from typing import Union, List, Dict, Any, Optional
+"""
+Column selection and data extraction module for Results objects.
+This module provides the Selector class that implements the column selection
+functionality for the Results object's select() method. It handles column name
+normalization, matching, and data extraction, supporting both direct column references
+and wildcard patterns.
+"""
+from typing import Union, List, Dict, Any, Optional, Tuple, Callable
 import sys
 from collections import defaultdict
-from edsl.results.Dataset import Dataset
-from edsl.exceptions.results import ResultsColumnNotFoundError
+from ..dataset import Dataset
+from ..utilities import is_notebook
-from edsl.utilities.is_notebook import is_notebook
+from .exceptions import ResultsColumnNotFoundError
 class Selector:
+    """
+    Selects and extracts columns from a Results object to create a Dataset.
+    The Selector class provides the functionality to extract specific data columns
+    from Results objects, handling column name resolution, disambiguation,
+    and wildcard matching. It transforms hierarchical Result data into a columnar
+    Dataset format optimized for analysis operations.
+    Attributes:
+        known_data_types: List of valid data types (e.g., "answer", "agent", "model")
+        columns: List of available column names in dot notation (e.g., "answer.how_feeling")
+    """
     def __init__(
         self,
         known_data_types: List[str],
         data_type_to_keys: Dict[str, List[str]],
         key_to_data_type: Dict[str, str],
-        fetch_list_func,
+        fetch_list_func: Callable[[str, str], List[Any]],
         columns: List[str],
     ):
-        """Selects columns from a Results object"""
+        """
+        Initialize a Selector object.
+        Args:
+            known_data_types: List of valid data types (e.g., "answer", "agent", "model")
+            data_type_to_keys: Mapping from data types to lists of keys available in that type
+            key_to_data_type: Mapping from keys to their corresponding data types
+            fetch_list_func: Function that retrieves values for a given data type and key
+            columns: List of available column names in dot notation
+        Examples:
+            >>> s = Selector(
+            ...     known_data_types=["answer", "agent"],
+            ...     data_type_to_keys={"answer": ["q1", "q2"], "agent": ["name"]},
+            ...     key_to_data_type={"q1": "answer", "q2": "answer", "name": "agent"},
+            ...     fetch_list_func=lambda dt, k: [f"{dt}.{k}_val"],
+            ...     columns=["answer.q1", "answer.q2", "agent.name"]
+            ... )
+            >>> isinstance(s, Selector)
+            True
+        """
         self.known_data_types = known_data_types
         self._data_type_to_keys = data_type_to_keys
         self._key_to_data_type = key_to_data_type
         self._fetch_list = fetch_list_func
         self.columns = columns
+        self.items_in_order = []  # Tracks column order for consistent output
     def select(self, *columns: Union[str, List[str]]) -> Optional[Dataset]:
+        """
+        Select specific columns from the data and return as a Dataset.
+        This method processes column specifications, fetches the corresponding data,
+        and constructs a Dataset with the selected columns. It handles error cases
+        differently in notebook vs non-notebook environments.
+        Args:
+            *columns: Column names to select. Each name can be a simple attribute
+                     name (e.g., "how_feeling"), a fully qualified name with type
+                     (e.g., "answer.how_feeling"), or a wildcard pattern
+                     (e.g., "answer.*"). If no columns provided, selects all data.
+        Returns:
+            A Dataset object containing the selected data, or None if an error occurs
+            in a notebook environment.
+        Raises:
+            ResultsColumnNotFoundError: If a specified column cannot be found (non-notebook only)
+        Examples:
+            >>> import unittest.mock as mock
+            >>> mock_selector = Selector(
+            ...     known_data_types=["answer", "agent"],
+            ...     data_type_to_keys={"answer": ["q1"], "agent": ["name"]},
+            ...     key_to_data_type={"q1": "answer", "name": "agent"},
+            ...     fetch_list_func=lambda dt, k: [f"{dt}-{k}1", f"{dt}-{k}2"],
+            ...     columns=["answer.q1", "agent.name"]
+            ... )
+            >>> ds = mock_selector.select("q1")
+            >>> list(ds[0].values())[0][0]
+            'answer-q11'
+        """
         try:
             columns = self._normalize_columns(columns)
             to_fetch = self._get_columns_to_fetch(columns)
@@ -37,14 +113,30 @@ class Selector:
                 raise e
         return Dataset(new_data)
-    def _normalize_columns(self, columns: Union[str, List[str]]) -> tuple:
-        """Normalize the columns to a tuple of strings
-        >>> s = Selector([], {}, {}, lambda x, y: x, [])
-        >>> s._normalize_columns([["a", "b"], ])
-        ('a', 'b')
-        >>> s._normalize_columns(None)
-        ('*.*',)
+    def _normalize_columns(self, columns: Union[str, List[str]]) -> Tuple[str, ...]:
+        """
+        Normalize column specifications to a standard format.
+        This method handles various forms of column specifications, including
+        converting lists to tuples, handling None values, and applying default
+        wildcards when no columns are specified.
+        Args:
+            columns: Column specifications as strings or lists
+        Returns:
+            A tuple of normalized column name strings
+        Examples:
+            >>> s = Selector([], {}, {}, lambda x, y: [], [])
+            >>> s._normalize_columns([["a", "b"]])
+            ('a', 'b')
+            >>> s._normalize_columns(None)
+            ('*.*',)
+            >>> s._normalize_columns(("a", "b"))
+            ('a', 'b')
+            >>> s._normalize_columns(("*",))
+            ('*.*',)
         """
         if not columns or columns == ("*",) or columns == (None,):
             return ("*.*",)
@@ -52,13 +144,41 @@ class Selector:
             return tuple(columns[0])
         return columns
-    def _get_columns_to_fetch(self, columns: tuple) -> Dict[str, List[str]]:
+    def _get_columns_to_fetch(self, columns: Tuple[str, ...]) -> Dict[str, List[str]]:
+        """
+        Process column specifications and determine what data to fetch.
+        This method iterates through each column specification, finds matching
+        columns, validates the matches, and builds a structure that organizes
+        which keys to fetch for each data type.
+        Args:
+            columns: Tuple of normalized column specifications
+        Returns:
+            Dictionary mapping data types to lists of keys to fetch
+        Raises:
+            ResultsColumnNotFoundError: If columns are ambiguous or not found
+        Examples:
+            >>> import unittest.mock as mock
+            >>> mock_selector = Selector(
+            ...     known_data_types=["answer"],
+            ...     data_type_to_keys={"answer": ["q1", "q2"]},
+            ...     key_to_data_type={"q1": "answer", "q2": "answer"},
+            ...     fetch_list_func=lambda dt, k: [],
+            ...     columns=["answer.q1", "answer.q2"]
+            ... )
+            >>> to_fetch = mock_selector._get_columns_to_fetch(("q1",))
+            >>> to_fetch["answer"]
+            ['q1']
+        """
         to_fetch = defaultdict(list)
         self.items_in_order = []
         for column in columns:
             matches = self._find_matching_columns(column)
-            # breakpoint()
             self._validate_matches(column, matches)
             if len(matches) == 1:
@@ -69,7 +189,33 @@ class Selector:
         return to_fetch
-    def _find_matching_columns(self, partial_name: str) -> list[str]:
+    def _find_matching_columns(self, partial_name: str) -> List[str]:
+        """
+        Find columns that match a partial column name.
+        This method supports both fully qualified column names with data types
+        (containing a dot) and simple column names, handling each case appropriately.
+        It finds all columns that start with the provided partial name.
+        Args:
+            partial_name: A full or partial column name to match
+        Returns:
+            List of matching column names
+        Examples:
+            >>> s = Selector(
+            ...     known_data_types=["answer", "agent"],
+            ...     data_type_to_keys={},
+            ...     key_to_data_type={},
+            ...     fetch_list_func=lambda dt, k: [],
+            ...     columns=["answer.q1", "answer.q2", "agent.name"]
+            ... )
+            >>> s._find_matching_columns("answer.q")
+            ['answer.q1', 'answer.q2']
+            >>> s._find_matching_columns("q")
+            ['q1', 'q2']
+        """
         if "." in partial_name:
             search_in_list = self.columns
         else:
@@ -77,7 +223,35 @@ class Selector:
         matches = [s for s in search_in_list if s.startswith(partial_name)]
         return [partial_name] if partial_name in matches else matches
-    def _validate_matches(self, column: str, matches: List[str]):
+    def _validate_matches(self, column: str, matches: List[str]) -> None:
+        """
+        Validate that matched columns are unambiguous and exist.
+        This method checks that the column specification resolves to exactly
+        one column or a wildcard pattern. It raises appropriate exceptions
+        for ambiguous matches or when no matches are found.
+        Args:
+            column: The original column specification
+            matches: List of matching column names
+        Raises:
+            ResultsColumnNotFoundError: If matches are ambiguous or no matches found
+        Examples:
+            >>> s = Selector([], {}, {}, lambda dt, k: [], [])
+            >>> s._validate_matches("col", ["col"])  # No exception
+            >>> try:
+            ...     s._validate_matches("c", ["col1", "col2"])
+            ... except ResultsColumnNotFoundError as e:
+            ...     "ambiguous" in str(e).lower()
+            True
+            >>> try:
+            ...     s._validate_matches("xyz", [])
+            ... except ResultsColumnNotFoundError as e:
+            ...     "not found" in str(e).lower()
+            True
+        """
         if len(matches) > 1:
             raise ResultsColumnNotFoundError(
                 f"Column '{column}' is ambiguous. Did you mean one of {matches}?"
@@ -85,15 +259,71 @@ class Selector:
         if len(matches) == 0 and ".*" not in column:
             raise ResultsColumnNotFoundError(f"Column '{column}' not found in data.")
-    def _parse_column(self, column: str) -> tuple[str, str]:
+    def _parse_column(self, column: str) -> Tuple[str, str]:
+        """
+        Parse a column name into data type and key components.
+        This method handles both fully qualified column names (containing a dot)
+        and simple column names, looking up the appropriate data type when needed.
+        Args:
+            column: Column name to parse
+        Returns:
+            Tuple of (data_type, key)
+        Raises:
+            ResultsColumnNotFoundError: When key cannot be found in data
+        Examples:
+            >>> s = Selector(
+            ...     [],
+            ...     {},
+            ...     {"col1": "type1"},
+            ...     lambda dt, k: [],
+            ...     []
+            ... )
+            >>> s._parse_column("type2.col2")
+            ('type2', 'col2')
+            >>> s._parse_column("col1")
+            ('type1', 'col1')
+        """
         if "." in column:
-            return column.split(".")
+            parts = column.split(".")
+            return (parts[0], parts[1])  # Return as tuple instead of list
         try:
             return self._key_to_data_type[column], column
         except KeyError:
             self._raise_key_error(column)
-    def _raise_key_error(self, column: str):
+    def _raise_key_error(self, column: str) -> None:
+        """
+        Raise an error with helpful suggestions when a column is not found.
+        This method uses difflib to find close matches to the specified column,
+        providing helpful suggestions in the error message when possible.
+        Args:
+            column: The column name that wasn't found
+        Raises:
+            ResultsColumnNotFoundError: Always raised with a descriptive message
+        Examples:
+            >>> import unittest.mock as mock
+            >>> s = Selector(
+            ...     [],
+            ...     {},
+            ...     {"column1": "type1", "column2": "type1"},
+            ...     lambda dt, k: [],
+            ...     []
+            ... )
+            >>> try:
+            ...     s._raise_key_error("colum1")
+            ... except ResultsColumnNotFoundError as e:
+            ...     "did you mean: column1" in str(e).lower()
+            True
+        """
         import difflib
         close_matches = difflib.get_close_matches(column, self._key_to_data_type.keys())
@@ -103,9 +333,38 @@ class Selector:
                 f"Column '{column}' not found in data. Did you mean: {suggestions}?"
             )
         else:
-            raise ResultsColumnNotFoundError(f"Column {column} not found in data")
+            raise ResultsColumnNotFoundError(f"Column '{column}' not found in data")
-    def _process_column(self, data_type: str, key: str, to_fetch: Dict[str, List[str]]):
+    def _process_column(self, data_type: str, key: str, to_fetch: Dict[str, List[str]]) -> None:
+        """
+        Process a parsed column and add it to the list of data to fetch.
+        This method handles wildcards in both data types and keys, expands them
+        appropriately, and tracks the order of items for consistent output.
+        Args:
+            data_type: The data type component (e.g., "answer", "agent")
+            key: The key component (e.g., "how_feeling", "status")
+            to_fetch: Dictionary to update with data to fetch
+        Raises:
+            ResultsColumnNotFoundError: If the key is not found in any relevant data type
+        Examples:
+            >>> s = Selector(
+            ...     ["answer", "agent"],
+            ...     {"answer": ["q1", "q2"], "agent": ["name"]},
+            ...     {},
+            ...     lambda dt, k: [],
+            ...     []
+            ... )
+            >>> to_fetch = defaultdict(list)
+            >>> s._process_column("answer", "q1", to_fetch)
+            >>> to_fetch["answer"]
+            ['q1']
+            >>> s.items_in_order
+            ['answer.q1']
+        """
         data_types = self._get_data_types_to_return(data_type)
         found_once = False
@@ -118,24 +377,84 @@ class Selector:
                     self.items_in_order.append(f"{dt}.{k}")
         if not found_once:
-            raise ResultsColumnNotFoundError(f"Key {key} not found in data.")
+            raise ResultsColumnNotFoundError(f"Key '{key}' not found in data.")
     def _get_data_types_to_return(self, parsed_data_type: str) -> List[str]:
+        """
+        Determine which data types to include based on the parsed data type.
+        This method handles wildcards in data types, returning either all known
+        data types or validating that a specific data type exists.
+        Args:
+            parsed_data_type: Data type string or wildcard (*)
+        Returns:
+            List of data types to include
+        Raises:
+            ResultsColumnNotFoundError: If the data type is not known
+        Examples:
+            >>> s = Selector(
+            ...     ["answer", "agent", "model"],
+            ...     {},
+            ...     {},
+            ...     lambda dt, k: [],
+            ...     []
+            ... )
+            >>> s._get_data_types_to_return("*")
+            ['answer', 'agent', 'model']
+            >>> s._get_data_types_to_return("answer")
+            ['answer']
+            >>> try:
+            ...     s._get_data_types_to_return("unknown")
+            ... except ResultsColumnNotFoundError:
+            ...     True
+            True
+        """
         if parsed_data_type == "*":
             return self.known_data_types
         if parsed_data_type not in self.known_data_types:
             raise ResultsColumnNotFoundError(
-                f"Data type {parsed_data_type} not found in data. Did you mean one of {self.known_data_types}"
+                f"Data type '{parsed_data_type}' not found in data. Did you mean one of {self.known_data_types}?"
             )
         return [parsed_data_type]
     def _fetch_data(self, to_fetch: Dict[str, List[str]]) -> List[Dict[str, Any]]:
+        """
+        Fetch the actual data for the specified columns.
+        This method retrieves values for each data type and key combination
+        and structures the results for conversion to a Dataset.
+        Args:
+            to_fetch: Dictionary mapping data types to lists of keys to fetch
+        Returns:
+            List of dictionaries containing the fetched data
+        Examples:
+            >>> fetch_mock = lambda dt, k: [f"{dt}-{k}-val1", f"{dt}-{k}-val2"]
+            >>> s = Selector(
+            ...     ["answer"],
+            ...     {"answer": ["q1"]},
+            ...     {},
+            ...     fetch_mock,
+            ...     []
+            ... )
+            >>> s.items_in_order = ["answer.q1"]
+            >>> data = s._fetch_data({"answer": ["q1"]})
+            >>> data[0]["answer.q1"]
+            ['answer-q1-val1', 'answer-q1-val2']
+        """
         new_data = []
         for data_type, keys in to_fetch.items():
             for key in keys:
                 entries = self._fetch_list(data_type, key)
                 new_data.append({f"{data_type}.{key}": entries})
+        # Ensure items are returned in the order they were requested
         return [d for key in self.items_in_order for d in new_data if key in d]

edsl/scenarios/__init__.py CHANGED Viewed

@@ -1,3 +1,30 @@
-from edsl.scenarios.Scenario import Scenario
-from edsl.scenarios.ScenarioList import ScenarioList
-from edsl.scenarios.FileStore import FileStore
+"""
+The scenarios package provides tools for creating and managing parameterized templates.
+This package is a core component of EDSL that enables parameterized content through
+key-value dictionaries called Scenarios. These Scenarios can be used to provide variable
+content to questions, surveys, and other components within EDSL.
+Key components:
+- Scenario: A dictionary-like object for storing key-value pairs to parameterize questions
+- ScenarioList: A collection of Scenario objects with powerful data manipulation capabilities
+- FileStore: A specialized Scenario subclass for handling files of various formats
+The scenarios package supports various file formats, data sources, and transformations,
+enabling complex experimental designs and data-driven surveys.
+Example:
+    >>> from edsl.scenarios import Scenario, ScenarioList
+    >>> # Create a simple scenario
+    >>> s1 = Scenario({"product": "coffee", "price": 4.99})
+    >>> s2 = Scenario({"product": "tea", "price": 3.50})
+    >>> # Create a scenario list
+    >>> sl = ScenarioList([s1, s2])
+    >>> # Use scenarios to parameterize questions and surveys
+"""
+from .scenario import Scenario
+from .scenario_list import ScenarioList
+from .file_store import FileStore
+__all__ = ["Scenario", "ScenarioList", "FileStore"]

edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} RENAMED Viewed

@@ -6,6 +6,12 @@ class ConstructDownloadLink:
     """
     A class to create HTML download links for FileStore objects.
     The links can be displayed in Jupyter notebooks or other web interfaces.
+    >>> from edsl import FileStore
+    >>> fs = FileStore.example("txt")
+    >>> link = ConstructDownloadLink(fs)
+    >>> link.create_link()
+    <IPython.core.display.HTML object>
     """
     def __init__(self, filestore):
@@ -98,6 +104,7 @@ class ConstructDownloadLink:
                 )._repr_html_()
             )
+        from IPython.display import HTML
         return HTML(
             '<div style="display: flex; gap: 10px;">' + "".join(html_parts) + "</div>"
         )

edsl 0.1.46__py3-none-any.whl → 0.1.48__py3-none-any.whl

edsl 0.1.46py3-none-any.whl → 0.1.48py3-none-any.whl