edsl 0.1.46__py3-none-any.whl → 0.1.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. edsl/__init__.py +44 -39
  2. edsl/__version__.py +1 -1
  3. edsl/agents/__init__.py +4 -2
  4. edsl/agents/{Agent.py → agent.py} +442 -152
  5. edsl/agents/{AgentList.py → agent_list.py} +220 -162
  6. edsl/agents/descriptors.py +46 -7
  7. edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
  8. edsl/base/__init__.py +75 -0
  9. edsl/base/base_class.py +1303 -0
  10. edsl/base/data_transfer_models.py +114 -0
  11. edsl/base/enums.py +215 -0
  12. edsl/base.py +8 -0
  13. edsl/buckets/__init__.py +25 -0
  14. edsl/buckets/bucket_collection.py +324 -0
  15. edsl/buckets/model_buckets.py +206 -0
  16. edsl/buckets/token_bucket.py +502 -0
  17. edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
  18. edsl/buckets/token_bucket_client.py +509 -0
  19. edsl/caching/__init__.py +20 -0
  20. edsl/caching/cache.py +814 -0
  21. edsl/caching/cache_entry.py +427 -0
  22. edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
  23. edsl/caching/exceptions.py +24 -0
  24. edsl/caching/orm.py +30 -0
  25. edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
  26. edsl/caching/sql_dict.py +441 -0
  27. edsl/config/__init__.py +8 -0
  28. edsl/config/config_class.py +177 -0
  29. edsl/config.py +4 -176
  30. edsl/conversation/Conversation.py +7 -7
  31. edsl/conversation/car_buying.py +4 -4
  32. edsl/conversation/chips.py +6 -6
  33. edsl/coop/__init__.py +25 -2
  34. edsl/coop/coop.py +430 -113
  35. edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
  36. edsl/coop/exceptions.py +62 -0
  37. edsl/coop/price_fetcher.py +126 -0
  38. edsl/coop/utils.py +89 -24
  39. edsl/data_transfer_models.py +5 -72
  40. edsl/dataset/__init__.py +10 -0
  41. edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
  42. edsl/dataset/dataset_operations_mixin.py +1492 -0
  43. edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
  44. edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
  45. edsl/{results → dataset/display}/table_renderers.py +58 -2
  46. edsl/{results → dataset}/file_exports.py +4 -5
  47. edsl/{results → dataset}/smart_objects.py +2 -2
  48. edsl/enums.py +5 -205
  49. edsl/inference_services/__init__.py +5 -0
  50. edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
  51. edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
  52. edsl/inference_services/data_structures.py +3 -2
  53. edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
  54. edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
  55. edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
  56. edsl/inference_services/registry.py +4 -41
  57. edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
  58. edsl/inference_services/services/__init__.py +31 -0
  59. edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
  60. edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
  61. edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
  62. edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
  63. edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
  64. edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
  65. edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
  66. edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
  67. edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
  68. edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
  69. edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +12 -12
  70. edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
  71. edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
  72. edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
  73. edsl/inference_services/write_available.py +1 -2
  74. edsl/instructions/__init__.py +6 -0
  75. edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
  76. edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
  77. edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
  78. edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
  79. edsl/interviews/__init__.py +4 -0
  80. edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
  81. edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
  82. edsl/interviews/interview.py +638 -0
  83. edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
  84. edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
  85. edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
  86. edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
  87. edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
  88. edsl/invigilators/__init__.py +38 -0
  89. edsl/invigilators/invigilator_base.py +477 -0
  90. edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
  91. edsl/invigilators/prompt_constructor.py +476 -0
  92. edsl/{agents → invigilators}/prompt_helpers.py +2 -1
  93. edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
  94. edsl/{agents → invigilators}/question_option_processor.py +96 -21
  95. edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
  96. edsl/jobs/__init__.py +7 -1
  97. edsl/jobs/async_interview_runner.py +99 -35
  98. edsl/jobs/check_survey_scenario_compatibility.py +7 -5
  99. edsl/jobs/data_structures.py +153 -22
  100. edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
  101. edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
  102. edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
  103. edsl/jobs/{Jobs.py → jobs.py} +321 -155
  104. edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
  105. edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +20 -17
  106. edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
  107. edsl/jobs/jobs_pricing_estimation.py +347 -0
  108. edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
  109. edsl/jobs/jobs_runner_asyncio.py +282 -0
  110. edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
  111. edsl/jobs/results_exceptions_handler.py +2 -2
  112. edsl/key_management/__init__.py +28 -0
  113. edsl/key_management/key_lookup.py +161 -0
  114. edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
  115. edsl/key_management/key_lookup_collection.py +82 -0
  116. edsl/key_management/models.py +218 -0
  117. edsl/language_models/__init__.py +7 -2
  118. edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
  119. edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
  120. edsl/language_models/language_model.py +1080 -0
  121. edsl/language_models/model.py +10 -25
  122. edsl/language_models/{ModelList.py → model_list.py} +9 -14
  123. edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
  124. edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
  125. edsl/language_models/repair.py +4 -4
  126. edsl/language_models/utilities.py +4 -4
  127. edsl/notebooks/__init__.py +3 -1
  128. edsl/notebooks/{Notebook.py → notebook.py} +7 -8
  129. edsl/prompts/__init__.py +1 -1
  130. edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
  131. edsl/prompts/{Prompt.py → prompt.py} +101 -95
  132. edsl/questions/HTMLQuestion.py +1 -1
  133. edsl/questions/__init__.py +154 -25
  134. edsl/questions/answer_validator_mixin.py +1 -1
  135. edsl/questions/compose_questions.py +4 -3
  136. edsl/questions/derived/question_likert_five.py +166 -0
  137. edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
  138. edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
  139. edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
  140. edsl/questions/descriptors.py +24 -30
  141. edsl/questions/loop_processor.py +65 -19
  142. edsl/questions/question_base.py +881 -0
  143. edsl/questions/question_base_gen_mixin.py +15 -16
  144. edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
  145. edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
  146. edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
  147. edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
  148. edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
  149. edsl/questions/question_free_text.py +282 -0
  150. edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
  151. edsl/questions/{QuestionList.py → question_list.py} +6 -7
  152. edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
  153. edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
  154. edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
  155. edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
  156. edsl/questions/question_registry.py +10 -16
  157. edsl/questions/register_questions_meta.py +8 -4
  158. edsl/questions/response_validator_abc.py +17 -16
  159. edsl/results/__init__.py +4 -1
  160. edsl/{exceptions/results.py → results/exceptions.py} +1 -1
  161. edsl/results/report.py +197 -0
  162. edsl/results/{Result.py → result.py} +131 -45
  163. edsl/results/{Results.py → results.py} +420 -216
  164. edsl/results/results_selector.py +344 -25
  165. edsl/scenarios/__init__.py +30 -3
  166. edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
  167. edsl/scenarios/directory_scanner.py +156 -13
  168. edsl/scenarios/document_chunker.py +186 -0
  169. edsl/scenarios/exceptions.py +101 -0
  170. edsl/scenarios/file_methods.py +2 -3
  171. edsl/scenarios/file_store.py +755 -0
  172. edsl/scenarios/handlers/__init__.py +14 -14
  173. edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
  174. edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
  175. edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
  176. edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
  177. edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
  178. edsl/scenarios/handlers/latex_file_store.py +5 -0
  179. edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
  180. edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
  181. edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
  182. edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
  183. edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
  184. edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
  185. edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
  186. edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
  187. edsl/scenarios/scenario.py +928 -0
  188. edsl/scenarios/scenario_join.py +18 -5
  189. edsl/scenarios/{ScenarioList.py → scenario_list.py} +424 -106
  190. edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
  191. edsl/scenarios/scenario_selector.py +5 -1
  192. edsl/study/ObjectEntry.py +2 -2
  193. edsl/study/SnapShot.py +5 -5
  194. edsl/study/Study.py +20 -21
  195. edsl/study/__init__.py +6 -4
  196. edsl/surveys/__init__.py +7 -4
  197. edsl/surveys/dag/__init__.py +2 -0
  198. edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
  199. edsl/surveys/{DAG.py → dag/dag.py} +13 -10
  200. edsl/surveys/descriptors.py +1 -1
  201. edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
  202. edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
  203. edsl/surveys/memory/__init__.py +3 -0
  204. edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
  205. edsl/surveys/rules/__init__.py +3 -0
  206. edsl/surveys/{Rule.py → rules/rule.py} +103 -43
  207. edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
  208. edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
  209. edsl/surveys/survey.py +1743 -0
  210. edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
  211. edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
  212. edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
  213. edsl/tasks/__init__.py +32 -0
  214. edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
  215. edsl/tasks/task_creators.py +135 -0
  216. edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
  217. edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
  218. edsl/tasks/task_status_log.py +85 -0
  219. edsl/tokens/__init__.py +2 -0
  220. edsl/tokens/interview_token_usage.py +53 -0
  221. edsl/utilities/PrettyList.py +1 -1
  222. edsl/utilities/SystemInfo.py +25 -22
  223. edsl/utilities/__init__.py +29 -21
  224. edsl/utilities/gcp_bucket/__init__.py +2 -0
  225. edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
  226. edsl/utilities/interface.py +44 -536
  227. edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
  228. edsl/utilities/repair_functions.py +1 -1
  229. {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/METADATA +3 -2
  230. edsl-0.1.48.dist-info/RECORD +347 -0
  231. edsl/Base.py +0 -426
  232. edsl/BaseDiff.py +0 -260
  233. edsl/agents/InvigilatorBase.py +0 -260
  234. edsl/agents/PromptConstructor.py +0 -318
  235. edsl/auto/AutoStudy.py +0 -130
  236. edsl/auto/StageBase.py +0 -243
  237. edsl/auto/StageGenerateSurvey.py +0 -178
  238. edsl/auto/StageLabelQuestions.py +0 -125
  239. edsl/auto/StagePersona.py +0 -61
  240. edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
  241. edsl/auto/StagePersonaDimensionValues.py +0 -74
  242. edsl/auto/StagePersonaDimensions.py +0 -69
  243. edsl/auto/StageQuestions.py +0 -74
  244. edsl/auto/SurveyCreatorPipeline.py +0 -21
  245. edsl/auto/utilities.py +0 -218
  246. edsl/base/Base.py +0 -279
  247. edsl/coop/PriceFetcher.py +0 -54
  248. edsl/data/Cache.py +0 -580
  249. edsl/data/CacheEntry.py +0 -230
  250. edsl/data/SQLiteDict.py +0 -292
  251. edsl/data/__init__.py +0 -5
  252. edsl/data/orm.py +0 -10
  253. edsl/exceptions/cache.py +0 -5
  254. edsl/exceptions/coop.py +0 -14
  255. edsl/exceptions/data.py +0 -14
  256. edsl/exceptions/scenarios.py +0 -29
  257. edsl/jobs/Answers.py +0 -43
  258. edsl/jobs/JobsPrompts.py +0 -354
  259. edsl/jobs/buckets/BucketCollection.py +0 -134
  260. edsl/jobs/buckets/ModelBuckets.py +0 -65
  261. edsl/jobs/buckets/TokenBucket.py +0 -283
  262. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  263. edsl/jobs/interviews/Interview.py +0 -395
  264. edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
  265. edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
  266. edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
  267. edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
  268. edsl/jobs/tasks/TaskCreators.py +0 -64
  269. edsl/jobs/tasks/TaskStatusLog.py +0 -23
  270. edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
  271. edsl/language_models/LanguageModel.py +0 -635
  272. edsl/language_models/ServiceDataSources.py +0 -0
  273. edsl/language_models/key_management/KeyLookup.py +0 -63
  274. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  275. edsl/language_models/key_management/models.py +0 -137
  276. edsl/questions/QuestionBase.py +0 -539
  277. edsl/questions/QuestionFreeText.py +0 -130
  278. edsl/questions/derived/QuestionLikertFive.py +0 -76
  279. edsl/results/DatasetExportMixin.py +0 -911
  280. edsl/results/ResultsExportMixin.py +0 -45
  281. edsl/results/TextEditor.py +0 -50
  282. edsl/results/results_fetch_mixin.py +0 -33
  283. edsl/results/results_tools_mixin.py +0 -98
  284. edsl/scenarios/DocumentChunker.py +0 -104
  285. edsl/scenarios/FileStore.py +0 -564
  286. edsl/scenarios/Scenario.py +0 -548
  287. edsl/scenarios/ScenarioHtmlMixin.py +0 -65
  288. edsl/scenarios/ScenarioListExportMixin.py +0 -45
  289. edsl/scenarios/handlers/latex.py +0 -5
  290. edsl/shared.py +0 -1
  291. edsl/surveys/Survey.py +0 -1306
  292. edsl/surveys/SurveyQualtricsImport.py +0 -284
  293. edsl/surveys/SurveyToApp.py +0 -141
  294. edsl/surveys/instructions/__init__.py +0 -0
  295. edsl/tools/__init__.py +0 -1
  296. edsl/tools/clusters.py +0 -192
  297. edsl/tools/embeddings.py +0 -27
  298. edsl/tools/embeddings_plotting.py +0 -118
  299. edsl/tools/plotting.py +0 -112
  300. edsl/tools/summarize.py +0 -18
  301. edsl/utilities/data/Registry.py +0 -6
  302. edsl/utilities/data/__init__.py +0 -1
  303. edsl/utilities/data/scooter_results.json +0 -1
  304. edsl-0.1.46.dist-info/RECORD +0 -366
  305. /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
  306. /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
  307. /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
  308. /edsl/{results → dataset/display}/table_data_class.py +0 -0
  309. /edsl/{results → dataset/display}/table_display.css +0 -0
  310. /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
  311. /edsl/{results → dataset}/tree_explore.py +0 -0
  312. /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
  313. /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
  314. /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
  315. /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
  316. /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
  317. /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
  318. /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
  319. /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
  320. /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
  321. /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
  322. /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
  323. /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
  324. /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
  325. /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
  326. /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
  327. {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
  328. {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
@@ -1,30 +1,106 @@
1
- from typing import Union, List, Dict, Any, Optional
1
+ """
2
+ Column selection and data extraction module for Results objects.
3
+
4
+ This module provides the Selector class that implements the column selection
5
+ functionality for the Results object's select() method. It handles column name
6
+ normalization, matching, and data extraction, supporting both direct column references
7
+ and wildcard patterns.
8
+ """
9
+
10
+ from typing import Union, List, Dict, Any, Optional, Tuple, Callable
2
11
  import sys
3
12
  from collections import defaultdict
4
- from edsl.results.Dataset import Dataset
5
13
 
6
- from edsl.exceptions.results import ResultsColumnNotFoundError
14
+ from ..dataset import Dataset
15
+ from ..utilities import is_notebook
7
16
 
8
- from edsl.utilities.is_notebook import is_notebook
17
+ from .exceptions import ResultsColumnNotFoundError
9
18
 
10
19
 
11
20
  class Selector:
21
+ """
22
+ Selects and extracts columns from a Results object to create a Dataset.
23
+
24
+ The Selector class provides the functionality to extract specific data columns
25
+ from Results objects, handling column name resolution, disambiguation,
26
+ and wildcard matching. It transforms hierarchical Result data into a columnar
27
+ Dataset format optimized for analysis operations.
28
+
29
+ Attributes:
30
+ known_data_types: List of valid data types (e.g., "answer", "agent", "model")
31
+ columns: List of available column names in dot notation (e.g., "answer.how_feeling")
32
+ """
33
+
12
34
  def __init__(
13
35
  self,
14
36
  known_data_types: List[str],
15
37
  data_type_to_keys: Dict[str, List[str]],
16
38
  key_to_data_type: Dict[str, str],
17
- fetch_list_func,
39
+ fetch_list_func: Callable[[str, str], List[Any]],
18
40
  columns: List[str],
19
41
  ):
20
- """Selects columns from a Results object"""
42
+ """
43
+ Initialize a Selector object.
44
+
45
+ Args:
46
+ known_data_types: List of valid data types (e.g., "answer", "agent", "model")
47
+ data_type_to_keys: Mapping from data types to lists of keys available in that type
48
+ key_to_data_type: Mapping from keys to their corresponding data types
49
+ fetch_list_func: Function that retrieves values for a given data type and key
50
+ columns: List of available column names in dot notation
51
+
52
+ Examples:
53
+ >>> s = Selector(
54
+ ... known_data_types=["answer", "agent"],
55
+ ... data_type_to_keys={"answer": ["q1", "q2"], "agent": ["name"]},
56
+ ... key_to_data_type={"q1": "answer", "q2": "answer", "name": "agent"},
57
+ ... fetch_list_func=lambda dt, k: [f"{dt}.{k}_val"],
58
+ ... columns=["answer.q1", "answer.q2", "agent.name"]
59
+ ... )
60
+ >>> isinstance(s, Selector)
61
+ True
62
+ """
21
63
  self.known_data_types = known_data_types
22
64
  self._data_type_to_keys = data_type_to_keys
23
65
  self._key_to_data_type = key_to_data_type
24
66
  self._fetch_list = fetch_list_func
25
67
  self.columns = columns
68
+ self.items_in_order = [] # Tracks column order for consistent output
26
69
 
27
70
  def select(self, *columns: Union[str, List[str]]) -> Optional[Dataset]:
71
+ """
72
+ Select specific columns from the data and return as a Dataset.
73
+
74
+ This method processes column specifications, fetches the corresponding data,
75
+ and constructs a Dataset with the selected columns. It handles error cases
76
+ differently in notebook vs non-notebook environments.
77
+
78
+ Args:
79
+ *columns: Column names to select. Each name can be a simple attribute
80
+ name (e.g., "how_feeling"), a fully qualified name with type
81
+ (e.g., "answer.how_feeling"), or a wildcard pattern
82
+ (e.g., "answer.*"). If no columns provided, selects all data.
83
+
84
+ Returns:
85
+ A Dataset object containing the selected data, or None if an error occurs
86
+ in a notebook environment.
87
+
88
+ Raises:
89
+ ResultsColumnNotFoundError: If a specified column cannot be found (non-notebook only)
90
+
91
+ Examples:
92
+ >>> import unittest.mock as mock
93
+ >>> mock_selector = Selector(
94
+ ... known_data_types=["answer", "agent"],
95
+ ... data_type_to_keys={"answer": ["q1"], "agent": ["name"]},
96
+ ... key_to_data_type={"q1": "answer", "name": "agent"},
97
+ ... fetch_list_func=lambda dt, k: [f"{dt}-{k}1", f"{dt}-{k}2"],
98
+ ... columns=["answer.q1", "agent.name"]
99
+ ... )
100
+ >>> ds = mock_selector.select("q1")
101
+ >>> list(ds[0].values())[0][0]
102
+ 'answer-q11'
103
+ """
28
104
  try:
29
105
  columns = self._normalize_columns(columns)
30
106
  to_fetch = self._get_columns_to_fetch(columns)
@@ -37,14 +113,30 @@ class Selector:
37
113
  raise e
38
114
  return Dataset(new_data)
39
115
 
40
- def _normalize_columns(self, columns: Union[str, List[str]]) -> tuple:
41
- """Normalize the columns to a tuple of strings
42
-
43
- >>> s = Selector([], {}, {}, lambda x, y: x, [])
44
- >>> s._normalize_columns([["a", "b"], ])
45
- ('a', 'b')
46
- >>> s._normalize_columns(None)
47
- ('*.*',)
116
+ def _normalize_columns(self, columns: Union[str, List[str]]) -> Tuple[str, ...]:
117
+ """
118
+ Normalize column specifications to a standard format.
119
+
120
+ This method handles various forms of column specifications, including
121
+ converting lists to tuples, handling None values, and applying default
122
+ wildcards when no columns are specified.
123
+
124
+ Args:
125
+ columns: Column specifications as strings or lists
126
+
127
+ Returns:
128
+ A tuple of normalized column name strings
129
+
130
+ Examples:
131
+ >>> s = Selector([], {}, {}, lambda x, y: [], [])
132
+ >>> s._normalize_columns([["a", "b"]])
133
+ ('a', 'b')
134
+ >>> s._normalize_columns(None)
135
+ ('*.*',)
136
+ >>> s._normalize_columns(("a", "b"))
137
+ ('a', 'b')
138
+ >>> s._normalize_columns(("*",))
139
+ ('*.*',)
48
140
  """
49
141
  if not columns or columns == ("*",) or columns == (None,):
50
142
  return ("*.*",)
@@ -52,13 +144,41 @@ class Selector:
52
144
  return tuple(columns[0])
53
145
  return columns
54
146
 
55
- def _get_columns_to_fetch(self, columns: tuple) -> Dict[str, List[str]]:
147
+ def _get_columns_to_fetch(self, columns: Tuple[str, ...]) -> Dict[str, List[str]]:
148
+ """
149
+ Process column specifications and determine what data to fetch.
150
+
151
+ This method iterates through each column specification, finds matching
152
+ columns, validates the matches, and builds a structure that organizes
153
+ which keys to fetch for each data type.
154
+
155
+ Args:
156
+ columns: Tuple of normalized column specifications
157
+
158
+ Returns:
159
+ Dictionary mapping data types to lists of keys to fetch
160
+
161
+ Raises:
162
+ ResultsColumnNotFoundError: If columns are ambiguous or not found
163
+
164
+ Examples:
165
+ >>> import unittest.mock as mock
166
+ >>> mock_selector = Selector(
167
+ ... known_data_types=["answer"],
168
+ ... data_type_to_keys={"answer": ["q1", "q2"]},
169
+ ... key_to_data_type={"q1": "answer", "q2": "answer"},
170
+ ... fetch_list_func=lambda dt, k: [],
171
+ ... columns=["answer.q1", "answer.q2"]
172
+ ... )
173
+ >>> to_fetch = mock_selector._get_columns_to_fetch(("q1",))
174
+ >>> to_fetch["answer"]
175
+ ['q1']
176
+ """
56
177
  to_fetch = defaultdict(list)
57
178
  self.items_in_order = []
58
179
 
59
180
  for column in columns:
60
181
  matches = self._find_matching_columns(column)
61
- # breakpoint()
62
182
  self._validate_matches(column, matches)
63
183
 
64
184
  if len(matches) == 1:
@@ -69,7 +189,33 @@ class Selector:
69
189
 
70
190
  return to_fetch
71
191
 
72
- def _find_matching_columns(self, partial_name: str) -> list[str]:
192
+ def _find_matching_columns(self, partial_name: str) -> List[str]:
193
+ """
194
+ Find columns that match a partial column name.
195
+
196
+ This method supports both fully qualified column names with data types
197
+ (containing a dot) and simple column names, handling each case appropriately.
198
+ It finds all columns that start with the provided partial name.
199
+
200
+ Args:
201
+ partial_name: A full or partial column name to match
202
+
203
+ Returns:
204
+ List of matching column names
205
+
206
+ Examples:
207
+ >>> s = Selector(
208
+ ... known_data_types=["answer", "agent"],
209
+ ... data_type_to_keys={},
210
+ ... key_to_data_type={},
211
+ ... fetch_list_func=lambda dt, k: [],
212
+ ... columns=["answer.q1", "answer.q2", "agent.name"]
213
+ ... )
214
+ >>> s._find_matching_columns("answer.q")
215
+ ['answer.q1', 'answer.q2']
216
+ >>> s._find_matching_columns("q")
217
+ ['q1', 'q2']
218
+ """
73
219
  if "." in partial_name:
74
220
  search_in_list = self.columns
75
221
  else:
@@ -77,7 +223,35 @@ class Selector:
77
223
  matches = [s for s in search_in_list if s.startswith(partial_name)]
78
224
  return [partial_name] if partial_name in matches else matches
79
225
 
80
- def _validate_matches(self, column: str, matches: List[str]):
226
+ def _validate_matches(self, column: str, matches: List[str]) -> None:
227
+ """
228
+ Validate that matched columns are unambiguous and exist.
229
+
230
+ This method checks that the column specification resolves to exactly
231
+ one column or a wildcard pattern. It raises appropriate exceptions
232
+ for ambiguous matches or when no matches are found.
233
+
234
+ Args:
235
+ column: The original column specification
236
+ matches: List of matching column names
237
+
238
+ Raises:
239
+ ResultsColumnNotFoundError: If matches are ambiguous or no matches found
240
+
241
+ Examples:
242
+ >>> s = Selector([], {}, {}, lambda dt, k: [], [])
243
+ >>> s._validate_matches("col", ["col"]) # No exception
244
+ >>> try:
245
+ ... s._validate_matches("c", ["col1", "col2"])
246
+ ... except ResultsColumnNotFoundError as e:
247
+ ... "ambiguous" in str(e).lower()
248
+ True
249
+ >>> try:
250
+ ... s._validate_matches("xyz", [])
251
+ ... except ResultsColumnNotFoundError as e:
252
+ ... "not found" in str(e).lower()
253
+ True
254
+ """
81
255
  if len(matches) > 1:
82
256
  raise ResultsColumnNotFoundError(
83
257
  f"Column '{column}' is ambiguous. Did you mean one of {matches}?"
@@ -85,15 +259,71 @@ class Selector:
85
259
  if len(matches) == 0 and ".*" not in column:
86
260
  raise ResultsColumnNotFoundError(f"Column '{column}' not found in data.")
87
261
 
88
- def _parse_column(self, column: str) -> tuple[str, str]:
262
+ def _parse_column(self, column: str) -> Tuple[str, str]:
263
+ """
264
+ Parse a column name into data type and key components.
265
+
266
+ This method handles both fully qualified column names (containing a dot)
267
+ and simple column names, looking up the appropriate data type when needed.
268
+
269
+ Args:
270
+ column: Column name to parse
271
+
272
+ Returns:
273
+ Tuple of (data_type, key)
274
+
275
+ Raises:
276
+ ResultsColumnNotFoundError: When key cannot be found in data
277
+
278
+ Examples:
279
+ >>> s = Selector(
280
+ ... [],
281
+ ... {},
282
+ ... {"col1": "type1"},
283
+ ... lambda dt, k: [],
284
+ ... []
285
+ ... )
286
+ >>> s._parse_column("type2.col2")
287
+ ('type2', 'col2')
288
+ >>> s._parse_column("col1")
289
+ ('type1', 'col1')
290
+ """
89
291
  if "." in column:
90
- return column.split(".")
292
+ parts = column.split(".")
293
+ return (parts[0], parts[1]) # Return as tuple instead of list
91
294
  try:
92
295
  return self._key_to_data_type[column], column
93
296
  except KeyError:
94
297
  self._raise_key_error(column)
95
298
 
96
- def _raise_key_error(self, column: str):
299
+ def _raise_key_error(self, column: str) -> None:
300
+ """
301
+ Raise an error with helpful suggestions when a column is not found.
302
+
303
+ This method uses difflib to find close matches to the specified column,
304
+ providing helpful suggestions in the error message when possible.
305
+
306
+ Args:
307
+ column: The column name that wasn't found
308
+
309
+ Raises:
310
+ ResultsColumnNotFoundError: Always raised with a descriptive message
311
+
312
+ Examples:
313
+ >>> import unittest.mock as mock
314
+ >>> s = Selector(
315
+ ... [],
316
+ ... {},
317
+ ... {"column1": "type1", "column2": "type1"},
318
+ ... lambda dt, k: [],
319
+ ... []
320
+ ... )
321
+ >>> try:
322
+ ... s._raise_key_error("colum1")
323
+ ... except ResultsColumnNotFoundError as e:
324
+ ... "did you mean: column1" in str(e).lower()
325
+ True
326
+ """
97
327
  import difflib
98
328
 
99
329
  close_matches = difflib.get_close_matches(column, self._key_to_data_type.keys())
@@ -103,9 +333,38 @@ class Selector:
103
333
  f"Column '{column}' not found in data. Did you mean: {suggestions}?"
104
334
  )
105
335
  else:
106
- raise ResultsColumnNotFoundError(f"Column {column} not found in data")
336
+ raise ResultsColumnNotFoundError(f"Column '{column}' not found in data")
107
337
 
108
- def _process_column(self, data_type: str, key: str, to_fetch: Dict[str, List[str]]):
338
+ def _process_column(self, data_type: str, key: str, to_fetch: Dict[str, List[str]]) -> None:
339
+ """
340
+ Process a parsed column and add it to the list of data to fetch.
341
+
342
+ This method handles wildcards in both data types and keys, expands them
343
+ appropriately, and tracks the order of items for consistent output.
344
+
345
+ Args:
346
+ data_type: The data type component (e.g., "answer", "agent")
347
+ key: The key component (e.g., "how_feeling", "status")
348
+ to_fetch: Dictionary to update with data to fetch
349
+
350
+ Raises:
351
+ ResultsColumnNotFoundError: If the key is not found in any relevant data type
352
+
353
+ Examples:
354
+ >>> s = Selector(
355
+ ... ["answer", "agent"],
356
+ ... {"answer": ["q1", "q2"], "agent": ["name"]},
357
+ ... {},
358
+ ... lambda dt, k: [],
359
+ ... []
360
+ ... )
361
+ >>> to_fetch = defaultdict(list)
362
+ >>> s._process_column("answer", "q1", to_fetch)
363
+ >>> to_fetch["answer"]
364
+ ['q1']
365
+ >>> s.items_in_order
366
+ ['answer.q1']
367
+ """
109
368
  data_types = self._get_data_types_to_return(data_type)
110
369
  found_once = False
111
370
 
@@ -118,24 +377,84 @@ class Selector:
118
377
  self.items_in_order.append(f"{dt}.{k}")
119
378
 
120
379
  if not found_once:
121
- raise ResultsColumnNotFoundError(f"Key {key} not found in data.")
380
+ raise ResultsColumnNotFoundError(f"Key '{key}' not found in data.")
122
381
 
123
382
  def _get_data_types_to_return(self, parsed_data_type: str) -> List[str]:
383
+ """
384
+ Determine which data types to include based on the parsed data type.
385
+
386
+ This method handles wildcards in data types, returning either all known
387
+ data types or validating that a specific data type exists.
388
+
389
+ Args:
390
+ parsed_data_type: Data type string or wildcard (*)
391
+
392
+ Returns:
393
+ List of data types to include
394
+
395
+ Raises:
396
+ ResultsColumnNotFoundError: If the data type is not known
397
+
398
+ Examples:
399
+ >>> s = Selector(
400
+ ... ["answer", "agent", "model"],
401
+ ... {},
402
+ ... {},
403
+ ... lambda dt, k: [],
404
+ ... []
405
+ ... )
406
+ >>> s._get_data_types_to_return("*")
407
+ ['answer', 'agent', 'model']
408
+ >>> s._get_data_types_to_return("answer")
409
+ ['answer']
410
+ >>> try:
411
+ ... s._get_data_types_to_return("unknown")
412
+ ... except ResultsColumnNotFoundError:
413
+ ... True
414
+ True
415
+ """
124
416
  if parsed_data_type == "*":
125
417
  return self.known_data_types
126
418
  if parsed_data_type not in self.known_data_types:
127
419
  raise ResultsColumnNotFoundError(
128
- f"Data type {parsed_data_type} not found in data. Did you mean one of {self.known_data_types}"
420
+ f"Data type '{parsed_data_type}' not found in data. Did you mean one of {self.known_data_types}?"
129
421
  )
130
422
  return [parsed_data_type]
131
423
 
132
424
  def _fetch_data(self, to_fetch: Dict[str, List[str]]) -> List[Dict[str, Any]]:
425
+ """
426
+ Fetch the actual data for the specified columns.
427
+
428
+ This method retrieves values for each data type and key combination
429
+ and structures the results for conversion to a Dataset.
430
+
431
+ Args:
432
+ to_fetch: Dictionary mapping data types to lists of keys to fetch
433
+
434
+ Returns:
435
+ List of dictionaries containing the fetched data
436
+
437
+ Examples:
438
+ >>> fetch_mock = lambda dt, k: [f"{dt}-{k}-val1", f"{dt}-{k}-val2"]
439
+ >>> s = Selector(
440
+ ... ["answer"],
441
+ ... {"answer": ["q1"]},
442
+ ... {},
443
+ ... fetch_mock,
444
+ ... []
445
+ ... )
446
+ >>> s.items_in_order = ["answer.q1"]
447
+ >>> data = s._fetch_data({"answer": ["q1"]})
448
+ >>> data[0]["answer.q1"]
449
+ ['answer-q1-val1', 'answer-q1-val2']
450
+ """
133
451
  new_data = []
134
452
  for data_type, keys in to_fetch.items():
135
453
  for key in keys:
136
454
  entries = self._fetch_list(data_type, key)
137
455
  new_data.append({f"{data_type}.{key}": entries})
138
456
 
457
+ # Ensure items are returned in the order they were requested
139
458
  return [d for key in self.items_in_order for d in new_data if key in d]
140
459
 
141
460
 
@@ -1,3 +1,30 @@
1
- from edsl.scenarios.Scenario import Scenario
2
- from edsl.scenarios.ScenarioList import ScenarioList
3
- from edsl.scenarios.FileStore import FileStore
1
+ """
2
+ The scenarios package provides tools for creating and managing parameterized templates.
3
+
4
+ This package is a core component of EDSL that enables parameterized content through
5
+ key-value dictionaries called Scenarios. These Scenarios can be used to provide variable
6
+ content to questions, surveys, and other components within EDSL.
7
+
8
+ Key components:
9
+ - Scenario: A dictionary-like object for storing key-value pairs to parameterize questions
10
+ - ScenarioList: A collection of Scenario objects with powerful data manipulation capabilities
11
+ - FileStore: A specialized Scenario subclass for handling files of various formats
12
+
13
+ The scenarios package supports various file formats, data sources, and transformations,
14
+ enabling complex experimental designs and data-driven surveys.
15
+
16
+ Example:
17
+ >>> from edsl.scenarios import Scenario, ScenarioList
18
+ >>> # Create a simple scenario
19
+ >>> s1 = Scenario({"product": "coffee", "price": 4.99})
20
+ >>> s2 = Scenario({"product": "tea", "price": 3.50})
21
+ >>> # Create a scenario list
22
+ >>> sl = ScenarioList([s1, s2])
23
+ >>> # Use scenarios to parameterize questions and surveys
24
+ """
25
+
26
+ from .scenario import Scenario
27
+ from .scenario_list import ScenarioList
28
+ from .file_store import FileStore
29
+
30
+ __all__ = ["Scenario", "ScenarioList", "FileStore"]
@@ -6,6 +6,12 @@ class ConstructDownloadLink:
6
6
  """
7
7
  A class to create HTML download links for FileStore objects.
8
8
  The links can be displayed in Jupyter notebooks or other web interfaces.
9
+
10
+ >>> from edsl import FileStore
11
+ >>> fs = FileStore.example("txt")
12
+ >>> link = ConstructDownloadLink(fs)
13
+ >>> link.create_link()
14
+ <IPython.core.display.HTML object>
9
15
  """
10
16
 
11
17
  def __init__(self, filestore):
@@ -98,6 +104,7 @@ class ConstructDownloadLink:
98
104
  )._repr_html_()
99
105
  )
100
106
 
107
+ from IPython.display import HTML
101
108
  return HTML(
102
109
  '<div style="display: flex; gap: 10px;">' + "".join(html_parts) + "</div>"
103
110
  )