edsl 0.1.46__py3-none-any.whl → 0.1.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. edsl/__init__.py +44 -39
  2. edsl/__version__.py +1 -1
  3. edsl/agents/__init__.py +4 -2
  4. edsl/agents/{Agent.py → agent.py} +442 -152
  5. edsl/agents/{AgentList.py → agent_list.py} +220 -162
  6. edsl/agents/descriptors.py +46 -7
  7. edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
  8. edsl/base/__init__.py +75 -0
  9. edsl/base/base_class.py +1303 -0
  10. edsl/base/data_transfer_models.py +114 -0
  11. edsl/base/enums.py +215 -0
  12. edsl/base.py +8 -0
  13. edsl/buckets/__init__.py +25 -0
  14. edsl/buckets/bucket_collection.py +324 -0
  15. edsl/buckets/model_buckets.py +206 -0
  16. edsl/buckets/token_bucket.py +502 -0
  17. edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
  18. edsl/buckets/token_bucket_client.py +509 -0
  19. edsl/caching/__init__.py +20 -0
  20. edsl/caching/cache.py +814 -0
  21. edsl/caching/cache_entry.py +427 -0
  22. edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
  23. edsl/caching/exceptions.py +24 -0
  24. edsl/caching/orm.py +30 -0
  25. edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
  26. edsl/caching/sql_dict.py +441 -0
  27. edsl/config/__init__.py +8 -0
  28. edsl/config/config_class.py +177 -0
  29. edsl/config.py +4 -176
  30. edsl/conversation/Conversation.py +7 -7
  31. edsl/conversation/car_buying.py +4 -4
  32. edsl/conversation/chips.py +6 -6
  33. edsl/coop/__init__.py +25 -2
  34. edsl/coop/coop.py +430 -113
  35. edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
  36. edsl/coop/exceptions.py +62 -0
  37. edsl/coop/price_fetcher.py +126 -0
  38. edsl/coop/utils.py +89 -24
  39. edsl/data_transfer_models.py +5 -72
  40. edsl/dataset/__init__.py +10 -0
  41. edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
  42. edsl/dataset/dataset_operations_mixin.py +1492 -0
  43. edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
  44. edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
  45. edsl/{results → dataset/display}/table_renderers.py +58 -2
  46. edsl/{results → dataset}/file_exports.py +4 -5
  47. edsl/{results → dataset}/smart_objects.py +2 -2
  48. edsl/enums.py +5 -205
  49. edsl/inference_services/__init__.py +5 -0
  50. edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
  51. edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
  52. edsl/inference_services/data_structures.py +3 -2
  53. edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
  54. edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
  55. edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
  56. edsl/inference_services/registry.py +4 -41
  57. edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
  58. edsl/inference_services/services/__init__.py +31 -0
  59. edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
  60. edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
  61. edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
  62. edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
  63. edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
  64. edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
  65. edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
  66. edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
  67. edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
  68. edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
  69. edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +12 -12
  70. edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
  71. edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
  72. edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
  73. edsl/inference_services/write_available.py +1 -2
  74. edsl/instructions/__init__.py +6 -0
  75. edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
  76. edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
  77. edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
  78. edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
  79. edsl/interviews/__init__.py +4 -0
  80. edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
  81. edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
  82. edsl/interviews/interview.py +638 -0
  83. edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
  84. edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
  85. edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
  86. edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
  87. edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
  88. edsl/invigilators/__init__.py +38 -0
  89. edsl/invigilators/invigilator_base.py +477 -0
  90. edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
  91. edsl/invigilators/prompt_constructor.py +476 -0
  92. edsl/{agents → invigilators}/prompt_helpers.py +2 -1
  93. edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
  94. edsl/{agents → invigilators}/question_option_processor.py +96 -21
  95. edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
  96. edsl/jobs/__init__.py +7 -1
  97. edsl/jobs/async_interview_runner.py +99 -35
  98. edsl/jobs/check_survey_scenario_compatibility.py +7 -5
  99. edsl/jobs/data_structures.py +153 -22
  100. edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
  101. edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
  102. edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
  103. edsl/jobs/{Jobs.py → jobs.py} +321 -155
  104. edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
  105. edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +20 -17
  106. edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
  107. edsl/jobs/jobs_pricing_estimation.py +347 -0
  108. edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
  109. edsl/jobs/jobs_runner_asyncio.py +282 -0
  110. edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
  111. edsl/jobs/results_exceptions_handler.py +2 -2
  112. edsl/key_management/__init__.py +28 -0
  113. edsl/key_management/key_lookup.py +161 -0
  114. edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
  115. edsl/key_management/key_lookup_collection.py +82 -0
  116. edsl/key_management/models.py +218 -0
  117. edsl/language_models/__init__.py +7 -2
  118. edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
  119. edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
  120. edsl/language_models/language_model.py +1080 -0
  121. edsl/language_models/model.py +10 -25
  122. edsl/language_models/{ModelList.py → model_list.py} +9 -14
  123. edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
  124. edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
  125. edsl/language_models/repair.py +4 -4
  126. edsl/language_models/utilities.py +4 -4
  127. edsl/notebooks/__init__.py +3 -1
  128. edsl/notebooks/{Notebook.py → notebook.py} +7 -8
  129. edsl/prompts/__init__.py +1 -1
  130. edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
  131. edsl/prompts/{Prompt.py → prompt.py} +101 -95
  132. edsl/questions/HTMLQuestion.py +1 -1
  133. edsl/questions/__init__.py +154 -25
  134. edsl/questions/answer_validator_mixin.py +1 -1
  135. edsl/questions/compose_questions.py +4 -3
  136. edsl/questions/derived/question_likert_five.py +166 -0
  137. edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
  138. edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
  139. edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
  140. edsl/questions/descriptors.py +24 -30
  141. edsl/questions/loop_processor.py +65 -19
  142. edsl/questions/question_base.py +881 -0
  143. edsl/questions/question_base_gen_mixin.py +15 -16
  144. edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
  145. edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
  146. edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
  147. edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
  148. edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
  149. edsl/questions/question_free_text.py +282 -0
  150. edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
  151. edsl/questions/{QuestionList.py → question_list.py} +6 -7
  152. edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
  153. edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
  154. edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
  155. edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
  156. edsl/questions/question_registry.py +10 -16
  157. edsl/questions/register_questions_meta.py +8 -4
  158. edsl/questions/response_validator_abc.py +17 -16
  159. edsl/results/__init__.py +4 -1
  160. edsl/{exceptions/results.py → results/exceptions.py} +1 -1
  161. edsl/results/report.py +197 -0
  162. edsl/results/{Result.py → result.py} +131 -45
  163. edsl/results/{Results.py → results.py} +420 -216
  164. edsl/results/results_selector.py +344 -25
  165. edsl/scenarios/__init__.py +30 -3
  166. edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
  167. edsl/scenarios/directory_scanner.py +156 -13
  168. edsl/scenarios/document_chunker.py +186 -0
  169. edsl/scenarios/exceptions.py +101 -0
  170. edsl/scenarios/file_methods.py +2 -3
  171. edsl/scenarios/file_store.py +755 -0
  172. edsl/scenarios/handlers/__init__.py +14 -14
  173. edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
  174. edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
  175. edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
  176. edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
  177. edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
  178. edsl/scenarios/handlers/latex_file_store.py +5 -0
  179. edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
  180. edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
  181. edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
  182. edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
  183. edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
  184. edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
  185. edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
  186. edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
  187. edsl/scenarios/scenario.py +928 -0
  188. edsl/scenarios/scenario_join.py +18 -5
  189. edsl/scenarios/{ScenarioList.py → scenario_list.py} +424 -106
  190. edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
  191. edsl/scenarios/scenario_selector.py +5 -1
  192. edsl/study/ObjectEntry.py +2 -2
  193. edsl/study/SnapShot.py +5 -5
  194. edsl/study/Study.py +20 -21
  195. edsl/study/__init__.py +6 -4
  196. edsl/surveys/__init__.py +7 -4
  197. edsl/surveys/dag/__init__.py +2 -0
  198. edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
  199. edsl/surveys/{DAG.py → dag/dag.py} +13 -10
  200. edsl/surveys/descriptors.py +1 -1
  201. edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
  202. edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
  203. edsl/surveys/memory/__init__.py +3 -0
  204. edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
  205. edsl/surveys/rules/__init__.py +3 -0
  206. edsl/surveys/{Rule.py → rules/rule.py} +103 -43
  207. edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
  208. edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
  209. edsl/surveys/survey.py +1743 -0
  210. edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
  211. edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
  212. edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
  213. edsl/tasks/__init__.py +32 -0
  214. edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
  215. edsl/tasks/task_creators.py +135 -0
  216. edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
  217. edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
  218. edsl/tasks/task_status_log.py +85 -0
  219. edsl/tokens/__init__.py +2 -0
  220. edsl/tokens/interview_token_usage.py +53 -0
  221. edsl/utilities/PrettyList.py +1 -1
  222. edsl/utilities/SystemInfo.py +25 -22
  223. edsl/utilities/__init__.py +29 -21
  224. edsl/utilities/gcp_bucket/__init__.py +2 -0
  225. edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
  226. edsl/utilities/interface.py +44 -536
  227. edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
  228. edsl/utilities/repair_functions.py +1 -1
  229. {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/METADATA +3 -2
  230. edsl-0.1.48.dist-info/RECORD +347 -0
  231. edsl/Base.py +0 -426
  232. edsl/BaseDiff.py +0 -260
  233. edsl/agents/InvigilatorBase.py +0 -260
  234. edsl/agents/PromptConstructor.py +0 -318
  235. edsl/auto/AutoStudy.py +0 -130
  236. edsl/auto/StageBase.py +0 -243
  237. edsl/auto/StageGenerateSurvey.py +0 -178
  238. edsl/auto/StageLabelQuestions.py +0 -125
  239. edsl/auto/StagePersona.py +0 -61
  240. edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
  241. edsl/auto/StagePersonaDimensionValues.py +0 -74
  242. edsl/auto/StagePersonaDimensions.py +0 -69
  243. edsl/auto/StageQuestions.py +0 -74
  244. edsl/auto/SurveyCreatorPipeline.py +0 -21
  245. edsl/auto/utilities.py +0 -218
  246. edsl/base/Base.py +0 -279
  247. edsl/coop/PriceFetcher.py +0 -54
  248. edsl/data/Cache.py +0 -580
  249. edsl/data/CacheEntry.py +0 -230
  250. edsl/data/SQLiteDict.py +0 -292
  251. edsl/data/__init__.py +0 -5
  252. edsl/data/orm.py +0 -10
  253. edsl/exceptions/cache.py +0 -5
  254. edsl/exceptions/coop.py +0 -14
  255. edsl/exceptions/data.py +0 -14
  256. edsl/exceptions/scenarios.py +0 -29
  257. edsl/jobs/Answers.py +0 -43
  258. edsl/jobs/JobsPrompts.py +0 -354
  259. edsl/jobs/buckets/BucketCollection.py +0 -134
  260. edsl/jobs/buckets/ModelBuckets.py +0 -65
  261. edsl/jobs/buckets/TokenBucket.py +0 -283
  262. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  263. edsl/jobs/interviews/Interview.py +0 -395
  264. edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
  265. edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
  266. edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
  267. edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
  268. edsl/jobs/tasks/TaskCreators.py +0 -64
  269. edsl/jobs/tasks/TaskStatusLog.py +0 -23
  270. edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
  271. edsl/language_models/LanguageModel.py +0 -635
  272. edsl/language_models/ServiceDataSources.py +0 -0
  273. edsl/language_models/key_management/KeyLookup.py +0 -63
  274. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  275. edsl/language_models/key_management/models.py +0 -137
  276. edsl/questions/QuestionBase.py +0 -539
  277. edsl/questions/QuestionFreeText.py +0 -130
  278. edsl/questions/derived/QuestionLikertFive.py +0 -76
  279. edsl/results/DatasetExportMixin.py +0 -911
  280. edsl/results/ResultsExportMixin.py +0 -45
  281. edsl/results/TextEditor.py +0 -50
  282. edsl/results/results_fetch_mixin.py +0 -33
  283. edsl/results/results_tools_mixin.py +0 -98
  284. edsl/scenarios/DocumentChunker.py +0 -104
  285. edsl/scenarios/FileStore.py +0 -564
  286. edsl/scenarios/Scenario.py +0 -548
  287. edsl/scenarios/ScenarioHtmlMixin.py +0 -65
  288. edsl/scenarios/ScenarioListExportMixin.py +0 -45
  289. edsl/scenarios/handlers/latex.py +0 -5
  290. edsl/shared.py +0 -1
  291. edsl/surveys/Survey.py +0 -1306
  292. edsl/surveys/SurveyQualtricsImport.py +0 -284
  293. edsl/surveys/SurveyToApp.py +0 -141
  294. edsl/surveys/instructions/__init__.py +0 -0
  295. edsl/tools/__init__.py +0 -1
  296. edsl/tools/clusters.py +0 -192
  297. edsl/tools/embeddings.py +0 -27
  298. edsl/tools/embeddings_plotting.py +0 -118
  299. edsl/tools/plotting.py +0 -112
  300. edsl/tools/summarize.py +0 -18
  301. edsl/utilities/data/Registry.py +0 -6
  302. edsl/utilities/data/__init__.py +0 -1
  303. edsl/utilities/data/scooter_results.json +0 -1
  304. edsl-0.1.46.dist-info/RECORD +0 -366
  305. /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
  306. /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
  307. /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
  308. /edsl/{results → dataset/display}/table_data_class.py +0 -0
  309. /edsl/{results → dataset/display}/table_display.css +0 -0
  310. /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
  311. /edsl/{results → dataset}/tree_explore.py +0 -0
  312. /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
  313. /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
  314. /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
  315. /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
  316. /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
  317. /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
  318. /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
  319. /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
  320. /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
  321. /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
  322. /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
  323. /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
  324. /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
  325. /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
  326. /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
  327. {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
  328. {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
@@ -0,0 +1,441 @@
1
+ """
2
+ SQLite-backed dictionary implementation for persistent storage of cache entries.
3
+
4
+ This module provides a dictionary-like interface to an SQLite database, which allows
5
+ for efficient, persistent storage of cache entries. SQLiteDict implements standard
6
+ dictionary methods like __getitem__, __setitem__, keys(), values(), and items(),
7
+ making it a drop-in replacement for regular dictionaries but with database persistence.
8
+ """
9
+
10
+ from __future__ import annotations
11
+ import json
12
+ from typing import Any, Generator, Optional, Union, Dict, List, Tuple, TypeVar
13
+
14
+ from ..config import CONFIG
15
+ from .cache_entry import CacheEntry
16
+ from .orm import Base, Data
17
+
18
+ T = TypeVar('T')
19
+
20
+
21
+ class SQLiteDict:
22
+ """
23
+ Dictionary-like interface for SQLite database storage of cache entries.
24
+
25
+ SQLiteDict provides a dictionary-like interface to an SQLite database, allowing
26
+ for persistent storage of CacheEntry objects. It implements all the standard
27
+ dictionary methods, making it a drop-in replacement for in-memory dictionaries
28
+ when persistence is needed.
29
+
30
+ The primary use case is for storing cache entries that should persist across
31
+ program invocations, with keys being the hash of the cache entry's content and
32
+ values being the CacheEntry objects themselves.
33
+
34
+ Attributes:
35
+ db_path (str): Path to the SQLite database file
36
+ engine: SQLAlchemy engine instance for database access
37
+ Session: SQLAlchemy sessionmaker for creating database sessions
38
+
39
+ Example:
40
+ >>> temp_db_path = SQLiteDict._get_temp_path()
41
+ >>> cache = SQLiteDict(temp_db_path)
42
+ >>> entry = CacheEntry.example()
43
+ >>> cache[entry.key] = entry
44
+ >>> retrieved_entry = cache[entry.key]
45
+ >>> entry == retrieved_entry
46
+ True
47
+ >>> import os; os.unlink(temp_db_path) # Clean up temp file
48
+ """
49
+
50
+ def __init__(self, db_path: Optional[str] = None):
51
+ """
52
+ Initializes a SQLiteDict with the specified database path.
53
+
54
+ This constructor creates a new SQLiteDict instance connected to the
55
+ specified SQLite database. If no database path is provided, it uses
56
+ the path from the EDSL configuration.
57
+
58
+ Args:
59
+ db_path: Path to the SQLite database file. If None, uses the path
60
+ from CONFIG.get("EDSL_DATABASE_PATH")
61
+
62
+ Raises:
63
+ Exception: If there is an error initializing the database connection
64
+
65
+ Example:
66
+ >>> temp_db_path = SQLiteDict._get_temp_path()
67
+ >>> SQLiteDict(f"sqlite:///{temp_db_path}") # Use the temp file for SQLite
68
+ SQLiteDict(db_path='...')
69
+ >>> import os; os.unlink(temp_db_path) # Clean up the temp file after the test
70
+ """
71
+ from sqlalchemy.exc import SQLAlchemyError
72
+ from sqlalchemy.orm import sessionmaker
73
+ from sqlalchemy import create_engine
74
+
75
+ self.db_path = db_path or CONFIG.get("EDSL_DATABASE_PATH")
76
+ if not self.db_path.startswith("sqlite:///"):
77
+ self.db_path = f"sqlite:///{self.db_path}"
78
+ try:
79
+ from edsl.caching.orm import Base, Data
80
+
81
+ self.engine = create_engine(self.db_path, echo=False, future=True)
82
+ Base.metadata.create_all(self.engine)
83
+ self.Session = sessionmaker(bind=self.engine)
84
+ except SQLAlchemyError as e:
85
+ raise Exception(
86
+ f"""Database initialization error: {e}. The attempted DB path was {db_path}"""
87
+ ) from e
88
+
89
+ @classmethod
90
+ def _get_temp_path(cls) -> str:
91
+ """
92
+ Creates a temporary file path for a SQLite database.
93
+
94
+ This helper method generates a temporary file path suitable for
95
+ creating a temporary SQLite database file. It's primarily used
96
+ for testing and examples.
97
+
98
+ Returns:
99
+ Path to a temporary file location
100
+ """
101
+ import tempfile
102
+ import os
103
+
104
+ _, temp_db_path = tempfile.mkstemp(suffix=".db")
105
+ return temp_db_path
106
+
107
+ def __setitem__(self, key: str, value: CacheEntry) -> None:
108
+ """
109
+ Stores a CacheEntry object at the specified key.
110
+
111
+ This method stores a CacheEntry object in the database, using the
112
+ specified key. The value is serialized to JSON before storage.
113
+
114
+ Args:
115
+ key: The key to store the value under
116
+ value: The CacheEntry object to store
117
+
118
+ Raises:
119
+ ValueError: If the value is not a CacheEntry object
120
+
121
+ Example:
122
+ >>> d = SQLiteDict.example()
123
+ >>> d["foo"] = CacheEntry.example()
124
+ """
125
+ if not isinstance(value, CacheEntry):
126
+ raise ValueError(f"Value must be a CacheEntry object (got {type(value)}).")
127
+ with self.Session() as db:
128
+ from edsl.caching.orm import Base, Data
129
+
130
+ db.merge(Data(key=key, value=json.dumps(value.to_dict())))
131
+ db.commit()
132
+
133
+ def __getitem__(self, key: str) -> CacheEntry:
134
+ """
135
+ Retrieves a CacheEntry object for the specified key.
136
+
137
+ This method retrieves a CacheEntry object from the database using
138
+ the specified key. The stored JSON value is deserialized into a
139
+ CacheEntry object.
140
+
141
+ Args:
142
+ key: The key to retrieve the value for
143
+
144
+ Returns:
145
+ The CacheEntry object stored at the specified key
146
+
147
+ Raises:
148
+ KeyError: If the key is not found in the database
149
+
150
+ Example:
151
+ >>> d = SQLiteDict.example()
152
+ >>> d["foo"] = CacheEntry.example()
153
+ >>> d["foo"] == CacheEntry.example()
154
+ True
155
+ """
156
+ with self.Session() as db:
157
+ from edsl.caching.orm import Base, Data
158
+
159
+ value = db.query(Data).filter_by(key=key).first()
160
+ if not value:
161
+ raise KeyError(f"Key '{key}' not found.")
162
+ return CacheEntry.from_dict(json.loads(value.value))
163
+
164
+ def get(self, key: str, default: Optional[Any] = None) -> Union[CacheEntry, Any]:
165
+ """
166
+ Retrieves a value for the specified key with a default fallback.
167
+
168
+ This method attempts to retrieve a CacheEntry for the specified key,
169
+ returning a default value if the key is not found. This provides a
170
+ safer alternative to __getitem__ when the key might not exist.
171
+
172
+ Args:
173
+ key: The key to retrieve the value for
174
+ default: The value to return if the key is not found (default: None)
175
+
176
+ Returns:
177
+ The CacheEntry for the key if found, otherwise the default value
178
+
179
+ Example:
180
+ >>> d = SQLiteDict.example()
181
+ >>> d.get("foo", "bar")
182
+ 'bar'
183
+ """
184
+ try:
185
+ return self[key]
186
+ except KeyError:
187
+ return default
188
+
189
+ def __bool__(self) -> bool:
190
+ """
191
+ Always returns True for SQLiteDict instances.
192
+
193
+ This special method ensures that SQLiteDict objects are always truthy
194
+ in boolean contexts, which allows patterns like `cache = cache or SQLiteDict()`
195
+ to work as expected.
196
+
197
+ Returns:
198
+ Always True for any SQLiteDict instance
199
+ """
200
+ return True
201
+
202
+ def update(
203
+ self,
204
+ new_d: Union[Dict[str, CacheEntry], SQLiteDict],
205
+ overwrite: bool = False,
206
+ max_batch_size: int = 100,
207
+ ) -> None:
208
+ """
209
+ Updates the dictionary with values from another dictionary or SQLiteDict.
210
+
211
+ This method adds entries from another dictionary or SQLiteDict to this
212
+ SQLiteDict. It optionally overwrites existing entries and uses batched
213
+ transactions for efficiency when updating many entries.
214
+
215
+ Args:
216
+ new_d: The dictionary or SQLiteDict containing entries to add
217
+ overwrite: If True, overwrites existing entries; if False, keeps
218
+ existing entries unchanged (default: False)
219
+ max_batch_size: Maximum number of entries to update in a single
220
+ database transaction (default: 100)
221
+
222
+ Raises:
223
+ ValueError: If new_d is not a dict or SQLiteDict
224
+
225
+ Example:
226
+ >>> d = SQLiteDict.example()
227
+ >>> d.update({"foo": CacheEntry.example()})
228
+ >>> d["foo"] == CacheEntry.example()
229
+ True
230
+
231
+ Note:
232
+ For large updates, the batched transaction approach helps prevent
233
+ the database from being locked for too long.
234
+ """
235
+ if not (isinstance(new_d, dict) or isinstance(new_d, SQLiteDict)):
236
+ raise ValueError(
237
+ f"new_d must be a dict or SQLiteDict object (got {type(new_d)})"
238
+ )
239
+ current_batch = 0
240
+ with self.Session() as db:
241
+ for key, value in new_d.items():
242
+ if current_batch == max_batch_size:
243
+ db.commit()
244
+ current_batch = 0
245
+ current_batch += 1
246
+ # Only merge if key doesn't exist or overwrite is True
247
+ if (key in self and overwrite) or key not in self:
248
+ db.merge(Data(key=key, value=json.dumps(value.to_dict())))
249
+ db.commit()
250
+
251
+ def values(self) -> Generator[CacheEntry, None, None]:
252
+ """
253
+ Returns a generator that yields the values in the cache.
254
+
255
+ >>> d = SQLiteDict.example()
256
+ >>> d["foo"] = CacheEntry.example()
257
+ >>> list(d.values()) == [CacheEntry.example()]
258
+ True
259
+ """
260
+ with self.Session() as db:
261
+ for instance in db.query(Data).all():
262
+ yield CacheEntry.from_dict(json.loads(instance.value))
263
+
264
+ def items(self) -> Generator[tuple[str, CacheEntry], None, None]:
265
+ """
266
+ Returns a generator that yields the items in the cache.
267
+
268
+ >>> d = SQLiteDict.example()
269
+ >>> d["foo"] = CacheEntry.example()
270
+ >>> list(d.items()) == [("foo", CacheEntry.example())]
271
+ True
272
+ """
273
+ with self.Session() as db:
274
+ for instance in db.query(Data).all():
275
+ yield (instance.key, CacheEntry.from_dict(json.loads(instance.value)))
276
+
277
+ def to_dict(self):
278
+ """
279
+ Returns the cache as a dictionary.
280
+
281
+ >>> d = SQLiteDict.example()
282
+ >>> d["foo"] = CacheEntry.example()
283
+ >>> d.to_dict() == {"foo": CacheEntry.example()}
284
+ True
285
+ """
286
+ return dict(self.items())
287
+
288
+ def __delitem__(self, key: str) -> None:
289
+ """
290
+ Deletes the value for a given key.
291
+
292
+ >>> d = SQLiteDict.example()
293
+ >>> d["foo"] = CacheEntry.example()
294
+ >>> del d["foo"]
295
+ >>> d.get("foo", "missing")
296
+ 'missing'
297
+ """
298
+ with self.Session() as db:
299
+ instance = db.query(Data).filter_by(key=key).one_or_none()
300
+ if instance:
301
+ db.delete(instance)
302
+ db.commit()
303
+ else:
304
+ raise KeyError(f"Key '{key}' not found.")
305
+
306
+ def __contains__(self, key: str) -> bool:
307
+ """
308
+ Checks if the dict contains the given key.
309
+
310
+ >>> d = SQLiteDict.example()
311
+ >>> d["foo"] = CacheEntry.example()
312
+ >>> "foo" in d
313
+ True
314
+ >>> "bar" in d
315
+ False
316
+ """
317
+ with self.Session() as db:
318
+ return db.query(Data).filter_by(key=key).first() is not None
319
+
320
+ def __iter__(self) -> Generator[str, None, None]:
321
+ """
322
+ Returns a generator that yields the keys in the dict.
323
+
324
+ >>> d = SQLiteDict.example()
325
+ >>> d["foo"] = CacheEntry.example()
326
+ >>> list(iter(d)) == ["foo"]
327
+ True
328
+ """
329
+ with self.Session() as db:
330
+ for instance in db.query(Data).all():
331
+ yield instance.key
332
+
333
+ def __len__(self) -> int:
334
+ """
335
+ Returns the number of items in the cache.
336
+
337
+ >>> d = SQLiteDict.example()
338
+ >>> len(d)
339
+ 0
340
+ >>> d["foo"] = CacheEntry.example()
341
+ >>> len(d)
342
+ 1
343
+ """
344
+ with self.Session() as db:
345
+ return db.query(Data).count()
346
+
347
+ def keys(self) -> Generator[str, None, None]:
348
+ """
349
+ Returns a generator that yields the keys in the cache.
350
+
351
+ >>> d = SQLiteDict.example()
352
+ >>> d["foo"] = CacheEntry.example()
353
+ >>> list(d.keys()) == ["foo"]
354
+ True
355
+ """
356
+ return self.__iter__()
357
+
358
+ def __repr__(self) -> str:
359
+ return f"{self.__class__.__name__}(db_path={self.db_path!r})"
360
+
361
+ @classmethod
362
+ def example(cls) -> SQLiteDict:
363
+ """
364
+ Creates an in-memory SQLiteDict for examples and testing.
365
+
366
+ This factory method creates a SQLiteDict that uses an in-memory SQLite
367
+ database, making it suitable for examples, testing, and demonstrations
368
+ without creating persistent files.
369
+
370
+ Returns:
371
+ A new SQLiteDict instance using an in-memory SQLite database
372
+
373
+ Example:
374
+ >>> SQLiteDict.example()
375
+ SQLiteDict(db_path='sqlite:///:memory:')
376
+ """
377
+ return cls(db_path="sqlite:///:memory:")
378
+
379
+
380
+ def main() -> None:
381
+ """
382
+ Demonstrates SQLiteDict functionality for interactive testing.
383
+
384
+ This function demonstrates the key features of the SQLiteDict class,
385
+ including creating, retrieving, updating, and deleting entries. It
386
+ provides a practical example of how to use SQLiteDict in code.
387
+
388
+ Note:
389
+ This function is intended to be run in an interactive Python session
390
+ for exploration and testing, not as part of normal code execution.
391
+ """
392
+ from .cache_entry import CacheEntry
393
+ from .sql_dict import SQLiteDict
394
+
395
+ # Create an in-memory SQLiteDict for demonstration
396
+ print("Creating an in-memory SQLiteDict...")
397
+ d = SQLiteDict.example()
398
+
399
+ # Store and retrieve a value
400
+ print("Storing and retrieving a value...")
401
+ d["foo"] = CacheEntry.example()
402
+ print(f"Retrieved value: {d['foo']}")
403
+
404
+ # Demonstrate get() with existing and non-existing keys
405
+ print("Demonstrating get() with existing and non-existing keys...")
406
+ print(f"Get existing key: {d.get('foo')}")
407
+ print(f"Get non-existing key: {d.get('poo')}")
408
+ print(f"Get non-existing key with default: {d.get('poo', 'not found')}")
409
+
410
+ # Update the dictionary
411
+ print("Updating the dictionary...")
412
+ d.update({"poo": CacheEntry.example()})
413
+ print(f"After update, retrieved value: {d['poo']}")
414
+
415
+ # Dictionary operations
416
+ print("Demonstrating dictionary operations...")
417
+ print(f"Length: {len(d)}")
418
+ print(f"Keys: {list(d.keys())}")
419
+ print(f"Values: {list(d.values())}")
420
+ print(f"Items: {list(d.items())}")
421
+
422
+ # Membership testing
423
+ print("Demonstrating membership testing...")
424
+ print(f"'poo' in d: {'poo' in d}")
425
+ print(f"'loo' in d: {'loo' in d}")
426
+
427
+ # Deletion
428
+ print("Demonstrating deletion...")
429
+ del d["poo"]
430
+ print(f"After deletion, length: {len(d)}")
431
+
432
+ # Representation
433
+ print("Demonstrating string representation...")
434
+ print(f"repr(d): {repr(d)}")
435
+ print(f"d: {d}")
436
+
437
+
438
+ if __name__ == "__main__":
439
+ import doctest
440
+
441
+ doctest.testmod()
@@ -0,0 +1,8 @@
1
+ """Configuration module for EDSL.
2
+
3
+ This module provides a Config class that loads environment variables from a .env file and sets them as class attributes.
4
+ """
5
+
6
+ from edsl.config.config_class import Config, CONFIG, CONFIG_MAP, EDSL_RUN_MODES, cache_dir
7
+
8
+ __all__ = ["Config", "CONFIG", "CONFIG_MAP", "EDSL_RUN_MODES", "cache_dir"]
@@ -0,0 +1,177 @@
1
+ """This module provides a Config class that loads environment variables from a .env file and sets them as class attributes."""
2
+
3
+ import os
4
+ import platformdirs
5
+ from dotenv import load_dotenv, find_dotenv
6
+ from edsl.exceptions.configuration import (
7
+ InvalidEnvironmentVariableError,
8
+ MissingEnvironmentVariableError,
9
+ )
10
+
11
+ cache_dir = platformdirs.user_cache_dir("edsl")
12
+ os.makedirs(cache_dir, exist_ok=True)
13
+
14
+ # valid values for EDSL_RUN_MODE
15
+ EDSL_RUN_MODES = [
16
+ "development",
17
+ "development-testrun",
18
+ "production",
19
+ ]
20
+
21
+ # `default` is used to impute values only in "production" mode
22
+ # `info` gives a brief description of the env var
23
+ CONFIG_MAP = {
24
+ "EDSL_RUN_MODE": {
25
+ "default": "production",
26
+ "info": "This config var determines the run mode of the application.",
27
+ },
28
+ "EDSL_API_TIMEOUT": {
29
+ "default": "60",
30
+ "info": "This config var determines the maximum number of seconds to wait for an API call to return.",
31
+ },
32
+ "EDSL_BACKOFF_START_SEC": {
33
+ "default": "1",
34
+ "info": "This config var determines the number of seconds to wait before retrying a failed API call.",
35
+ },
36
+ "EDSL_BACKOFF_MAX_SEC": {
37
+ "default": "60",
38
+ "info": "This config var determines the maximum number of seconds to wait before retrying a failed API call.",
39
+ },
40
+ "EDSL_DATABASE_PATH": {
41
+ # "default": f"sqlite:///{os.path.join(os.getcwd(), '.edsl_cache/data.db')}",
42
+ "default": f"sqlite:///{os.path.join(platformdirs.user_cache_dir('edsl'), 'lm_model_calls.db')}",
43
+ "info": "This config var determines the path to the cache file.",
44
+ },
45
+ "EDSL_DEFAULT_MODEL": {
46
+ "default": "gpt-4o",
47
+ "info": "This config var holds the default model that will be used if a model is not explicitly passed.",
48
+ },
49
+ "EDSL_FETCH_TOKEN_PRICES": {
50
+ "default": "True",
51
+ "info": "This config var determines whether to fetch prices for tokens used in remote inference",
52
+ },
53
+ "EDSL_MAX_ATTEMPTS": {
54
+ "default": "5",
55
+ "info": "This config var determines the maximum number of times to retry a failed API call.",
56
+ },
57
+ "EDSL_SERVICE_RPM_BASELINE": {
58
+ "default": "100",
59
+ "info": "This config var holds the maximum number of requests per minute. Model-specific values provided in env vars such as EDSL_SERVICE_RPM_OPENAI will override this. value for the corresponding model",
60
+ },
61
+ "EDSL_SERVICE_TPM_BASELINE": {
62
+ "default": "2000000",
63
+ "info": "This config var holds the maximum number of tokens per minute for all models. Model-specific values provided in env vars such as EDSL_SERVICE_TPM_OPENAI will override this value for the corresponding model.",
64
+ },
65
+ "EXPECTED_PARROT_URL": {
66
+ "default": "https://www.expectedparrot.com",
67
+ "info": "This config var holds the URL of the Expected Parrot API.",
68
+ },
69
+ "EDSL_MAX_CONCURRENT_TASKS": {
70
+ "default": "500",
71
+ "info": "This config var determines the maximum number of concurrent tasks that can be run by the async job-runner",
72
+ },
73
+ "EDSL_OPEN_EXCEPTION_REPORT_URL": {
74
+ "default": "False",
75
+ "info": "This config var determines whether to open the exception report URL in the browser",
76
+ },
77
+ "EDSL_REMOTE_TOKEN_BUCKET_URL": {
78
+ "default": "None",
79
+ "info": "This config var holds the URL of the remote token bucket server.",
80
+ },
81
+ }
82
+
83
+
84
+ class Config:
85
+ """A class that loads environment variables from a .env file and sets them as class attributes."""
86
+
87
+ def __init__(self):
88
+ """Initialize the Config class."""
89
+ self._set_run_mode()
90
+ self._load_dotenv()
91
+ self._set_env_vars()
92
+
93
+ def show_path_to_dot_env(self):
94
+ print(find_dotenv(usecwd=True))
95
+
96
+ def _set_run_mode(self) -> None:
97
+ """
98
+ Sets EDSL_RUN_MODE as a class attribute.
99
+ """
100
+ run_mode = os.getenv("EDSL_RUN_MODE")
101
+ default = CONFIG_MAP.get("EDSL_RUN_MODE").get("default")
102
+ if run_mode is None:
103
+ run_mode = default
104
+ if run_mode not in EDSL_RUN_MODES:
105
+ raise InvalidEnvironmentVariableError(
106
+ f"Value `{run_mode}` is not allowed for EDSL_RUN_MODE."
107
+ )
108
+ self.EDSL_RUN_MODE = run_mode
109
+
110
+ def _load_dotenv(self) -> None:
111
+ """
112
+ Loads the .env
113
+ - The .env will override existing env vars **unless** EDSL_RUN_MODE=="development-testrun"
114
+ """
115
+
116
+ if self.EDSL_RUN_MODE == "development-testrun":
117
+ override = False
118
+ else:
119
+ override = True
120
+ _ = load_dotenv(dotenv_path=find_dotenv(usecwd=True), override=override)
121
+
122
+ def __contains__(self, env_var: str) -> bool:
123
+ """
124
+ Checks if an env var is set as a class attribute.
125
+ """
126
+ return env_var in self.__dict__
127
+
128
+ def _set_env_vars(self) -> None:
129
+ """
130
+ Sets env vars as class attributes.
131
+ - EDSL_RUN_MODE is not set my this method, but by _set_run_mode
132
+ - If an env var is not set and has a default value in the CONFIG_MAP, sets it to the default value.
133
+ """
134
+ # for each env var in the CONFIG_MAP
135
+ for env_var, config in CONFIG_MAP.items():
136
+ # EDSL_RUN_MODE is already set by _set_run_mode
137
+ if env_var == "EDSL_RUN_MODE":
138
+ continue
139
+ value = os.getenv(env_var)
140
+ default_value = config.get("default")
141
+ # if an env var exists, set it as a class attribute
142
+ if value:
143
+ setattr(self, env_var, value)
144
+ # otherwise, if EDSL_RUN_MODE == "production" set it to its default value
145
+ elif self.EDSL_RUN_MODE == "production":
146
+ setattr(self, env_var, default_value)
147
+
148
+ def get(self, env_var: str) -> str:
149
+ """
150
+ Returns the value of an environment variable.
151
+ """
152
+ if env_var not in CONFIG_MAP:
153
+ raise InvalidEnvironmentVariableError(f"{env_var} is not a valid env var. ")
154
+ elif env_var not in self.__dict__:
155
+ info = CONFIG_MAP[env_var].get("info")
156
+ raise MissingEnvironmentVariableError(f"{env_var} is not set. {info}")
157
+ return self.__dict__.get(env_var)
158
+
159
+ def __iter__(self):
160
+ """Iterate over the environment variables."""
161
+ return iter(self.__dict__)
162
+
163
+ def items(self):
164
+ """Iterate over the environment variables and their values."""
165
+ return self.__dict__.items()
166
+
167
+ def show(self) -> str:
168
+ """Print the currently set environment vars."""
169
+ max_env_var_length = max(len(env_var) for env_var in self.__dict__)
170
+ print("Here are the current configuration settings:")
171
+ for env_var, value in self.__dict__.items():
172
+ print(f"{env_var:<{max_env_var_length}} : {value}")
173
+
174
+
175
+ # Note: Python modules are singletons. As such, once this module is imported
176
+ # the same instance of it is reused across the application.
177
+ CONFIG = Config()