edsl 0.1.47__py3-none-any.whl → 0.1.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. edsl/__init__.py +44 -39
  2. edsl/__version__.py +1 -1
  3. edsl/agents/__init__.py +4 -2
  4. edsl/agents/{Agent.py → agent.py} +442 -152
  5. edsl/agents/{AgentList.py → agent_list.py} +220 -162
  6. edsl/agents/descriptors.py +46 -7
  7. edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
  8. edsl/base/__init__.py +75 -0
  9. edsl/base/base_class.py +1303 -0
  10. edsl/base/data_transfer_models.py +114 -0
  11. edsl/base/enums.py +215 -0
  12. edsl/base.py +8 -0
  13. edsl/buckets/__init__.py +25 -0
  14. edsl/buckets/bucket_collection.py +324 -0
  15. edsl/buckets/model_buckets.py +206 -0
  16. edsl/buckets/token_bucket.py +502 -0
  17. edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
  18. edsl/buckets/token_bucket_client.py +509 -0
  19. edsl/caching/__init__.py +20 -0
  20. edsl/caching/cache.py +814 -0
  21. edsl/caching/cache_entry.py +427 -0
  22. edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
  23. edsl/caching/exceptions.py +24 -0
  24. edsl/caching/orm.py +30 -0
  25. edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
  26. edsl/caching/sql_dict.py +441 -0
  27. edsl/config/__init__.py +8 -0
  28. edsl/config/config_class.py +177 -0
  29. edsl/config.py +4 -176
  30. edsl/conversation/Conversation.py +7 -7
  31. edsl/conversation/car_buying.py +4 -4
  32. edsl/conversation/chips.py +6 -6
  33. edsl/coop/__init__.py +25 -2
  34. edsl/coop/coop.py +303 -67
  35. edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
  36. edsl/coop/exceptions.py +62 -0
  37. edsl/coop/price_fetcher.py +126 -0
  38. edsl/coop/utils.py +89 -24
  39. edsl/data_transfer_models.py +5 -72
  40. edsl/dataset/__init__.py +10 -0
  41. edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
  42. edsl/{results/DatasetExportMixin.py → dataset/dataset_operations_mixin.py} +606 -122
  43. edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
  44. edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
  45. edsl/{results → dataset/display}/table_renderers.py +58 -2
  46. edsl/{results → dataset}/file_exports.py +4 -5
  47. edsl/{results → dataset}/smart_objects.py +2 -2
  48. edsl/enums.py +5 -205
  49. edsl/inference_services/__init__.py +5 -0
  50. edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
  51. edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
  52. edsl/inference_services/data_structures.py +3 -2
  53. edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
  54. edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
  55. edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
  56. edsl/inference_services/registry.py +4 -41
  57. edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
  58. edsl/inference_services/services/__init__.py +31 -0
  59. edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
  60. edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
  61. edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
  62. edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
  63. edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
  64. edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
  65. edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
  66. edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
  67. edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
  68. edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
  69. edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +3 -7
  70. edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
  71. edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
  72. edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
  73. edsl/inference_services/write_available.py +1 -2
  74. edsl/instructions/__init__.py +6 -0
  75. edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
  76. edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
  77. edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
  78. edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
  79. edsl/interviews/__init__.py +4 -0
  80. edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
  81. edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
  82. edsl/interviews/interview.py +638 -0
  83. edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
  84. edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
  85. edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
  86. edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
  87. edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
  88. edsl/invigilators/__init__.py +38 -0
  89. edsl/invigilators/invigilator_base.py +477 -0
  90. edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
  91. edsl/invigilators/prompt_constructor.py +476 -0
  92. edsl/{agents → invigilators}/prompt_helpers.py +2 -1
  93. edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
  94. edsl/{agents → invigilators}/question_option_processor.py +96 -21
  95. edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
  96. edsl/jobs/__init__.py +7 -1
  97. edsl/jobs/async_interview_runner.py +99 -35
  98. edsl/jobs/check_survey_scenario_compatibility.py +7 -5
  99. edsl/jobs/data_structures.py +153 -22
  100. edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
  101. edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
  102. edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
  103. edsl/jobs/{Jobs.py → jobs.py} +313 -167
  104. edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
  105. edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +19 -17
  106. edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
  107. edsl/jobs/jobs_pricing_estimation.py +347 -0
  108. edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
  109. edsl/jobs/jobs_runner_asyncio.py +282 -0
  110. edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
  111. edsl/jobs/results_exceptions_handler.py +2 -2
  112. edsl/key_management/__init__.py +28 -0
  113. edsl/key_management/key_lookup.py +161 -0
  114. edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
  115. edsl/key_management/key_lookup_collection.py +82 -0
  116. edsl/key_management/models.py +218 -0
  117. edsl/language_models/__init__.py +7 -2
  118. edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
  119. edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
  120. edsl/language_models/language_model.py +1080 -0
  121. edsl/language_models/model.py +10 -25
  122. edsl/language_models/{ModelList.py → model_list.py} +9 -14
  123. edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
  124. edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
  125. edsl/language_models/repair.py +4 -4
  126. edsl/language_models/utilities.py +4 -4
  127. edsl/notebooks/__init__.py +3 -1
  128. edsl/notebooks/{Notebook.py → notebook.py} +7 -8
  129. edsl/prompts/__init__.py +1 -1
  130. edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
  131. edsl/prompts/{Prompt.py → prompt.py} +101 -95
  132. edsl/questions/HTMLQuestion.py +1 -1
  133. edsl/questions/__init__.py +154 -25
  134. edsl/questions/answer_validator_mixin.py +1 -1
  135. edsl/questions/compose_questions.py +4 -3
  136. edsl/questions/derived/question_likert_five.py +166 -0
  137. edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
  138. edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
  139. edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
  140. edsl/questions/descriptors.py +24 -30
  141. edsl/questions/loop_processor.py +65 -19
  142. edsl/questions/question_base.py +881 -0
  143. edsl/questions/question_base_gen_mixin.py +15 -16
  144. edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
  145. edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
  146. edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
  147. edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
  148. edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
  149. edsl/questions/question_free_text.py +282 -0
  150. edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
  151. edsl/questions/{QuestionList.py → question_list.py} +6 -7
  152. edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
  153. edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
  154. edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
  155. edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
  156. edsl/questions/question_registry.py +4 -9
  157. edsl/questions/register_questions_meta.py +8 -4
  158. edsl/questions/response_validator_abc.py +17 -16
  159. edsl/results/__init__.py +4 -1
  160. edsl/{exceptions/results.py → results/exceptions.py} +1 -1
  161. edsl/results/report.py +197 -0
  162. edsl/results/{Result.py → result.py} +131 -45
  163. edsl/results/{Results.py → results.py} +365 -220
  164. edsl/results/results_selector.py +344 -25
  165. edsl/scenarios/__init__.py +30 -3
  166. edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
  167. edsl/scenarios/directory_scanner.py +156 -13
  168. edsl/scenarios/document_chunker.py +186 -0
  169. edsl/scenarios/exceptions.py +101 -0
  170. edsl/scenarios/file_methods.py +2 -3
  171. edsl/scenarios/{FileStore.py → file_store.py} +275 -189
  172. edsl/scenarios/handlers/__init__.py +14 -14
  173. edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
  174. edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
  175. edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
  176. edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
  177. edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
  178. edsl/scenarios/handlers/latex_file_store.py +5 -0
  179. edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
  180. edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
  181. edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
  182. edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
  183. edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
  184. edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
  185. edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
  186. edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
  187. edsl/scenarios/scenario.py +928 -0
  188. edsl/scenarios/scenario_join.py +18 -5
  189. edsl/scenarios/{ScenarioList.py → scenario_list.py} +294 -106
  190. edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
  191. edsl/scenarios/scenario_selector.py +5 -1
  192. edsl/study/ObjectEntry.py +2 -2
  193. edsl/study/SnapShot.py +5 -5
  194. edsl/study/Study.py +18 -19
  195. edsl/study/__init__.py +6 -4
  196. edsl/surveys/__init__.py +7 -4
  197. edsl/surveys/dag/__init__.py +2 -0
  198. edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
  199. edsl/surveys/{DAG.py → dag/dag.py} +13 -10
  200. edsl/surveys/descriptors.py +1 -1
  201. edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
  202. edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
  203. edsl/surveys/memory/__init__.py +3 -0
  204. edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
  205. edsl/surveys/rules/__init__.py +3 -0
  206. edsl/surveys/{Rule.py → rules/rule.py} +103 -43
  207. edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
  208. edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
  209. edsl/surveys/survey.py +1743 -0
  210. edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
  211. edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
  212. edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
  213. edsl/tasks/__init__.py +32 -0
  214. edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
  215. edsl/tasks/task_creators.py +135 -0
  216. edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
  217. edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
  218. edsl/tasks/task_status_log.py +85 -0
  219. edsl/tokens/__init__.py +2 -0
  220. edsl/tokens/interview_token_usage.py +53 -0
  221. edsl/utilities/PrettyList.py +1 -1
  222. edsl/utilities/SystemInfo.py +25 -22
  223. edsl/utilities/__init__.py +29 -21
  224. edsl/utilities/gcp_bucket/__init__.py +2 -0
  225. edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
  226. edsl/utilities/interface.py +44 -536
  227. edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
  228. edsl/utilities/repair_functions.py +1 -1
  229. {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/METADATA +1 -1
  230. edsl-0.1.48.dist-info/RECORD +347 -0
  231. edsl/Base.py +0 -493
  232. edsl/BaseDiff.py +0 -260
  233. edsl/agents/InvigilatorBase.py +0 -260
  234. edsl/agents/PromptConstructor.py +0 -318
  235. edsl/coop/PriceFetcher.py +0 -54
  236. edsl/data/Cache.py +0 -582
  237. edsl/data/CacheEntry.py +0 -238
  238. edsl/data/SQLiteDict.py +0 -292
  239. edsl/data/__init__.py +0 -5
  240. edsl/data/orm.py +0 -10
  241. edsl/exceptions/cache.py +0 -5
  242. edsl/exceptions/coop.py +0 -14
  243. edsl/exceptions/data.py +0 -14
  244. edsl/exceptions/scenarios.py +0 -29
  245. edsl/jobs/Answers.py +0 -43
  246. edsl/jobs/JobsPrompts.py +0 -354
  247. edsl/jobs/buckets/BucketCollection.py +0 -134
  248. edsl/jobs/buckets/ModelBuckets.py +0 -65
  249. edsl/jobs/buckets/TokenBucket.py +0 -283
  250. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  251. edsl/jobs/interviews/Interview.py +0 -395
  252. edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
  253. edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
  254. edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
  255. edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
  256. edsl/jobs/tasks/TaskCreators.py +0 -64
  257. edsl/jobs/tasks/TaskStatusLog.py +0 -23
  258. edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
  259. edsl/language_models/LanguageModel.py +0 -635
  260. edsl/language_models/ServiceDataSources.py +0 -0
  261. edsl/language_models/key_management/KeyLookup.py +0 -63
  262. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  263. edsl/language_models/key_management/models.py +0 -137
  264. edsl/questions/QuestionBase.py +0 -544
  265. edsl/questions/QuestionFreeText.py +0 -130
  266. edsl/questions/derived/QuestionLikertFive.py +0 -76
  267. edsl/results/ResultsExportMixin.py +0 -45
  268. edsl/results/TextEditor.py +0 -50
  269. edsl/results/results_fetch_mixin.py +0 -33
  270. edsl/results/results_tools_mixin.py +0 -98
  271. edsl/scenarios/DocumentChunker.py +0 -104
  272. edsl/scenarios/Scenario.py +0 -548
  273. edsl/scenarios/ScenarioHtmlMixin.py +0 -65
  274. edsl/scenarios/ScenarioListExportMixin.py +0 -45
  275. edsl/scenarios/handlers/latex.py +0 -5
  276. edsl/shared.py +0 -1
  277. edsl/surveys/Survey.py +0 -1301
  278. edsl/surveys/SurveyQualtricsImport.py +0 -284
  279. edsl/surveys/SurveyToApp.py +0 -141
  280. edsl/surveys/instructions/__init__.py +0 -0
  281. edsl/tools/__init__.py +0 -1
  282. edsl/tools/clusters.py +0 -192
  283. edsl/tools/embeddings.py +0 -27
  284. edsl/tools/embeddings_plotting.py +0 -118
  285. edsl/tools/plotting.py +0 -112
  286. edsl/tools/summarize.py +0 -18
  287. edsl/utilities/data/Registry.py +0 -6
  288. edsl/utilities/data/__init__.py +0 -1
  289. edsl/utilities/data/scooter_results.json +0 -1
  290. edsl-0.1.47.dist-info/RECORD +0 -354
  291. /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
  292. /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
  293. /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
  294. /edsl/{results → dataset/display}/table_data_class.py +0 -0
  295. /edsl/{results → dataset/display}/table_display.css +0 -0
  296. /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
  297. /edsl/{results → dataset}/tree_explore.py +0 -0
  298. /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
  299. /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
  300. /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
  301. /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
  302. /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
  303. /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
  304. /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
  305. /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
  306. /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
  307. /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
  308. /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
  309. /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
  310. /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
  311. /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
  312. /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
  313. {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
  314. {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
edsl/caching/cache.py ADDED
@@ -0,0 +1,814 @@
1
+ """
2
+ Cache implementation for storing and retrieving language model responses.
3
+
4
+ This module provides the Cache class, which is the core component of EDSL's caching system.
5
+ The caching system stores language model responses to avoid redundant API calls,
6
+ reducing costs and latency while improving reproducibility of results.
7
+
8
+ The Cache class handles:
9
+ - Storage and retrieval of model responses via key-based lookups
10
+ - Persistence to and from disk using various formats (.jsonl, .db)
11
+ - Merging and comparing caches from different sources
12
+ - Integration with remote caching systems
13
+
14
+ The primary workflow involves:
15
+ 1. Fetching responses from cache if they exist
16
+ 2. Storing new responses when they don't
17
+ 3. Persisting cache state to disk when needed
18
+
19
+ Cache objects can be used:
20
+ - Directly by the user for explicit cache management
21
+ - Implicitly by the CacheHandler which manages cache selection and migrations
22
+ - In conjunction with remote caching services
23
+
24
+ Implementation Notes:
25
+ - Cache uses CacheEntry objects as its values
26
+ - Keys are hash-based identifiers of the input parameters
27
+ - Multiple storage backends are supported (dict, SQLiteDict)
28
+ """
29
+
30
+ from __future__ import annotations
31
+ import json
32
+ import os
33
+ import warnings
34
+ from typing import Optional, Union, TYPE_CHECKING
35
+ from ..base import Base
36
+
37
+ from ..utilities import remove_edsl_version, dict_hash
38
+ from .exceptions import CacheError
39
+
40
+ class Cache(Base):
41
+ """Cache for storing and retrieving language model responses.
42
+
43
+ The Cache class manages a collection of CacheEntry objects, providing methods for
44
+ storing, retrieving, and persisting language model responses. It serves as the core
45
+ component of EDSL's caching infrastructure, helping to reduce redundant API calls,
46
+ save costs, and ensure reproducibility.
47
+
48
+ Cache can use different storage backends:
49
+ - In-memory dictionary (default)
50
+ - SQLite database via SQLiteDict
51
+ - JSON lines file (.jsonl)
52
+
53
+ The cache operates by generating deterministic keys based on the model, parameters,
54
+ prompts, and iteration number. This allows for efficient lookup of cached responses
55
+ when identical requests are made.
56
+
57
+ Attributes:
58
+ data (dict or SQLiteDict): The primary storage for cache entries
59
+ new_entries (dict): Entries added in the current session
60
+ fetched_data (dict): Entries retrieved in the current session
61
+ filename (str, optional): Path for persistence if provided
62
+ immediate_write (bool): Whether to update data immediately (True) or defer (False)
63
+
64
+ Technical Notes:
65
+ - Can be used as a context manager to automatically persist changes on exit
66
+ - Supports serialization/deserialization via to_dict/from_dict methods
67
+ - Implements set operations (addition, subtraction) for combining caches
68
+ - Integrates with the broader EDSL caching infrastructure via CacheHandler
69
+ """
70
+
71
+ __documentation__ = "https://docs.expectedparrot.com/en/latest/caching.html"
72
+
73
+ data = {}
74
+
75
+ def __init__(
76
+ self,
77
+ *,
78
+ filename: Optional[str] = None,
79
+ data: Optional[Union["SQLiteDict", dict]] = None,
80
+ immediate_write: bool = True,
81
+ method=None,
82
+ verbose=False,
83
+ ):
84
+ """Initialize a new Cache instance.
85
+
86
+ Creates a new cache for storing language model responses. The cache can be initialized
87
+ with existing data or connected to a persistent storage file.
88
+
89
+ Args:
90
+ filename: Path to a persistent storage file (.jsonl or .db). If provided, the cache
91
+ will be initialized from this file and changes will be written back to it.
92
+ Cannot be used together with data parameter.
93
+ data: Initial cache data as a dictionary or SQLiteDict. Cannot be used together
94
+ with filename parameter.
95
+ immediate_write: If True, new entries are immediately added to the main data store.
96
+ If False, they're kept separate until explicitly written.
97
+ method: Deprecated. Legacy parameter for backward compatibility.
98
+ verbose: If True, prints diagnostic information about cache hits and misses.
99
+
100
+ Raises:
101
+ CacheError: If both filename and data are provided, or if the filename has an
102
+ invalid extension.
103
+
104
+ Implementation Notes:
105
+ - The cache maintains separate dictionaries for tracking:
106
+ * data: The main persistent storage
107
+ * new_entries: Entries added in this session
108
+ * fetched_data: Entries fetched in this session
109
+ * new_entries_to_write_later: Entries to be written if immediate_write=False
110
+ - If loading from a file, the appropriate loader method is called based on extension
111
+ """
112
+
113
+ # self.data_at_init = data or {}
114
+ self.fetched_data = {}
115
+ self.immediate_write = immediate_write
116
+ self.method = method
117
+ self.new_entries = {}
118
+ self.new_entries_to_write_later = {}
119
+ self.coop = None
120
+ self.verbose = verbose
121
+
122
+ self.filename = filename
123
+ if filename and data:
124
+ raise CacheError("Cannot provide both filename and data")
125
+ if filename is None and data is None:
126
+ data = {}
127
+ if data is not None:
128
+ self.data = data
129
+ if filename is not None:
130
+ self.data = {}
131
+ if filename.endswith(".jsonl"):
132
+ if os.path.exists(filename):
133
+ self.add_from_jsonl(filename)
134
+ else:
135
+ print(
136
+ f"File {filename} not found, but will write to this location."
137
+ )
138
+ elif filename.endswith(".db"):
139
+ if os.path.exists(filename):
140
+ self.add_from_sqlite(filename)
141
+ else:
142
+ raise CacheError("Invalid file extension. Must be .jsonl or .db")
143
+
144
+ self._perform_checks()
145
+
146
+ def code(sefl):
147
+ pass
148
+ # raise NotImplementedError("This method is not implemented yet.")
149
+
150
+ def keys(self):
151
+ """Return a list of all cache keys.
152
+
153
+ Retrieves all cache keys, which are the unique identifiers for each cache entry.
154
+
155
+ Returns:
156
+ list: A list of string keys in the cache
157
+
158
+ Examples:
159
+ >>> from edsl import Cache
160
+ >>> Cache.example().keys()
161
+ ['5513286eb6967abc0511211f0402587d']
162
+ """
163
+ return list(self.data.keys())
164
+
165
+ def values(self):
166
+ """Return a list of all cache entry values.
167
+
168
+ Retrieves all CacheEntry objects stored in the cache.
169
+
170
+ Returns:
171
+ list: A list of CacheEntry objects
172
+
173
+ Examples:
174
+ >>> from edsl import Cache
175
+ >>> Cache.example().values()
176
+ [CacheEntry(...)]
177
+ """
178
+ return list(self.data.values())
179
+
180
+ def items(self):
181
+ """Return an iterator of (key, value) pairs in the cache.
182
+
183
+ Similar to dict.items(), provides an iterator over all key-value pairs
184
+ in the cache for easy iteration.
185
+
186
+ Returns:
187
+ zip: An iterator of (key, CacheEntry) tuples
188
+ """
189
+ return zip(self.keys(), self.values())
190
+
191
+ def new_entries_cache(self) -> Cache:
192
+ """Return a new Cache object with the new entries."""
193
+ return Cache(data={**self.new_entries, **self.fetched_data})
194
+
195
+ def _perform_checks(self):
196
+ """Perform checks on the cache."""
197
+ from .cache_entry import CacheEntry
198
+
199
+ if any(not isinstance(value, CacheEntry) for value in self.data.values()):
200
+ raise CacheError("Not all values are CacheEntry instances")
201
+ if self.method is not None:
202
+ warnings.warn("Argument `method` is deprecated", DeprecationWarning)
203
+
204
+ ####################
205
+ # READ/WRITE
206
+ ####################
207
+ def fetch(
208
+ self,
209
+ *,
210
+ model: str,
211
+ parameters: dict,
212
+ system_prompt: str,
213
+ user_prompt: str,
214
+ iteration: int,
215
+ ) -> tuple(Union[None, str], str):
216
+ """Retrieve a cached language model response if available.
217
+
218
+ This method attempts to find a cached response matching the exact input parameters.
219
+ The combination of model, parameters, prompts, and iteration creates a unique key
220
+ that identifies a specific language model request.
221
+
222
+ Args:
223
+ model: Language model identifier (e.g., "gpt-3.5-turbo")
224
+ parameters: Model configuration parameters (e.g., temperature, max_tokens)
225
+ system_prompt: The system instructions given to the model
226
+ user_prompt: The user query/prompt given to the model
227
+ iteration: The iteration number for this specific request
228
+
229
+ Returns:
230
+ tuple: (response, key) where:
231
+ - response: The cached model output as a string, or None if not found
232
+ - key: The cache key string generated for this request
233
+
234
+ Technical Notes:
235
+ - Uses CacheEntry.gen_key() to generate a consistent hash-based key
236
+ - Updates self.fetched_data when a hit occurs to track cache usage
237
+ - Optionally logs cache hit/miss when verbose=True
238
+ - The response is returned as a JSON string for consistency
239
+
240
+ Examples:
241
+ >>> c = Cache()
242
+ >>> c.fetch(model="gpt-3", parameters="default", system_prompt="Hello",
243
+ ... user_prompt="Hi", iteration=1)[0] is None
244
+ True
245
+ """
246
+ from .cache_entry import CacheEntry
247
+
248
+ key = CacheEntry.gen_key(
249
+ model=model,
250
+ parameters=parameters,
251
+ system_prompt=system_prompt,
252
+ user_prompt=user_prompt,
253
+ iteration=iteration,
254
+ )
255
+ entry = self.data.get(key, None)
256
+ if entry is not None:
257
+ if self.verbose:
258
+ print(f"Cache hit for key: {key}")
259
+ self.fetched_data[key] = entry
260
+ else:
261
+ if self.verbose:
262
+ print(f"Cache miss for key: {key}")
263
+ return None if entry is None else entry.output, key
264
+
265
+ def store(
266
+ self,
267
+ model: str,
268
+ parameters: str,
269
+ system_prompt: str,
270
+ user_prompt: str,
271
+ response: dict,
272
+ iteration: int,
273
+ service: str,
274
+ ) -> str:
275
+ """Store a new language model response in the cache.
276
+
277
+ Creates a new CacheEntry from the provided parameters and response, then
278
+ adds it to the cache using a deterministic key derived from the input parameters.
279
+
280
+ Args:
281
+ model: Language model identifier (e.g., "gpt-3.5-turbo")
282
+ parameters: Model configuration parameters (e.g., temperature, max_tokens)
283
+ system_prompt: The system instructions given to the model
284
+ user_prompt: The user query/prompt given to the model
285
+ response: The model's response as a dictionary
286
+ iteration: The iteration number for this specific request
287
+ service: The service provider (e.g., "openai", "anthropic")
288
+
289
+ Returns:
290
+ str: The cache key generated for this entry
291
+
292
+ Technical Notes:
293
+ - Creates a new CacheEntry object to encapsulate the response and metadata
294
+ - Adds the entry to self.new_entries to track entries added in this session
295
+ - Adds the entry to the main data store if immediate_write=True
296
+ - Otherwise, stores in new_entries_to_write_later for deferred writing
297
+ - The response is stored as a JSON string for consistency and compatibility
298
+
299
+ Storage Behavior:
300
+ The method's behavior depends on the immediate_write setting:
301
+ - If True: Immediately writes to the main data store (self.data)
302
+ - If False: Stores in a separate dict for writing later (e.g., at context exit)
303
+
304
+ Examples:
305
+ >>> from edsl import Cache, Model, Question
306
+ >>> m = Model("test")
307
+ >>> c = Cache()
308
+ >>> len(c)
309
+ 0
310
+ >>> results = Question.example("free_text").by(m).run(cache=c,
311
+ ... disable_remote_cache=True, disable_remote_inference=True)
312
+ >>> len(c)
313
+ 1
314
+ """
315
+ from .cache_entry import CacheEntry
316
+
317
+ entry = CacheEntry(
318
+ model=model,
319
+ parameters=parameters,
320
+ system_prompt=system_prompt,
321
+ user_prompt=user_prompt,
322
+ output=json.dumps(response),
323
+ iteration=iteration,
324
+ service=service,
325
+ )
326
+ key = entry.key
327
+ self.new_entries[key] = entry
328
+ if self.immediate_write:
329
+ self.data[key] = entry
330
+ else:
331
+ self.new_entries_to_write_later[key] = entry
332
+ return key
333
+
334
+ def add_from_dict(
335
+ self, new_data: dict[str, "CacheEntry"], write_now: Optional[bool] = True
336
+ ) -> None:
337
+ """
338
+ Add entries to the cache from a dictionary.
339
+
340
+ :param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
341
+ """
342
+ from .cache_entry import CacheEntry
343
+
344
+ for key, value in new_data.items():
345
+ if key in self.data:
346
+ if value != self.data[key]:
347
+ raise CacheError("Mismatch in values")
348
+ if not isinstance(value, CacheEntry):
349
+ raise CacheError(f"Wrong type - the observed type is {type(value)}")
350
+
351
+ self.new_entries.update(new_data)
352
+ if write_now:
353
+ self.data.update(new_data)
354
+ else:
355
+ self.new_entries_to_write_later.update(new_data)
356
+
357
+ def add_from_jsonl(self, filename: str, write_now: Optional[bool] = True) -> None:
358
+ """
359
+ Add entries to the cache from a JSONL.
360
+
361
+ :param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
362
+ """
363
+ from .cache_entry import CacheEntry
364
+
365
+ with open(filename, "a+") as f:
366
+ f.seek(0)
367
+ lines = f.readlines()
368
+ new_data = {}
369
+ for line in lines:
370
+ d = json.loads(line)
371
+ key = list(d.keys())[0]
372
+ value = list(d.values())[0]
373
+ new_data[key] = CacheEntry(**value)
374
+ self.add_from_dict(new_data=new_data, write_now=write_now)
375
+
376
+ def add_from_sqlite(self, db_path: str, write_now: Optional[bool] = True):
377
+ """
378
+ Add entries to the cache from an SQLite database.
379
+
380
+ :param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
381
+ """
382
+ from .sql_dict import SQLiteDict
383
+ from .cache_entry import CacheEntry
384
+
385
+ db = SQLiteDict(db_path)
386
+ new_data = {}
387
+ for key, value in db.items():
388
+ new_data[key] = CacheEntry(**value)
389
+ self.add_from_dict(new_data=new_data, write_now=write_now)
390
+
391
+ @classmethod
392
+ def from_sqlite_db(cls, db_path: str) -> Cache:
393
+ """Construct a Cache from a SQLite database."""
394
+ from .sql_dict import SQLiteDict
395
+
396
+ return cls(data=SQLiteDict(db_path))
397
+
398
+ @classmethod
399
+ def from_local_cache(cls) -> Cache:
400
+ """Construct a Cache from a local cache file."""
401
+ from ..config import CONFIG
402
+
403
+ CACHE_PATH = CONFIG.get("EDSL_DATABASE_PATH")
404
+ path = CACHE_PATH.replace("sqlite:///", "")
405
+ # db_path = os.path.join(os.path.dirname(path), "data.db")
406
+ return cls.from_sqlite_db(path)
407
+
408
+ @classmethod
409
+ def from_jsonl(cls, jsonlfile: str, db_path: Optional[str] = None) -> Cache:
410
+ """
411
+ Construct a Cache from a JSONL file.
412
+
413
+ :param jsonlfile: The path to the JSONL file of cache entries.
414
+ :param db_path: The path to the SQLite database used to store the cache.
415
+
416
+ * If `db_path` is None, the cache will be stored in memory, as a dictionary.
417
+ * If `db_path` is provided, the cache will be stored in an SQLite database.
418
+ """
419
+ # if a file doesn't exist at jsonfile, throw an error
420
+ from .sql_dict import SQLiteDict
421
+
422
+ if not os.path.exists(jsonlfile):
423
+ raise FileNotFoundError(f"File {jsonlfile} not found")
424
+
425
+ if db_path is None:
426
+ data = {}
427
+ else:
428
+ data = SQLiteDict(db_path)
429
+
430
+ cache = Cache(data=data)
431
+ cache.add_from_jsonl(jsonlfile)
432
+ return cache
433
+
434
+ def write_sqlite_db(self, db_path: str) -> None:
435
+ """
436
+ Write the cache to an SQLite database.
437
+ """
438
+ ## TODO: Check to make sure not over-writing (?)
439
+ ## Should be added to SQLiteDict constructor (?)
440
+ from .sql_dict import SQLiteDict
441
+
442
+ new_data = SQLiteDict(db_path)
443
+ for key, value in self.data.items():
444
+ new_data[key] = value
445
+
446
+ def write(self, filename: Optional[str] = None) -> None:
447
+ """
448
+ Write the cache to a file at the specified location.
449
+ """
450
+ if filename is None:
451
+ filename = self.filename
452
+ if filename.endswith(".jsonl"):
453
+ self.write_jsonl(filename)
454
+ elif filename.endswith(".db"):
455
+ self.write_sqlite_db(filename)
456
+ else:
457
+ raise CacheError("Invalid file extension. Must be .jsonl or .db")
458
+
459
+ def write_jsonl(self, filename: str) -> None:
460
+ """
461
+ Write the cache to a JSONL file.
462
+ """
463
+ path = os.path.join(os.getcwd(), filename)
464
+ with open(path, "w") as f:
465
+ for key, value in self.data.items():
466
+ f.write(json.dumps({key: value.to_dict()}) + "\n")
467
+
468
+ def to_scenario_list(self):
469
+ from ..scenarios import ScenarioList, Scenario
470
+
471
+ scenarios = []
472
+ for key, value in self.data.items():
473
+ new_d = value.to_dict()
474
+ new_d["cache_key"] = key
475
+ s = Scenario(new_d)
476
+ scenarios.append(s)
477
+ return ScenarioList(scenarios)
478
+
479
+ def __floordiv__(self, other: "Cache") -> "Cache":
480
+ """Subtract one cache from another, returning entries unique to this cache.
481
+
482
+ This operator implements set difference between two caches, returning a new cache
483
+ containing only entries that exist in this cache but not in the other cache.
484
+ The floor division operator (//) is used as an intuitive alternative to subtraction.
485
+
486
+ Args:
487
+ other: Another Cache object to subtract from this one
488
+
489
+ Returns:
490
+ Cache: A new Cache containing only entries unique to this cache
491
+
492
+ Raises:
493
+ CacheError: If the provided object is not a Cache instance
494
+
495
+ Examples:
496
+ >>> from edsl.caching import CacheEntry
497
+ >>> ce1 = CacheEntry.example(randomize=True)
498
+ >>> ce2 = CacheEntry.example(randomize=True)
499
+ >>> c1 = Cache(data={ce1.key: ce1, ce2.key: ce2})
500
+ >>> c2 = Cache(data={ce1.key: ce1})
501
+ >>> c3 = c1 // c2 # Get entries in c1 that aren't in c2
502
+ >>> len(c3)
503
+ 1
504
+ >>> c3.data[ce2.key] == ce2
505
+ True
506
+
507
+ Technical Notes:
508
+ - Comparison is based on cache keys, not the full entry contents
509
+ - Returns a new Cache instance with the same immediate_write setting
510
+ - Useful for identifying new entries or differences between caches
511
+ """
512
+ if not isinstance(other, Cache):
513
+ raise CacheError("Can only compare two caches")
514
+
515
+ diff_data = {k: v for k, v in self.data.items() if k not in other.data}
516
+ return Cache(data=diff_data, immediate_write=self.immediate_write)
517
+
518
+ @classmethod
519
+ def from_url(cls, db_path=None) -> Cache:
520
+ """
521
+ Construct a Cache object from a remote.
522
+ """
523
+ # ...do something here
524
+ # return Cache(data=db)
525
+ pass
526
+
527
+ def __enter__(self):
528
+ """Set up the cache when used as a context manager.
529
+
530
+ Enables usage of Cache in a with statement, e.g.:
531
+ ```python
532
+ with Cache(filename="my_cache.db") as cache:
533
+ # Use cache...
534
+ # Changes automatically saved when exiting the context
535
+ ```
536
+
537
+ Returns:
538
+ Cache: The cache instance itself
539
+ """
540
+ return self
541
+
542
+ def __exit__(self, exc_type, exc_value, traceback):
543
+ """Clean up and persist cache when exiting the context.
544
+
545
+ This method is called automatically when exiting a with block.
546
+ It performs two key operations:
547
+ 1. Writes any deferred entries to the main data store
548
+ 2. Persists the cache to disk if a filename was provided
549
+
550
+ Args:
551
+ exc_type: Exception type if an exception was raised in the with block
552
+ exc_value: Exception value if an exception was raised
553
+ traceback: Traceback if an exception was raised
554
+
555
+ Technical Notes:
556
+ - Deferred entries (new_entries_to_write_later) are written to the main data store
557
+ - If a filename was provided at initialization, cache is persisted to that file
558
+ - Persistence format is determined by the filename extension (.jsonl or .db)
559
+ """
560
+ # Write any deferred entries to the main data store
561
+ for key, entry in self.new_entries_to_write_later.items():
562
+ self.data[key] = entry
563
+
564
+ # Persist the cache to disk if a filename was provided
565
+ if self.filename:
566
+ self.write(self.filename)
567
+
568
+ def __hash__(self):
569
+ """Return the hash of the Cache."""
570
+
571
+ return dict_hash(self.to_dict(add_edsl_version=False))
572
+
573
+ def to_dict(self, add_edsl_version=True) -> dict:
574
+ """Serialize the cache to a dictionary for storage or transmission.
575
+
576
+ Converts the Cache object into a plain dictionary format that can be
577
+ easily serialized to JSON or other formats. Each CacheEntry is also
578
+ converted to a dictionary using its to_dict method.
579
+
580
+ Args:
581
+ add_edsl_version: If True, includes the EDSL version and class name
582
+ in the serialized output for compatibility tracking
583
+
584
+ Returns:
585
+ dict: A dictionary representation of the cache with the structure:
586
+ {
587
+ "key1": {cache_entry1_dict},
588
+ "key2": {cache_entry2_dict},
589
+ ...
590
+ "edsl_version": "x.x.x", # if add_edsl_version=True
591
+ "edsl_class_name": "Cache" # if add_edsl_version=True
592
+ }
593
+
594
+ Technical Notes:
595
+ - Used by from_dict for deserialization
596
+ - Used by __hash__ for cache comparison
597
+ - The version info allows for proper handling of format changes
598
+ """
599
+ d = {k: v.to_dict() for k, v in self.data.items()}
600
+ if add_edsl_version:
601
+ from .. import __version__
602
+
603
+ d["edsl_version"] = __version__
604
+ d["edsl_class_name"] = "Cache"
605
+
606
+ return d
607
+
608
+ def _summary(self) -> dict:
609
+ return {"EDSL Class": "Cache", "Number of entries": len(self.data)}
610
+
611
+ def table(
612
+ self,
613
+ *fields,
614
+ tablefmt: Optional[str] = None,
615
+ pretty_labels: Optional[dict] = None,
616
+ ) -> str:
617
+ return self.to_dataset().table(
618
+ *fields, tablefmt=tablefmt, pretty_labels=pretty_labels
619
+ )
620
+
621
+ def select(self, *fields):
622
+ return self.to_dataset().select(*fields)
623
+
624
+ def tree(self, node_list: Optional[list[str]] = None):
625
+ return self.to_scenario_list().tree(node_list)
626
+
627
+ def to_dataset(self):
628
+ return self.to_scenario_list().to_dataset()
629
+
630
+ @classmethod
631
+ @remove_edsl_version
632
+ def from_dict(cls, data) -> Cache:
633
+ """Construct a Cache from a dictionary."""
634
+ from .cache_entry import CacheEntry
635
+
636
+ newdata = {k: CacheEntry.from_dict(v) for k, v in data.items()}
637
+ return cls(data=newdata)
638
+
639
+ def __len__(self):
640
+ """Return the number of CacheEntry objects in the Cache."""
641
+ return len(self.data)
642
+
643
+ # TODO: Same inputs could give different results and this could be useful
644
+ # can't distinguish unless we do the ε trick or vary iterations
645
+ def __eq__(self, other_cache: "Cache") -> bool:
646
+ """
647
+ Check if two Cache objects are equal.
648
+ Does not verify their values are equal, only that they have the same keys.
649
+ """
650
+ if not isinstance(other_cache, Cache):
651
+ return False
652
+ return set(self.data.keys()) == set(other_cache.data.keys())
653
+
654
+ def __add__(self, other: "Cache"):
655
+ """Combine this cache with another, updating in-place.
656
+
657
+ This operator implements a set union operation between two caches, adding all
658
+ entries from the other cache into this one. The operation modifies this cache
659
+ in-place rather than creating a new one.
660
+
661
+ Args:
662
+ other: Another Cache object to merge into this one
663
+
664
+ Returns:
665
+ Cache: Self, with entries from other added
666
+
667
+ Raises:
668
+ CacheError: If the provided object is not a Cache instance
669
+
670
+ Technical Notes:
671
+ - Modifies this cache in-place (unlike __floordiv__ which returns a new cache)
672
+ - If both caches have the same key, this cache's entry will be overwritten
673
+ - Useful for merging caches from different sources
674
+ - No special handling for conflicting entries - last one wins
675
+
676
+ Examples:
677
+ >>> from edsl.caching import CacheEntry
678
+ >>> ce1 = CacheEntry.example(randomize=True)
679
+ >>> ce2 = CacheEntry.example(randomize=True)
680
+ >>> c1 = Cache(data={ce1.key: ce1})
681
+ >>> initial_len = len(c1)
682
+ >>> c2 = Cache(data={ce2.key: ce2})
683
+ >>> result = c1 + c2 # Add c2's entries to c1
684
+ >>> len(c1) > initial_len # Should have more entries now
685
+ True
686
+ """
687
+ if not isinstance(other, Cache):
688
+ raise CacheError("Can only add two caches together")
689
+ self.data.update(other.data)
690
+ return self
691
+
692
+ def __repr__(self):
693
+ """
694
+ Return a string representation of the Cache object.
695
+ """
696
+ return (
697
+ f"Cache(data = {repr(self.data)}, immediate_write={self.immediate_write})"
698
+ )
699
+
700
+ ####################
701
+ # EXAMPLES
702
+ ####################
703
+ def fetch_input_example(self) -> dict:
704
+ """
705
+ Create an example input for a 'fetch' operation.
706
+ """
707
+ from .cache_entry import CacheEntry
708
+
709
+ return CacheEntry.fetch_input_example()
710
+
711
+ def to_html(self):
712
+ # json_str = json.dumps(self.data, indent=4)
713
+ d = {k: v.to_dict() for k, v in self.data.items()}
714
+ for key, value in d.items():
715
+ for k, v in value.items():
716
+ if isinstance(v, dict):
717
+ d[key][k] = {kk: str(vv) for kk, vv in v.items()}
718
+ else:
719
+ d[key][k] = str(v)
720
+
721
+ json_str = json.dumps(d, indent=4)
722
+
723
+ # HTML template with the JSON string embedded
724
+ html = f"""
725
+ <!DOCTYPE html>
726
+ <html>
727
+ <head>
728
+ <title>Display JSON</title>
729
+ </head>
730
+ <body>
731
+ <pre id="jsonData"></pre>
732
+ <script>
733
+ var json = {json_str};
734
+
735
+ // JSON.stringify with spacing to format
736
+ document.getElementById('jsonData').textContent = JSON.stringify(json, null, 4);
737
+ </script>
738
+ </body>
739
+ </html>
740
+ """
741
+ return html
742
+
743
+ def subset(self, keys: list[str]) -> Cache:
744
+ """
745
+ Return a subset of the Cache with the specified keys.
746
+ """
747
+ new_data = {k: v for k, v in self.data.items() if k in keys}
748
+ return Cache(data=new_data)
749
+
750
+ def view(self) -> None:
751
+ """View the Cache in a new browser tab."""
752
+ import tempfile
753
+ import webbrowser
754
+
755
+ html_content = self.to_html()
756
+ # Create a temporary file to hold the HTML
757
+ with tempfile.NamedTemporaryFile("w", delete=False, suffix=".html") as tmpfile:
758
+ tmpfile.write(html_content)
759
+ # Get the path to the temporary file
760
+ filepath = tmpfile.name
761
+
762
+ # Open the HTML file in a new browser tab
763
+ webbrowser.open("file://" + filepath)
764
+
765
+ @classmethod
766
+ def example(cls, randomize: bool = False) -> Cache:
767
+ """Create an example Cache instance for testing and demonstration.
768
+
769
+ Creates a Cache object pre-populated with example CacheEntry objects.
770
+ This method is useful for documentation, testing, and demonstration purposes.
771
+
772
+ Args:
773
+ randomize: If True, creates CacheEntry objects with randomized content
774
+ for uniqueness. If False, uses consistent example entries.
775
+
776
+ Returns:
777
+ Cache: A new Cache object containing example CacheEntry objects
778
+
779
+ Technical Notes:
780
+ - Uses CacheEntry.example() to create sample entries
781
+ - When randomize=True, generates unique keys for each call
782
+ - When randomize=False, produces consistent examples for doctests
783
+ - Creates an in-memory cache (no persistent file)
784
+
785
+ Examples:
786
+ >>> cache = Cache.example()
787
+ >>> len(cache) > 0
788
+ True
789
+ >>> from edsl.caching.cache_entry import CacheEntry
790
+ >>> all(isinstance(entry, CacheEntry) for entry in cache.values())
791
+ True
792
+
793
+ >>> # Create examples with randomized content
794
+ >>> cache1 = Cache.example(randomize=True)
795
+ >>> cache2 = Cache.example(randomize=True)
796
+ >>> # With randomization, keys should be different
797
+ >>> len(cache1) > 0 and len(cache2) > 0
798
+ True
799
+ """
800
+ from .cache_entry import CacheEntry
801
+
802
+ # Maintain the original implementation exactly to preserve behavior
803
+ return cls(
804
+ data={
805
+ CacheEntry.example(randomize).key: CacheEntry.example(),
806
+ CacheEntry.example(randomize).key: CacheEntry.example(),
807
+ }
808
+ )
809
+
810
+
811
+ if __name__ == "__main__":
812
+ import doctest
813
+
814
+ doctest.testmod(optionflags=doctest.ELLIPSIS)