edsl 0.1.39.dev2__py3-none-any.whl → 0.1.39.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (334) hide show
  1. edsl/Base.py +332 -385
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +49 -57
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +867 -1079
  7. edsl/agents/AgentList.py +413 -551
  8. edsl/agents/Invigilator.py +233 -285
  9. edsl/agents/InvigilatorBase.py +270 -254
  10. edsl/agents/PromptConstructor.py +354 -252
  11. edsl/agents/__init__.py +3 -2
  12. edsl/agents/descriptors.py +99 -99
  13. edsl/agents/prompt_helpers.py +129 -129
  14. edsl/auto/AutoStudy.py +117 -117
  15. edsl/auto/StageBase.py +230 -230
  16. edsl/auto/StageGenerateSurvey.py +178 -178
  17. edsl/auto/StageLabelQuestions.py +125 -125
  18. edsl/auto/StagePersona.py +61 -61
  19. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  20. edsl/auto/StagePersonaDimensionValues.py +74 -74
  21. edsl/auto/StagePersonaDimensions.py +69 -69
  22. edsl/auto/StageQuestions.py +73 -73
  23. edsl/auto/SurveyCreatorPipeline.py +21 -21
  24. edsl/auto/utilities.py +224 -224
  25. edsl/base/Base.py +279 -279
  26. edsl/config.py +157 -177
  27. edsl/conversation/Conversation.py +290 -290
  28. edsl/conversation/car_buying.py +58 -59
  29. edsl/conversation/chips.py +95 -95
  30. edsl/conversation/mug_negotiation.py +81 -81
  31. edsl/conversation/next_speaker_utilities.py +93 -93
  32. edsl/coop/PriceFetcher.py +54 -54
  33. edsl/coop/__init__.py +2 -2
  34. edsl/coop/coop.py +1028 -1090
  35. edsl/coop/utils.py +131 -131
  36. edsl/data/Cache.py +555 -562
  37. edsl/data/CacheEntry.py +233 -230
  38. edsl/data/CacheHandler.py +149 -170
  39. edsl/data/RemoteCacheSync.py +78 -78
  40. edsl/data/SQLiteDict.py +292 -292
  41. edsl/data/__init__.py +4 -5
  42. edsl/data/orm.py +10 -10
  43. edsl/data_transfer_models.py +73 -74
  44. edsl/enums.py +175 -195
  45. edsl/exceptions/BaseException.py +21 -21
  46. edsl/exceptions/__init__.py +54 -54
  47. edsl/exceptions/agents.py +42 -54
  48. edsl/exceptions/cache.py +5 -5
  49. edsl/exceptions/configuration.py +16 -16
  50. edsl/exceptions/coop.py +10 -10
  51. edsl/exceptions/data.py +14 -14
  52. edsl/exceptions/general.py +34 -34
  53. edsl/exceptions/jobs.py +33 -33
  54. edsl/exceptions/language_models.py +63 -63
  55. edsl/exceptions/prompts.py +15 -15
  56. edsl/exceptions/questions.py +91 -109
  57. edsl/exceptions/results.py +29 -29
  58. edsl/exceptions/scenarios.py +22 -29
  59. edsl/exceptions/surveys.py +37 -37
  60. edsl/inference_services/AnthropicService.py +87 -84
  61. edsl/inference_services/AwsBedrock.py +120 -118
  62. edsl/inference_services/AzureAI.py +217 -215
  63. edsl/inference_services/DeepInfraService.py +18 -18
  64. edsl/inference_services/GoogleService.py +148 -139
  65. edsl/inference_services/GroqService.py +20 -20
  66. edsl/inference_services/InferenceServiceABC.py +147 -80
  67. edsl/inference_services/InferenceServicesCollection.py +97 -122
  68. edsl/inference_services/MistralAIService.py +123 -120
  69. edsl/inference_services/OllamaService.py +18 -18
  70. edsl/inference_services/OpenAIService.py +224 -221
  71. edsl/inference_services/PerplexityService.py +163 -160
  72. edsl/inference_services/TestService.py +89 -92
  73. edsl/inference_services/TogetherAIService.py +170 -170
  74. edsl/inference_services/models_available_cache.py +118 -118
  75. edsl/inference_services/rate_limits_cache.py +25 -25
  76. edsl/inference_services/registry.py +41 -41
  77. edsl/inference_services/write_available.py +10 -10
  78. edsl/jobs/Answers.py +56 -43
  79. edsl/jobs/Jobs.py +898 -757
  80. edsl/jobs/JobsChecks.py +147 -172
  81. edsl/jobs/JobsPrompts.py +268 -270
  82. edsl/jobs/JobsRemoteInferenceHandler.py +239 -287
  83. edsl/jobs/__init__.py +1 -1
  84. edsl/jobs/buckets/BucketCollection.py +63 -104
  85. edsl/jobs/buckets/ModelBuckets.py +65 -65
  86. edsl/jobs/buckets/TokenBucket.py +251 -283
  87. edsl/jobs/interviews/Interview.py +661 -358
  88. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  89. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
  90. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  91. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  92. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  93. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  94. edsl/jobs/interviews/ReportErrors.py +66 -66
  95. edsl/jobs/interviews/interview_status_enum.py +9 -9
  96. edsl/jobs/runners/JobsRunnerAsyncio.py +466 -421
  97. edsl/jobs/runners/JobsRunnerStatus.py +330 -330
  98. edsl/jobs/tasks/QuestionTaskCreator.py +242 -244
  99. edsl/jobs/tasks/TaskCreators.py +64 -64
  100. edsl/jobs/tasks/TaskHistory.py +450 -449
  101. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  102. edsl/jobs/tasks/task_status_enum.py +163 -161
  103. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  104. edsl/jobs/tokens/TokenUsage.py +34 -34
  105. edsl/language_models/KeyLookup.py +30 -0
  106. edsl/language_models/LanguageModel.py +668 -571
  107. edsl/language_models/ModelList.py +155 -153
  108. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  109. edsl/language_models/__init__.py +3 -2
  110. edsl/language_models/fake_openai_call.py +15 -15
  111. edsl/language_models/fake_openai_service.py +61 -61
  112. edsl/language_models/registry.py +190 -180
  113. edsl/language_models/repair.py +156 -156
  114. edsl/language_models/unused/ReplicateBase.py +83 -0
  115. edsl/language_models/utilities.py +64 -65
  116. edsl/notebooks/Notebook.py +258 -263
  117. edsl/notebooks/__init__.py +1 -1
  118. edsl/prompts/Prompt.py +362 -352
  119. edsl/prompts/__init__.py +2 -2
  120. edsl/questions/AnswerValidatorMixin.py +289 -334
  121. edsl/questions/QuestionBase.py +664 -509
  122. edsl/questions/QuestionBaseGenMixin.py +161 -165
  123. edsl/questions/QuestionBasePromptsMixin.py +217 -221
  124. edsl/questions/QuestionBudget.py +227 -227
  125. edsl/questions/QuestionCheckBox.py +359 -359
  126. edsl/questions/QuestionExtract.py +182 -182
  127. edsl/questions/QuestionFreeText.py +114 -113
  128. edsl/questions/QuestionFunctional.py +166 -166
  129. edsl/questions/QuestionList.py +231 -229
  130. edsl/questions/QuestionMultipleChoice.py +286 -330
  131. edsl/questions/QuestionNumerical.py +153 -151
  132. edsl/questions/QuestionRank.py +324 -314
  133. edsl/questions/Quick.py +41 -41
  134. edsl/questions/RegisterQuestionsMeta.py +71 -71
  135. edsl/questions/ResponseValidatorABC.py +174 -200
  136. edsl/questions/SimpleAskMixin.py +73 -74
  137. edsl/questions/__init__.py +26 -27
  138. edsl/questions/compose_questions.py +98 -98
  139. edsl/questions/decorators.py +21 -21
  140. edsl/questions/derived/QuestionLikertFive.py +76 -76
  141. edsl/questions/derived/QuestionLinearScale.py +87 -90
  142. edsl/questions/derived/QuestionTopK.py +93 -93
  143. edsl/questions/derived/QuestionYesNo.py +82 -82
  144. edsl/questions/descriptors.py +413 -427
  145. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  146. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  147. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  148. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  149. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  150. edsl/questions/prompt_templates/question_list.jinja +17 -17
  151. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  152. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  153. edsl/questions/question_registry.py +177 -177
  154. edsl/questions/settings.py +12 -12
  155. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  156. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  157. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  158. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  159. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  160. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  161. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  162. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  163. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  164. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  165. edsl/questions/templates/list/question_presentation.jinja +5 -5
  166. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  167. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  168. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  169. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  170. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  171. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  172. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  173. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  174. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  175. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  176. edsl/results/CSSParameterizer.py +108 -108
  177. edsl/results/Dataset.py +424 -587
  178. edsl/results/DatasetExportMixin.py +731 -653
  179. edsl/results/DatasetTree.py +275 -295
  180. edsl/results/Result.py +465 -451
  181. edsl/results/Results.py +1165 -1172
  182. edsl/results/ResultsDBMixin.py +238 -0
  183. edsl/results/ResultsExportMixin.py +43 -45
  184. edsl/results/ResultsFetchMixin.py +33 -33
  185. edsl/results/ResultsGGMixin.py +121 -121
  186. edsl/results/ResultsToolsMixin.py +98 -98
  187. edsl/results/Selector.py +135 -145
  188. edsl/results/TableDisplay.py +198 -125
  189. edsl/results/__init__.py +2 -2
  190. edsl/results/table_display.css +77 -77
  191. edsl/results/tree_explore.py +115 -115
  192. edsl/scenarios/FileStore.py +632 -511
  193. edsl/scenarios/Scenario.py +601 -498
  194. edsl/scenarios/ScenarioHtmlMixin.py +64 -65
  195. edsl/scenarios/ScenarioJoin.py +127 -131
  196. edsl/scenarios/ScenarioList.py +1287 -1430
  197. edsl/scenarios/ScenarioListExportMixin.py +52 -45
  198. edsl/scenarios/ScenarioListPdfMixin.py +261 -239
  199. edsl/scenarios/__init__.py +4 -3
  200. edsl/shared.py +1 -1
  201. edsl/study/ObjectEntry.py +173 -173
  202. edsl/study/ProofOfWork.py +113 -113
  203. edsl/study/SnapShot.py +80 -80
  204. edsl/study/Study.py +528 -521
  205. edsl/study/__init__.py +4 -4
  206. edsl/surveys/DAG.py +148 -148
  207. edsl/surveys/Memory.py +31 -31
  208. edsl/surveys/MemoryPlan.py +244 -244
  209. edsl/surveys/Rule.py +326 -327
  210. edsl/surveys/RuleCollection.py +387 -385
  211. edsl/surveys/Survey.py +1801 -1229
  212. edsl/surveys/SurveyCSS.py +261 -273
  213. edsl/surveys/SurveyExportMixin.py +259 -259
  214. edsl/surveys/{SurveyFlowVisualization.py → SurveyFlowVisualizationMixin.py} +179 -181
  215. edsl/surveys/SurveyQualtricsImport.py +284 -284
  216. edsl/surveys/__init__.py +3 -5
  217. edsl/surveys/base.py +53 -53
  218. edsl/surveys/descriptors.py +56 -60
  219. edsl/surveys/instructions/ChangeInstruction.py +49 -48
  220. edsl/surveys/instructions/Instruction.py +65 -56
  221. edsl/surveys/instructions/InstructionCollection.py +77 -82
  222. edsl/templates/error_reporting/base.html +23 -23
  223. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  224. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  225. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  226. edsl/templates/error_reporting/interview_details.html +115 -115
  227. edsl/templates/error_reporting/interviews.html +19 -19
  228. edsl/templates/error_reporting/overview.html +4 -4
  229. edsl/templates/error_reporting/performance_plot.html +1 -1
  230. edsl/templates/error_reporting/report.css +73 -73
  231. edsl/templates/error_reporting/report.html +117 -117
  232. edsl/templates/error_reporting/report.js +25 -25
  233. edsl/tools/__init__.py +1 -1
  234. edsl/tools/clusters.py +192 -192
  235. edsl/tools/embeddings.py +27 -27
  236. edsl/tools/embeddings_plotting.py +118 -118
  237. edsl/tools/plotting.py +112 -112
  238. edsl/tools/summarize.py +18 -18
  239. edsl/utilities/SystemInfo.py +28 -28
  240. edsl/utilities/__init__.py +22 -22
  241. edsl/utilities/ast_utilities.py +25 -25
  242. edsl/utilities/data/Registry.py +6 -6
  243. edsl/utilities/data/__init__.py +1 -1
  244. edsl/utilities/data/scooter_results.json +1 -1
  245. edsl/utilities/decorators.py +77 -77
  246. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  247. edsl/utilities/interface.py +627 -627
  248. edsl/utilities/naming_utilities.py +263 -263
  249. edsl/utilities/repair_functions.py +28 -28
  250. edsl/utilities/restricted_python.py +70 -70
  251. edsl/utilities/utilities.py +424 -436
  252. {edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev3.dist-info}/LICENSE +21 -21
  253. {edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev3.dist-info}/METADATA +10 -12
  254. edsl-0.1.39.dev3.dist-info/RECORD +277 -0
  255. edsl/agents/QuestionInstructionPromptBuilder.py +0 -128
  256. edsl/agents/QuestionOptionProcessor.py +0 -172
  257. edsl/agents/QuestionTemplateReplacementsBuilder.py +0 -137
  258. edsl/coop/CoopFunctionsMixin.py +0 -15
  259. edsl/coop/ExpectedParrotKeyHandler.py +0 -125
  260. edsl/exceptions/inference_services.py +0 -5
  261. edsl/inference_services/AvailableModelCacheHandler.py +0 -184
  262. edsl/inference_services/AvailableModelFetcher.py +0 -209
  263. edsl/inference_services/ServiceAvailability.py +0 -135
  264. edsl/inference_services/data_structures.py +0 -62
  265. edsl/jobs/AnswerQuestionFunctionConstructor.py +0 -188
  266. edsl/jobs/FetchInvigilator.py +0 -40
  267. edsl/jobs/InterviewTaskManager.py +0 -98
  268. edsl/jobs/InterviewsConstructor.py +0 -48
  269. edsl/jobs/JobsComponentConstructor.py +0 -189
  270. edsl/jobs/JobsRemoteInferenceLogger.py +0 -239
  271. edsl/jobs/RequestTokenEstimator.py +0 -30
  272. edsl/jobs/buckets/TokenBucketAPI.py +0 -211
  273. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  274. edsl/jobs/decorators.py +0 -35
  275. edsl/jobs/jobs_status_enums.py +0 -9
  276. edsl/jobs/loggers/HTMLTableJobLogger.py +0 -304
  277. edsl/language_models/ComputeCost.py +0 -63
  278. edsl/language_models/PriceManager.py +0 -127
  279. edsl/language_models/RawResponseHandler.py +0 -106
  280. edsl/language_models/ServiceDataSources.py +0 -0
  281. edsl/language_models/key_management/KeyLookup.py +0 -63
  282. edsl/language_models/key_management/KeyLookupBuilder.py +0 -273
  283. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  284. edsl/language_models/key_management/__init__.py +0 -0
  285. edsl/language_models/key_management/models.py +0 -131
  286. edsl/notebooks/NotebookToLaTeX.py +0 -142
  287. edsl/questions/ExceptionExplainer.py +0 -77
  288. edsl/questions/HTMLQuestion.py +0 -103
  289. edsl/questions/LoopProcessor.py +0 -149
  290. edsl/questions/QuestionMatrix.py +0 -265
  291. edsl/questions/ResponseValidatorFactory.py +0 -28
  292. edsl/questions/templates/matrix/__init__.py +0 -1
  293. edsl/questions/templates/matrix/answering_instructions.jinja +0 -5
  294. edsl/questions/templates/matrix/question_presentation.jinja +0 -20
  295. edsl/results/MarkdownToDocx.py +0 -122
  296. edsl/results/MarkdownToPDF.py +0 -111
  297. edsl/results/TextEditor.py +0 -50
  298. edsl/results/smart_objects.py +0 -96
  299. edsl/results/table_data_class.py +0 -12
  300. edsl/results/table_renderers.py +0 -118
  301. edsl/scenarios/ConstructDownloadLink.py +0 -109
  302. edsl/scenarios/DirectoryScanner.py +0 -96
  303. edsl/scenarios/DocumentChunker.py +0 -102
  304. edsl/scenarios/DocxScenario.py +0 -16
  305. edsl/scenarios/PdfExtractor.py +0 -40
  306. edsl/scenarios/ScenarioSelector.py +0 -156
  307. edsl/scenarios/file_methods.py +0 -85
  308. edsl/scenarios/handlers/__init__.py +0 -13
  309. edsl/scenarios/handlers/csv.py +0 -38
  310. edsl/scenarios/handlers/docx.py +0 -76
  311. edsl/scenarios/handlers/html.py +0 -37
  312. edsl/scenarios/handlers/json.py +0 -111
  313. edsl/scenarios/handlers/latex.py +0 -5
  314. edsl/scenarios/handlers/md.py +0 -51
  315. edsl/scenarios/handlers/pdf.py +0 -68
  316. edsl/scenarios/handlers/png.py +0 -39
  317. edsl/scenarios/handlers/pptx.py +0 -105
  318. edsl/scenarios/handlers/py.py +0 -294
  319. edsl/scenarios/handlers/sql.py +0 -313
  320. edsl/scenarios/handlers/sqlite.py +0 -149
  321. edsl/scenarios/handlers/txt.py +0 -33
  322. edsl/surveys/ConstructDAG.py +0 -92
  323. edsl/surveys/EditSurvey.py +0 -221
  324. edsl/surveys/InstructionHandler.py +0 -100
  325. edsl/surveys/MemoryManagement.py +0 -72
  326. edsl/surveys/RuleManager.py +0 -172
  327. edsl/surveys/Simulator.py +0 -75
  328. edsl/surveys/SurveyToApp.py +0 -141
  329. edsl/utilities/PrettyList.py +0 -56
  330. edsl/utilities/is_notebook.py +0 -18
  331. edsl/utilities/is_valid_variable_name.py +0 -11
  332. edsl/utilities/remove_edsl_version.py +0 -24
  333. edsl-0.1.39.dev2.dist-info/RECORD +0 -352
  334. {edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev3.dist-info}/WHEEL +0 -0
edsl/data/Cache.py CHANGED
@@ -1,562 +1,555 @@
1
- """
2
- The `Cache` class is used to store responses from a language model.
3
- """
4
-
5
- from __future__ import annotations
6
- import json
7
- import os
8
- import warnings
9
- from typing import Optional, Union
10
- from edsl.Base import Base
11
-
12
-
13
- # from edsl.utilities.decorators import remove_edsl_version
14
- from edsl.utilities.remove_edsl_version import remove_edsl_version
15
- from edsl.exceptions.cache import CacheError
16
-
17
-
18
- class Cache(Base):
19
- """
20
- A class that represents a cache of responses from a language model.
21
-
22
- :param data: The data to initialize the cache with.
23
- :param immediate_write: Whether to write to the cache immediately after storing a new entry.
24
-
25
- Deprecated:
26
-
27
- :param method: The method of storage to use for the cache.
28
- """
29
-
30
- __documentation__ = "https://docs.expectedparrot.com/en/latest/data.html"
31
-
32
- data = {}
33
-
34
- def __init__(
35
- self,
36
- *,
37
- filename: Optional[str] = None,
38
- data: Optional[Union["SQLiteDict", dict]] = None,
39
- immediate_write: bool = True,
40
- method=None,
41
- verbose=False,
42
- ):
43
- """
44
- Create two dictionaries to store the cache data.
45
-
46
- :param filename: The name of the file to read/write the cache from/to.
47
- :param data: The data to initialize the cache with.
48
- :param immediate_write: Whether to write to the cache immediately after storing a new entry.
49
- :param method: The method of storage to use for the cache.
50
-
51
- """
52
-
53
- # self.data_at_init = data or {}
54
- self.fetched_data = {}
55
- self.immediate_write = immediate_write
56
- self.method = method
57
- self.new_entries = {}
58
- self.new_entries_to_write_later = {}
59
- self.coop = None
60
- self.verbose = verbose
61
-
62
- self.filename = filename
63
- if filename and data:
64
- raise CacheError("Cannot provide both filename and data")
65
- if filename is None and data is None:
66
- data = {}
67
- if data is not None:
68
- self.data = data
69
- if filename is not None:
70
- self.data = {}
71
- if filename.endswith(".jsonl"):
72
- if os.path.exists(filename):
73
- self.add_from_jsonl(filename)
74
- else:
75
- print(
76
- f"File {filename} not found, but will write to this location."
77
- )
78
- elif filename.endswith(".db"):
79
- if os.path.exists(filename):
80
- self.add_from_sqlite(filename)
81
- else:
82
- raise CacheError("Invalid file extension. Must be .jsonl or .db")
83
-
84
- self._perform_checks()
85
-
86
- # def rich_print(sefl):
87
- # pass
88
- # # raise NotImplementedError("This method is not implemented yet.")
89
-
90
- def code(sefl):
91
- pass
92
- # raise NotImplementedError("This method is not implemented yet.")
93
-
94
- def keys(self):
95
- """
96
- >>> from edsl import Cache
97
- >>> Cache.example().keys()
98
- ['5513286eb6967abc0511211f0402587d']
99
- """
100
- return list(self.data.keys())
101
-
102
- def values(self):
103
- """
104
- >>> from edsl import Cache
105
- >>> Cache.example().values()
106
- [CacheEntry(...)]
107
- """
108
- return list(self.data.values())
109
-
110
- def items(self):
111
- return zip(self.keys(), self.values())
112
-
113
- def new_entries_cache(self) -> Cache:
114
- """Return a new Cache object with the new entries."""
115
- return Cache(data={**self.new_entries, **self.fetched_data})
116
-
117
- def _perform_checks(self):
118
- """Perform checks on the cache."""
119
- from edsl.data.CacheEntry import CacheEntry
120
-
121
- if any(not isinstance(value, CacheEntry) for value in self.data.values()):
122
- raise CacheError("Not all values are CacheEntry instances")
123
- if self.method is not None:
124
- warnings.warn("Argument `method` is deprecated", DeprecationWarning)
125
-
126
- ####################
127
- # READ/WRITE
128
- ####################
129
- def fetch(
130
- self,
131
- *,
132
- model: str,
133
- parameters: dict,
134
- system_prompt: str,
135
- user_prompt: str,
136
- iteration: int,
137
- ) -> tuple(Union[None, str], str):
138
- """
139
- Fetch a value (LLM output) from the cache.
140
-
141
- :param model: The name of the language model.
142
- :param parameters: The model parameters.
143
- :param system_prompt: The system prompt.
144
- :param user_prompt: The user prompt.
145
- :param iteration: The iteration number.
146
-
147
- Return None if the response is not found.
148
-
149
- >>> c = Cache()
150
- >>> c.fetch(model="gpt-3", parameters="default", system_prompt="Hello", user_prompt="Hi", iteration=1)[0] is None
151
- True
152
-
153
-
154
- """
155
- from edsl.data.CacheEntry import CacheEntry
156
-
157
- key = CacheEntry.gen_key(
158
- model=model,
159
- parameters=parameters,
160
- system_prompt=system_prompt,
161
- user_prompt=user_prompt,
162
- iteration=iteration,
163
- )
164
- entry = self.data.get(key, None)
165
- if entry is not None:
166
- if self.verbose:
167
- print(f"Cache hit for key: {key}")
168
- self.fetched_data[key] = entry
169
- else:
170
- if self.verbose:
171
- print(f"Cache miss for key: {key}")
172
- return None if entry is None else entry.output, key
173
-
174
- def store(
175
- self,
176
- model: str,
177
- parameters: str,
178
- system_prompt: str,
179
- user_prompt: str,
180
- response: dict,
181
- iteration: int,
182
- ) -> str:
183
- """
184
- Add a new key-value pair to the cache.
185
-
186
- * Key is a hash of the input parameters.
187
- * Output is the response from the language model.
188
-
189
- How it works:
190
-
191
- * The key-value pair is added to `self.new_entries`
192
- * If `immediate_write` is True , the key-value pair is added to `self.data`
193
- * If `immediate_write` is False, the key-value pair is added to `self.new_entries_to_write_later`
194
-
195
- >>> from edsl import Cache, Model, Question
196
- >>> m = Model("test")
197
- >>> c = Cache()
198
- >>> len(c)
199
- 0
200
- >>> results = Question.example("free_text").by(m).run(cache = c, disable_remote_cache = True, disable_remote_inference = True)
201
- >>> len(c)
202
- 1
203
- """
204
- from edsl.data.CacheEntry import CacheEntry
205
-
206
- entry = CacheEntry(
207
- model=model,
208
- parameters=parameters,
209
- system_prompt=system_prompt,
210
- user_prompt=user_prompt,
211
- output=json.dumps(response),
212
- iteration=iteration,
213
- )
214
- key = entry.key
215
- self.new_entries[key] = entry
216
- if self.immediate_write:
217
- self.data[key] = entry
218
- else:
219
- self.new_entries_to_write_later[key] = entry
220
- return key
221
-
222
- def add_from_dict(
223
- self, new_data: dict[str, "CacheEntry"], write_now: Optional[bool] = True
224
- ) -> None:
225
- """
226
- Add entries to the cache from a dictionary.
227
-
228
- :param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
229
- """
230
- from edsl.data.CacheEntry import CacheEntry
231
-
232
- for key, value in new_data.items():
233
- if key in self.data:
234
- if value != self.data[key]:
235
- raise CacheError("Mismatch in values")
236
- if not isinstance(value, CacheEntry):
237
- raise CacheError(f"Wrong type - the observed type is {type(value)}")
238
-
239
- self.new_entries.update(new_data)
240
- if write_now:
241
- self.data.update(new_data)
242
- else:
243
- self.new_entries_to_write_later.update(new_data)
244
-
245
- def add_from_jsonl(self, filename: str, write_now: Optional[bool] = True) -> None:
246
- """
247
- Add entries to the cache from a JSONL.
248
-
249
- :param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
250
- """
251
- from edsl.data.CacheEntry import CacheEntry
252
-
253
- with open(filename, "a+") as f:
254
- f.seek(0)
255
- lines = f.readlines()
256
- new_data = {}
257
- for line in lines:
258
- d = json.loads(line)
259
- key = list(d.keys())[0]
260
- value = list(d.values())[0]
261
- new_data[key] = CacheEntry(**value)
262
- self.add_from_dict(new_data=new_data, write_now=write_now)
263
-
264
- def add_from_sqlite(self, db_path: str, write_now: Optional[bool] = True):
265
- """
266
- Add entries to the cache from an SQLite database.
267
-
268
- :param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
269
- """
270
- from edsl.data.SQLiteDict import SQLiteDict
271
-
272
- db = SQLiteDict(db_path)
273
- new_data = {}
274
- for key, value in db.items():
275
- new_data[key] = CacheEntry(**value)
276
- self.add_from_dict(new_data=new_data, write_now=write_now)
277
-
278
- @classmethod
279
- def from_sqlite_db(cls, db_path: str) -> Cache:
280
- """
281
- Construct a Cache from a SQLite database.
282
- """
283
- from edsl.data.SQLiteDict import SQLiteDict
284
-
285
- return cls(data=SQLiteDict(db_path))
286
-
287
- @classmethod
288
- def from_local_cache(cls) -> Cache:
289
- """
290
- Construct a Cache from a local cache file.
291
- """
292
- from edsl.config import CONFIG
293
-
294
- CACHE_PATH = CONFIG.get("EDSL_DATABASE_PATH")
295
- path = CACHE_PATH.replace("sqlite:///", "")
296
- db_path = os.path.join(os.path.dirname(path), "data.db")
297
- return cls.from_sqlite_db(db_path=db_path)
298
-
299
- @classmethod
300
- def from_jsonl(cls, jsonlfile: str, db_path: Optional[str] = None) -> Cache:
301
- """
302
- Construct a Cache from a JSONL file.
303
-
304
- :param jsonlfile: The path to the JSONL file of cache entries.
305
- :param db_path: The path to the SQLite database used to store the cache.
306
-
307
- * If `db_path` is None, the cache will be stored in memory, as a dictionary.
308
- * If `db_path` is provided, the cache will be stored in an SQLite database.
309
- """
310
- # if a file doesn't exist at jsonfile, throw an error
311
- from edsl.data.SQLiteDict import SQLiteDict
312
-
313
- if not os.path.exists(jsonlfile):
314
- raise FileNotFoundError(f"File {jsonlfile} not found")
315
-
316
- if db_path is None:
317
- data = {}
318
- else:
319
- data = SQLiteDict(db_path)
320
-
321
- cache = Cache(data=data)
322
- cache.add_from_jsonl(jsonlfile)
323
- return cache
324
-
325
- def write_sqlite_db(self, db_path: str) -> None:
326
- """
327
- Write the cache to an SQLite database.
328
- """
329
- ## TODO: Check to make sure not over-writing (?)
330
- ## Should be added to SQLiteDict constructor (?)
331
- from edsl.data.SQLiteDict import SQLiteDict
332
-
333
- new_data = SQLiteDict(db_path)
334
- for key, value in self.data.items():
335
- new_data[key] = value
336
-
337
- def write(self, filename: Optional[str] = None) -> None:
338
- """
339
- Write the cache to a file at the specified location.
340
- """
341
- if filename is None:
342
- filename = self.filename
343
- if filename.endswith(".jsonl"):
344
- self.write_jsonl(filename)
345
- elif filename.endswith(".db"):
346
- self.write_sqlite_db(filename)
347
- else:
348
- raise CacheError("Invalid file extension. Must be .jsonl or .db")
349
-
350
- def write_jsonl(self, filename: str) -> None:
351
- """
352
- Write the cache to a JSONL file.
353
- """
354
- path = os.path.join(os.getcwd(), filename)
355
- with open(path, "w") as f:
356
- for key, value in self.data.items():
357
- f.write(json.dumps({key: value.to_dict()}) + "\n")
358
-
359
- def to_scenario_list(self):
360
- from edsl.scenarios.ScenarioList import ScenarioList
361
- from edsl.scenarios.Scenario import Scenario
362
-
363
- scenarios = []
364
- for key, value in self.data.items():
365
- new_d = value.to_dict()
366
- new_d["cache_key"] = key
367
- s = Scenario(new_d)
368
- scenarios.append(s)
369
- return ScenarioList(scenarios)
370
-
371
- ####################
372
- # REMOTE
373
- ####################
374
- # TODO: Make this work
375
- # - Need to decide whether the cache belongs to a user and what can be shared
376
- # - I.e., some cache entries? all or nothing?
377
- @classmethod
378
- def from_url(cls, db_path=None) -> Cache:
379
- """
380
- Construct a Cache object from a remote.
381
- """
382
- # ...do something here
383
- # return Cache(data=db)
384
- pass
385
-
386
- def __enter__(self):
387
- """
388
- Run when a context is entered.
389
- """
390
- return self
391
-
392
- def __exit__(self, exc_type, exc_value, traceback):
393
- """
394
- Run when a context is exited.
395
- """
396
- for key, entry in self.new_entries_to_write_later.items():
397
- self.data[key] = entry
398
-
399
- if self.filename:
400
- self.write(self.filename)
401
-
402
- ####################
403
- # DUNDER / USEFUL
404
- ####################
405
- def __hash__(self):
406
- """Return the hash of the Cache."""
407
- from edsl.utilities.utilities import dict_hash
408
-
409
- return dict_hash(self.to_dict(add_edsl_version=False))
410
-
411
- def to_dict(self, add_edsl_version=True) -> dict:
412
- d = {k: v.to_dict() for k, v in self.data.items()}
413
- if add_edsl_version:
414
- from edsl import __version__
415
-
416
- d["edsl_version"] = __version__
417
- d["edsl_class_name"] = "Cache"
418
-
419
- return d
420
-
421
- def _summary(self):
422
- return {"EDSL Class": "Cache", "Number of entries": len(self.data)}
423
-
424
- def table(
425
- self,
426
- *fields,
427
- tablefmt: Optional[str] = None,
428
- pretty_labels: Optional[dict] = None,
429
- ) -> str:
430
- return self.to_dataset().table(
431
- *fields, tablefmt=tablefmt, pretty_labels=pretty_labels
432
- )
433
-
434
- def select(self, *fields):
435
- return self.to_dataset().select(*fields)
436
-
437
- def tree(self, node_list: Optional[list[str]] = None):
438
- return self.to_scenario_list().tree(node_list)
439
-
440
- def to_dataset(self):
441
- return self.to_scenario_list().to_dataset()
442
-
443
- @classmethod
444
- @remove_edsl_version
445
- def from_dict(cls, data) -> Cache:
446
- """Construct a Cache from a dictionary."""
447
- from edsl.data.CacheEntry import CacheEntry
448
-
449
- newdata = {k: CacheEntry.from_dict(v) for k, v in data.items()}
450
- return cls(data=newdata)
451
-
452
- def __len__(self):
453
- """Return the number of CacheEntry objects in the Cache."""
454
- return len(self.data)
455
-
456
- # TODO: Same inputs could give different results and this could be useful
457
- # can't distinguish unless we do the ε trick or vary iterations
458
- def __eq__(self, other_cache: "Cache") -> bool:
459
- """
460
- Check if two Cache objects are equal.
461
- Does not verify their values are equal, only that they have the same keys.
462
- """
463
- if not isinstance(other_cache, Cache):
464
- return False
465
- return set(self.data.keys()) == set(other_cache.data.keys())
466
-
467
- def __add__(self, other: "Cache"):
468
- """
469
- Combine two caches.
470
- """
471
- if not isinstance(other, Cache):
472
- raise CacheError("Can only add two caches together")
473
- self.data.update(other.data)
474
- return self
475
-
476
- def __repr__(self):
477
- """
478
- Return a string representation of the Cache object.
479
- """
480
- return (
481
- f"Cache(data = {repr(self.data)}, immediate_write={self.immediate_write})"
482
- )
483
-
484
- ####################
485
- # EXAMPLES
486
- ####################
487
- def fetch_input_example(self) -> dict:
488
- """
489
- Create an example input for a 'fetch' operation.
490
- """
491
- from edsl.data.CacheEntry import CacheEntry
492
-
493
- return CacheEntry.fetch_input_example()
494
-
495
- def to_html(self):
496
- # json_str = json.dumps(self.data, indent=4)
497
- d = {k: v.to_dict() for k, v in self.data.items()}
498
- for key, value in d.items():
499
- for k, v in value.items():
500
- if isinstance(v, dict):
501
- d[key][k] = {kk: str(vv) for kk, vv in v.items()}
502
- else:
503
- d[key][k] = str(v)
504
-
505
- json_str = json.dumps(d, indent=4)
506
-
507
- # HTML template with the JSON string embedded
508
- html = f"""
509
- <!DOCTYPE html>
510
- <html>
511
- <head>
512
- <title>Display JSON</title>
513
- </head>
514
- <body>
515
- <pre id="jsonData"></pre>
516
- <script>
517
- var json = {json_str};
518
-
519
- // JSON.stringify with spacing to format
520
- document.getElementById('jsonData').textContent = JSON.stringify(json, null, 4);
521
- </script>
522
- </body>
523
- </html>
524
- """
525
- return html
526
-
527
- def view(self) -> None:
528
- """View the Cache in a new browser tab."""
529
- import tempfile
530
- import webbrowser
531
-
532
- html_content = self.to_html()
533
- # Create a temporary file to hold the HTML
534
- with tempfile.NamedTemporaryFile("w", delete=False, suffix=".html") as tmpfile:
535
- tmpfile.write(html_content)
536
- # Get the path to the temporary file
537
- filepath = tmpfile.name
538
-
539
- # Open the HTML file in a new browser tab
540
- webbrowser.open("file://" + filepath)
541
-
542
- @classmethod
543
- def example(cls, randomize: bool = False) -> Cache:
544
- """
545
- Returns an example Cache instance.
546
-
547
- :param randomize: If True, uses CacheEntry's randomize method.
548
- """
549
- from edsl.data.CacheEntry import CacheEntry
550
-
551
- return cls(
552
- data={
553
- CacheEntry.example(randomize).key: CacheEntry.example(),
554
- CacheEntry.example(randomize).key: CacheEntry.example(),
555
- }
556
- )
557
-
558
-
559
- if __name__ == "__main__":
560
- import doctest
561
-
562
- doctest.testmod(optionflags=doctest.ELLIPSIS)
1
+ """
2
+ The `Cache` class is used to store responses from a language model.
3
+ """
4
+
5
+ from __future__ import annotations
6
+ import json
7
+ import os
8
+ import warnings
9
+ import copy
10
+ from typing import Optional, Union
11
+ from edsl.Base import Base
12
+ from edsl.data.CacheEntry import CacheEntry
13
+ from edsl.utilities.utilities import dict_hash
14
+ from edsl.utilities.decorators import remove_edsl_version
15
+ from edsl.exceptions.cache import CacheError
16
+
17
+
18
+ class Cache(Base):
19
+ """
20
+ A class that represents a cache of responses from a language model.
21
+
22
+ :param data: The data to initialize the cache with.
23
+ :param immediate_write: Whether to write to the cache immediately after storing a new entry.
24
+
25
+ Deprecated:
26
+
27
+ :param method: The method of storage to use for the cache.
28
+ """
29
+
30
+ __documentation__ = "https://docs.expectedparrot.com/en/latest/data.html"
31
+
32
+ data = {}
33
+
34
+ def __init__(
35
+ self,
36
+ *,
37
+ filename: Optional[str] = None,
38
+ data: Optional[Union["SQLiteDict", dict]] = None,
39
+ immediate_write: bool = True,
40
+ method=None,
41
+ verbose=False,
42
+ ):
43
+ """
44
+ Create two dictionaries to store the cache data.
45
+
46
+ :param filename: The name of the file to read/write the cache from/to.
47
+ :param data: The data to initialize the cache with.
48
+ :param immediate_write: Whether to write to the cache immediately after storing a new entry.
49
+ :param method: The method of storage to use for the cache.
50
+
51
+ """
52
+
53
+ # self.data_at_init = data or {}
54
+ self.fetched_data = {}
55
+ self.immediate_write = immediate_write
56
+ self.method = method
57
+ self.new_entries = {}
58
+ self.new_entries_to_write_later = {}
59
+ self.coop = None
60
+ self.verbose = verbose
61
+
62
+ self.filename = filename
63
+ if filename and data:
64
+ raise CacheError("Cannot provide both filename and data")
65
+ if filename is None and data is None:
66
+ data = {}
67
+ if data is not None:
68
+ self.data = data
69
+ if filename is not None:
70
+ self.data = {}
71
+ if filename.endswith(".jsonl"):
72
+ if os.path.exists(filename):
73
+ self.add_from_jsonl(filename)
74
+ else:
75
+ print(
76
+ f"File {filename} not found, but will write to this location."
77
+ )
78
+ elif filename.endswith(".db"):
79
+ if os.path.exists(filename):
80
+ self.add_from_sqlite(filename)
81
+ else:
82
+ raise CacheError("Invalid file extension. Must be .jsonl or .db")
83
+
84
+ self._perform_checks()
85
+
86
+ def rich_print(sefl):
87
+ pass
88
+ # raise NotImplementedError("This method is not implemented yet.")
89
+
90
+ def code(sefl):
91
+ pass
92
+ # raise NotImplementedError("This method is not implemented yet.")
93
+
94
+ def keys(self):
95
+ """
96
+ >>> from edsl import Cache
97
+ >>> Cache.example().keys()
98
+ ['5513286eb6967abc0511211f0402587d']
99
+ """
100
+ return list(self.data.keys())
101
+
102
+ def values(self):
103
+ """
104
+ >>> from edsl import Cache
105
+ >>> Cache.example().values()
106
+ [CacheEntry(...)]
107
+ """
108
+ return list(self.data.values())
109
+
110
+ def items(self):
111
+ return zip(self.keys(), self.values())
112
+
113
+ def new_entries_cache(self) -> Cache:
114
+ """Return a new Cache object with the new entries."""
115
+ return Cache(data={**self.new_entries, **self.fetched_data})
116
+
117
+ def _perform_checks(self):
118
+ """Perform checks on the cache."""
119
+ from edsl.data.CacheEntry import CacheEntry
120
+
121
+ if any(not isinstance(value, CacheEntry) for value in self.data.values()):
122
+ raise CacheError("Not all values are CacheEntry instances")
123
+ if self.method is not None:
124
+ warnings.warn("Argument `method` is deprecated", DeprecationWarning)
125
+
126
+ ####################
127
+ # READ/WRITE
128
+ ####################
129
+ def fetch(
130
+ self,
131
+ *,
132
+ model: str,
133
+ parameters: dict,
134
+ system_prompt: str,
135
+ user_prompt: str,
136
+ iteration: int,
137
+ ) -> tuple(Union[None, str], str):
138
+ """
139
+ Fetch a value (LLM output) from the cache.
140
+
141
+ :param model: The name of the language model.
142
+ :param parameters: The model parameters.
143
+ :param system_prompt: The system prompt.
144
+ :param user_prompt: The user prompt.
145
+ :param iteration: The iteration number.
146
+
147
+ Return None if the response is not found.
148
+
149
+ >>> c = Cache()
150
+ >>> c.fetch(model="gpt-3", parameters="default", system_prompt="Hello", user_prompt="Hi", iteration=1)[0] is None
151
+ True
152
+
153
+
154
+ """
155
+ from edsl.data.CacheEntry import CacheEntry
156
+
157
+ key = CacheEntry.gen_key(
158
+ model=model,
159
+ parameters=parameters,
160
+ system_prompt=system_prompt,
161
+ user_prompt=user_prompt,
162
+ iteration=iteration,
163
+ )
164
+ entry = self.data.get(key, None)
165
+ if entry is not None:
166
+ if self.verbose:
167
+ print(f"Cache hit for key: {key}")
168
+ self.fetched_data[key] = entry
169
+ else:
170
+ if self.verbose:
171
+ print(f"Cache miss for key: {key}")
172
+ return None if entry is None else entry.output, key
173
+
174
+ def store(
175
+ self,
176
+ model: str,
177
+ parameters: str,
178
+ system_prompt: str,
179
+ user_prompt: str,
180
+ response: dict,
181
+ iteration: int,
182
+ ) -> str:
183
+ """
184
+ Add a new key-value pair to the cache.
185
+
186
+ * Key is a hash of the input parameters.
187
+ * Output is the response from the language model.
188
+
189
+ How it works:
190
+
191
+ * The key-value pair is added to `self.new_entries`
192
+ * If `immediate_write` is True , the key-value pair is added to `self.data`
193
+ * If `immediate_write` is False, the key-value pair is added to `self.new_entries_to_write_later`
194
+
195
+ >>> from edsl import Cache, Model, Question
196
+ >>> m = Model("test")
197
+ >>> c = Cache()
198
+ >>> len(c)
199
+ 0
200
+ >>> results = Question.example("free_text").by(m).run(cache = c, disable_remote_cache = True, disable_remote_inference = True)
201
+ >>> len(c)
202
+ 1
203
+ """
204
+
205
+ entry = CacheEntry(
206
+ model=model,
207
+ parameters=parameters,
208
+ system_prompt=system_prompt,
209
+ user_prompt=user_prompt,
210
+ output=json.dumps(response),
211
+ iteration=iteration,
212
+ )
213
+ key = entry.key
214
+ self.new_entries[key] = entry
215
+ if self.immediate_write:
216
+ self.data[key] = entry
217
+ else:
218
+ self.new_entries_to_write_later[key] = entry
219
+ return key
220
+
221
+ def add_from_dict(
222
+ self, new_data: dict[str, "CacheEntry"], write_now: Optional[bool] = True
223
+ ) -> None:
224
+ """
225
+ Add entries to the cache from a dictionary.
226
+
227
+ :param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
228
+ """
229
+
230
+ for key, value in new_data.items():
231
+ if key in self.data:
232
+ if value != self.data[key]:
233
+ raise CacheError("Mismatch in values")
234
+ if not isinstance(value, CacheEntry):
235
+ raise CacheError(f"Wrong type - the observed type is {type(value)}")
236
+
237
+ self.new_entries.update(new_data)
238
+ if write_now:
239
+ self.data.update(new_data)
240
+ else:
241
+ self.new_entries_to_write_later.update(new_data)
242
+
243
+ def add_from_jsonl(self, filename: str, write_now: Optional[bool] = True) -> None:
244
+ """
245
+ Add entries to the cache from a JSONL.
246
+
247
+ :param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
248
+ """
249
+ with open(filename, "a+") as f:
250
+ f.seek(0)
251
+ lines = f.readlines()
252
+ new_data = {}
253
+ for line in lines:
254
+ d = json.loads(line)
255
+ key = list(d.keys())[0]
256
+ value = list(d.values())[0]
257
+ new_data[key] = CacheEntry(**value)
258
+ self.add_from_dict(new_data=new_data, write_now=write_now)
259
+
260
+ def add_from_sqlite(self, db_path: str, write_now: Optional[bool] = True):
261
+ """
262
+ Add entries to the cache from an SQLite database.
263
+
264
+ :param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
265
+ """
266
+ from edsl.data.SQLiteDict import SQLiteDict
267
+
268
+ db = SQLiteDict(db_path)
269
+ new_data = {}
270
+ for key, value in db.items():
271
+ new_data[key] = CacheEntry(**value)
272
+ self.add_from_dict(new_data=new_data, write_now=write_now)
273
+
274
+ @classmethod
275
+ def from_sqlite_db(cls, db_path: str) -> Cache:
276
+ """
277
+ Construct a Cache from a SQLite database.
278
+ """
279
+ from edsl.data.SQLiteDict import SQLiteDict
280
+
281
+ return cls(data=SQLiteDict(db_path))
282
+
283
+ @classmethod
284
+ def from_local_cache(cls) -> Cache:
285
+ """
286
+ Construct a Cache from a local cache file.
287
+ """
288
+ from edsl.config import CONFIG
289
+
290
+ CACHE_PATH = CONFIG.get("EDSL_DATABASE_PATH")
291
+ path = CACHE_PATH.replace("sqlite:///", "")
292
+ db_path = os.path.join(os.path.dirname(path), "data.db")
293
+ return cls.from_sqlite_db(db_path=db_path)
294
+
295
+ @classmethod
296
+ def from_jsonl(cls, jsonlfile: str, db_path: Optional[str] = None) -> Cache:
297
+ """
298
+ Construct a Cache from a JSONL file.
299
+
300
+ :param jsonlfile: The path to the JSONL file of cache entries.
301
+ :param db_path: The path to the SQLite database used to store the cache.
302
+
303
+ * If `db_path` is None, the cache will be stored in memory, as a dictionary.
304
+ * If `db_path` is provided, the cache will be stored in an SQLite database.
305
+ """
306
+ # if a file doesn't exist at jsonfile, throw an error
307
+ from edsl.data.SQLiteDict import SQLiteDict
308
+
309
+ if not os.path.exists(jsonlfile):
310
+ raise FileNotFoundError(f"File {jsonlfile} not found")
311
+
312
+ if db_path is None:
313
+ data = {}
314
+ else:
315
+ data = SQLiteDict(db_path)
316
+
317
+ cache = Cache(data=data)
318
+ cache.add_from_jsonl(jsonlfile)
319
+ return cache
320
+
321
+ def write_sqlite_db(self, db_path: str) -> None:
322
+ """
323
+ Write the cache to an SQLite database.
324
+ """
325
+ ## TODO: Check to make sure not over-writing (?)
326
+ ## Should be added to SQLiteDict constructor (?)
327
+ from edsl.data.SQLiteDict import SQLiteDict
328
+
329
+ new_data = SQLiteDict(db_path)
330
+ for key, value in self.data.items():
331
+ new_data[key] = value
332
+
333
+ def write(self, filename: Optional[str] = None) -> None:
334
+ """
335
+ Write the cache to a file at the specified location.
336
+ """
337
+ if filename is None:
338
+ filename = self.filename
339
+ if filename.endswith(".jsonl"):
340
+ self.write_jsonl(filename)
341
+ elif filename.endswith(".db"):
342
+ self.write_sqlite_db(filename)
343
+ else:
344
+ raise CacheError("Invalid file extension. Must be .jsonl or .db")
345
+
346
+ def write_jsonl(self, filename: str) -> None:
347
+ """
348
+ Write the cache to a JSONL file.
349
+ """
350
+ path = os.path.join(os.getcwd(), filename)
351
+ with open(path, "w") as f:
352
+ for key, value in self.data.items():
353
+ f.write(json.dumps({key: value.to_dict()}) + "\n")
354
+
355
+ def to_scenario_list(self):
356
+ from edsl import ScenarioList, Scenario
357
+
358
+ scenarios = []
359
+ for key, value in self.data.items():
360
+ new_d = value.to_dict()
361
+ new_d["cache_key"] = key
362
+ s = Scenario(new_d)
363
+ scenarios.append(s)
364
+ return ScenarioList(scenarios)
365
+
366
+ ####################
367
+ # REMOTE
368
+ ####################
369
+ # TODO: Make this work
370
+ # - Need to decide whether the cache belongs to a user and what can be shared
371
+ # - I.e., some cache entries? all or nothing?
372
+ @classmethod
373
+ def from_url(cls, db_path=None) -> Cache:
374
+ """
375
+ Construct a Cache object from a remote.
376
+ """
377
+ # ...do something here
378
+ # return Cache(data=db)
379
+ pass
380
+
381
+ def __enter__(self):
382
+ """
383
+ Run when a context is entered.
384
+ """
385
+ return self
386
+
387
+ def __exit__(self, exc_type, exc_value, traceback):
388
+ """
389
+ Run when a context is exited.
390
+ """
391
+ for key, entry in self.new_entries_to_write_later.items():
392
+ self.data[key] = entry
393
+
394
+ if self.filename:
395
+ self.write(self.filename)
396
+
397
+ ####################
398
+ # DUNDER / USEFUL
399
+ ####################
400
+ def __hash__(self):
401
+ """Return the hash of the Cache."""
402
+ return dict_hash(self.to_dict(add_edsl_version=False))
403
+
404
+ def to_dict(self, add_edsl_version=True) -> dict:
405
+ d = {k: v.to_dict() for k, v in self.data.items()}
406
+ if add_edsl_version:
407
+ from edsl import __version__
408
+
409
+ d["edsl_version"] = __version__
410
+ d["edsl_class_name"] = "Cache"
411
+
412
+ return d
413
+
414
+ def _summary(self):
415
+ return {"EDSL Class": "Cache", "Number of entries": len(self.data)}
416
+
417
+ def _repr_html_(self):
418
+ # from edsl.utilities.utilities import data_to_html
419
+ # return data_to_html(self.to_dict())
420
+ footer = f"<a href={self.__documentation__}>(docs)</a>"
421
+ return str(self.summary(format="html")) + footer
422
+
423
+ def table(
424
+ self,
425
+ *fields,
426
+ tablefmt: Optional[str] = None,
427
+ pretty_labels: Optional[dict] = None,
428
+ ) -> str:
429
+ return self.to_dataset().table(
430
+ *fields, tablefmt=tablefmt, pretty_labels=pretty_labels
431
+ )
432
+
433
+ def select(self, *fields):
434
+ return self.to_dataset().select(*fields)
435
+
436
+ def tree(self, node_list: Optional[list[str]] = None):
437
+ return self.to_scenario_list().tree(node_list)
438
+
439
+ def to_dataset(self):
440
+ return self.to_scenario_list().to_dataset()
441
+
442
+ @classmethod
443
+ @remove_edsl_version
444
+ def from_dict(cls, data) -> Cache:
445
+ """Construct a Cache from a dictionary."""
446
+ newdata = {k: CacheEntry.from_dict(v) for k, v in data.items()}
447
+ return cls(data=newdata)
448
+
449
+ def __len__(self):
450
+ """Return the number of CacheEntry objects in the Cache."""
451
+ return len(self.data)
452
+
453
+ # TODO: Same inputs could give different results and this could be useful
454
+ # can't distinguish unless we do the ε trick or vary iterations
455
+ def __eq__(self, other_cache: "Cache") -> bool:
456
+ """
457
+ Check if two Cache objects are equal.
458
+ Does not verify their values are equal, only that they have the same keys.
459
+ """
460
+ if not isinstance(other_cache, Cache):
461
+ return False
462
+ return set(self.data.keys()) == set(other_cache.data.keys())
463
+
464
+ def __add__(self, other: "Cache"):
465
+ """
466
+ Combine two caches.
467
+ """
468
+ if not isinstance(other, Cache):
469
+ raise CacheError("Can only add two caches together")
470
+ self.data.update(other.data)
471
+ return self
472
+
473
+ def __repr__(self):
474
+ """
475
+ Return a string representation of the Cache object.
476
+ """
477
+ return (
478
+ f"Cache(data = {repr(self.data)}, immediate_write={self.immediate_write})"
479
+ )
480
+
481
+ ####################
482
+ # EXAMPLES
483
+ ####################
484
+ def fetch_input_example(self) -> dict:
485
+ """
486
+ Create an example input for a 'fetch' operation.
487
+ """
488
+ return CacheEntry.fetch_input_example()
489
+
490
+ def to_html(self):
491
+ # json_str = json.dumps(self.data, indent=4)
492
+ d = {k: v.to_dict() for k, v in self.data.items()}
493
+ for key, value in d.items():
494
+ for k, v in value.items():
495
+ if isinstance(v, dict):
496
+ d[key][k] = {kk: str(vv) for kk, vv in v.items()}
497
+ else:
498
+ d[key][k] = str(v)
499
+
500
+ json_str = json.dumps(d, indent=4)
501
+
502
+ # HTML template with the JSON string embedded
503
+ html = f"""
504
+ <!DOCTYPE html>
505
+ <html>
506
+ <head>
507
+ <title>Display JSON</title>
508
+ </head>
509
+ <body>
510
+ <pre id="jsonData"></pre>
511
+ <script>
512
+ var json = {json_str};
513
+
514
+ // JSON.stringify with spacing to format
515
+ document.getElementById('jsonData').textContent = JSON.stringify(json, null, 4);
516
+ </script>
517
+ </body>
518
+ </html>
519
+ """
520
+ return html
521
+
522
+ def view(self) -> None:
523
+ """View the Cache in a new browser tab."""
524
+ import tempfile
525
+ import webbrowser
526
+
527
+ html_content = self.to_html()
528
+ # Create a temporary file to hold the HTML
529
+ with tempfile.NamedTemporaryFile("w", delete=False, suffix=".html") as tmpfile:
530
+ tmpfile.write(html_content)
531
+ # Get the path to the temporary file
532
+ filepath = tmpfile.name
533
+
534
+ # Open the HTML file in a new browser tab
535
+ webbrowser.open("file://" + filepath)
536
+
537
+ @classmethod
538
+ def example(cls, randomize: bool = False) -> Cache:
539
+ """
540
+ Returns an example Cache instance.
541
+
542
+ :param randomize: If True, uses CacheEntry's randomize method.
543
+ """
544
+ return cls(
545
+ data={
546
+ CacheEntry.example(randomize).key: CacheEntry.example(),
547
+ CacheEntry.example(randomize).key: CacheEntry.example(),
548
+ }
549
+ )
550
+
551
+
552
+ if __name__ == "__main__":
553
+ import doctest
554
+
555
+ doctest.testmod(optionflags=doctest.ELLIPSIS)