edsl 0.1.47__py3-none-any.whl → 0.1.49__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. edsl/__init__.py +44 -39
  2. edsl/__version__.py +1 -1
  3. edsl/agents/__init__.py +4 -2
  4. edsl/agents/{Agent.py → agent.py} +442 -152
  5. edsl/agents/{AgentList.py → agent_list.py} +220 -162
  6. edsl/agents/descriptors.py +46 -7
  7. edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
  8. edsl/base/__init__.py +75 -0
  9. edsl/base/base_class.py +1303 -0
  10. edsl/base/data_transfer_models.py +114 -0
  11. edsl/base/enums.py +215 -0
  12. edsl/base.py +8 -0
  13. edsl/buckets/__init__.py +25 -0
  14. edsl/buckets/bucket_collection.py +324 -0
  15. edsl/buckets/model_buckets.py +206 -0
  16. edsl/buckets/token_bucket.py +502 -0
  17. edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
  18. edsl/buckets/token_bucket_client.py +509 -0
  19. edsl/caching/__init__.py +20 -0
  20. edsl/caching/cache.py +814 -0
  21. edsl/caching/cache_entry.py +427 -0
  22. edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
  23. edsl/caching/exceptions.py +24 -0
  24. edsl/caching/orm.py +30 -0
  25. edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
  26. edsl/caching/sql_dict.py +441 -0
  27. edsl/config/__init__.py +8 -0
  28. edsl/config/config_class.py +177 -0
  29. edsl/config.py +4 -176
  30. edsl/conversation/Conversation.py +7 -7
  31. edsl/conversation/car_buying.py +4 -4
  32. edsl/conversation/chips.py +6 -6
  33. edsl/coop/__init__.py +25 -2
  34. edsl/coop/coop.py +311 -75
  35. edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
  36. edsl/coop/exceptions.py +62 -0
  37. edsl/coop/price_fetcher.py +126 -0
  38. edsl/coop/utils.py +89 -24
  39. edsl/data_transfer_models.py +5 -72
  40. edsl/dataset/__init__.py +10 -0
  41. edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
  42. edsl/{results/DatasetExportMixin.py → dataset/dataset_operations_mixin.py} +606 -122
  43. edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
  44. edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
  45. edsl/{results → dataset/display}/table_renderers.py +58 -2
  46. edsl/{results → dataset}/file_exports.py +4 -5
  47. edsl/{results → dataset}/smart_objects.py +2 -2
  48. edsl/enums.py +5 -205
  49. edsl/inference_services/__init__.py +5 -0
  50. edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
  51. edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
  52. edsl/inference_services/data_structures.py +3 -2
  53. edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
  54. edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
  55. edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
  56. edsl/inference_services/registry.py +4 -41
  57. edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
  58. edsl/inference_services/services/__init__.py +31 -0
  59. edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
  60. edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
  61. edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
  62. edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
  63. edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
  64. edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
  65. edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
  66. edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
  67. edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
  68. edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
  69. edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +3 -7
  70. edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
  71. edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
  72. edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
  73. edsl/inference_services/write_available.py +1 -2
  74. edsl/instructions/__init__.py +6 -0
  75. edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
  76. edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
  77. edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
  78. edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
  79. edsl/interviews/__init__.py +4 -0
  80. edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
  81. edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
  82. edsl/interviews/interview.py +638 -0
  83. edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
  84. edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
  85. edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
  86. edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
  87. edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
  88. edsl/invigilators/__init__.py +38 -0
  89. edsl/invigilators/invigilator_base.py +477 -0
  90. edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
  91. edsl/invigilators/prompt_constructor.py +476 -0
  92. edsl/{agents → invigilators}/prompt_helpers.py +2 -1
  93. edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
  94. edsl/{agents → invigilators}/question_option_processor.py +96 -21
  95. edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
  96. edsl/jobs/__init__.py +7 -1
  97. edsl/jobs/async_interview_runner.py +99 -35
  98. edsl/jobs/check_survey_scenario_compatibility.py +7 -5
  99. edsl/jobs/data_structures.py +153 -22
  100. edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
  101. edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
  102. edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
  103. edsl/jobs/{Jobs.py → jobs.py} +313 -167
  104. edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
  105. edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +19 -17
  106. edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
  107. edsl/jobs/jobs_pricing_estimation.py +347 -0
  108. edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
  109. edsl/jobs/jobs_runner_asyncio.py +282 -0
  110. edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
  111. edsl/jobs/results_exceptions_handler.py +2 -2
  112. edsl/key_management/__init__.py +28 -0
  113. edsl/key_management/key_lookup.py +161 -0
  114. edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
  115. edsl/key_management/key_lookup_collection.py +82 -0
  116. edsl/key_management/models.py +218 -0
  117. edsl/language_models/__init__.py +7 -2
  118. edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
  119. edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
  120. edsl/language_models/language_model.py +1080 -0
  121. edsl/language_models/model.py +10 -25
  122. edsl/language_models/{ModelList.py → model_list.py} +9 -14
  123. edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
  124. edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
  125. edsl/language_models/repair.py +4 -4
  126. edsl/language_models/utilities.py +4 -4
  127. edsl/notebooks/__init__.py +3 -1
  128. edsl/notebooks/{Notebook.py → notebook.py} +7 -8
  129. edsl/prompts/__init__.py +1 -1
  130. edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
  131. edsl/prompts/{Prompt.py → prompt.py} +101 -95
  132. edsl/questions/HTMLQuestion.py +1 -1
  133. edsl/questions/__init__.py +154 -25
  134. edsl/questions/answer_validator_mixin.py +1 -1
  135. edsl/questions/compose_questions.py +4 -3
  136. edsl/questions/derived/question_likert_five.py +166 -0
  137. edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
  138. edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
  139. edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
  140. edsl/questions/descriptors.py +24 -30
  141. edsl/questions/loop_processor.py +65 -19
  142. edsl/questions/question_base.py +881 -0
  143. edsl/questions/question_base_gen_mixin.py +15 -16
  144. edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
  145. edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
  146. edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
  147. edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
  148. edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
  149. edsl/questions/question_free_text.py +282 -0
  150. edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
  151. edsl/questions/{QuestionList.py → question_list.py} +6 -7
  152. edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
  153. edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
  154. edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
  155. edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
  156. edsl/questions/question_registry.py +4 -9
  157. edsl/questions/register_questions_meta.py +8 -4
  158. edsl/questions/response_validator_abc.py +17 -16
  159. edsl/results/__init__.py +4 -1
  160. edsl/{exceptions/results.py → results/exceptions.py} +1 -1
  161. edsl/results/report.py +197 -0
  162. edsl/results/{Result.py → result.py} +131 -45
  163. edsl/results/{Results.py → results.py} +365 -220
  164. edsl/results/results_selector.py +344 -25
  165. edsl/scenarios/__init__.py +30 -3
  166. edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
  167. edsl/scenarios/directory_scanner.py +156 -13
  168. edsl/scenarios/document_chunker.py +186 -0
  169. edsl/scenarios/exceptions.py +101 -0
  170. edsl/scenarios/file_methods.py +2 -3
  171. edsl/scenarios/{FileStore.py → file_store.py} +275 -189
  172. edsl/scenarios/handlers/__init__.py +14 -14
  173. edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
  174. edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
  175. edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
  176. edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
  177. edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
  178. edsl/scenarios/handlers/latex_file_store.py +5 -0
  179. edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
  180. edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
  181. edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
  182. edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
  183. edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
  184. edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
  185. edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
  186. edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
  187. edsl/scenarios/scenario.py +928 -0
  188. edsl/scenarios/scenario_join.py +18 -5
  189. edsl/scenarios/{ScenarioList.py → scenario_list.py} +294 -106
  190. edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
  191. edsl/scenarios/scenario_selector.py +5 -1
  192. edsl/study/ObjectEntry.py +2 -2
  193. edsl/study/SnapShot.py +5 -5
  194. edsl/study/Study.py +18 -19
  195. edsl/study/__init__.py +6 -4
  196. edsl/surveys/__init__.py +7 -4
  197. edsl/surveys/dag/__init__.py +2 -0
  198. edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
  199. edsl/surveys/{DAG.py → dag/dag.py} +13 -10
  200. edsl/surveys/descriptors.py +1 -1
  201. edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
  202. edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
  203. edsl/surveys/memory/__init__.py +3 -0
  204. edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
  205. edsl/surveys/rules/__init__.py +3 -0
  206. edsl/surveys/{Rule.py → rules/rule.py} +103 -43
  207. edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
  208. edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
  209. edsl/surveys/survey.py +1743 -0
  210. edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
  211. edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
  212. edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
  213. edsl/tasks/__init__.py +32 -0
  214. edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
  215. edsl/tasks/task_creators.py +135 -0
  216. edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
  217. edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
  218. edsl/tasks/task_status_log.py +85 -0
  219. edsl/tokens/__init__.py +2 -0
  220. edsl/tokens/interview_token_usage.py +53 -0
  221. edsl/utilities/PrettyList.py +1 -1
  222. edsl/utilities/SystemInfo.py +25 -22
  223. edsl/utilities/__init__.py +29 -21
  224. edsl/utilities/gcp_bucket/__init__.py +2 -0
  225. edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
  226. edsl/utilities/interface.py +44 -536
  227. edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
  228. edsl/utilities/repair_functions.py +1 -1
  229. {edsl-0.1.47.dist-info → edsl-0.1.49.dist-info}/METADATA +1 -1
  230. edsl-0.1.49.dist-info/RECORD +347 -0
  231. edsl/Base.py +0 -493
  232. edsl/BaseDiff.py +0 -260
  233. edsl/agents/InvigilatorBase.py +0 -260
  234. edsl/agents/PromptConstructor.py +0 -318
  235. edsl/coop/PriceFetcher.py +0 -54
  236. edsl/data/Cache.py +0 -582
  237. edsl/data/CacheEntry.py +0 -238
  238. edsl/data/SQLiteDict.py +0 -292
  239. edsl/data/__init__.py +0 -5
  240. edsl/data/orm.py +0 -10
  241. edsl/exceptions/cache.py +0 -5
  242. edsl/exceptions/coop.py +0 -14
  243. edsl/exceptions/data.py +0 -14
  244. edsl/exceptions/scenarios.py +0 -29
  245. edsl/jobs/Answers.py +0 -43
  246. edsl/jobs/JobsPrompts.py +0 -354
  247. edsl/jobs/buckets/BucketCollection.py +0 -134
  248. edsl/jobs/buckets/ModelBuckets.py +0 -65
  249. edsl/jobs/buckets/TokenBucket.py +0 -283
  250. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  251. edsl/jobs/interviews/Interview.py +0 -395
  252. edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
  253. edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
  254. edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
  255. edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
  256. edsl/jobs/tasks/TaskCreators.py +0 -64
  257. edsl/jobs/tasks/TaskStatusLog.py +0 -23
  258. edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
  259. edsl/language_models/LanguageModel.py +0 -635
  260. edsl/language_models/ServiceDataSources.py +0 -0
  261. edsl/language_models/key_management/KeyLookup.py +0 -63
  262. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  263. edsl/language_models/key_management/models.py +0 -137
  264. edsl/questions/QuestionBase.py +0 -544
  265. edsl/questions/QuestionFreeText.py +0 -130
  266. edsl/questions/derived/QuestionLikertFive.py +0 -76
  267. edsl/results/ResultsExportMixin.py +0 -45
  268. edsl/results/TextEditor.py +0 -50
  269. edsl/results/results_fetch_mixin.py +0 -33
  270. edsl/results/results_tools_mixin.py +0 -98
  271. edsl/scenarios/DocumentChunker.py +0 -104
  272. edsl/scenarios/Scenario.py +0 -548
  273. edsl/scenarios/ScenarioHtmlMixin.py +0 -65
  274. edsl/scenarios/ScenarioListExportMixin.py +0 -45
  275. edsl/scenarios/handlers/latex.py +0 -5
  276. edsl/shared.py +0 -1
  277. edsl/surveys/Survey.py +0 -1301
  278. edsl/surveys/SurveyQualtricsImport.py +0 -284
  279. edsl/surveys/SurveyToApp.py +0 -141
  280. edsl/surveys/instructions/__init__.py +0 -0
  281. edsl/tools/__init__.py +0 -1
  282. edsl/tools/clusters.py +0 -192
  283. edsl/tools/embeddings.py +0 -27
  284. edsl/tools/embeddings_plotting.py +0 -118
  285. edsl/tools/plotting.py +0 -112
  286. edsl/tools/summarize.py +0 -18
  287. edsl/utilities/data/Registry.py +0 -6
  288. edsl/utilities/data/__init__.py +0 -1
  289. edsl/utilities/data/scooter_results.json +0 -1
  290. edsl-0.1.47.dist-info/RECORD +0 -354
  291. /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
  292. /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
  293. /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
  294. /edsl/{results → dataset/display}/table_data_class.py +0 -0
  295. /edsl/{results → dataset/display}/table_display.css +0 -0
  296. /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
  297. /edsl/{results → dataset}/tree_explore.py +0 -0
  298. /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
  299. /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
  300. /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
  301. /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
  302. /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
  303. /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
  304. /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
  305. /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
  306. /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
  307. /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
  308. /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
  309. /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
  310. /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
  311. /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
  312. /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
  313. {edsl-0.1.47.dist-info → edsl-0.1.49.dist-info}/LICENSE +0 -0
  314. {edsl-0.1.47.dist-info → edsl-0.1.49.dist-info}/WHEEL +0 -0
edsl/data/Cache.py DELETED
@@ -1,582 +0,0 @@
1
- """
2
- The `Cache` class is used to store responses from a language model.
3
- """
4
-
5
- from __future__ import annotations
6
- import json
7
- import os
8
- import warnings
9
- from typing import Optional, Union, TYPE_CHECKING
10
- from edsl.Base import Base
11
-
12
- from edsl.utilities.remove_edsl_version import remove_edsl_version
13
- from edsl.exceptions.cache import CacheError
14
-
15
-
16
- class Cache(Base):
17
- """
18
- A class that represents a cache of responses from a language model.
19
-
20
- :param data: The data to initialize the cache with.
21
- :param immediate_write: Whether to write to the cache immediately after storing a new entry.
22
-
23
- Deprecated:
24
-
25
- :param method: The method of storage to use for the cache.
26
- """
27
-
28
- __documentation__ = "https://docs.expectedparrot.com/en/latest/data.html"
29
-
30
- data = {}
31
-
32
- def __init__(
33
- self,
34
- *,
35
- filename: Optional[str] = None,
36
- data: Optional[Union["SQLiteDict", dict]] = None,
37
- immediate_write: bool = True,
38
- method=None,
39
- verbose=False,
40
- ):
41
- """
42
- Create two dictionaries to store the cache data.
43
-
44
- :param filename: The name of the file to read/write the cache from/to.
45
- :param data: The data to initialize the cache with.
46
- :param immediate_write: Whether to write to the cache immediately after storing a new entry.
47
- :param method: The method of storage to use for the cache.
48
-
49
- """
50
-
51
- # self.data_at_init = data or {}
52
- self.fetched_data = {}
53
- self.immediate_write = immediate_write
54
- self.method = method
55
- self.new_entries = {}
56
- self.new_entries_to_write_later = {}
57
- self.coop = None
58
- self.verbose = verbose
59
-
60
- self.filename = filename
61
- if filename and data:
62
- raise CacheError("Cannot provide both filename and data")
63
- if filename is None and data is None:
64
- data = {}
65
- if data is not None:
66
- self.data = data
67
- if filename is not None:
68
- self.data = {}
69
- if filename.endswith(".jsonl"):
70
- if os.path.exists(filename):
71
- self.add_from_jsonl(filename)
72
- else:
73
- print(
74
- f"File {filename} not found, but will write to this location."
75
- )
76
- elif filename.endswith(".db"):
77
- if os.path.exists(filename):
78
- self.add_from_sqlite(filename)
79
- else:
80
- raise CacheError("Invalid file extension. Must be .jsonl or .db")
81
-
82
- self._perform_checks()
83
-
84
- def code(sefl):
85
- pass
86
- # raise NotImplementedError("This method is not implemented yet.")
87
-
88
- def keys(self):
89
- """
90
- >>> from edsl import Cache
91
- >>> Cache.example().keys()
92
- ['5513286eb6967abc0511211f0402587d']
93
- """
94
- return list(self.data.keys())
95
-
96
- def values(self):
97
- """
98
- >>> from edsl import Cache
99
- >>> Cache.example().values()
100
- [CacheEntry(...)]
101
- """
102
- return list(self.data.values())
103
-
104
- def items(self):
105
- return zip(self.keys(), self.values())
106
-
107
- def new_entries_cache(self) -> Cache:
108
- """Return a new Cache object with the new entries."""
109
- return Cache(data={**self.new_entries, **self.fetched_data})
110
-
111
- def _perform_checks(self):
112
- """Perform checks on the cache."""
113
- from edsl.data.CacheEntry import CacheEntry
114
-
115
- if any(not isinstance(value, CacheEntry) for value in self.data.values()):
116
- raise CacheError("Not all values are CacheEntry instances")
117
- if self.method is not None:
118
- warnings.warn("Argument `method` is deprecated", DeprecationWarning)
119
-
120
- ####################
121
- # READ/WRITE
122
- ####################
123
- def fetch(
124
- self,
125
- *,
126
- model: str,
127
- parameters: dict,
128
- system_prompt: str,
129
- user_prompt: str,
130
- iteration: int,
131
- ) -> tuple(Union[None, str], str):
132
- """
133
- Fetch a value (LLM output) from the cache.
134
-
135
- :param model: The name of the language model.
136
- :param parameters: The model parameters.
137
- :param system_prompt: The system prompt.
138
- :param user_prompt: The user prompt.
139
- :param iteration: The iteration number.
140
-
141
- Return None if the response is not found.
142
-
143
- >>> c = Cache()
144
- >>> c.fetch(model="gpt-3", parameters="default", system_prompt="Hello", user_prompt="Hi", iteration=1)[0] is None
145
- True
146
-
147
-
148
- """
149
- from edsl.data.CacheEntry import CacheEntry
150
-
151
- key = CacheEntry.gen_key(
152
- model=model,
153
- parameters=parameters,
154
- system_prompt=system_prompt,
155
- user_prompt=user_prompt,
156
- iteration=iteration,
157
- )
158
- entry = self.data.get(key, None)
159
- if entry is not None:
160
- if self.verbose:
161
- print(f"Cache hit for key: {key}")
162
- self.fetched_data[key] = entry
163
- else:
164
- if self.verbose:
165
- print(f"Cache miss for key: {key}")
166
- return None if entry is None else entry.output, key
167
-
168
- def store(
169
- self,
170
- model: str,
171
- parameters: str,
172
- system_prompt: str,
173
- user_prompt: str,
174
- response: dict,
175
- iteration: int,
176
- service: str,
177
- ) -> str:
178
- """
179
- Add a new key-value pair to the cache.
180
-
181
- * Key is a hash of the input parameters.
182
- * Output is the response from the language model.
183
-
184
- How it works:
185
-
186
- * The key-value pair is added to `self.new_entries`
187
- * If `immediate_write` is True , the key-value pair is added to `self.data`
188
- * If `immediate_write` is False, the key-value pair is added to `self.new_entries_to_write_later`
189
-
190
- >>> from edsl import Cache, Model, Question
191
- >>> m = Model("test")
192
- >>> c = Cache()
193
- >>> len(c)
194
- 0
195
- >>> results = Question.example("free_text").by(m).run(cache = c, disable_remote_cache = True, disable_remote_inference = True)
196
- >>> len(c)
197
- 1
198
- """
199
- from edsl.data.CacheEntry import CacheEntry
200
-
201
- entry = CacheEntry(
202
- model=model,
203
- parameters=parameters,
204
- system_prompt=system_prompt,
205
- user_prompt=user_prompt,
206
- output=json.dumps(response),
207
- iteration=iteration,
208
- service=service,
209
- )
210
- key = entry.key
211
- self.new_entries[key] = entry
212
- if self.immediate_write:
213
- self.data[key] = entry
214
- else:
215
- self.new_entries_to_write_later[key] = entry
216
- return key
217
-
218
- def add_from_dict(
219
- self, new_data: dict[str, "CacheEntry"], write_now: Optional[bool] = True
220
- ) -> None:
221
- """
222
- Add entries to the cache from a dictionary.
223
-
224
- :param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
225
- """
226
- from edsl.data.CacheEntry import CacheEntry
227
-
228
- for key, value in new_data.items():
229
- if key in self.data:
230
- if value != self.data[key]:
231
- raise CacheError("Mismatch in values")
232
- if not isinstance(value, CacheEntry):
233
- raise CacheError(f"Wrong type - the observed type is {type(value)}")
234
-
235
- self.new_entries.update(new_data)
236
- if write_now:
237
- self.data.update(new_data)
238
- else:
239
- self.new_entries_to_write_later.update(new_data)
240
-
241
- def add_from_jsonl(self, filename: str, write_now: Optional[bool] = True) -> None:
242
- """
243
- Add entries to the cache from a JSONL.
244
-
245
- :param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
246
- """
247
- from edsl.data.CacheEntry import CacheEntry
248
-
249
- with open(filename, "a+") as f:
250
- f.seek(0)
251
- lines = f.readlines()
252
- new_data = {}
253
- for line in lines:
254
- d = json.loads(line)
255
- key = list(d.keys())[0]
256
- value = list(d.values())[0]
257
- new_data[key] = CacheEntry(**value)
258
- self.add_from_dict(new_data=new_data, write_now=write_now)
259
-
260
- def add_from_sqlite(self, db_path: str, write_now: Optional[bool] = True):
261
- """
262
- Add entries to the cache from an SQLite database.
263
-
264
- :param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
265
- """
266
- from edsl.data.SQLiteDict import SQLiteDict
267
-
268
- db = SQLiteDict(db_path)
269
- new_data = {}
270
- for key, value in db.items():
271
- new_data[key] = CacheEntry(**value)
272
- self.add_from_dict(new_data=new_data, write_now=write_now)
273
-
274
- @classmethod
275
- def from_sqlite_db(cls, db_path: str) -> Cache:
276
- """
277
- Construct a Cache from a SQLite database.
278
- """
279
- from edsl.data.SQLiteDict import SQLiteDict
280
-
281
- return cls(data=SQLiteDict(db_path))
282
-
283
- @classmethod
284
- def from_local_cache(cls) -> Cache:
285
- """
286
- Construct a Cache from a local cache file.
287
- """
288
- from edsl.config import CONFIG
289
-
290
- CACHE_PATH = CONFIG.get("EDSL_DATABASE_PATH")
291
- path = CACHE_PATH.replace("sqlite:///", "")
292
- # db_path = os.path.join(os.path.dirname(path), "data.db")
293
- return cls.from_sqlite_db(path)
294
-
295
- @classmethod
296
- def from_jsonl(cls, jsonlfile: str, db_path: Optional[str] = None) -> Cache:
297
- """
298
- Construct a Cache from a JSONL file.
299
-
300
- :param jsonlfile: The path to the JSONL file of cache entries.
301
- :param db_path: The path to the SQLite database used to store the cache.
302
-
303
- * If `db_path` is None, the cache will be stored in memory, as a dictionary.
304
- * If `db_path` is provided, the cache will be stored in an SQLite database.
305
- """
306
- # if a file doesn't exist at jsonfile, throw an error
307
- from edsl.data.SQLiteDict import SQLiteDict
308
-
309
- if not os.path.exists(jsonlfile):
310
- raise FileNotFoundError(f"File {jsonlfile} not found")
311
-
312
- if db_path is None:
313
- data = {}
314
- else:
315
- data = SQLiteDict(db_path)
316
-
317
- cache = Cache(data=data)
318
- cache.add_from_jsonl(jsonlfile)
319
- return cache
320
-
321
- def write_sqlite_db(self, db_path: str) -> None:
322
- """
323
- Write the cache to an SQLite database.
324
- """
325
- ## TODO: Check to make sure not over-writing (?)
326
- ## Should be added to SQLiteDict constructor (?)
327
- from edsl.data.SQLiteDict import SQLiteDict
328
-
329
- new_data = SQLiteDict(db_path)
330
- for key, value in self.data.items():
331
- new_data[key] = value
332
-
333
- def write(self, filename: Optional[str] = None) -> None:
334
- """
335
- Write the cache to a file at the specified location.
336
- """
337
- if filename is None:
338
- filename = self.filename
339
- if filename.endswith(".jsonl"):
340
- self.write_jsonl(filename)
341
- elif filename.endswith(".db"):
342
- self.write_sqlite_db(filename)
343
- else:
344
- raise CacheError("Invalid file extension. Must be .jsonl or .db")
345
-
346
- def write_jsonl(self, filename: str) -> None:
347
- """
348
- Write the cache to a JSONL file.
349
- """
350
- path = os.path.join(os.getcwd(), filename)
351
- with open(path, "w") as f:
352
- for key, value in self.data.items():
353
- f.write(json.dumps({key: value.to_dict()}) + "\n")
354
-
355
- def to_scenario_list(self):
356
- from edsl.scenarios.ScenarioList import ScenarioList
357
- from edsl.scenarios.Scenario import Scenario
358
-
359
- scenarios = []
360
- for key, value in self.data.items():
361
- new_d = value.to_dict()
362
- new_d["cache_key"] = key
363
- s = Scenario(new_d)
364
- scenarios.append(s)
365
- return ScenarioList(scenarios)
366
-
367
- def __floordiv__(self, other: "Cache") -> "Cache":
368
- """
369
- Return a new Cache containing entries that are in self but not in other.
370
- Uses // operator as alternative to subtraction.
371
-
372
- :param other: Another Cache object to compare against
373
- :return: A new Cache object containing unique entries
374
-
375
- >>> from edsl.data.CacheEntry import CacheEntry
376
- >>> ce1 = CacheEntry.example(randomize = True)
377
- >>> ce2 = CacheEntry.example(randomize = True)
378
- >>> ce2 = CacheEntry.example(randomize = True)
379
- >>> c1 = Cache(data={ce1.key: ce1, ce2.key: ce2})
380
- >>> c2 = Cache(data={ce1.key: ce1})
381
- >>> c3 = c1 // c2
382
- >>> len(c3)
383
- 1
384
- >>> c3.data[ce2.key] == ce2
385
- True
386
- """
387
- if not isinstance(other, Cache):
388
- raise CacheError("Can only compare two caches")
389
-
390
- diff_data = {k: v for k, v in self.data.items() if k not in other.data}
391
- return Cache(data=diff_data, immediate_write=self.immediate_write)
392
-
393
- @classmethod
394
- def from_url(cls, db_path=None) -> Cache:
395
- """
396
- Construct a Cache object from a remote.
397
- """
398
- # ...do something here
399
- # return Cache(data=db)
400
- pass
401
-
402
- def __enter__(self):
403
- """
404
- Run when a context is entered.
405
- """
406
- return self
407
-
408
- def __exit__(self, exc_type, exc_value, traceback):
409
- """
410
- Run when a context is exited.
411
- """
412
- for key, entry in self.new_entries_to_write_later.items():
413
- self.data[key] = entry
414
-
415
- if self.filename:
416
- self.write(self.filename)
417
-
418
- def __hash__(self):
419
- """Return the hash of the Cache."""
420
- from edsl.utilities.utilities import dict_hash
421
-
422
- return dict_hash(self.to_dict(add_edsl_version=False))
423
-
424
- def to_dict(self, add_edsl_version=True) -> dict:
425
- d = {k: v.to_dict() for k, v in self.data.items()}
426
- if add_edsl_version:
427
- from edsl import __version__
428
-
429
- d["edsl_version"] = __version__
430
- d["edsl_class_name"] = "Cache"
431
-
432
- return d
433
-
434
- def _summary(self):
435
- return {"EDSL Class": "Cache", "Number of entries": len(self.data)}
436
-
437
- def table(
438
- self,
439
- *fields,
440
- tablefmt: Optional[str] = None,
441
- pretty_labels: Optional[dict] = None,
442
- ) -> str:
443
- return self.to_dataset().table(
444
- *fields, tablefmt=tablefmt, pretty_labels=pretty_labels
445
- )
446
-
447
- def select(self, *fields):
448
- return self.to_dataset().select(*fields)
449
-
450
- def tree(self, node_list: Optional[list[str]] = None):
451
- return self.to_scenario_list().tree(node_list)
452
-
453
- def to_dataset(self):
454
- return self.to_scenario_list().to_dataset()
455
-
456
- @classmethod
457
- @remove_edsl_version
458
- def from_dict(cls, data) -> Cache:
459
- """Construct a Cache from a dictionary."""
460
- from edsl.data.CacheEntry import CacheEntry
461
-
462
- newdata = {k: CacheEntry.from_dict(v) for k, v in data.items()}
463
- return cls(data=newdata)
464
-
465
- def __len__(self):
466
- """Return the number of CacheEntry objects in the Cache."""
467
- return len(self.data)
468
-
469
- # TODO: Same inputs could give different results and this could be useful
470
- # can't distinguish unless we do the ε trick or vary iterations
471
- def __eq__(self, other_cache: "Cache") -> bool:
472
- """
473
- Check if two Cache objects are equal.
474
- Does not verify their values are equal, only that they have the same keys.
475
- """
476
- if not isinstance(other_cache, Cache):
477
- return False
478
- return set(self.data.keys()) == set(other_cache.data.keys())
479
-
480
- def __add__(self, other: "Cache"):
481
- """
482
- Combine two caches.
483
- """
484
- if not isinstance(other, Cache):
485
- raise CacheError("Can only add two caches together")
486
- self.data.update(other.data)
487
- return self
488
-
489
- def __repr__(self):
490
- """
491
- Return a string representation of the Cache object.
492
- """
493
- return (
494
- f"Cache(data = {repr(self.data)}, immediate_write={self.immediate_write})"
495
- )
496
-
497
- ####################
498
- # EXAMPLES
499
- ####################
500
- def fetch_input_example(self) -> dict:
501
- """
502
- Create an example input for a 'fetch' operation.
503
- """
504
- from edsl.data.CacheEntry import CacheEntry
505
-
506
- return CacheEntry.fetch_input_example()
507
-
508
- def to_html(self):
509
- # json_str = json.dumps(self.data, indent=4)
510
- d = {k: v.to_dict() for k, v in self.data.items()}
511
- for key, value in d.items():
512
- for k, v in value.items():
513
- if isinstance(v, dict):
514
- d[key][k] = {kk: str(vv) for kk, vv in v.items()}
515
- else:
516
- d[key][k] = str(v)
517
-
518
- json_str = json.dumps(d, indent=4)
519
-
520
- # HTML template with the JSON string embedded
521
- html = f"""
522
- <!DOCTYPE html>
523
- <html>
524
- <head>
525
- <title>Display JSON</title>
526
- </head>
527
- <body>
528
- <pre id="jsonData"></pre>
529
- <script>
530
- var json = {json_str};
531
-
532
- // JSON.stringify with spacing to format
533
- document.getElementById('jsonData').textContent = JSON.stringify(json, null, 4);
534
- </script>
535
- </body>
536
- </html>
537
- """
538
- return html
539
-
540
- def subset(self, keys: list[str]) -> Cache:
541
- """
542
- Return a subset of the Cache with the specified keys.
543
- """
544
- new_data = {k: v for k, v in self.data.items() if k in keys}
545
- return Cache(data=new_data)
546
-
547
- def view(self) -> None:
548
- """View the Cache in a new browser tab."""
549
- import tempfile
550
- import webbrowser
551
-
552
- html_content = self.to_html()
553
- # Create a temporary file to hold the HTML
554
- with tempfile.NamedTemporaryFile("w", delete=False, suffix=".html") as tmpfile:
555
- tmpfile.write(html_content)
556
- # Get the path to the temporary file
557
- filepath = tmpfile.name
558
-
559
- # Open the HTML file in a new browser tab
560
- webbrowser.open("file://" + filepath)
561
-
562
- @classmethod
563
- def example(cls, randomize: bool = False) -> Cache:
564
- """
565
- Returns an example Cache instance.
566
-
567
- :param randomize: If True, uses CacheEntry's randomize method.
568
- """
569
- from edsl.data.CacheEntry import CacheEntry
570
-
571
- return cls(
572
- data={
573
- CacheEntry.example(randomize).key: CacheEntry.example(),
574
- CacheEntry.example(randomize).key: CacheEntry.example(),
575
- }
576
- )
577
-
578
-
579
- if __name__ == "__main__":
580
- import doctest
581
-
582
- doctest.testmod(optionflags=doctest.ELLIPSIS)