edsl 0.1.47__py3-none-any.whl → 0.1.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +44 -39
- edsl/__version__.py +1 -1
- edsl/agents/__init__.py +4 -2
- edsl/agents/{Agent.py → agent.py} +442 -152
- edsl/agents/{AgentList.py → agent_list.py} +220 -162
- edsl/agents/descriptors.py +46 -7
- edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
- edsl/base/__init__.py +75 -0
- edsl/base/base_class.py +1303 -0
- edsl/base/data_transfer_models.py +114 -0
- edsl/base/enums.py +215 -0
- edsl/base.py +8 -0
- edsl/buckets/__init__.py +25 -0
- edsl/buckets/bucket_collection.py +324 -0
- edsl/buckets/model_buckets.py +206 -0
- edsl/buckets/token_bucket.py +502 -0
- edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
- edsl/buckets/token_bucket_client.py +509 -0
- edsl/caching/__init__.py +20 -0
- edsl/caching/cache.py +814 -0
- edsl/caching/cache_entry.py +427 -0
- edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
- edsl/caching/exceptions.py +24 -0
- edsl/caching/orm.py +30 -0
- edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
- edsl/caching/sql_dict.py +441 -0
- edsl/config/__init__.py +8 -0
- edsl/config/config_class.py +177 -0
- edsl/config.py +4 -176
- edsl/conversation/Conversation.py +7 -7
- edsl/conversation/car_buying.py +4 -4
- edsl/conversation/chips.py +6 -6
- edsl/coop/__init__.py +25 -2
- edsl/coop/coop.py +303 -67
- edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
- edsl/coop/exceptions.py +62 -0
- edsl/coop/price_fetcher.py +126 -0
- edsl/coop/utils.py +89 -24
- edsl/data_transfer_models.py +5 -72
- edsl/dataset/__init__.py +10 -0
- edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
- edsl/{results/DatasetExportMixin.py → dataset/dataset_operations_mixin.py} +606 -122
- edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
- edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
- edsl/{results → dataset/display}/table_renderers.py +58 -2
- edsl/{results → dataset}/file_exports.py +4 -5
- edsl/{results → dataset}/smart_objects.py +2 -2
- edsl/enums.py +5 -205
- edsl/inference_services/__init__.py +5 -0
- edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
- edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
- edsl/inference_services/data_structures.py +3 -2
- edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
- edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
- edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
- edsl/inference_services/registry.py +4 -41
- edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
- edsl/inference_services/services/__init__.py +31 -0
- edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
- edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
- edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
- edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
- edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
- edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
- edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
- edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
- edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
- edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
- edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +3 -7
- edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
- edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
- edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
- edsl/inference_services/write_available.py +1 -2
- edsl/instructions/__init__.py +6 -0
- edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
- edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
- edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
- edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
- edsl/interviews/__init__.py +4 -0
- edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
- edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
- edsl/interviews/interview.py +638 -0
- edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
- edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
- edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
- edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
- edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
- edsl/invigilators/__init__.py +38 -0
- edsl/invigilators/invigilator_base.py +477 -0
- edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
- edsl/invigilators/prompt_constructor.py +476 -0
- edsl/{agents → invigilators}/prompt_helpers.py +2 -1
- edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
- edsl/{agents → invigilators}/question_option_processor.py +96 -21
- edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
- edsl/jobs/__init__.py +7 -1
- edsl/jobs/async_interview_runner.py +99 -35
- edsl/jobs/check_survey_scenario_compatibility.py +7 -5
- edsl/jobs/data_structures.py +153 -22
- edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
- edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
- edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
- edsl/jobs/{Jobs.py → jobs.py} +313 -167
- edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
- edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +19 -17
- edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
- edsl/jobs/jobs_pricing_estimation.py +347 -0
- edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
- edsl/jobs/jobs_runner_asyncio.py +282 -0
- edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
- edsl/jobs/results_exceptions_handler.py +2 -2
- edsl/key_management/__init__.py +28 -0
- edsl/key_management/key_lookup.py +161 -0
- edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
- edsl/key_management/key_lookup_collection.py +82 -0
- edsl/key_management/models.py +218 -0
- edsl/language_models/__init__.py +7 -2
- edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
- edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
- edsl/language_models/language_model.py +1080 -0
- edsl/language_models/model.py +10 -25
- edsl/language_models/{ModelList.py → model_list.py} +9 -14
- edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
- edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
- edsl/language_models/repair.py +4 -4
- edsl/language_models/utilities.py +4 -4
- edsl/notebooks/__init__.py +3 -1
- edsl/notebooks/{Notebook.py → notebook.py} +7 -8
- edsl/prompts/__init__.py +1 -1
- edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
- edsl/prompts/{Prompt.py → prompt.py} +101 -95
- edsl/questions/HTMLQuestion.py +1 -1
- edsl/questions/__init__.py +154 -25
- edsl/questions/answer_validator_mixin.py +1 -1
- edsl/questions/compose_questions.py +4 -3
- edsl/questions/derived/question_likert_five.py +166 -0
- edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
- edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
- edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
- edsl/questions/descriptors.py +24 -30
- edsl/questions/loop_processor.py +65 -19
- edsl/questions/question_base.py +881 -0
- edsl/questions/question_base_gen_mixin.py +15 -16
- edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
- edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
- edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
- edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
- edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
- edsl/questions/question_free_text.py +282 -0
- edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
- edsl/questions/{QuestionList.py → question_list.py} +6 -7
- edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
- edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
- edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
- edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
- edsl/questions/question_registry.py +4 -9
- edsl/questions/register_questions_meta.py +8 -4
- edsl/questions/response_validator_abc.py +17 -16
- edsl/results/__init__.py +4 -1
- edsl/{exceptions/results.py → results/exceptions.py} +1 -1
- edsl/results/report.py +197 -0
- edsl/results/{Result.py → result.py} +131 -45
- edsl/results/{Results.py → results.py} +365 -220
- edsl/results/results_selector.py +344 -25
- edsl/scenarios/__init__.py +30 -3
- edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
- edsl/scenarios/directory_scanner.py +156 -13
- edsl/scenarios/document_chunker.py +186 -0
- edsl/scenarios/exceptions.py +101 -0
- edsl/scenarios/file_methods.py +2 -3
- edsl/scenarios/{FileStore.py → file_store.py} +275 -189
- edsl/scenarios/handlers/__init__.py +14 -14
- edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
- edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
- edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
- edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
- edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
- edsl/scenarios/handlers/latex_file_store.py +5 -0
- edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
- edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
- edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
- edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
- edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
- edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
- edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
- edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
- edsl/scenarios/scenario.py +928 -0
- edsl/scenarios/scenario_join.py +18 -5
- edsl/scenarios/{ScenarioList.py → scenario_list.py} +294 -106
- edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
- edsl/scenarios/scenario_selector.py +5 -1
- edsl/study/ObjectEntry.py +2 -2
- edsl/study/SnapShot.py +5 -5
- edsl/study/Study.py +18 -19
- edsl/study/__init__.py +6 -4
- edsl/surveys/__init__.py +7 -4
- edsl/surveys/dag/__init__.py +2 -0
- edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
- edsl/surveys/{DAG.py → dag/dag.py} +13 -10
- edsl/surveys/descriptors.py +1 -1
- edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
- edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
- edsl/surveys/memory/__init__.py +3 -0
- edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
- edsl/surveys/rules/__init__.py +3 -0
- edsl/surveys/{Rule.py → rules/rule.py} +103 -43
- edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
- edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
- edsl/surveys/survey.py +1743 -0
- edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
- edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
- edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
- edsl/tasks/__init__.py +32 -0
- edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
- edsl/tasks/task_creators.py +135 -0
- edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
- edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
- edsl/tasks/task_status_log.py +85 -0
- edsl/tokens/__init__.py +2 -0
- edsl/tokens/interview_token_usage.py +53 -0
- edsl/utilities/PrettyList.py +1 -1
- edsl/utilities/SystemInfo.py +25 -22
- edsl/utilities/__init__.py +29 -21
- edsl/utilities/gcp_bucket/__init__.py +2 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
- edsl/utilities/interface.py +44 -536
- edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
- edsl/utilities/repair_functions.py +1 -1
- {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/METADATA +1 -1
- edsl-0.1.48.dist-info/RECORD +347 -0
- edsl/Base.py +0 -493
- edsl/BaseDiff.py +0 -260
- edsl/agents/InvigilatorBase.py +0 -260
- edsl/agents/PromptConstructor.py +0 -318
- edsl/coop/PriceFetcher.py +0 -54
- edsl/data/Cache.py +0 -582
- edsl/data/CacheEntry.py +0 -238
- edsl/data/SQLiteDict.py +0 -292
- edsl/data/__init__.py +0 -5
- edsl/data/orm.py +0 -10
- edsl/exceptions/cache.py +0 -5
- edsl/exceptions/coop.py +0 -14
- edsl/exceptions/data.py +0 -14
- edsl/exceptions/scenarios.py +0 -29
- edsl/jobs/Answers.py +0 -43
- edsl/jobs/JobsPrompts.py +0 -354
- edsl/jobs/buckets/BucketCollection.py +0 -134
- edsl/jobs/buckets/ModelBuckets.py +0 -65
- edsl/jobs/buckets/TokenBucket.py +0 -283
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/interviews/Interview.py +0 -395
- edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
- edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
- edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
- edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
- edsl/jobs/tasks/TaskCreators.py +0 -64
- edsl/jobs/tasks/TaskStatusLog.py +0 -23
- edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
- edsl/language_models/LanguageModel.py +0 -635
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/models.py +0 -137
- edsl/questions/QuestionBase.py +0 -544
- edsl/questions/QuestionFreeText.py +0 -130
- edsl/questions/derived/QuestionLikertFive.py +0 -76
- edsl/results/ResultsExportMixin.py +0 -45
- edsl/results/TextEditor.py +0 -50
- edsl/results/results_fetch_mixin.py +0 -33
- edsl/results/results_tools_mixin.py +0 -98
- edsl/scenarios/DocumentChunker.py +0 -104
- edsl/scenarios/Scenario.py +0 -548
- edsl/scenarios/ScenarioHtmlMixin.py +0 -65
- edsl/scenarios/ScenarioListExportMixin.py +0 -45
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/shared.py +0 -1
- edsl/surveys/Survey.py +0 -1301
- edsl/surveys/SurveyQualtricsImport.py +0 -284
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/tools/__init__.py +0 -1
- edsl/tools/clusters.py +0 -192
- edsl/tools/embeddings.py +0 -27
- edsl/tools/embeddings_plotting.py +0 -118
- edsl/tools/plotting.py +0 -112
- edsl/tools/summarize.py +0 -18
- edsl/utilities/data/Registry.py +0 -6
- edsl/utilities/data/__init__.py +0 -1
- edsl/utilities/data/scooter_results.json +0 -1
- edsl-0.1.47.dist-info/RECORD +0 -354
- /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
- /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
- /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
- /edsl/{results → dataset/display}/table_data_class.py +0 -0
- /edsl/{results → dataset/display}/table_display.css +0 -0
- /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
- /edsl/{results → dataset}/tree_explore.py +0 -0
- /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
- /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
- /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
- /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
- /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
- /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
- /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
- /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
- /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
- /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
- /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
- /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
- /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
- /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
- /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
- {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
- {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
edsl/caching/cache.py
ADDED
@@ -0,0 +1,814 @@
|
|
1
|
+
"""
|
2
|
+
Cache implementation for storing and retrieving language model responses.
|
3
|
+
|
4
|
+
This module provides the Cache class, which is the core component of EDSL's caching system.
|
5
|
+
The caching system stores language model responses to avoid redundant API calls,
|
6
|
+
reducing costs and latency while improving reproducibility of results.
|
7
|
+
|
8
|
+
The Cache class handles:
|
9
|
+
- Storage and retrieval of model responses via key-based lookups
|
10
|
+
- Persistence to and from disk using various formats (.jsonl, .db)
|
11
|
+
- Merging and comparing caches from different sources
|
12
|
+
- Integration with remote caching systems
|
13
|
+
|
14
|
+
The primary workflow involves:
|
15
|
+
1. Fetching responses from cache if they exist
|
16
|
+
2. Storing new responses when they don't
|
17
|
+
3. Persisting cache state to disk when needed
|
18
|
+
|
19
|
+
Cache objects can be used:
|
20
|
+
- Directly by the user for explicit cache management
|
21
|
+
- Implicitly by the CacheHandler which manages cache selection and migrations
|
22
|
+
- In conjunction with remote caching services
|
23
|
+
|
24
|
+
Implementation Notes:
|
25
|
+
- Cache uses CacheEntry objects as its values
|
26
|
+
- Keys are hash-based identifiers of the input parameters
|
27
|
+
- Multiple storage backends are supported (dict, SQLiteDict)
|
28
|
+
"""
|
29
|
+
|
30
|
+
from __future__ import annotations
|
31
|
+
import json
|
32
|
+
import os
|
33
|
+
import warnings
|
34
|
+
from typing import Optional, Union, TYPE_CHECKING
|
35
|
+
from ..base import Base
|
36
|
+
|
37
|
+
from ..utilities import remove_edsl_version, dict_hash
|
38
|
+
from .exceptions import CacheError
|
39
|
+
|
40
|
+
class Cache(Base):
|
41
|
+
"""Cache for storing and retrieving language model responses.
|
42
|
+
|
43
|
+
The Cache class manages a collection of CacheEntry objects, providing methods for
|
44
|
+
storing, retrieving, and persisting language model responses. It serves as the core
|
45
|
+
component of EDSL's caching infrastructure, helping to reduce redundant API calls,
|
46
|
+
save costs, and ensure reproducibility.
|
47
|
+
|
48
|
+
Cache can use different storage backends:
|
49
|
+
- In-memory dictionary (default)
|
50
|
+
- SQLite database via SQLiteDict
|
51
|
+
- JSON lines file (.jsonl)
|
52
|
+
|
53
|
+
The cache operates by generating deterministic keys based on the model, parameters,
|
54
|
+
prompts, and iteration number. This allows for efficient lookup of cached responses
|
55
|
+
when identical requests are made.
|
56
|
+
|
57
|
+
Attributes:
|
58
|
+
data (dict or SQLiteDict): The primary storage for cache entries
|
59
|
+
new_entries (dict): Entries added in the current session
|
60
|
+
fetched_data (dict): Entries retrieved in the current session
|
61
|
+
filename (str, optional): Path for persistence if provided
|
62
|
+
immediate_write (bool): Whether to update data immediately (True) or defer (False)
|
63
|
+
|
64
|
+
Technical Notes:
|
65
|
+
- Can be used as a context manager to automatically persist changes on exit
|
66
|
+
- Supports serialization/deserialization via to_dict/from_dict methods
|
67
|
+
- Implements set operations (addition, subtraction) for combining caches
|
68
|
+
- Integrates with the broader EDSL caching infrastructure via CacheHandler
|
69
|
+
"""
|
70
|
+
|
71
|
+
__documentation__ = "https://docs.expectedparrot.com/en/latest/caching.html"
|
72
|
+
|
73
|
+
data = {}
|
74
|
+
|
75
|
+
def __init__(
|
76
|
+
self,
|
77
|
+
*,
|
78
|
+
filename: Optional[str] = None,
|
79
|
+
data: Optional[Union["SQLiteDict", dict]] = None,
|
80
|
+
immediate_write: bool = True,
|
81
|
+
method=None,
|
82
|
+
verbose=False,
|
83
|
+
):
|
84
|
+
"""Initialize a new Cache instance.
|
85
|
+
|
86
|
+
Creates a new cache for storing language model responses. The cache can be initialized
|
87
|
+
with existing data or connected to a persistent storage file.
|
88
|
+
|
89
|
+
Args:
|
90
|
+
filename: Path to a persistent storage file (.jsonl or .db). If provided, the cache
|
91
|
+
will be initialized from this file and changes will be written back to it.
|
92
|
+
Cannot be used together with data parameter.
|
93
|
+
data: Initial cache data as a dictionary or SQLiteDict. Cannot be used together
|
94
|
+
with filename parameter.
|
95
|
+
immediate_write: If True, new entries are immediately added to the main data store.
|
96
|
+
If False, they're kept separate until explicitly written.
|
97
|
+
method: Deprecated. Legacy parameter for backward compatibility.
|
98
|
+
verbose: If True, prints diagnostic information about cache hits and misses.
|
99
|
+
|
100
|
+
Raises:
|
101
|
+
CacheError: If both filename and data are provided, or if the filename has an
|
102
|
+
invalid extension.
|
103
|
+
|
104
|
+
Implementation Notes:
|
105
|
+
- The cache maintains separate dictionaries for tracking:
|
106
|
+
* data: The main persistent storage
|
107
|
+
* new_entries: Entries added in this session
|
108
|
+
* fetched_data: Entries fetched in this session
|
109
|
+
* new_entries_to_write_later: Entries to be written if immediate_write=False
|
110
|
+
- If loading from a file, the appropriate loader method is called based on extension
|
111
|
+
"""
|
112
|
+
|
113
|
+
# self.data_at_init = data or {}
|
114
|
+
self.fetched_data = {}
|
115
|
+
self.immediate_write = immediate_write
|
116
|
+
self.method = method
|
117
|
+
self.new_entries = {}
|
118
|
+
self.new_entries_to_write_later = {}
|
119
|
+
self.coop = None
|
120
|
+
self.verbose = verbose
|
121
|
+
|
122
|
+
self.filename = filename
|
123
|
+
if filename and data:
|
124
|
+
raise CacheError("Cannot provide both filename and data")
|
125
|
+
if filename is None and data is None:
|
126
|
+
data = {}
|
127
|
+
if data is not None:
|
128
|
+
self.data = data
|
129
|
+
if filename is not None:
|
130
|
+
self.data = {}
|
131
|
+
if filename.endswith(".jsonl"):
|
132
|
+
if os.path.exists(filename):
|
133
|
+
self.add_from_jsonl(filename)
|
134
|
+
else:
|
135
|
+
print(
|
136
|
+
f"File {filename} not found, but will write to this location."
|
137
|
+
)
|
138
|
+
elif filename.endswith(".db"):
|
139
|
+
if os.path.exists(filename):
|
140
|
+
self.add_from_sqlite(filename)
|
141
|
+
else:
|
142
|
+
raise CacheError("Invalid file extension. Must be .jsonl or .db")
|
143
|
+
|
144
|
+
self._perform_checks()
|
145
|
+
|
146
|
+
def code(sefl):
|
147
|
+
pass
|
148
|
+
# raise NotImplementedError("This method is not implemented yet.")
|
149
|
+
|
150
|
+
def keys(self):
|
151
|
+
"""Return a list of all cache keys.
|
152
|
+
|
153
|
+
Retrieves all cache keys, which are the unique identifiers for each cache entry.
|
154
|
+
|
155
|
+
Returns:
|
156
|
+
list: A list of string keys in the cache
|
157
|
+
|
158
|
+
Examples:
|
159
|
+
>>> from edsl import Cache
|
160
|
+
>>> Cache.example().keys()
|
161
|
+
['5513286eb6967abc0511211f0402587d']
|
162
|
+
"""
|
163
|
+
return list(self.data.keys())
|
164
|
+
|
165
|
+
def values(self):
|
166
|
+
"""Return a list of all cache entry values.
|
167
|
+
|
168
|
+
Retrieves all CacheEntry objects stored in the cache.
|
169
|
+
|
170
|
+
Returns:
|
171
|
+
list: A list of CacheEntry objects
|
172
|
+
|
173
|
+
Examples:
|
174
|
+
>>> from edsl import Cache
|
175
|
+
>>> Cache.example().values()
|
176
|
+
[CacheEntry(...)]
|
177
|
+
"""
|
178
|
+
return list(self.data.values())
|
179
|
+
|
180
|
+
def items(self):
|
181
|
+
"""Return an iterator of (key, value) pairs in the cache.
|
182
|
+
|
183
|
+
Similar to dict.items(), provides an iterator over all key-value pairs
|
184
|
+
in the cache for easy iteration.
|
185
|
+
|
186
|
+
Returns:
|
187
|
+
zip: An iterator of (key, CacheEntry) tuples
|
188
|
+
"""
|
189
|
+
return zip(self.keys(), self.values())
|
190
|
+
|
191
|
+
def new_entries_cache(self) -> Cache:
|
192
|
+
"""Return a new Cache object with the new entries."""
|
193
|
+
return Cache(data={**self.new_entries, **self.fetched_data})
|
194
|
+
|
195
|
+
def _perform_checks(self):
|
196
|
+
"""Perform checks on the cache."""
|
197
|
+
from .cache_entry import CacheEntry
|
198
|
+
|
199
|
+
if any(not isinstance(value, CacheEntry) for value in self.data.values()):
|
200
|
+
raise CacheError("Not all values are CacheEntry instances")
|
201
|
+
if self.method is not None:
|
202
|
+
warnings.warn("Argument `method` is deprecated", DeprecationWarning)
|
203
|
+
|
204
|
+
####################
|
205
|
+
# READ/WRITE
|
206
|
+
####################
|
207
|
+
def fetch(
|
208
|
+
self,
|
209
|
+
*,
|
210
|
+
model: str,
|
211
|
+
parameters: dict,
|
212
|
+
system_prompt: str,
|
213
|
+
user_prompt: str,
|
214
|
+
iteration: int,
|
215
|
+
) -> tuple(Union[None, str], str):
|
216
|
+
"""Retrieve a cached language model response if available.
|
217
|
+
|
218
|
+
This method attempts to find a cached response matching the exact input parameters.
|
219
|
+
The combination of model, parameters, prompts, and iteration creates a unique key
|
220
|
+
that identifies a specific language model request.
|
221
|
+
|
222
|
+
Args:
|
223
|
+
model: Language model identifier (e.g., "gpt-3.5-turbo")
|
224
|
+
parameters: Model configuration parameters (e.g., temperature, max_tokens)
|
225
|
+
system_prompt: The system instructions given to the model
|
226
|
+
user_prompt: The user query/prompt given to the model
|
227
|
+
iteration: The iteration number for this specific request
|
228
|
+
|
229
|
+
Returns:
|
230
|
+
tuple: (response, key) where:
|
231
|
+
- response: The cached model output as a string, or None if not found
|
232
|
+
- key: The cache key string generated for this request
|
233
|
+
|
234
|
+
Technical Notes:
|
235
|
+
- Uses CacheEntry.gen_key() to generate a consistent hash-based key
|
236
|
+
- Updates self.fetched_data when a hit occurs to track cache usage
|
237
|
+
- Optionally logs cache hit/miss when verbose=True
|
238
|
+
- The response is returned as a JSON string for consistency
|
239
|
+
|
240
|
+
Examples:
|
241
|
+
>>> c = Cache()
|
242
|
+
>>> c.fetch(model="gpt-3", parameters="default", system_prompt="Hello",
|
243
|
+
... user_prompt="Hi", iteration=1)[0] is None
|
244
|
+
True
|
245
|
+
"""
|
246
|
+
from .cache_entry import CacheEntry
|
247
|
+
|
248
|
+
key = CacheEntry.gen_key(
|
249
|
+
model=model,
|
250
|
+
parameters=parameters,
|
251
|
+
system_prompt=system_prompt,
|
252
|
+
user_prompt=user_prompt,
|
253
|
+
iteration=iteration,
|
254
|
+
)
|
255
|
+
entry = self.data.get(key, None)
|
256
|
+
if entry is not None:
|
257
|
+
if self.verbose:
|
258
|
+
print(f"Cache hit for key: {key}")
|
259
|
+
self.fetched_data[key] = entry
|
260
|
+
else:
|
261
|
+
if self.verbose:
|
262
|
+
print(f"Cache miss for key: {key}")
|
263
|
+
return None if entry is None else entry.output, key
|
264
|
+
|
265
|
+
def store(
|
266
|
+
self,
|
267
|
+
model: str,
|
268
|
+
parameters: str,
|
269
|
+
system_prompt: str,
|
270
|
+
user_prompt: str,
|
271
|
+
response: dict,
|
272
|
+
iteration: int,
|
273
|
+
service: str,
|
274
|
+
) -> str:
|
275
|
+
"""Store a new language model response in the cache.
|
276
|
+
|
277
|
+
Creates a new CacheEntry from the provided parameters and response, then
|
278
|
+
adds it to the cache using a deterministic key derived from the input parameters.
|
279
|
+
|
280
|
+
Args:
|
281
|
+
model: Language model identifier (e.g., "gpt-3.5-turbo")
|
282
|
+
parameters: Model configuration parameters (e.g., temperature, max_tokens)
|
283
|
+
system_prompt: The system instructions given to the model
|
284
|
+
user_prompt: The user query/prompt given to the model
|
285
|
+
response: The model's response as a dictionary
|
286
|
+
iteration: The iteration number for this specific request
|
287
|
+
service: The service provider (e.g., "openai", "anthropic")
|
288
|
+
|
289
|
+
Returns:
|
290
|
+
str: The cache key generated for this entry
|
291
|
+
|
292
|
+
Technical Notes:
|
293
|
+
- Creates a new CacheEntry object to encapsulate the response and metadata
|
294
|
+
- Adds the entry to self.new_entries to track entries added in this session
|
295
|
+
- Adds the entry to the main data store if immediate_write=True
|
296
|
+
- Otherwise, stores in new_entries_to_write_later for deferred writing
|
297
|
+
- The response is stored as a JSON string for consistency and compatibility
|
298
|
+
|
299
|
+
Storage Behavior:
|
300
|
+
The method's behavior depends on the immediate_write setting:
|
301
|
+
- If True: Immediately writes to the main data store (self.data)
|
302
|
+
- If False: Stores in a separate dict for writing later (e.g., at context exit)
|
303
|
+
|
304
|
+
Examples:
|
305
|
+
>>> from edsl import Cache, Model, Question
|
306
|
+
>>> m = Model("test")
|
307
|
+
>>> c = Cache()
|
308
|
+
>>> len(c)
|
309
|
+
0
|
310
|
+
>>> results = Question.example("free_text").by(m).run(cache=c,
|
311
|
+
... disable_remote_cache=True, disable_remote_inference=True)
|
312
|
+
>>> len(c)
|
313
|
+
1
|
314
|
+
"""
|
315
|
+
from .cache_entry import CacheEntry
|
316
|
+
|
317
|
+
entry = CacheEntry(
|
318
|
+
model=model,
|
319
|
+
parameters=parameters,
|
320
|
+
system_prompt=system_prompt,
|
321
|
+
user_prompt=user_prompt,
|
322
|
+
output=json.dumps(response),
|
323
|
+
iteration=iteration,
|
324
|
+
service=service,
|
325
|
+
)
|
326
|
+
key = entry.key
|
327
|
+
self.new_entries[key] = entry
|
328
|
+
if self.immediate_write:
|
329
|
+
self.data[key] = entry
|
330
|
+
else:
|
331
|
+
self.new_entries_to_write_later[key] = entry
|
332
|
+
return key
|
333
|
+
|
334
|
+
def add_from_dict(
|
335
|
+
self, new_data: dict[str, "CacheEntry"], write_now: Optional[bool] = True
|
336
|
+
) -> None:
|
337
|
+
"""
|
338
|
+
Add entries to the cache from a dictionary.
|
339
|
+
|
340
|
+
:param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
|
341
|
+
"""
|
342
|
+
from .cache_entry import CacheEntry
|
343
|
+
|
344
|
+
for key, value in new_data.items():
|
345
|
+
if key in self.data:
|
346
|
+
if value != self.data[key]:
|
347
|
+
raise CacheError("Mismatch in values")
|
348
|
+
if not isinstance(value, CacheEntry):
|
349
|
+
raise CacheError(f"Wrong type - the observed type is {type(value)}")
|
350
|
+
|
351
|
+
self.new_entries.update(new_data)
|
352
|
+
if write_now:
|
353
|
+
self.data.update(new_data)
|
354
|
+
else:
|
355
|
+
self.new_entries_to_write_later.update(new_data)
|
356
|
+
|
357
|
+
def add_from_jsonl(self, filename: str, write_now: Optional[bool] = True) -> None:
|
358
|
+
"""
|
359
|
+
Add entries to the cache from a JSONL.
|
360
|
+
|
361
|
+
:param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
|
362
|
+
"""
|
363
|
+
from .cache_entry import CacheEntry
|
364
|
+
|
365
|
+
with open(filename, "a+") as f:
|
366
|
+
f.seek(0)
|
367
|
+
lines = f.readlines()
|
368
|
+
new_data = {}
|
369
|
+
for line in lines:
|
370
|
+
d = json.loads(line)
|
371
|
+
key = list(d.keys())[0]
|
372
|
+
value = list(d.values())[0]
|
373
|
+
new_data[key] = CacheEntry(**value)
|
374
|
+
self.add_from_dict(new_data=new_data, write_now=write_now)
|
375
|
+
|
376
|
+
def add_from_sqlite(self, db_path: str, write_now: Optional[bool] = True):
|
377
|
+
"""
|
378
|
+
Add entries to the cache from an SQLite database.
|
379
|
+
|
380
|
+
:param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
|
381
|
+
"""
|
382
|
+
from .sql_dict import SQLiteDict
|
383
|
+
from .cache_entry import CacheEntry
|
384
|
+
|
385
|
+
db = SQLiteDict(db_path)
|
386
|
+
new_data = {}
|
387
|
+
for key, value in db.items():
|
388
|
+
new_data[key] = CacheEntry(**value)
|
389
|
+
self.add_from_dict(new_data=new_data, write_now=write_now)
|
390
|
+
|
391
|
+
@classmethod
|
392
|
+
def from_sqlite_db(cls, db_path: str) -> Cache:
|
393
|
+
"""Construct a Cache from a SQLite database."""
|
394
|
+
from .sql_dict import SQLiteDict
|
395
|
+
|
396
|
+
return cls(data=SQLiteDict(db_path))
|
397
|
+
|
398
|
+
@classmethod
|
399
|
+
def from_local_cache(cls) -> Cache:
|
400
|
+
"""Construct a Cache from a local cache file."""
|
401
|
+
from ..config import CONFIG
|
402
|
+
|
403
|
+
CACHE_PATH = CONFIG.get("EDSL_DATABASE_PATH")
|
404
|
+
path = CACHE_PATH.replace("sqlite:///", "")
|
405
|
+
# db_path = os.path.join(os.path.dirname(path), "data.db")
|
406
|
+
return cls.from_sqlite_db(path)
|
407
|
+
|
408
|
+
@classmethod
|
409
|
+
def from_jsonl(cls, jsonlfile: str, db_path: Optional[str] = None) -> Cache:
|
410
|
+
"""
|
411
|
+
Construct a Cache from a JSONL file.
|
412
|
+
|
413
|
+
:param jsonlfile: The path to the JSONL file of cache entries.
|
414
|
+
:param db_path: The path to the SQLite database used to store the cache.
|
415
|
+
|
416
|
+
* If `db_path` is None, the cache will be stored in memory, as a dictionary.
|
417
|
+
* If `db_path` is provided, the cache will be stored in an SQLite database.
|
418
|
+
"""
|
419
|
+
# if a file doesn't exist at jsonfile, throw an error
|
420
|
+
from .sql_dict import SQLiteDict
|
421
|
+
|
422
|
+
if not os.path.exists(jsonlfile):
|
423
|
+
raise FileNotFoundError(f"File {jsonlfile} not found")
|
424
|
+
|
425
|
+
if db_path is None:
|
426
|
+
data = {}
|
427
|
+
else:
|
428
|
+
data = SQLiteDict(db_path)
|
429
|
+
|
430
|
+
cache = Cache(data=data)
|
431
|
+
cache.add_from_jsonl(jsonlfile)
|
432
|
+
return cache
|
433
|
+
|
434
|
+
def write_sqlite_db(self, db_path: str) -> None:
|
435
|
+
"""
|
436
|
+
Write the cache to an SQLite database.
|
437
|
+
"""
|
438
|
+
## TODO: Check to make sure not over-writing (?)
|
439
|
+
## Should be added to SQLiteDict constructor (?)
|
440
|
+
from .sql_dict import SQLiteDict
|
441
|
+
|
442
|
+
new_data = SQLiteDict(db_path)
|
443
|
+
for key, value in self.data.items():
|
444
|
+
new_data[key] = value
|
445
|
+
|
446
|
+
def write(self, filename: Optional[str] = None) -> None:
|
447
|
+
"""
|
448
|
+
Write the cache to a file at the specified location.
|
449
|
+
"""
|
450
|
+
if filename is None:
|
451
|
+
filename = self.filename
|
452
|
+
if filename.endswith(".jsonl"):
|
453
|
+
self.write_jsonl(filename)
|
454
|
+
elif filename.endswith(".db"):
|
455
|
+
self.write_sqlite_db(filename)
|
456
|
+
else:
|
457
|
+
raise CacheError("Invalid file extension. Must be .jsonl or .db")
|
458
|
+
|
459
|
+
def write_jsonl(self, filename: str) -> None:
|
460
|
+
"""
|
461
|
+
Write the cache to a JSONL file.
|
462
|
+
"""
|
463
|
+
path = os.path.join(os.getcwd(), filename)
|
464
|
+
with open(path, "w") as f:
|
465
|
+
for key, value in self.data.items():
|
466
|
+
f.write(json.dumps({key: value.to_dict()}) + "\n")
|
467
|
+
|
468
|
+
def to_scenario_list(self):
|
469
|
+
from ..scenarios import ScenarioList, Scenario
|
470
|
+
|
471
|
+
scenarios = []
|
472
|
+
for key, value in self.data.items():
|
473
|
+
new_d = value.to_dict()
|
474
|
+
new_d["cache_key"] = key
|
475
|
+
s = Scenario(new_d)
|
476
|
+
scenarios.append(s)
|
477
|
+
return ScenarioList(scenarios)
|
478
|
+
|
479
|
+
def __floordiv__(self, other: "Cache") -> "Cache":
|
480
|
+
"""Subtract one cache from another, returning entries unique to this cache.
|
481
|
+
|
482
|
+
This operator implements set difference between two caches, returning a new cache
|
483
|
+
containing only entries that exist in this cache but not in the other cache.
|
484
|
+
The floor division operator (//) is used as an intuitive alternative to subtraction.
|
485
|
+
|
486
|
+
Args:
|
487
|
+
other: Another Cache object to subtract from this one
|
488
|
+
|
489
|
+
Returns:
|
490
|
+
Cache: A new Cache containing only entries unique to this cache
|
491
|
+
|
492
|
+
Raises:
|
493
|
+
CacheError: If the provided object is not a Cache instance
|
494
|
+
|
495
|
+
Examples:
|
496
|
+
>>> from edsl.caching import CacheEntry
|
497
|
+
>>> ce1 = CacheEntry.example(randomize=True)
|
498
|
+
>>> ce2 = CacheEntry.example(randomize=True)
|
499
|
+
>>> c1 = Cache(data={ce1.key: ce1, ce2.key: ce2})
|
500
|
+
>>> c2 = Cache(data={ce1.key: ce1})
|
501
|
+
>>> c3 = c1 // c2 # Get entries in c1 that aren't in c2
|
502
|
+
>>> len(c3)
|
503
|
+
1
|
504
|
+
>>> c3.data[ce2.key] == ce2
|
505
|
+
True
|
506
|
+
|
507
|
+
Technical Notes:
|
508
|
+
- Comparison is based on cache keys, not the full entry contents
|
509
|
+
- Returns a new Cache instance with the same immediate_write setting
|
510
|
+
- Useful for identifying new entries or differences between caches
|
511
|
+
"""
|
512
|
+
if not isinstance(other, Cache):
|
513
|
+
raise CacheError("Can only compare two caches")
|
514
|
+
|
515
|
+
diff_data = {k: v for k, v in self.data.items() if k not in other.data}
|
516
|
+
return Cache(data=diff_data, immediate_write=self.immediate_write)
|
517
|
+
|
518
|
+
@classmethod
|
519
|
+
def from_url(cls, db_path=None) -> Cache:
|
520
|
+
"""
|
521
|
+
Construct a Cache object from a remote.
|
522
|
+
"""
|
523
|
+
# ...do something here
|
524
|
+
# return Cache(data=db)
|
525
|
+
pass
|
526
|
+
|
527
|
+
def __enter__(self):
|
528
|
+
"""Set up the cache when used as a context manager.
|
529
|
+
|
530
|
+
Enables usage of Cache in a with statement, e.g.:
|
531
|
+
```python
|
532
|
+
with Cache(filename="my_cache.db") as cache:
|
533
|
+
# Use cache...
|
534
|
+
# Changes automatically saved when exiting the context
|
535
|
+
```
|
536
|
+
|
537
|
+
Returns:
|
538
|
+
Cache: The cache instance itself
|
539
|
+
"""
|
540
|
+
return self
|
541
|
+
|
542
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
543
|
+
"""Clean up and persist cache when exiting the context.
|
544
|
+
|
545
|
+
This method is called automatically when exiting a with block.
|
546
|
+
It performs two key operations:
|
547
|
+
1. Writes any deferred entries to the main data store
|
548
|
+
2. Persists the cache to disk if a filename was provided
|
549
|
+
|
550
|
+
Args:
|
551
|
+
exc_type: Exception type if an exception was raised in the with block
|
552
|
+
exc_value: Exception value if an exception was raised
|
553
|
+
traceback: Traceback if an exception was raised
|
554
|
+
|
555
|
+
Technical Notes:
|
556
|
+
- Deferred entries (new_entries_to_write_later) are written to the main data store
|
557
|
+
- If a filename was provided at initialization, cache is persisted to that file
|
558
|
+
- Persistence format is determined by the filename extension (.jsonl or .db)
|
559
|
+
"""
|
560
|
+
# Write any deferred entries to the main data store
|
561
|
+
for key, entry in self.new_entries_to_write_later.items():
|
562
|
+
self.data[key] = entry
|
563
|
+
|
564
|
+
# Persist the cache to disk if a filename was provided
|
565
|
+
if self.filename:
|
566
|
+
self.write(self.filename)
|
567
|
+
|
568
|
+
def __hash__(self):
|
569
|
+
"""Return the hash of the Cache."""
|
570
|
+
|
571
|
+
return dict_hash(self.to_dict(add_edsl_version=False))
|
572
|
+
|
573
|
+
def to_dict(self, add_edsl_version=True) -> dict:
|
574
|
+
"""Serialize the cache to a dictionary for storage or transmission.
|
575
|
+
|
576
|
+
Converts the Cache object into a plain dictionary format that can be
|
577
|
+
easily serialized to JSON or other formats. Each CacheEntry is also
|
578
|
+
converted to a dictionary using its to_dict method.
|
579
|
+
|
580
|
+
Args:
|
581
|
+
add_edsl_version: If True, includes the EDSL version and class name
|
582
|
+
in the serialized output for compatibility tracking
|
583
|
+
|
584
|
+
Returns:
|
585
|
+
dict: A dictionary representation of the cache with the structure:
|
586
|
+
{
|
587
|
+
"key1": {cache_entry1_dict},
|
588
|
+
"key2": {cache_entry2_dict},
|
589
|
+
...
|
590
|
+
"edsl_version": "x.x.x", # if add_edsl_version=True
|
591
|
+
"edsl_class_name": "Cache" # if add_edsl_version=True
|
592
|
+
}
|
593
|
+
|
594
|
+
Technical Notes:
|
595
|
+
- Used by from_dict for deserialization
|
596
|
+
- Used by __hash__ for cache comparison
|
597
|
+
- The version info allows for proper handling of format changes
|
598
|
+
"""
|
599
|
+
d = {k: v.to_dict() for k, v in self.data.items()}
|
600
|
+
if add_edsl_version:
|
601
|
+
from .. import __version__
|
602
|
+
|
603
|
+
d["edsl_version"] = __version__
|
604
|
+
d["edsl_class_name"] = "Cache"
|
605
|
+
|
606
|
+
return d
|
607
|
+
|
608
|
+
def _summary(self) -> dict:
|
609
|
+
return {"EDSL Class": "Cache", "Number of entries": len(self.data)}
|
610
|
+
|
611
|
+
def table(
|
612
|
+
self,
|
613
|
+
*fields,
|
614
|
+
tablefmt: Optional[str] = None,
|
615
|
+
pretty_labels: Optional[dict] = None,
|
616
|
+
) -> str:
|
617
|
+
return self.to_dataset().table(
|
618
|
+
*fields, tablefmt=tablefmt, pretty_labels=pretty_labels
|
619
|
+
)
|
620
|
+
|
621
|
+
def select(self, *fields):
|
622
|
+
return self.to_dataset().select(*fields)
|
623
|
+
|
624
|
+
def tree(self, node_list: Optional[list[str]] = None):
|
625
|
+
return self.to_scenario_list().tree(node_list)
|
626
|
+
|
627
|
+
def to_dataset(self):
|
628
|
+
return self.to_scenario_list().to_dataset()
|
629
|
+
|
630
|
+
@classmethod
|
631
|
+
@remove_edsl_version
|
632
|
+
def from_dict(cls, data) -> Cache:
|
633
|
+
"""Construct a Cache from a dictionary."""
|
634
|
+
from .cache_entry import CacheEntry
|
635
|
+
|
636
|
+
newdata = {k: CacheEntry.from_dict(v) for k, v in data.items()}
|
637
|
+
return cls(data=newdata)
|
638
|
+
|
639
|
+
def __len__(self):
|
640
|
+
"""Return the number of CacheEntry objects in the Cache."""
|
641
|
+
return len(self.data)
|
642
|
+
|
643
|
+
# TODO: Same inputs could give different results and this could be useful
|
644
|
+
# can't distinguish unless we do the ε trick or vary iterations
|
645
|
+
def __eq__(self, other_cache: "Cache") -> bool:
|
646
|
+
"""
|
647
|
+
Check if two Cache objects are equal.
|
648
|
+
Does not verify their values are equal, only that they have the same keys.
|
649
|
+
"""
|
650
|
+
if not isinstance(other_cache, Cache):
|
651
|
+
return False
|
652
|
+
return set(self.data.keys()) == set(other_cache.data.keys())
|
653
|
+
|
654
|
+
def __add__(self, other: "Cache"):
|
655
|
+
"""Combine this cache with another, updating in-place.
|
656
|
+
|
657
|
+
This operator implements a set union operation between two caches, adding all
|
658
|
+
entries from the other cache into this one. The operation modifies this cache
|
659
|
+
in-place rather than creating a new one.
|
660
|
+
|
661
|
+
Args:
|
662
|
+
other: Another Cache object to merge into this one
|
663
|
+
|
664
|
+
Returns:
|
665
|
+
Cache: Self, with entries from other added
|
666
|
+
|
667
|
+
Raises:
|
668
|
+
CacheError: If the provided object is not a Cache instance
|
669
|
+
|
670
|
+
Technical Notes:
|
671
|
+
- Modifies this cache in-place (unlike __floordiv__ which returns a new cache)
|
672
|
+
- If both caches have the same key, this cache's entry will be overwritten
|
673
|
+
- Useful for merging caches from different sources
|
674
|
+
- No special handling for conflicting entries - last one wins
|
675
|
+
|
676
|
+
Examples:
|
677
|
+
>>> from edsl.caching import CacheEntry
|
678
|
+
>>> ce1 = CacheEntry.example(randomize=True)
|
679
|
+
>>> ce2 = CacheEntry.example(randomize=True)
|
680
|
+
>>> c1 = Cache(data={ce1.key: ce1})
|
681
|
+
>>> initial_len = len(c1)
|
682
|
+
>>> c2 = Cache(data={ce2.key: ce2})
|
683
|
+
>>> result = c1 + c2 # Add c2's entries to c1
|
684
|
+
>>> len(c1) > initial_len # Should have more entries now
|
685
|
+
True
|
686
|
+
"""
|
687
|
+
if not isinstance(other, Cache):
|
688
|
+
raise CacheError("Can only add two caches together")
|
689
|
+
self.data.update(other.data)
|
690
|
+
return self
|
691
|
+
|
692
|
+
def __repr__(self):
|
693
|
+
"""
|
694
|
+
Return a string representation of the Cache object.
|
695
|
+
"""
|
696
|
+
return (
|
697
|
+
f"Cache(data = {repr(self.data)}, immediate_write={self.immediate_write})"
|
698
|
+
)
|
699
|
+
|
700
|
+
####################
|
701
|
+
# EXAMPLES
|
702
|
+
####################
|
703
|
+
def fetch_input_example(self) -> dict:
|
704
|
+
"""
|
705
|
+
Create an example input for a 'fetch' operation.
|
706
|
+
"""
|
707
|
+
from .cache_entry import CacheEntry
|
708
|
+
|
709
|
+
return CacheEntry.fetch_input_example()
|
710
|
+
|
711
|
+
def to_html(self):
|
712
|
+
# json_str = json.dumps(self.data, indent=4)
|
713
|
+
d = {k: v.to_dict() for k, v in self.data.items()}
|
714
|
+
for key, value in d.items():
|
715
|
+
for k, v in value.items():
|
716
|
+
if isinstance(v, dict):
|
717
|
+
d[key][k] = {kk: str(vv) for kk, vv in v.items()}
|
718
|
+
else:
|
719
|
+
d[key][k] = str(v)
|
720
|
+
|
721
|
+
json_str = json.dumps(d, indent=4)
|
722
|
+
|
723
|
+
# HTML template with the JSON string embedded
|
724
|
+
html = f"""
|
725
|
+
<!DOCTYPE html>
|
726
|
+
<html>
|
727
|
+
<head>
|
728
|
+
<title>Display JSON</title>
|
729
|
+
</head>
|
730
|
+
<body>
|
731
|
+
<pre id="jsonData"></pre>
|
732
|
+
<script>
|
733
|
+
var json = {json_str};
|
734
|
+
|
735
|
+
// JSON.stringify with spacing to format
|
736
|
+
document.getElementById('jsonData').textContent = JSON.stringify(json, null, 4);
|
737
|
+
</script>
|
738
|
+
</body>
|
739
|
+
</html>
|
740
|
+
"""
|
741
|
+
return html
|
742
|
+
|
743
|
+
def subset(self, keys: list[str]) -> Cache:
|
744
|
+
"""
|
745
|
+
Return a subset of the Cache with the specified keys.
|
746
|
+
"""
|
747
|
+
new_data = {k: v for k, v in self.data.items() if k in keys}
|
748
|
+
return Cache(data=new_data)
|
749
|
+
|
750
|
+
def view(self) -> None:
|
751
|
+
"""View the Cache in a new browser tab."""
|
752
|
+
import tempfile
|
753
|
+
import webbrowser
|
754
|
+
|
755
|
+
html_content = self.to_html()
|
756
|
+
# Create a temporary file to hold the HTML
|
757
|
+
with tempfile.NamedTemporaryFile("w", delete=False, suffix=".html") as tmpfile:
|
758
|
+
tmpfile.write(html_content)
|
759
|
+
# Get the path to the temporary file
|
760
|
+
filepath = tmpfile.name
|
761
|
+
|
762
|
+
# Open the HTML file in a new browser tab
|
763
|
+
webbrowser.open("file://" + filepath)
|
764
|
+
|
765
|
+
@classmethod
|
766
|
+
def example(cls, randomize: bool = False) -> Cache:
|
767
|
+
"""Create an example Cache instance for testing and demonstration.
|
768
|
+
|
769
|
+
Creates a Cache object pre-populated with example CacheEntry objects.
|
770
|
+
This method is useful for documentation, testing, and demonstration purposes.
|
771
|
+
|
772
|
+
Args:
|
773
|
+
randomize: If True, creates CacheEntry objects with randomized content
|
774
|
+
for uniqueness. If False, uses consistent example entries.
|
775
|
+
|
776
|
+
Returns:
|
777
|
+
Cache: A new Cache object containing example CacheEntry objects
|
778
|
+
|
779
|
+
Technical Notes:
|
780
|
+
- Uses CacheEntry.example() to create sample entries
|
781
|
+
- When randomize=True, generates unique keys for each call
|
782
|
+
- When randomize=False, produces consistent examples for doctests
|
783
|
+
- Creates an in-memory cache (no persistent file)
|
784
|
+
|
785
|
+
Examples:
|
786
|
+
>>> cache = Cache.example()
|
787
|
+
>>> len(cache) > 0
|
788
|
+
True
|
789
|
+
>>> from edsl.caching.cache_entry import CacheEntry
|
790
|
+
>>> all(isinstance(entry, CacheEntry) for entry in cache.values())
|
791
|
+
True
|
792
|
+
|
793
|
+
>>> # Create examples with randomized content
|
794
|
+
>>> cache1 = Cache.example(randomize=True)
|
795
|
+
>>> cache2 = Cache.example(randomize=True)
|
796
|
+
>>> # With randomization, keys should be different
|
797
|
+
>>> len(cache1) > 0 and len(cache2) > 0
|
798
|
+
True
|
799
|
+
"""
|
800
|
+
from .cache_entry import CacheEntry
|
801
|
+
|
802
|
+
# Maintain the original implementation exactly to preserve behavior
|
803
|
+
return cls(
|
804
|
+
data={
|
805
|
+
CacheEntry.example(randomize).key: CacheEntry.example(),
|
806
|
+
CacheEntry.example(randomize).key: CacheEntry.example(),
|
807
|
+
}
|
808
|
+
)
|
809
|
+
|
810
|
+
|
811
|
+
if __name__ == "__main__":
|
812
|
+
import doctest
|
813
|
+
|
814
|
+
doctest.testmod(optionflags=doctest.ELLIPSIS)
|