agent_os_kernel 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_control_plane/__init__.py +662 -0
- agent_control_plane/a2a_adapter.py +543 -0
- agent_control_plane/adapter.py +417 -0
- agent_control_plane/agent_hibernation.py +394 -0
- agent_control_plane/agent_kernel.py +470 -0
- agent_control_plane/compliance.py +720 -0
- agent_control_plane/constraint_graphs.py +478 -0
- agent_control_plane/control_plane.py +854 -0
- agent_control_plane/example_executors.py +195 -0
- agent_control_plane/execution_engine.py +231 -0
- agent_control_plane/flight_recorder.py +846 -0
- agent_control_plane/governance_layer.py +435 -0
- agent_control_plane/hf_utils.py +563 -0
- agent_control_plane/interfaces/__init__.py +55 -0
- agent_control_plane/interfaces/kernel_interface.py +361 -0
- agent_control_plane/interfaces/plugin_interface.py +497 -0
- agent_control_plane/interfaces/protocol_interfaces.py +387 -0
- agent_control_plane/kernel_space.py +1009 -0
- agent_control_plane/langchain_adapter.py +424 -0
- agent_control_plane/lifecycle.py +3113 -0
- agent_control_plane/mcp_adapter.py +653 -0
- agent_control_plane/ml_safety.py +563 -0
- agent_control_plane/multimodal.py +727 -0
- agent_control_plane/mute_agent.py +422 -0
- agent_control_plane/observability.py +787 -0
- agent_control_plane/orchestrator.py +482 -0
- agent_control_plane/plugin_registry.py +750 -0
- agent_control_plane/policy_engine.py +954 -0
- agent_control_plane/process_isolation.py +777 -0
- agent_control_plane/shadow_mode.py +310 -0
- agent_control_plane/signals.py +493 -0
- agent_control_plane/supervisor_agents.py +430 -0
- agent_control_plane/time_travel_debugger.py +557 -0
- agent_control_plane/tool_registry.py +452 -0
- agent_control_plane/vfs.py +697 -0
- agent_kernel/__init__.py +69 -0
- agent_kernel/analyzer.py +435 -0
- agent_kernel/auditor.py +36 -0
- agent_kernel/completeness_auditor.py +237 -0
- agent_kernel/detector.py +203 -0
- agent_kernel/kernel.py +744 -0
- agent_kernel/memory_manager.py +85 -0
- agent_kernel/models.py +374 -0
- agent_kernel/nudge_mechanism.py +263 -0
- agent_kernel/outcome_analyzer.py +338 -0
- agent_kernel/patcher.py +582 -0
- agent_kernel/semantic_analyzer.py +316 -0
- agent_kernel/semantic_purge.py +349 -0
- agent_kernel/simulator.py +449 -0
- agent_kernel/teacher.py +85 -0
- agent_kernel/triage.py +152 -0
- agent_os/__init__.py +409 -0
- agent_os/_adversarial_impl.py +200 -0
- agent_os/_circuit_breaker_impl.py +232 -0
- agent_os/_mcp_metrics.py +193 -0
- agent_os/adversarial.py +20 -0
- agent_os/agents_compat.py +490 -0
- agent_os/audit_logger.py +135 -0
- agent_os/base_agent.py +651 -0
- agent_os/circuit_breaker.py +34 -0
- agent_os/cli/__init__.py +659 -0
- agent_os/cli/cmd_audit.py +128 -0
- agent_os/cli/cmd_init.py +152 -0
- agent_os/cli/cmd_policy.py +41 -0
- agent_os/cli/cmd_policy_gen.py +180 -0
- agent_os/cli/cmd_validate.py +258 -0
- agent_os/cli/mcp_scan.py +265 -0
- agent_os/cli/output.py +192 -0
- agent_os/cli/policy_checker.py +330 -0
- agent_os/compat.py +74 -0
- agent_os/constraint_graph.py +234 -0
- agent_os/content_governance.py +140 -0
- agent_os/context_budget.py +305 -0
- agent_os/credential_redactor.py +224 -0
- agent_os/diff_policy.py +89 -0
- agent_os/egress_policy.py +159 -0
- agent_os/escalation.py +276 -0
- agent_os/event_bus.py +124 -0
- agent_os/exceptions.py +180 -0
- agent_os/execution_context_policy.py +141 -0
- agent_os/github_enterprise.py +96 -0
- agent_os/health.py +20 -0
- agent_os/integrations/__init__.py +279 -0
- agent_os/integrations/a2a_adapter.py +279 -0
- agent_os/integrations/agent_lightning/__init__.py +30 -0
- agent_os/integrations/anthropic_adapter.py +420 -0
- agent_os/integrations/autogen_adapter.py +620 -0
- agent_os/integrations/base.py +1137 -0
- agent_os/integrations/compat.py +229 -0
- agent_os/integrations/config.py +98 -0
- agent_os/integrations/conversation_guardian.py +957 -0
- agent_os/integrations/crewai_adapter.py +467 -0
- agent_os/integrations/drift_detector.py +425 -0
- agent_os/integrations/dry_run.py +124 -0
- agent_os/integrations/escalation.py +582 -0
- agent_os/integrations/gemini_adapter.py +364 -0
- agent_os/integrations/google_adk_adapter.py +633 -0
- agent_os/integrations/guardrails_adapter.py +394 -0
- agent_os/integrations/health.py +197 -0
- agent_os/integrations/langchain_adapter.py +654 -0
- agent_os/integrations/llamafirewall.py +343 -0
- agent_os/integrations/llamaindex_adapter.py +188 -0
- agent_os/integrations/logging.py +191 -0
- agent_os/integrations/maf_adapter.py +631 -0
- agent_os/integrations/mistral_adapter.py +365 -0
- agent_os/integrations/openai_adapter.py +816 -0
- agent_os/integrations/openai_agents_sdk.py +406 -0
- agent_os/integrations/policy_compose.py +171 -0
- agent_os/integrations/profiling.py +144 -0
- agent_os/integrations/pydantic_ai_adapter.py +420 -0
- agent_os/integrations/rate_limiter.py +130 -0
- agent_os/integrations/rbac.py +143 -0
- agent_os/integrations/registry.py +113 -0
- agent_os/integrations/scope_guard.py +303 -0
- agent_os/integrations/semantic_kernel_adapter.py +769 -0
- agent_os/integrations/smolagents_adapter.py +629 -0
- agent_os/integrations/templates.py +178 -0
- agent_os/integrations/token_budget.py +134 -0
- agent_os/integrations/tool_aliases.py +190 -0
- agent_os/integrations/webhooks.py +177 -0
- agent_os/lite.py +208 -0
- agent_os/mcp_gateway.py +385 -0
- agent_os/mcp_message_signer.py +273 -0
- agent_os/mcp_protocols.py +161 -0
- agent_os/mcp_response_scanner.py +232 -0
- agent_os/mcp_security.py +924 -0
- agent_os/mcp_session_auth.py +231 -0
- agent_os/mcp_sliding_rate_limiter.py +184 -0
- agent_os/memory_guard.py +409 -0
- agent_os/metrics.py +134 -0
- agent_os/mute.py +428 -0
- agent_os/mute_agent.py +209 -0
- agent_os/policies/__init__.py +77 -0
- agent_os/policies/async_evaluator.py +275 -0
- agent_os/policies/backends.py +670 -0
- agent_os/policies/bridge.py +169 -0
- agent_os/policies/budget.py +85 -0
- agent_os/policies/cli.py +294 -0
- agent_os/policies/conflict_resolution.py +270 -0
- agent_os/policies/data_classification.py +252 -0
- agent_os/policies/evaluator.py +239 -0
- agent_os/policies/policy_schema.json +228 -0
- agent_os/policies/rate_limiting.py +145 -0
- agent_os/policies/schema.py +115 -0
- agent_os/policies/shared.py +331 -0
- agent_os/prompt_injection.py +694 -0
- agent_os/providers.py +182 -0
- agent_os/py.typed +0 -0
- agent_os/retry.py +81 -0
- agent_os/reversibility.py +251 -0
- agent_os/sandbox.py +432 -0
- agent_os/sandbox_provider.py +140 -0
- agent_os/secure_codegen.py +525 -0
- agent_os/security_skills.py +538 -0
- agent_os/semantic_policy.py +422 -0
- agent_os/server/__init__.py +15 -0
- agent_os/server/__main__.py +25 -0
- agent_os/server/app.py +277 -0
- agent_os/server/models.py +104 -0
- agent_os/shift_left_metrics.py +130 -0
- agent_os/stateless.py +742 -0
- agent_os/supervisor.py +148 -0
- agent_os/task_outcome.py +148 -0
- agent_os/transparency.py +181 -0
- agent_os/trust_root.py +128 -0
- agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
- agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
- agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
- agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
- agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
- agent_os_observability/__init__.py +27 -0
- agent_os_observability/dashboards.py +898 -0
- agent_os_observability/metrics.py +398 -0
- agent_os_observability/server.py +223 -0
- agent_os_observability/tracer.py +232 -0
- agent_primitives/__init__.py +24 -0
- agent_primitives/failures.py +84 -0
- agent_primitives/py.typed +0 -0
- amb_core/__init__.py +177 -0
- amb_core/adapters/__init__.py +57 -0
- amb_core/adapters/aws_sqs_broker.py +376 -0
- amb_core/adapters/azure_servicebus_broker.py +340 -0
- amb_core/adapters/kafka_broker.py +260 -0
- amb_core/adapters/nats_broker.py +285 -0
- amb_core/adapters/rabbitmq_broker.py +235 -0
- amb_core/adapters/redis_broker.py +262 -0
- amb_core/broker.py +145 -0
- amb_core/bus.py +481 -0
- amb_core/cloudevents.py +509 -0
- amb_core/dlq.py +345 -0
- amb_core/hf_utils.py +536 -0
- amb_core/memory_broker.py +410 -0
- amb_core/models.py +141 -0
- amb_core/persistence.py +529 -0
- amb_core/schema.py +294 -0
- amb_core/tracing.py +358 -0
- atr/__init__.py +640 -0
- atr/access.py +348 -0
- atr/composition.py +645 -0
- atr/decorator.py +357 -0
- atr/executor.py +384 -0
- atr/health.py +557 -0
- atr/hf_utils.py +449 -0
- atr/injection.py +422 -0
- atr/metrics.py +440 -0
- atr/policies.py +403 -0
- atr/py.typed +2 -0
- atr/registry.py +452 -0
- atr/schema.py +480 -0
- atr/tools/safe/__init__.py +75 -0
- atr/tools/safe/calculator.py +467 -0
- atr/tools/safe/datetime_tool.py +443 -0
- atr/tools/safe/file_reader.py +402 -0
- atr/tools/safe/http_client.py +316 -0
- atr/tools/safe/json_parser.py +374 -0
- atr/tools/safe/text_tool.py +537 -0
- atr/tools/safe/toolkit.py +175 -0
- caas/__init__.py +162 -0
- caas/api/__init__.py +7 -0
- caas/api/server.py +1328 -0
- caas/caching.py +834 -0
- caas/cli.py +210 -0
- caas/conversation.py +223 -0
- caas/decay.py +72 -0
- caas/detection/__init__.py +9 -0
- caas/detection/detector.py +238 -0
- caas/enrichment.py +130 -0
- caas/gateway/__init__.py +27 -0
- caas/gateway/trust_gateway.py +474 -0
- caas/hf_utils.py +479 -0
- caas/ingestion/__init__.py +23 -0
- caas/ingestion/processors.py +253 -0
- caas/ingestion/structure_parser.py +188 -0
- caas/models.py +356 -0
- caas/pragmatic_truth.py +444 -0
- caas/routing/__init__.py +10 -0
- caas/routing/heuristic_router.py +58 -0
- caas/storage/__init__.py +9 -0
- caas/storage/store.py +389 -0
- caas/triad.py +213 -0
- caas/tuning/__init__.py +9 -0
- caas/tuning/tuner.py +329 -0
- caas/vfs/__init__.py +14 -0
- caas/vfs/filesystem.py +452 -0
- cmvk/__init__.py +218 -0
- cmvk/audit.py +402 -0
- cmvk/benchmarks.py +478 -0
- cmvk/constitutional.py +904 -0
- cmvk/hf_utils.py +301 -0
- cmvk/metrics.py +473 -0
- cmvk/profiles.py +300 -0
- cmvk/py.typed +0 -0
- cmvk/types.py +12 -0
- cmvk/verification.py +956 -0
- emk/__init__.py +89 -0
- emk/causal.py +352 -0
- emk/hf_utils.py +421 -0
- emk/indexer.py +83 -0
- emk/py.typed +0 -0
- emk/schema.py +204 -0
- emk/sleep_cycle.py +347 -0
- emk/store.py +281 -0
- iatp/__init__.py +166 -0
- iatp/attestation.py +461 -0
- iatp/cli.py +317 -0
- iatp/hf_utils.py +472 -0
- iatp/ipc_pipes.py +580 -0
- iatp/main.py +412 -0
- iatp/models/__init__.py +447 -0
- iatp/policy_engine.py +337 -0
- iatp/py.typed +2 -0
- iatp/recovery.py +321 -0
- iatp/security/__init__.py +270 -0
- iatp/sidecar/__init__.py +519 -0
- iatp/telemetry/__init__.py +164 -0
- iatp/tests/__init__.py +1 -0
- iatp/tests/test_attestation.py +370 -0
- iatp/tests/test_cli.py +131 -0
- iatp/tests/test_ed25519_attestation.py +211 -0
- iatp/tests/test_models.py +130 -0
- iatp/tests/test_policy_engine.py +347 -0
- iatp/tests/test_recovery.py +281 -0
- iatp/tests/test_security.py +222 -0
- iatp/tests/test_sidecar.py +167 -0
- iatp/tests/test_telemetry.py +175 -0
- mcp_kernel_server/__init__.py +28 -0
- mcp_kernel_server/cli.py +274 -0
- mcp_kernel_server/resources.py +217 -0
- mcp_kernel_server/server.py +564 -0
- mcp_kernel_server/tools.py +1174 -0
- mute_agent/__init__.py +68 -0
- mute_agent/core/__init__.py +1 -0
- mute_agent/core/execution_agent.py +166 -0
- mute_agent/core/handshake_protocol.py +201 -0
- mute_agent/core/reasoning_agent.py +238 -0
- mute_agent/knowledge_graph/__init__.py +1 -0
- mute_agent/knowledge_graph/graph_elements.py +65 -0
- mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
- mute_agent/knowledge_graph/subgraph.py +224 -0
- mute_agent/listener/__init__.py +43 -0
- mute_agent/listener/adapters/__init__.py +31 -0
- mute_agent/listener/adapters/base_adapter.py +189 -0
- mute_agent/listener/adapters/caas_adapter.py +344 -0
- mute_agent/listener/adapters/control_plane_adapter.py +436 -0
- mute_agent/listener/adapters/iatp_adapter.py +332 -0
- mute_agent/listener/adapters/scak_adapter.py +251 -0
- mute_agent/listener/listener.py +610 -0
- mute_agent/listener/state_observer.py +436 -0
- mute_agent/listener/threshold_config.py +313 -0
- mute_agent/super_system/__init__.py +1 -0
- mute_agent/super_system/router.py +204 -0
- mute_agent/visualization/__init__.py +10 -0
- mute_agent/visualization/graph_debugger.py +502 -0
- nexus/README.md +60 -0
- nexus/__init__.py +51 -0
- nexus/arbiter.py +359 -0
- nexus/client.py +466 -0
- nexus/dmz.py +444 -0
- nexus/escrow.py +430 -0
- nexus/exceptions.py +286 -0
- nexus/pyproject.toml +36 -0
- nexus/registry.py +393 -0
- nexus/reputation.py +425 -0
- nexus/schemas/__init__.py +51 -0
- nexus/schemas/compliance.py +276 -0
- nexus/schemas/escrow.py +251 -0
- nexus/schemas/manifest.py +225 -0
- nexus/schemas/receipt.py +208 -0
- nexus/tests/__init__.py +0 -0
- nexus/tests/conftest.py +146 -0
- nexus/tests/test_arbiter.py +192 -0
- nexus/tests/test_dmz.py +194 -0
- nexus/tests/test_escrow.py +276 -0
- nexus/tests/test_exceptions.py +225 -0
- nexus/tests/test_registry.py +232 -0
- nexus/tests/test_reputation.py +328 -0
- nexus/tests/test_schemas.py +295 -0
caas/caching.py
ADDED
|
@@ -0,0 +1,834 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
"""
|
|
4
|
+
Context Caching for LLM APIs.
|
|
5
|
+
|
|
6
|
+
This module provides intelligent caching for context sent to LLM APIs,
|
|
7
|
+
leveraging provider-specific caching features (Anthropic's prompt caching,
|
|
8
|
+
OpenAI's predicted outputs) and local caching strategies.
|
|
9
|
+
|
|
10
|
+
Key Features:
|
|
11
|
+
- Provider-agnostic caching interface
|
|
12
|
+
- Anthropic prompt caching support (cache_control breakpoints)
|
|
13
|
+
- OpenAI predicted outputs / prefix caching detection
|
|
14
|
+
- Local semantic cache for repeated queries
|
|
15
|
+
- Cache statistics and cost tracking
|
|
16
|
+
- TTL-based cache expiration
|
|
17
|
+
|
|
18
|
+
Cost Savings:
|
|
19
|
+
- Anthropic: Up to 90% reduction on cached prompt tokens
|
|
20
|
+
- OpenAI: 50% reduction on cached prefix tokens
|
|
21
|
+
- Local cache: 100% reduction for exact/semantic matches
|
|
22
|
+
|
|
23
|
+
Example:
|
|
24
|
+
from caas.caching import (
|
|
25
|
+
ContextCache,
|
|
26
|
+
AnthropicCacheStrategy,
|
|
27
|
+
OpenAICacheStrategy,
|
|
28
|
+
CacheConfig,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Create cache with Anthropic strategy
|
|
32
|
+
cache = ContextCache(
|
|
33
|
+
strategy=AnthropicCacheStrategy(),
|
|
34
|
+
config=CacheConfig(ttl_seconds=3600)
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Prepare messages with cache breakpoints
|
|
38
|
+
messages = cache.prepare_messages(
|
|
39
|
+
system_prompt="You are a helpful assistant...",
|
|
40
|
+
context="Large document context here...",
|
|
41
|
+
user_message="Summarize the key points"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# Track cache hits
|
|
45
|
+
stats = cache.get_stats()
|
|
46
|
+
print(f"Cache hit rate: {stats['hit_rate']:.1%}")
|
|
47
|
+
print(f"Estimated savings: ${stats['estimated_savings']:.2f}")
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
from __future__ import annotations
|
|
51
|
+
|
|
52
|
+
import hashlib
|
|
53
|
+
import json
|
|
54
|
+
import time
|
|
55
|
+
from abc import ABC, abstractmethod
|
|
56
|
+
from dataclasses import dataclass, field
|
|
57
|
+
from datetime import datetime, timezone
|
|
58
|
+
from enum import Enum
|
|
59
|
+
from typing import Any, Dict, List, Optional, Protocol, Tuple, Union
|
|
60
|
+
from collections import OrderedDict
|
|
61
|
+
import threading
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class CacheProvider(str, Enum):
|
|
65
|
+
"""Supported LLM providers for caching."""
|
|
66
|
+
ANTHROPIC = "anthropic"
|
|
67
|
+
OPENAI = "openai"
|
|
68
|
+
LOCAL = "local" # Local-only caching
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class CacheType(str, Enum):
|
|
72
|
+
"""Types of cache hits."""
|
|
73
|
+
PROVIDER_CACHE = "provider_cache" # Provider-side caching (Anthropic/OpenAI)
|
|
74
|
+
LOCAL_EXACT = "local_exact" # Local exact match
|
|
75
|
+
LOCAL_SEMANTIC = "local_semantic" # Local semantic similarity match
|
|
76
|
+
MISS = "miss" # No cache hit
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class CacheConfig:
|
|
81
|
+
"""Configuration for context caching.
|
|
82
|
+
|
|
83
|
+
Attributes:
|
|
84
|
+
ttl_seconds: Time-to-live for cached entries (default: 1 hour)
|
|
85
|
+
max_entries: Maximum number of entries in local cache
|
|
86
|
+
min_tokens_for_caching: Minimum tokens to consider caching
|
|
87
|
+
semantic_threshold: Similarity threshold for semantic cache (0-1)
|
|
88
|
+
enable_provider_cache: Whether to use provider-specific caching
|
|
89
|
+
enable_local_cache: Whether to use local caching
|
|
90
|
+
track_costs: Whether to track cost savings
|
|
91
|
+
"""
|
|
92
|
+
ttl_seconds: int = 3600
|
|
93
|
+
max_entries: int = 1000
|
|
94
|
+
min_tokens_for_caching: int = 1024 # Only cache contexts >= 1024 tokens
|
|
95
|
+
semantic_threshold: float = 0.95
|
|
96
|
+
enable_provider_cache: bool = True
|
|
97
|
+
enable_local_cache: bool = True
|
|
98
|
+
track_costs: bool = True
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class CacheEntry:
|
|
103
|
+
"""A single cache entry.
|
|
104
|
+
|
|
105
|
+
Attributes:
|
|
106
|
+
key: Cache key (hash of content)
|
|
107
|
+
content: The cached content
|
|
108
|
+
metadata: Additional metadata
|
|
109
|
+
created_at: When the entry was created
|
|
110
|
+
last_accessed: When the entry was last accessed
|
|
111
|
+
access_count: Number of times this entry was accessed
|
|
112
|
+
token_count: Estimated token count
|
|
113
|
+
"""
|
|
114
|
+
key: str
|
|
115
|
+
content: str
|
|
116
|
+
metadata: Dict[str, Any]
|
|
117
|
+
created_at: datetime
|
|
118
|
+
last_accessed: datetime
|
|
119
|
+
access_count: int = 1
|
|
120
|
+
token_count: int = 0
|
|
121
|
+
|
|
122
|
+
def is_expired(self, ttl_seconds: int) -> bool:
|
|
123
|
+
"""Check if this entry has expired."""
|
|
124
|
+
age = (datetime.now(timezone.utc) - self.created_at).total_seconds()
|
|
125
|
+
return age > ttl_seconds
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@dataclass
|
|
129
|
+
class CacheResult:
|
|
130
|
+
"""Result of a cache lookup.
|
|
131
|
+
|
|
132
|
+
Attributes:
|
|
133
|
+
cache_type: Type of cache hit (or miss)
|
|
134
|
+
cached_content: The cached content (if hit)
|
|
135
|
+
cache_key: Key used for caching
|
|
136
|
+
token_savings: Estimated token savings
|
|
137
|
+
cost_savings: Estimated cost savings in USD
|
|
138
|
+
"""
|
|
139
|
+
cache_type: CacheType
|
|
140
|
+
cached_content: Optional[str] = None
|
|
141
|
+
cache_key: Optional[str] = None
|
|
142
|
+
token_savings: int = 0
|
|
143
|
+
cost_savings: float = 0.0
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@dataclass
|
|
147
|
+
class CacheStats:
|
|
148
|
+
"""Statistics for cache performance.
|
|
149
|
+
|
|
150
|
+
Attributes:
|
|
151
|
+
total_requests: Total number of cache requests
|
|
152
|
+
provider_hits: Hits from provider cache
|
|
153
|
+
local_exact_hits: Hits from local exact match
|
|
154
|
+
local_semantic_hits: Hits from local semantic match
|
|
155
|
+
misses: Cache misses
|
|
156
|
+
total_tokens_saved: Total tokens saved
|
|
157
|
+
total_cost_saved: Total cost saved in USD
|
|
158
|
+
current_entries: Current number of cache entries
|
|
159
|
+
"""
|
|
160
|
+
total_requests: int = 0
|
|
161
|
+
provider_hits: int = 0
|
|
162
|
+
local_exact_hits: int = 0
|
|
163
|
+
local_semantic_hits: int = 0
|
|
164
|
+
misses: int = 0
|
|
165
|
+
total_tokens_saved: int = 0
|
|
166
|
+
total_cost_saved: float = 0.0
|
|
167
|
+
current_entries: int = 0
|
|
168
|
+
|
|
169
|
+
@property
|
|
170
|
+
def hit_rate(self) -> float:
|
|
171
|
+
"""Calculate overall cache hit rate."""
|
|
172
|
+
if self.total_requests == 0:
|
|
173
|
+
return 0.0
|
|
174
|
+
hits = self.provider_hits + self.local_exact_hits + self.local_semantic_hits
|
|
175
|
+
return hits / self.total_requests
|
|
176
|
+
|
|
177
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
178
|
+
"""Convert to dictionary."""
|
|
179
|
+
return {
|
|
180
|
+
"total_requests": self.total_requests,
|
|
181
|
+
"provider_hits": self.provider_hits,
|
|
182
|
+
"local_exact_hits": self.local_exact_hits,
|
|
183
|
+
"local_semantic_hits": self.local_semantic_hits,
|
|
184
|
+
"misses": self.misses,
|
|
185
|
+
"hit_rate": self.hit_rate,
|
|
186
|
+
"total_tokens_saved": self.total_tokens_saved,
|
|
187
|
+
"total_cost_saved": self.total_cost_saved,
|
|
188
|
+
"current_entries": self.current_entries,
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class CacheStrategy(ABC):
|
|
193
|
+
"""Abstract base class for provider-specific caching strategies."""
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
@abstractmethod
|
|
197
|
+
def provider(self) -> CacheProvider:
|
|
198
|
+
"""Return the provider this strategy is for."""
|
|
199
|
+
pass
|
|
200
|
+
|
|
201
|
+
@abstractmethod
|
|
202
|
+
def prepare_messages(
|
|
203
|
+
self,
|
|
204
|
+
system_prompt: Optional[str],
|
|
205
|
+
context: str,
|
|
206
|
+
messages: List[Dict[str, Any]],
|
|
207
|
+
) -> List[Dict[str, Any]]:
|
|
208
|
+
"""
|
|
209
|
+
Prepare messages with caching hints.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
system_prompt: Optional system prompt
|
|
213
|
+
context: The context to cache
|
|
214
|
+
messages: The conversation messages
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
Messages formatted for the provider with cache hints
|
|
218
|
+
"""
|
|
219
|
+
pass
|
|
220
|
+
|
|
221
|
+
@abstractmethod
|
|
222
|
+
def estimate_savings(self, token_count: int, is_cached: bool) -> Tuple[int, float]:
|
|
223
|
+
"""
|
|
224
|
+
Estimate token and cost savings from caching.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
token_count: Number of tokens in the cached content
|
|
228
|
+
is_cached: Whether the content was cached
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
Tuple of (tokens_saved, cost_saved_usd)
|
|
232
|
+
"""
|
|
233
|
+
pass
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class AnthropicCacheStrategy(CacheStrategy):
|
|
237
|
+
"""
|
|
238
|
+
Caching strategy for Anthropic API.
|
|
239
|
+
|
|
240
|
+
Implements Anthropic's prompt caching feature which provides:
|
|
241
|
+
- 90% cost reduction on cached input tokens
|
|
242
|
+
- 5-minute TTL for cached prompts
|
|
243
|
+
- Requires min 1024 tokens for caching (2048 for Claude 3.5 Haiku)
|
|
244
|
+
|
|
245
|
+
See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
# Anthropic pricing (as of 2024)
|
|
249
|
+
# Claude 3.5 Sonnet: $3/M input, $0.30/M cached input
|
|
250
|
+
INPUT_COST_PER_MILLION = 3.00
|
|
251
|
+
CACHED_COST_PER_MILLION = 0.30
|
|
252
|
+
CACHE_WRITE_COST_PER_MILLION = 3.75 # 25% premium for writing to cache
|
|
253
|
+
|
|
254
|
+
MIN_TOKENS_FOR_CACHE = 1024 # Minimum tokens for caching
|
|
255
|
+
|
|
256
|
+
@property
|
|
257
|
+
def provider(self) -> CacheProvider:
|
|
258
|
+
return CacheProvider.ANTHROPIC
|
|
259
|
+
|
|
260
|
+
def prepare_messages(
|
|
261
|
+
self,
|
|
262
|
+
system_prompt: Optional[str],
|
|
263
|
+
context: str,
|
|
264
|
+
messages: List[Dict[str, Any]],
|
|
265
|
+
) -> List[Dict[str, Any]]:
|
|
266
|
+
"""
|
|
267
|
+
Prepare messages with Anthropic cache_control breakpoints.
|
|
268
|
+
|
|
269
|
+
Adds cache_control: {"type": "ephemeral"} to cacheable content blocks.
|
|
270
|
+
"""
|
|
271
|
+
prepared = []
|
|
272
|
+
|
|
273
|
+
# System prompt with caching (if provided and long enough)
|
|
274
|
+
if system_prompt:
|
|
275
|
+
system_tokens = self._estimate_tokens(system_prompt)
|
|
276
|
+
system_content: List[Dict[str, Any]] = [{"type": "text", "text": system_prompt}]
|
|
277
|
+
|
|
278
|
+
if system_tokens >= self.MIN_TOKENS_FOR_CACHE:
|
|
279
|
+
system_content[0]["cache_control"] = {"type": "ephemeral"}
|
|
280
|
+
|
|
281
|
+
# Add context as second block with caching
|
|
282
|
+
if context:
|
|
283
|
+
context_block: Dict[str, Any] = {"type": "text", "text": context}
|
|
284
|
+
context_tokens = self._estimate_tokens(context)
|
|
285
|
+
if context_tokens >= self.MIN_TOKENS_FOR_CACHE:
|
|
286
|
+
context_block["cache_control"] = {"type": "ephemeral"}
|
|
287
|
+
system_content.append(context_block)
|
|
288
|
+
|
|
289
|
+
prepared.append({
|
|
290
|
+
"role": "system",
|
|
291
|
+
"content": system_content,
|
|
292
|
+
})
|
|
293
|
+
elif context:
|
|
294
|
+
# Context only (no system prompt)
|
|
295
|
+
context_content: List[Dict[str, Any]] = [{"type": "text", "text": context}]
|
|
296
|
+
context_tokens = self._estimate_tokens(context)
|
|
297
|
+
if context_tokens >= self.MIN_TOKENS_FOR_CACHE:
|
|
298
|
+
context_content[0]["cache_control"] = {"type": "ephemeral"}
|
|
299
|
+
|
|
300
|
+
prepared.append({
|
|
301
|
+
"role": "system",
|
|
302
|
+
"content": context_content,
|
|
303
|
+
})
|
|
304
|
+
|
|
305
|
+
# Add conversation messages
|
|
306
|
+
prepared.extend(messages)
|
|
307
|
+
|
|
308
|
+
return prepared
|
|
309
|
+
|
|
310
|
+
def estimate_savings(self, token_count: int, is_cached: bool) -> Tuple[int, float]:
|
|
311
|
+
"""
|
|
312
|
+
Estimate savings from Anthropic caching.
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
Tuple of (tokens_saved, cost_saved_usd)
|
|
316
|
+
"""
|
|
317
|
+
if not is_cached or token_count < self.MIN_TOKENS_FOR_CACHE:
|
|
318
|
+
return (0, 0.0)
|
|
319
|
+
|
|
320
|
+
# Cost without caching
|
|
321
|
+
normal_cost = (token_count / 1_000_000) * self.INPUT_COST_PER_MILLION
|
|
322
|
+
|
|
323
|
+
# Cost with caching (90% reduction)
|
|
324
|
+
cached_cost = (token_count / 1_000_000) * self.CACHED_COST_PER_MILLION
|
|
325
|
+
|
|
326
|
+
# Savings
|
|
327
|
+
cost_saved = normal_cost - cached_cost
|
|
328
|
+
|
|
329
|
+
# Token savings (conceptually, same tokens but cheaper)
|
|
330
|
+
tokens_saved = int(token_count * 0.9) # 90% "saved" in cost terms
|
|
331
|
+
|
|
332
|
+
return (tokens_saved, cost_saved)
|
|
333
|
+
|
|
334
|
+
def _estimate_tokens(self, text: str) -> int:
|
|
335
|
+
"""Rough token estimation (4 chars per token)."""
|
|
336
|
+
return len(text) // 4
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
class OpenAICacheStrategy(CacheStrategy):
|
|
340
|
+
"""
|
|
341
|
+
Caching strategy for OpenAI API.
|
|
342
|
+
|
|
343
|
+
Implements OpenAI's automatic prompt caching which provides:
|
|
344
|
+
- 50% cost reduction on cached input tokens
|
|
345
|
+
- Automatic caching for prompts > 1024 tokens
|
|
346
|
+
- Cache expires after 5-60 minutes of inactivity
|
|
347
|
+
|
|
348
|
+
See: https://platform.openai.com/docs/guides/prompt-caching
|
|
349
|
+
"""
|
|
350
|
+
|
|
351
|
+
# OpenAI pricing (GPT-4o as of 2024)
|
|
352
|
+
INPUT_COST_PER_MILLION = 2.50
|
|
353
|
+
CACHED_COST_PER_MILLION = 1.25 # 50% discount
|
|
354
|
+
|
|
355
|
+
MIN_TOKENS_FOR_CACHE = 1024
|
|
356
|
+
|
|
357
|
+
@property
|
|
358
|
+
def provider(self) -> CacheProvider:
|
|
359
|
+
return CacheProvider.OPENAI
|
|
360
|
+
|
|
361
|
+
def prepare_messages(
|
|
362
|
+
self,
|
|
363
|
+
system_prompt: Optional[str],
|
|
364
|
+
context: str,
|
|
365
|
+
messages: List[Dict[str, Any]],
|
|
366
|
+
) -> List[Dict[str, Any]]:
|
|
367
|
+
"""
|
|
368
|
+
Prepare messages for OpenAI (no special formatting needed).
|
|
369
|
+
|
|
370
|
+
OpenAI caching is automatic - we just need to structure messages
|
|
371
|
+
with static content first for optimal cache hits.
|
|
372
|
+
"""
|
|
373
|
+
prepared = []
|
|
374
|
+
|
|
375
|
+
# Combine system prompt and context (static content first)
|
|
376
|
+
if system_prompt or context:
|
|
377
|
+
system_text = ""
|
|
378
|
+
if system_prompt:
|
|
379
|
+
system_text += system_prompt
|
|
380
|
+
if context:
|
|
381
|
+
if system_text:
|
|
382
|
+
system_text += "\n\n---\n\n"
|
|
383
|
+
system_text += context
|
|
384
|
+
|
|
385
|
+
prepared.append({
|
|
386
|
+
"role": "system",
|
|
387
|
+
"content": system_text,
|
|
388
|
+
})
|
|
389
|
+
|
|
390
|
+
# Add conversation messages
|
|
391
|
+
prepared.extend(messages)
|
|
392
|
+
|
|
393
|
+
return prepared
|
|
394
|
+
|
|
395
|
+
def estimate_savings(self, token_count: int, is_cached: bool) -> Tuple[int, float]:
|
|
396
|
+
"""
|
|
397
|
+
Estimate savings from OpenAI caching.
|
|
398
|
+
|
|
399
|
+
Returns:
|
|
400
|
+
Tuple of (tokens_saved, cost_saved_usd)
|
|
401
|
+
"""
|
|
402
|
+
if not is_cached or token_count < self.MIN_TOKENS_FOR_CACHE:
|
|
403
|
+
return (0, 0.0)
|
|
404
|
+
|
|
405
|
+
# Cost without caching
|
|
406
|
+
normal_cost = (token_count / 1_000_000) * self.INPUT_COST_PER_MILLION
|
|
407
|
+
|
|
408
|
+
# Cost with caching (50% reduction)
|
|
409
|
+
cached_cost = (token_count / 1_000_000) * self.CACHED_COST_PER_MILLION
|
|
410
|
+
|
|
411
|
+
# Savings
|
|
412
|
+
cost_saved = normal_cost - cached_cost
|
|
413
|
+
|
|
414
|
+
# Token savings (50% in cost terms)
|
|
415
|
+
tokens_saved = int(token_count * 0.5)
|
|
416
|
+
|
|
417
|
+
return (tokens_saved, cost_saved)
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
class LocalCacheStrategy(CacheStrategy):
|
|
421
|
+
"""
|
|
422
|
+
Local-only caching strategy.
|
|
423
|
+
|
|
424
|
+
Uses a local LRU cache to store and retrieve responses.
|
|
425
|
+
Provides 100% savings on exact matches.
|
|
426
|
+
"""
|
|
427
|
+
|
|
428
|
+
@property
|
|
429
|
+
def provider(self) -> CacheProvider:
|
|
430
|
+
return CacheProvider.LOCAL
|
|
431
|
+
|
|
432
|
+
def prepare_messages(
|
|
433
|
+
self,
|
|
434
|
+
system_prompt: Optional[str],
|
|
435
|
+
context: str,
|
|
436
|
+
messages: List[Dict[str, Any]],
|
|
437
|
+
) -> List[Dict[str, Any]]:
|
|
438
|
+
"""No special preparation needed for local caching."""
|
|
439
|
+
prepared = []
|
|
440
|
+
|
|
441
|
+
if system_prompt or context:
|
|
442
|
+
system_text = ""
|
|
443
|
+
if system_prompt:
|
|
444
|
+
system_text += system_prompt
|
|
445
|
+
if context:
|
|
446
|
+
if system_text:
|
|
447
|
+
system_text += "\n\n"
|
|
448
|
+
system_text += context
|
|
449
|
+
|
|
450
|
+
prepared.append({
|
|
451
|
+
"role": "system",
|
|
452
|
+
"content": system_text,
|
|
453
|
+
})
|
|
454
|
+
|
|
455
|
+
prepared.extend(messages)
|
|
456
|
+
return prepared
|
|
457
|
+
|
|
458
|
+
def estimate_savings(self, token_count: int, is_cached: bool) -> Tuple[int, float]:
|
|
459
|
+
"""100% savings on local cache hits."""
|
|
460
|
+
if not is_cached:
|
|
461
|
+
return (0, 0.0)
|
|
462
|
+
|
|
463
|
+
# Assume average cost of $2/M tokens
|
|
464
|
+
cost_saved = (token_count / 1_000_000) * 2.00
|
|
465
|
+
return (token_count, cost_saved)
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
class LRUCache:
|
|
469
|
+
"""Thread-safe LRU cache implementation."""
|
|
470
|
+
|
|
471
|
+
def __init__(self, max_size: int = 1000):
|
|
472
|
+
self._cache: OrderedDict[str, CacheEntry] = OrderedDict()
|
|
473
|
+
self._max_size = max_size
|
|
474
|
+
self._lock = threading.RLock()
|
|
475
|
+
|
|
476
|
+
def get(self, key: str, ttl_seconds: int) -> Optional[CacheEntry]:
|
|
477
|
+
"""Get an entry from cache."""
|
|
478
|
+
with self._lock:
|
|
479
|
+
if key not in self._cache:
|
|
480
|
+
return None
|
|
481
|
+
|
|
482
|
+
entry = self._cache[key]
|
|
483
|
+
|
|
484
|
+
# Check expiration
|
|
485
|
+
if entry.is_expired(ttl_seconds):
|
|
486
|
+
del self._cache[key]
|
|
487
|
+
return None
|
|
488
|
+
|
|
489
|
+
# Move to end (most recently used)
|
|
490
|
+
self._cache.move_to_end(key)
|
|
491
|
+
|
|
492
|
+
# Update access stats
|
|
493
|
+
entry.access_count += 1
|
|
494
|
+
entry.last_accessed = datetime.now(timezone.utc)
|
|
495
|
+
|
|
496
|
+
return entry
|
|
497
|
+
|
|
498
|
+
def put(self, entry: CacheEntry) -> None:
|
|
499
|
+
"""Add or update an entry in cache."""
|
|
500
|
+
with self._lock:
|
|
501
|
+
# Remove oldest entries if at capacity
|
|
502
|
+
while len(self._cache) >= self._max_size:
|
|
503
|
+
self._cache.popitem(last=False)
|
|
504
|
+
|
|
505
|
+
self._cache[entry.key] = entry
|
|
506
|
+
self._cache.move_to_end(entry.key)
|
|
507
|
+
|
|
508
|
+
def remove(self, key: str) -> bool:
|
|
509
|
+
"""Remove an entry from cache."""
|
|
510
|
+
with self._lock:
|
|
511
|
+
if key in self._cache:
|
|
512
|
+
del self._cache[key]
|
|
513
|
+
return True
|
|
514
|
+
return False
|
|
515
|
+
|
|
516
|
+
def clear(self) -> None:
|
|
517
|
+
"""Clear all entries."""
|
|
518
|
+
with self._lock:
|
|
519
|
+
self._cache.clear()
|
|
520
|
+
|
|
521
|
+
def __len__(self) -> int:
|
|
522
|
+
with self._lock:
|
|
523
|
+
return len(self._cache)
|
|
524
|
+
|
|
525
|
+
def cleanup_expired(self, ttl_seconds: int) -> int:
|
|
526
|
+
"""Remove expired entries. Returns count of removed entries."""
|
|
527
|
+
with self._lock:
|
|
528
|
+
expired_keys = [
|
|
529
|
+
key for key, entry in self._cache.items()
|
|
530
|
+
if entry.is_expired(ttl_seconds)
|
|
531
|
+
]
|
|
532
|
+
for key in expired_keys:
|
|
533
|
+
del self._cache[key]
|
|
534
|
+
return len(expired_keys)
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
class ContextCache:
|
|
538
|
+
"""
|
|
539
|
+
Main context caching class.
|
|
540
|
+
|
|
541
|
+
Provides unified caching interface for LLM APIs with support for:
|
|
542
|
+
- Provider-specific caching (Anthropic, OpenAI)
|
|
543
|
+
- Local LRU cache for exact matches
|
|
544
|
+
- Statistics and cost tracking
|
|
545
|
+
|
|
546
|
+
Example:
|
|
547
|
+
cache = ContextCache(
|
|
548
|
+
strategy=AnthropicCacheStrategy(),
|
|
549
|
+
config=CacheConfig(ttl_seconds=3600)
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
# Check for cached response
|
|
553
|
+
result = cache.lookup(context_hash)
|
|
554
|
+
|
|
555
|
+
if result.cache_type == CacheType.MISS:
|
|
556
|
+
# Make API call
|
|
557
|
+
response = api.complete(...)
|
|
558
|
+
cache.store(context_hash, response)
|
|
559
|
+
"""
|
|
560
|
+
|
|
561
|
+
def __init__(
|
|
562
|
+
self,
|
|
563
|
+
strategy: Optional[CacheStrategy] = None,
|
|
564
|
+
config: Optional[CacheConfig] = None,
|
|
565
|
+
):
|
|
566
|
+
"""
|
|
567
|
+
Initialize the context cache.
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
strategy: Provider-specific caching strategy
|
|
571
|
+
config: Cache configuration
|
|
572
|
+
"""
|
|
573
|
+
self._strategy = strategy or LocalCacheStrategy()
|
|
574
|
+
self._config = config or CacheConfig()
|
|
575
|
+
self._local_cache = LRUCache(max_size=self._config.max_entries)
|
|
576
|
+
self._stats = CacheStats()
|
|
577
|
+
self._lock = threading.RLock()
|
|
578
|
+
|
|
579
|
+
@property
|
|
580
|
+
def strategy(self) -> CacheStrategy:
|
|
581
|
+
"""Get the current caching strategy."""
|
|
582
|
+
return self._strategy
|
|
583
|
+
|
|
584
|
+
@property
|
|
585
|
+
def config(self) -> CacheConfig:
|
|
586
|
+
"""Get the cache configuration."""
|
|
587
|
+
return self._config
|
|
588
|
+
|
|
589
|
+
def compute_key(self, content: str, metadata: Optional[Dict[str, Any]] = None) -> str:
|
|
590
|
+
"""
|
|
591
|
+
Compute a cache key for content.
|
|
592
|
+
|
|
593
|
+
Args:
|
|
594
|
+
content: The content to hash
|
|
595
|
+
metadata: Optional metadata to include in key
|
|
596
|
+
|
|
597
|
+
Returns:
|
|
598
|
+
SHA-256 hash of the content
|
|
599
|
+
"""
|
|
600
|
+
key_data = content
|
|
601
|
+
if metadata:
|
|
602
|
+
key_data += json.dumps(metadata, sort_keys=True)
|
|
603
|
+
|
|
604
|
+
return hashlib.sha256(key_data.encode()).hexdigest()[:32]
|
|
605
|
+
|
|
606
|
+
def lookup(
|
|
607
|
+
self,
|
|
608
|
+
context: str,
|
|
609
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
610
|
+
) -> CacheResult:
|
|
611
|
+
"""
|
|
612
|
+
Look up content in cache.
|
|
613
|
+
|
|
614
|
+
Args:
|
|
615
|
+
context: The context to look up
|
|
616
|
+
metadata: Optional metadata for key computation
|
|
617
|
+
|
|
618
|
+
Returns:
|
|
619
|
+
CacheResult with cache type and any cached content
|
|
620
|
+
"""
|
|
621
|
+
with self._lock:
|
|
622
|
+
self._stats.total_requests += 1
|
|
623
|
+
|
|
624
|
+
cache_key = self.compute_key(context, metadata)
|
|
625
|
+
token_count = len(context) // 4 # Rough estimate
|
|
626
|
+
|
|
627
|
+
# Skip caching for small contexts
|
|
628
|
+
if token_count < self._config.min_tokens_for_caching:
|
|
629
|
+
with self._lock:
|
|
630
|
+
self._stats.misses += 1
|
|
631
|
+
return CacheResult(
|
|
632
|
+
cache_type=CacheType.MISS,
|
|
633
|
+
cache_key=cache_key,
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
# Check local cache first
|
|
637
|
+
if self._config.enable_local_cache:
|
|
638
|
+
entry = self._local_cache.get(cache_key, self._config.ttl_seconds)
|
|
639
|
+
if entry:
|
|
640
|
+
tokens_saved, cost_saved = self._strategy.estimate_savings(
|
|
641
|
+
entry.token_count, True
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
with self._lock:
|
|
645
|
+
self._stats.local_exact_hits += 1
|
|
646
|
+
self._stats.total_tokens_saved += tokens_saved
|
|
647
|
+
self._stats.total_cost_saved += cost_saved
|
|
648
|
+
|
|
649
|
+
return CacheResult(
|
|
650
|
+
cache_type=CacheType.LOCAL_EXACT,
|
|
651
|
+
cached_content=entry.content,
|
|
652
|
+
cache_key=cache_key,
|
|
653
|
+
token_savings=tokens_saved,
|
|
654
|
+
cost_savings=cost_saved,
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
# No local hit - check if provider caching is enabled
|
|
658
|
+
if self._config.enable_provider_cache:
|
|
659
|
+
# Provider caching is handled at message preparation time
|
|
660
|
+
# We return a result indicating provider cache should be used
|
|
661
|
+
with self._lock:
|
|
662
|
+
self._stats.provider_hits += 1
|
|
663
|
+
|
|
664
|
+
return CacheResult(
|
|
665
|
+
cache_type=CacheType.PROVIDER_CACHE,
|
|
666
|
+
cache_key=cache_key,
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
# Cache miss
|
|
670
|
+
with self._lock:
|
|
671
|
+
self._stats.misses += 1
|
|
672
|
+
|
|
673
|
+
return CacheResult(
|
|
674
|
+
cache_type=CacheType.MISS,
|
|
675
|
+
cache_key=cache_key,
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
def store(
|
|
679
|
+
self,
|
|
680
|
+
context: str,
|
|
681
|
+
response: Optional[str] = None,
|
|
682
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
683
|
+
) -> str:
|
|
684
|
+
"""
|
|
685
|
+
Store content in cache.
|
|
686
|
+
|
|
687
|
+
Args:
|
|
688
|
+
context: The context to cache
|
|
689
|
+
response: Optional response to cache with context
|
|
690
|
+
metadata: Optional metadata
|
|
691
|
+
|
|
692
|
+
Returns:
|
|
693
|
+
Cache key
|
|
694
|
+
"""
|
|
695
|
+
cache_key = self.compute_key(context, metadata)
|
|
696
|
+
token_count = len(context) // 4
|
|
697
|
+
|
|
698
|
+
# Don't cache small contexts
|
|
699
|
+
if token_count < self._config.min_tokens_for_caching:
|
|
700
|
+
return cache_key
|
|
701
|
+
|
|
702
|
+
if self._config.enable_local_cache:
|
|
703
|
+
now = datetime.now(timezone.utc)
|
|
704
|
+
entry = CacheEntry(
|
|
705
|
+
key=cache_key,
|
|
706
|
+
content=response or context,
|
|
707
|
+
metadata=metadata or {},
|
|
708
|
+
created_at=now,
|
|
709
|
+
last_accessed=now,
|
|
710
|
+
token_count=token_count,
|
|
711
|
+
)
|
|
712
|
+
self._local_cache.put(entry)
|
|
713
|
+
|
|
714
|
+
with self._lock:
|
|
715
|
+
self._stats.current_entries = len(self._local_cache)
|
|
716
|
+
|
|
717
|
+
return cache_key
|
|
718
|
+
|
|
719
|
+
def prepare_messages(
|
|
720
|
+
self,
|
|
721
|
+
system_prompt: Optional[str] = None,
|
|
722
|
+
context: str = "",
|
|
723
|
+
messages: Optional[List[Dict[str, Any]]] = None,
|
|
724
|
+
) -> List[Dict[str, Any]]:
|
|
725
|
+
"""
|
|
726
|
+
Prepare messages with caching hints.
|
|
727
|
+
|
|
728
|
+
Uses the configured strategy to format messages for optimal caching.
|
|
729
|
+
|
|
730
|
+
Args:
|
|
731
|
+
system_prompt: Optional system prompt
|
|
732
|
+
context: Context to include (will be cached if large enough)
|
|
733
|
+
messages: Conversation messages
|
|
734
|
+
|
|
735
|
+
Returns:
|
|
736
|
+
List of messages formatted for the provider
|
|
737
|
+
"""
|
|
738
|
+
return self._strategy.prepare_messages(
|
|
739
|
+
system_prompt=system_prompt,
|
|
740
|
+
context=context,
|
|
741
|
+
messages=messages or [],
|
|
742
|
+
)
|
|
743
|
+
|
|
744
|
+
def invalidate(self, cache_key: str) -> bool:
|
|
745
|
+
"""
|
|
746
|
+
Invalidate a cache entry.
|
|
747
|
+
|
|
748
|
+
Args:
|
|
749
|
+
cache_key: Key of the entry to invalidate
|
|
750
|
+
|
|
751
|
+
Returns:
|
|
752
|
+
True if entry was removed, False if not found
|
|
753
|
+
"""
|
|
754
|
+
removed = self._local_cache.remove(cache_key)
|
|
755
|
+
if removed:
|
|
756
|
+
with self._lock:
|
|
757
|
+
self._stats.current_entries = len(self._local_cache)
|
|
758
|
+
return removed
|
|
759
|
+
|
|
760
|
+
def clear(self) -> None:
|
|
761
|
+
"""Clear all cache entries."""
|
|
762
|
+
self._local_cache.clear()
|
|
763
|
+
with self._lock:
|
|
764
|
+
self._stats.current_entries = 0
|
|
765
|
+
|
|
766
|
+
def cleanup(self) -> int:
|
|
767
|
+
"""
|
|
768
|
+
Remove expired entries.
|
|
769
|
+
|
|
770
|
+
Returns:
|
|
771
|
+
Number of entries removed
|
|
772
|
+
"""
|
|
773
|
+
removed = self._local_cache.cleanup_expired(self._config.ttl_seconds)
|
|
774
|
+
with self._lock:
|
|
775
|
+
self._stats.current_entries = len(self._local_cache)
|
|
776
|
+
return removed
|
|
777
|
+
|
|
778
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
779
|
+
"""
|
|
780
|
+
Get cache statistics.
|
|
781
|
+
|
|
782
|
+
Returns:
|
|
783
|
+
Dictionary with cache statistics
|
|
784
|
+
"""
|
|
785
|
+
with self._lock:
|
|
786
|
+
self._stats.current_entries = len(self._local_cache)
|
|
787
|
+
return self._stats.to_dict()
|
|
788
|
+
|
|
789
|
+
def reset_stats(self) -> None:
|
|
790
|
+
"""Reset cache statistics."""
|
|
791
|
+
with self._lock:
|
|
792
|
+
current_entries = len(self._local_cache)
|
|
793
|
+
self._stats = CacheStats(current_entries=current_entries)
|
|
794
|
+
|
|
795
|
+
|
|
796
|
+
# Convenience function for creating caches
|
|
797
|
+
def create_cache(
|
|
798
|
+
provider: Union[str, CacheProvider] = CacheProvider.LOCAL,
|
|
799
|
+
**kwargs,
|
|
800
|
+
) -> ContextCache:
|
|
801
|
+
"""
|
|
802
|
+
Create a context cache for a specific provider.
|
|
803
|
+
|
|
804
|
+
Args:
|
|
805
|
+
provider: Provider name or CacheProvider enum
|
|
806
|
+
**kwargs: Additional config options passed to CacheConfig
|
|
807
|
+
|
|
808
|
+
Returns:
|
|
809
|
+
Configured ContextCache instance
|
|
810
|
+
|
|
811
|
+
Example:
|
|
812
|
+
# Anthropic cache
|
|
813
|
+
cache = create_cache("anthropic", ttl_seconds=3600)
|
|
814
|
+
|
|
815
|
+
# OpenAI cache
|
|
816
|
+
cache = create_cache("openai", max_entries=500)
|
|
817
|
+
|
|
818
|
+
# Local-only cache
|
|
819
|
+
cache = create_cache("local")
|
|
820
|
+
"""
|
|
821
|
+
if isinstance(provider, str):
|
|
822
|
+
provider = CacheProvider(provider.lower())
|
|
823
|
+
|
|
824
|
+
strategy: CacheStrategy
|
|
825
|
+
if provider == CacheProvider.ANTHROPIC:
|
|
826
|
+
strategy = AnthropicCacheStrategy()
|
|
827
|
+
elif provider == CacheProvider.OPENAI:
|
|
828
|
+
strategy = OpenAICacheStrategy()
|
|
829
|
+
else:
|
|
830
|
+
strategy = LocalCacheStrategy()
|
|
831
|
+
|
|
832
|
+
config = CacheConfig(**kwargs)
|
|
833
|
+
|
|
834
|
+
return ContextCache(strategy=strategy, config=config)
|