agent_os_kernel 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. agent_control_plane/__init__.py +662 -0
  2. agent_control_plane/a2a_adapter.py +543 -0
  3. agent_control_plane/adapter.py +417 -0
  4. agent_control_plane/agent_hibernation.py +394 -0
  5. agent_control_plane/agent_kernel.py +470 -0
  6. agent_control_plane/compliance.py +720 -0
  7. agent_control_plane/constraint_graphs.py +478 -0
  8. agent_control_plane/control_plane.py +854 -0
  9. agent_control_plane/example_executors.py +195 -0
  10. agent_control_plane/execution_engine.py +231 -0
  11. agent_control_plane/flight_recorder.py +846 -0
  12. agent_control_plane/governance_layer.py +435 -0
  13. agent_control_plane/hf_utils.py +563 -0
  14. agent_control_plane/interfaces/__init__.py +55 -0
  15. agent_control_plane/interfaces/kernel_interface.py +361 -0
  16. agent_control_plane/interfaces/plugin_interface.py +497 -0
  17. agent_control_plane/interfaces/protocol_interfaces.py +387 -0
  18. agent_control_plane/kernel_space.py +1009 -0
  19. agent_control_plane/langchain_adapter.py +424 -0
  20. agent_control_plane/lifecycle.py +3113 -0
  21. agent_control_plane/mcp_adapter.py +653 -0
  22. agent_control_plane/ml_safety.py +563 -0
  23. agent_control_plane/multimodal.py +727 -0
  24. agent_control_plane/mute_agent.py +422 -0
  25. agent_control_plane/observability.py +787 -0
  26. agent_control_plane/orchestrator.py +482 -0
  27. agent_control_plane/plugin_registry.py +750 -0
  28. agent_control_plane/policy_engine.py +954 -0
  29. agent_control_plane/process_isolation.py +777 -0
  30. agent_control_plane/shadow_mode.py +310 -0
  31. agent_control_plane/signals.py +493 -0
  32. agent_control_plane/supervisor_agents.py +430 -0
  33. agent_control_plane/time_travel_debugger.py +557 -0
  34. agent_control_plane/tool_registry.py +452 -0
  35. agent_control_plane/vfs.py +697 -0
  36. agent_kernel/__init__.py +69 -0
  37. agent_kernel/analyzer.py +435 -0
  38. agent_kernel/auditor.py +36 -0
  39. agent_kernel/completeness_auditor.py +237 -0
  40. agent_kernel/detector.py +203 -0
  41. agent_kernel/kernel.py +744 -0
  42. agent_kernel/memory_manager.py +85 -0
  43. agent_kernel/models.py +374 -0
  44. agent_kernel/nudge_mechanism.py +263 -0
  45. agent_kernel/outcome_analyzer.py +338 -0
  46. agent_kernel/patcher.py +582 -0
  47. agent_kernel/semantic_analyzer.py +316 -0
  48. agent_kernel/semantic_purge.py +349 -0
  49. agent_kernel/simulator.py +449 -0
  50. agent_kernel/teacher.py +85 -0
  51. agent_kernel/triage.py +152 -0
  52. agent_os/__init__.py +409 -0
  53. agent_os/_adversarial_impl.py +200 -0
  54. agent_os/_circuit_breaker_impl.py +232 -0
  55. agent_os/_mcp_metrics.py +193 -0
  56. agent_os/adversarial.py +20 -0
  57. agent_os/agents_compat.py +490 -0
  58. agent_os/audit_logger.py +135 -0
  59. agent_os/base_agent.py +651 -0
  60. agent_os/circuit_breaker.py +34 -0
  61. agent_os/cli/__init__.py +659 -0
  62. agent_os/cli/cmd_audit.py +128 -0
  63. agent_os/cli/cmd_init.py +152 -0
  64. agent_os/cli/cmd_policy.py +41 -0
  65. agent_os/cli/cmd_policy_gen.py +180 -0
  66. agent_os/cli/cmd_validate.py +258 -0
  67. agent_os/cli/mcp_scan.py +265 -0
  68. agent_os/cli/output.py +192 -0
  69. agent_os/cli/policy_checker.py +330 -0
  70. agent_os/compat.py +74 -0
  71. agent_os/constraint_graph.py +234 -0
  72. agent_os/content_governance.py +140 -0
  73. agent_os/context_budget.py +305 -0
  74. agent_os/credential_redactor.py +224 -0
  75. agent_os/diff_policy.py +89 -0
  76. agent_os/egress_policy.py +159 -0
  77. agent_os/escalation.py +276 -0
  78. agent_os/event_bus.py +124 -0
  79. agent_os/exceptions.py +180 -0
  80. agent_os/execution_context_policy.py +141 -0
  81. agent_os/github_enterprise.py +96 -0
  82. agent_os/health.py +20 -0
  83. agent_os/integrations/__init__.py +279 -0
  84. agent_os/integrations/a2a_adapter.py +279 -0
  85. agent_os/integrations/agent_lightning/__init__.py +30 -0
  86. agent_os/integrations/anthropic_adapter.py +420 -0
  87. agent_os/integrations/autogen_adapter.py +620 -0
  88. agent_os/integrations/base.py +1137 -0
  89. agent_os/integrations/compat.py +229 -0
  90. agent_os/integrations/config.py +98 -0
  91. agent_os/integrations/conversation_guardian.py +957 -0
  92. agent_os/integrations/crewai_adapter.py +467 -0
  93. agent_os/integrations/drift_detector.py +425 -0
  94. agent_os/integrations/dry_run.py +124 -0
  95. agent_os/integrations/escalation.py +582 -0
  96. agent_os/integrations/gemini_adapter.py +364 -0
  97. agent_os/integrations/google_adk_adapter.py +633 -0
  98. agent_os/integrations/guardrails_adapter.py +394 -0
  99. agent_os/integrations/health.py +197 -0
  100. agent_os/integrations/langchain_adapter.py +654 -0
  101. agent_os/integrations/llamafirewall.py +343 -0
  102. agent_os/integrations/llamaindex_adapter.py +188 -0
  103. agent_os/integrations/logging.py +191 -0
  104. agent_os/integrations/maf_adapter.py +631 -0
  105. agent_os/integrations/mistral_adapter.py +365 -0
  106. agent_os/integrations/openai_adapter.py +816 -0
  107. agent_os/integrations/openai_agents_sdk.py +406 -0
  108. agent_os/integrations/policy_compose.py +171 -0
  109. agent_os/integrations/profiling.py +144 -0
  110. agent_os/integrations/pydantic_ai_adapter.py +420 -0
  111. agent_os/integrations/rate_limiter.py +130 -0
  112. agent_os/integrations/rbac.py +143 -0
  113. agent_os/integrations/registry.py +113 -0
  114. agent_os/integrations/scope_guard.py +303 -0
  115. agent_os/integrations/semantic_kernel_adapter.py +769 -0
  116. agent_os/integrations/smolagents_adapter.py +629 -0
  117. agent_os/integrations/templates.py +178 -0
  118. agent_os/integrations/token_budget.py +134 -0
  119. agent_os/integrations/tool_aliases.py +190 -0
  120. agent_os/integrations/webhooks.py +177 -0
  121. agent_os/lite.py +208 -0
  122. agent_os/mcp_gateway.py +385 -0
  123. agent_os/mcp_message_signer.py +273 -0
  124. agent_os/mcp_protocols.py +161 -0
  125. agent_os/mcp_response_scanner.py +232 -0
  126. agent_os/mcp_security.py +924 -0
  127. agent_os/mcp_session_auth.py +231 -0
  128. agent_os/mcp_sliding_rate_limiter.py +184 -0
  129. agent_os/memory_guard.py +409 -0
  130. agent_os/metrics.py +134 -0
  131. agent_os/mute.py +428 -0
  132. agent_os/mute_agent.py +209 -0
  133. agent_os/policies/__init__.py +77 -0
  134. agent_os/policies/async_evaluator.py +275 -0
  135. agent_os/policies/backends.py +670 -0
  136. agent_os/policies/bridge.py +169 -0
  137. agent_os/policies/budget.py +85 -0
  138. agent_os/policies/cli.py +294 -0
  139. agent_os/policies/conflict_resolution.py +270 -0
  140. agent_os/policies/data_classification.py +252 -0
  141. agent_os/policies/evaluator.py +239 -0
  142. agent_os/policies/policy_schema.json +228 -0
  143. agent_os/policies/rate_limiting.py +145 -0
  144. agent_os/policies/schema.py +115 -0
  145. agent_os/policies/shared.py +331 -0
  146. agent_os/prompt_injection.py +694 -0
  147. agent_os/providers.py +182 -0
  148. agent_os/py.typed +0 -0
  149. agent_os/retry.py +81 -0
  150. agent_os/reversibility.py +251 -0
  151. agent_os/sandbox.py +432 -0
  152. agent_os/sandbox_provider.py +140 -0
  153. agent_os/secure_codegen.py +525 -0
  154. agent_os/security_skills.py +538 -0
  155. agent_os/semantic_policy.py +422 -0
  156. agent_os/server/__init__.py +15 -0
  157. agent_os/server/__main__.py +25 -0
  158. agent_os/server/app.py +277 -0
  159. agent_os/server/models.py +104 -0
  160. agent_os/shift_left_metrics.py +130 -0
  161. agent_os/stateless.py +742 -0
  162. agent_os/supervisor.py +148 -0
  163. agent_os/task_outcome.py +148 -0
  164. agent_os/transparency.py +181 -0
  165. agent_os/trust_root.py +128 -0
  166. agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
  167. agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
  168. agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
  169. agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
  170. agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
  171. agent_os_observability/__init__.py +27 -0
  172. agent_os_observability/dashboards.py +898 -0
  173. agent_os_observability/metrics.py +398 -0
  174. agent_os_observability/server.py +223 -0
  175. agent_os_observability/tracer.py +232 -0
  176. agent_primitives/__init__.py +24 -0
  177. agent_primitives/failures.py +84 -0
  178. agent_primitives/py.typed +0 -0
  179. amb_core/__init__.py +177 -0
  180. amb_core/adapters/__init__.py +57 -0
  181. amb_core/adapters/aws_sqs_broker.py +376 -0
  182. amb_core/adapters/azure_servicebus_broker.py +340 -0
  183. amb_core/adapters/kafka_broker.py +260 -0
  184. amb_core/adapters/nats_broker.py +285 -0
  185. amb_core/adapters/rabbitmq_broker.py +235 -0
  186. amb_core/adapters/redis_broker.py +262 -0
  187. amb_core/broker.py +145 -0
  188. amb_core/bus.py +481 -0
  189. amb_core/cloudevents.py +509 -0
  190. amb_core/dlq.py +345 -0
  191. amb_core/hf_utils.py +536 -0
  192. amb_core/memory_broker.py +410 -0
  193. amb_core/models.py +141 -0
  194. amb_core/persistence.py +529 -0
  195. amb_core/schema.py +294 -0
  196. amb_core/tracing.py +358 -0
  197. atr/__init__.py +640 -0
  198. atr/access.py +348 -0
  199. atr/composition.py +645 -0
  200. atr/decorator.py +357 -0
  201. atr/executor.py +384 -0
  202. atr/health.py +557 -0
  203. atr/hf_utils.py +449 -0
  204. atr/injection.py +422 -0
  205. atr/metrics.py +440 -0
  206. atr/policies.py +403 -0
  207. atr/py.typed +2 -0
  208. atr/registry.py +452 -0
  209. atr/schema.py +480 -0
  210. atr/tools/safe/__init__.py +75 -0
  211. atr/tools/safe/calculator.py +467 -0
  212. atr/tools/safe/datetime_tool.py +443 -0
  213. atr/tools/safe/file_reader.py +402 -0
  214. atr/tools/safe/http_client.py +316 -0
  215. atr/tools/safe/json_parser.py +374 -0
  216. atr/tools/safe/text_tool.py +537 -0
  217. atr/tools/safe/toolkit.py +175 -0
  218. caas/__init__.py +162 -0
  219. caas/api/__init__.py +7 -0
  220. caas/api/server.py +1328 -0
  221. caas/caching.py +834 -0
  222. caas/cli.py +210 -0
  223. caas/conversation.py +223 -0
  224. caas/decay.py +72 -0
  225. caas/detection/__init__.py +9 -0
  226. caas/detection/detector.py +238 -0
  227. caas/enrichment.py +130 -0
  228. caas/gateway/__init__.py +27 -0
  229. caas/gateway/trust_gateway.py +474 -0
  230. caas/hf_utils.py +479 -0
  231. caas/ingestion/__init__.py +23 -0
  232. caas/ingestion/processors.py +253 -0
  233. caas/ingestion/structure_parser.py +188 -0
  234. caas/models.py +356 -0
  235. caas/pragmatic_truth.py +444 -0
  236. caas/routing/__init__.py +10 -0
  237. caas/routing/heuristic_router.py +58 -0
  238. caas/storage/__init__.py +9 -0
  239. caas/storage/store.py +389 -0
  240. caas/triad.py +213 -0
  241. caas/tuning/__init__.py +9 -0
  242. caas/tuning/tuner.py +329 -0
  243. caas/vfs/__init__.py +14 -0
  244. caas/vfs/filesystem.py +452 -0
  245. cmvk/__init__.py +218 -0
  246. cmvk/audit.py +402 -0
  247. cmvk/benchmarks.py +478 -0
  248. cmvk/constitutional.py +904 -0
  249. cmvk/hf_utils.py +301 -0
  250. cmvk/metrics.py +473 -0
  251. cmvk/profiles.py +300 -0
  252. cmvk/py.typed +0 -0
  253. cmvk/types.py +12 -0
  254. cmvk/verification.py +956 -0
  255. emk/__init__.py +89 -0
  256. emk/causal.py +352 -0
  257. emk/hf_utils.py +421 -0
  258. emk/indexer.py +83 -0
  259. emk/py.typed +0 -0
  260. emk/schema.py +204 -0
  261. emk/sleep_cycle.py +347 -0
  262. emk/store.py +281 -0
  263. iatp/__init__.py +166 -0
  264. iatp/attestation.py +461 -0
  265. iatp/cli.py +317 -0
  266. iatp/hf_utils.py +472 -0
  267. iatp/ipc_pipes.py +580 -0
  268. iatp/main.py +412 -0
  269. iatp/models/__init__.py +447 -0
  270. iatp/policy_engine.py +337 -0
  271. iatp/py.typed +2 -0
  272. iatp/recovery.py +321 -0
  273. iatp/security/__init__.py +270 -0
  274. iatp/sidecar/__init__.py +519 -0
  275. iatp/telemetry/__init__.py +164 -0
  276. iatp/tests/__init__.py +1 -0
  277. iatp/tests/test_attestation.py +370 -0
  278. iatp/tests/test_cli.py +131 -0
  279. iatp/tests/test_ed25519_attestation.py +211 -0
  280. iatp/tests/test_models.py +130 -0
  281. iatp/tests/test_policy_engine.py +347 -0
  282. iatp/tests/test_recovery.py +281 -0
  283. iatp/tests/test_security.py +222 -0
  284. iatp/tests/test_sidecar.py +167 -0
  285. iatp/tests/test_telemetry.py +175 -0
  286. mcp_kernel_server/__init__.py +28 -0
  287. mcp_kernel_server/cli.py +274 -0
  288. mcp_kernel_server/resources.py +217 -0
  289. mcp_kernel_server/server.py +564 -0
  290. mcp_kernel_server/tools.py +1174 -0
  291. mute_agent/__init__.py +68 -0
  292. mute_agent/core/__init__.py +1 -0
  293. mute_agent/core/execution_agent.py +166 -0
  294. mute_agent/core/handshake_protocol.py +201 -0
  295. mute_agent/core/reasoning_agent.py +238 -0
  296. mute_agent/knowledge_graph/__init__.py +1 -0
  297. mute_agent/knowledge_graph/graph_elements.py +65 -0
  298. mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
  299. mute_agent/knowledge_graph/subgraph.py +224 -0
  300. mute_agent/listener/__init__.py +43 -0
  301. mute_agent/listener/adapters/__init__.py +31 -0
  302. mute_agent/listener/adapters/base_adapter.py +189 -0
  303. mute_agent/listener/adapters/caas_adapter.py +344 -0
  304. mute_agent/listener/adapters/control_plane_adapter.py +436 -0
  305. mute_agent/listener/adapters/iatp_adapter.py +332 -0
  306. mute_agent/listener/adapters/scak_adapter.py +251 -0
  307. mute_agent/listener/listener.py +610 -0
  308. mute_agent/listener/state_observer.py +436 -0
  309. mute_agent/listener/threshold_config.py +313 -0
  310. mute_agent/super_system/__init__.py +1 -0
  311. mute_agent/super_system/router.py +204 -0
  312. mute_agent/visualization/__init__.py +10 -0
  313. mute_agent/visualization/graph_debugger.py +502 -0
  314. nexus/README.md +60 -0
  315. nexus/__init__.py +51 -0
  316. nexus/arbiter.py +359 -0
  317. nexus/client.py +466 -0
  318. nexus/dmz.py +444 -0
  319. nexus/escrow.py +430 -0
  320. nexus/exceptions.py +286 -0
  321. nexus/pyproject.toml +36 -0
  322. nexus/registry.py +393 -0
  323. nexus/reputation.py +425 -0
  324. nexus/schemas/__init__.py +51 -0
  325. nexus/schemas/compliance.py +276 -0
  326. nexus/schemas/escrow.py +251 -0
  327. nexus/schemas/manifest.py +225 -0
  328. nexus/schemas/receipt.py +208 -0
  329. nexus/tests/__init__.py +0 -0
  330. nexus/tests/conftest.py +146 -0
  331. nexus/tests/test_arbiter.py +192 -0
  332. nexus/tests/test_dmz.py +194 -0
  333. nexus/tests/test_escrow.py +276 -0
  334. nexus/tests/test_exceptions.py +225 -0
  335. nexus/tests/test_registry.py +232 -0
  336. nexus/tests/test_reputation.py +328 -0
  337. nexus/tests/test_schemas.py +295 -0
caas/caching.py ADDED
@@ -0,0 +1,834 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+ """
4
+ Context Caching for LLM APIs.
5
+
6
+ This module provides intelligent caching for context sent to LLM APIs,
7
+ leveraging provider-specific caching features (Anthropic's prompt caching,
8
+ OpenAI's predicted outputs) and local caching strategies.
9
+
10
+ Key Features:
11
+ - Provider-agnostic caching interface
12
+ - Anthropic prompt caching support (cache_control breakpoints)
13
+ - OpenAI predicted outputs / prefix caching detection
14
+ - Local semantic cache for repeated queries
15
+ - Cache statistics and cost tracking
16
+ - TTL-based cache expiration
17
+
18
+ Cost Savings:
19
+ - Anthropic: Up to 90% reduction on cached prompt tokens
20
+ - OpenAI: 50% reduction on cached prefix tokens
21
+ - Local cache: 100% reduction for exact/semantic matches
22
+
23
+ Example:
24
+ from caas.caching import (
25
+ ContextCache,
26
+ AnthropicCacheStrategy,
27
+ OpenAICacheStrategy,
28
+ CacheConfig,
29
+ )
30
+
31
+ # Create cache with Anthropic strategy
32
+ cache = ContextCache(
33
+ strategy=AnthropicCacheStrategy(),
34
+ config=CacheConfig(ttl_seconds=3600)
35
+ )
36
+
37
+ # Prepare messages with cache breakpoints
38
+ messages = cache.prepare_messages(
39
+ system_prompt="You are a helpful assistant...",
40
+ context="Large document context here...",
41
+ user_message="Summarize the key points"
42
+ )
43
+
44
+ # Track cache hits
45
+ stats = cache.get_stats()
46
+ print(f"Cache hit rate: {stats['hit_rate']:.1%}")
47
+ print(f"Estimated savings: ${stats['estimated_savings']:.2f}")
48
+ """
49
+
50
+ from __future__ import annotations
51
+
52
+ import hashlib
53
+ import json
54
+ import time
55
+ from abc import ABC, abstractmethod
56
+ from dataclasses import dataclass, field
57
+ from datetime import datetime, timezone
58
+ from enum import Enum
59
+ from typing import Any, Dict, List, Optional, Protocol, Tuple, Union
60
+ from collections import OrderedDict
61
+ import threading
62
+
63
+
64
+ class CacheProvider(str, Enum):
65
+ """Supported LLM providers for caching."""
66
+ ANTHROPIC = "anthropic"
67
+ OPENAI = "openai"
68
+ LOCAL = "local" # Local-only caching
69
+
70
+
71
+ class CacheType(str, Enum):
72
+ """Types of cache hits."""
73
+ PROVIDER_CACHE = "provider_cache" # Provider-side caching (Anthropic/OpenAI)
74
+ LOCAL_EXACT = "local_exact" # Local exact match
75
+ LOCAL_SEMANTIC = "local_semantic" # Local semantic similarity match
76
+ MISS = "miss" # No cache hit
77
+
78
+
79
+ @dataclass
80
+ class CacheConfig:
81
+ """Configuration for context caching.
82
+
83
+ Attributes:
84
+ ttl_seconds: Time-to-live for cached entries (default: 1 hour)
85
+ max_entries: Maximum number of entries in local cache
86
+ min_tokens_for_caching: Minimum tokens to consider caching
87
+ semantic_threshold: Similarity threshold for semantic cache (0-1)
88
+ enable_provider_cache: Whether to use provider-specific caching
89
+ enable_local_cache: Whether to use local caching
90
+ track_costs: Whether to track cost savings
91
+ """
92
+ ttl_seconds: int = 3600
93
+ max_entries: int = 1000
94
+ min_tokens_for_caching: int = 1024 # Only cache contexts >= 1024 tokens
95
+ semantic_threshold: float = 0.95
96
+ enable_provider_cache: bool = True
97
+ enable_local_cache: bool = True
98
+ track_costs: bool = True
99
+
100
+
101
+ @dataclass
102
+ class CacheEntry:
103
+ """A single cache entry.
104
+
105
+ Attributes:
106
+ key: Cache key (hash of content)
107
+ content: The cached content
108
+ metadata: Additional metadata
109
+ created_at: When the entry was created
110
+ last_accessed: When the entry was last accessed
111
+ access_count: Number of times this entry was accessed
112
+ token_count: Estimated token count
113
+ """
114
+ key: str
115
+ content: str
116
+ metadata: Dict[str, Any]
117
+ created_at: datetime
118
+ last_accessed: datetime
119
+ access_count: int = 1
120
+ token_count: int = 0
121
+
122
+ def is_expired(self, ttl_seconds: int) -> bool:
123
+ """Check if this entry has expired."""
124
+ age = (datetime.now(timezone.utc) - self.created_at).total_seconds()
125
+ return age > ttl_seconds
126
+
127
+
128
+ @dataclass
129
+ class CacheResult:
130
+ """Result of a cache lookup.
131
+
132
+ Attributes:
133
+ cache_type: Type of cache hit (or miss)
134
+ cached_content: The cached content (if hit)
135
+ cache_key: Key used for caching
136
+ token_savings: Estimated token savings
137
+ cost_savings: Estimated cost savings in USD
138
+ """
139
+ cache_type: CacheType
140
+ cached_content: Optional[str] = None
141
+ cache_key: Optional[str] = None
142
+ token_savings: int = 0
143
+ cost_savings: float = 0.0
144
+
145
+
146
+ @dataclass
147
+ class CacheStats:
148
+ """Statistics for cache performance.
149
+
150
+ Attributes:
151
+ total_requests: Total number of cache requests
152
+ provider_hits: Hits from provider cache
153
+ local_exact_hits: Hits from local exact match
154
+ local_semantic_hits: Hits from local semantic match
155
+ misses: Cache misses
156
+ total_tokens_saved: Total tokens saved
157
+ total_cost_saved: Total cost saved in USD
158
+ current_entries: Current number of cache entries
159
+ """
160
+ total_requests: int = 0
161
+ provider_hits: int = 0
162
+ local_exact_hits: int = 0
163
+ local_semantic_hits: int = 0
164
+ misses: int = 0
165
+ total_tokens_saved: int = 0
166
+ total_cost_saved: float = 0.0
167
+ current_entries: int = 0
168
+
169
+ @property
170
+ def hit_rate(self) -> float:
171
+ """Calculate overall cache hit rate."""
172
+ if self.total_requests == 0:
173
+ return 0.0
174
+ hits = self.provider_hits + self.local_exact_hits + self.local_semantic_hits
175
+ return hits / self.total_requests
176
+
177
+ def to_dict(self) -> Dict[str, Any]:
178
+ """Convert to dictionary."""
179
+ return {
180
+ "total_requests": self.total_requests,
181
+ "provider_hits": self.provider_hits,
182
+ "local_exact_hits": self.local_exact_hits,
183
+ "local_semantic_hits": self.local_semantic_hits,
184
+ "misses": self.misses,
185
+ "hit_rate": self.hit_rate,
186
+ "total_tokens_saved": self.total_tokens_saved,
187
+ "total_cost_saved": self.total_cost_saved,
188
+ "current_entries": self.current_entries,
189
+ }
190
+
191
+
192
+ class CacheStrategy(ABC):
193
+ """Abstract base class for provider-specific caching strategies."""
194
+
195
+ @property
196
+ @abstractmethod
197
+ def provider(self) -> CacheProvider:
198
+ """Return the provider this strategy is for."""
199
+ pass
200
+
201
+ @abstractmethod
202
+ def prepare_messages(
203
+ self,
204
+ system_prompt: Optional[str],
205
+ context: str,
206
+ messages: List[Dict[str, Any]],
207
+ ) -> List[Dict[str, Any]]:
208
+ """
209
+ Prepare messages with caching hints.
210
+
211
+ Args:
212
+ system_prompt: Optional system prompt
213
+ context: The context to cache
214
+ messages: The conversation messages
215
+
216
+ Returns:
217
+ Messages formatted for the provider with cache hints
218
+ """
219
+ pass
220
+
221
+ @abstractmethod
222
+ def estimate_savings(self, token_count: int, is_cached: bool) -> Tuple[int, float]:
223
+ """
224
+ Estimate token and cost savings from caching.
225
+
226
+ Args:
227
+ token_count: Number of tokens in the cached content
228
+ is_cached: Whether the content was cached
229
+
230
+ Returns:
231
+ Tuple of (tokens_saved, cost_saved_usd)
232
+ """
233
+ pass
234
+
235
+
236
+ class AnthropicCacheStrategy(CacheStrategy):
237
+ """
238
+ Caching strategy for Anthropic API.
239
+
240
+ Implements Anthropic's prompt caching feature which provides:
241
+ - 90% cost reduction on cached input tokens
242
+ - 5-minute TTL for cached prompts
243
+ - Requires min 1024 tokens for caching (2048 for Claude 3.5 Haiku)
244
+
245
+ See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
246
+ """
247
+
248
+ # Anthropic pricing (as of 2024)
249
+ # Claude 3.5 Sonnet: $3/M input, $0.30/M cached input
250
+ INPUT_COST_PER_MILLION = 3.00
251
+ CACHED_COST_PER_MILLION = 0.30
252
+ CACHE_WRITE_COST_PER_MILLION = 3.75 # 25% premium for writing to cache
253
+
254
+ MIN_TOKENS_FOR_CACHE = 1024 # Minimum tokens for caching
255
+
256
+ @property
257
+ def provider(self) -> CacheProvider:
258
+ return CacheProvider.ANTHROPIC
259
+
260
+ def prepare_messages(
261
+ self,
262
+ system_prompt: Optional[str],
263
+ context: str,
264
+ messages: List[Dict[str, Any]],
265
+ ) -> List[Dict[str, Any]]:
266
+ """
267
+ Prepare messages with Anthropic cache_control breakpoints.
268
+
269
+ Adds cache_control: {"type": "ephemeral"} to cacheable content blocks.
270
+ """
271
+ prepared = []
272
+
273
+ # System prompt with caching (if provided and long enough)
274
+ if system_prompt:
275
+ system_tokens = self._estimate_tokens(system_prompt)
276
+ system_content: List[Dict[str, Any]] = [{"type": "text", "text": system_prompt}]
277
+
278
+ if system_tokens >= self.MIN_TOKENS_FOR_CACHE:
279
+ system_content[0]["cache_control"] = {"type": "ephemeral"}
280
+
281
+ # Add context as second block with caching
282
+ if context:
283
+ context_block: Dict[str, Any] = {"type": "text", "text": context}
284
+ context_tokens = self._estimate_tokens(context)
285
+ if context_tokens >= self.MIN_TOKENS_FOR_CACHE:
286
+ context_block["cache_control"] = {"type": "ephemeral"}
287
+ system_content.append(context_block)
288
+
289
+ prepared.append({
290
+ "role": "system",
291
+ "content": system_content,
292
+ })
293
+ elif context:
294
+ # Context only (no system prompt)
295
+ context_content: List[Dict[str, Any]] = [{"type": "text", "text": context}]
296
+ context_tokens = self._estimate_tokens(context)
297
+ if context_tokens >= self.MIN_TOKENS_FOR_CACHE:
298
+ context_content[0]["cache_control"] = {"type": "ephemeral"}
299
+
300
+ prepared.append({
301
+ "role": "system",
302
+ "content": context_content,
303
+ })
304
+
305
+ # Add conversation messages
306
+ prepared.extend(messages)
307
+
308
+ return prepared
309
+
310
+ def estimate_savings(self, token_count: int, is_cached: bool) -> Tuple[int, float]:
311
+ """
312
+ Estimate savings from Anthropic caching.
313
+
314
+ Returns:
315
+ Tuple of (tokens_saved, cost_saved_usd)
316
+ """
317
+ if not is_cached or token_count < self.MIN_TOKENS_FOR_CACHE:
318
+ return (0, 0.0)
319
+
320
+ # Cost without caching
321
+ normal_cost = (token_count / 1_000_000) * self.INPUT_COST_PER_MILLION
322
+
323
+ # Cost with caching (90% reduction)
324
+ cached_cost = (token_count / 1_000_000) * self.CACHED_COST_PER_MILLION
325
+
326
+ # Savings
327
+ cost_saved = normal_cost - cached_cost
328
+
329
+ # Token savings (conceptually, same tokens but cheaper)
330
+ tokens_saved = int(token_count * 0.9) # 90% "saved" in cost terms
331
+
332
+ return (tokens_saved, cost_saved)
333
+
334
+ def _estimate_tokens(self, text: str) -> int:
335
+ """Rough token estimation (4 chars per token)."""
336
+ return len(text) // 4
337
+
338
+
339
+ class OpenAICacheStrategy(CacheStrategy):
340
+ """
341
+ Caching strategy for OpenAI API.
342
+
343
+ Implements OpenAI's automatic prompt caching which provides:
344
+ - 50% cost reduction on cached input tokens
345
+ - Automatic caching for prompts > 1024 tokens
346
+ - Cache expires after 5-60 minutes of inactivity
347
+
348
+ See: https://platform.openai.com/docs/guides/prompt-caching
349
+ """
350
+
351
+ # OpenAI pricing (GPT-4o as of 2024)
352
+ INPUT_COST_PER_MILLION = 2.50
353
+ CACHED_COST_PER_MILLION = 1.25 # 50% discount
354
+
355
+ MIN_TOKENS_FOR_CACHE = 1024
356
+
357
+ @property
358
+ def provider(self) -> CacheProvider:
359
+ return CacheProvider.OPENAI
360
+
361
+ def prepare_messages(
362
+ self,
363
+ system_prompt: Optional[str],
364
+ context: str,
365
+ messages: List[Dict[str, Any]],
366
+ ) -> List[Dict[str, Any]]:
367
+ """
368
+ Prepare messages for OpenAI (no special formatting needed).
369
+
370
+ OpenAI caching is automatic - we just need to structure messages
371
+ with static content first for optimal cache hits.
372
+ """
373
+ prepared = []
374
+
375
+ # Combine system prompt and context (static content first)
376
+ if system_prompt or context:
377
+ system_text = ""
378
+ if system_prompt:
379
+ system_text += system_prompt
380
+ if context:
381
+ if system_text:
382
+ system_text += "\n\n---\n\n"
383
+ system_text += context
384
+
385
+ prepared.append({
386
+ "role": "system",
387
+ "content": system_text,
388
+ })
389
+
390
+ # Add conversation messages
391
+ prepared.extend(messages)
392
+
393
+ return prepared
394
+
395
+ def estimate_savings(self, token_count: int, is_cached: bool) -> Tuple[int, float]:
396
+ """
397
+ Estimate savings from OpenAI caching.
398
+
399
+ Returns:
400
+ Tuple of (tokens_saved, cost_saved_usd)
401
+ """
402
+ if not is_cached or token_count < self.MIN_TOKENS_FOR_CACHE:
403
+ return (0, 0.0)
404
+
405
+ # Cost without caching
406
+ normal_cost = (token_count / 1_000_000) * self.INPUT_COST_PER_MILLION
407
+
408
+ # Cost with caching (50% reduction)
409
+ cached_cost = (token_count / 1_000_000) * self.CACHED_COST_PER_MILLION
410
+
411
+ # Savings
412
+ cost_saved = normal_cost - cached_cost
413
+
414
+ # Token savings (50% in cost terms)
415
+ tokens_saved = int(token_count * 0.5)
416
+
417
+ return (tokens_saved, cost_saved)
418
+
419
+
420
+ class LocalCacheStrategy(CacheStrategy):
421
+ """
422
+ Local-only caching strategy.
423
+
424
+ Uses a local LRU cache to store and retrieve responses.
425
+ Provides 100% savings on exact matches.
426
+ """
427
+
428
+ @property
429
+ def provider(self) -> CacheProvider:
430
+ return CacheProvider.LOCAL
431
+
432
+ def prepare_messages(
433
+ self,
434
+ system_prompt: Optional[str],
435
+ context: str,
436
+ messages: List[Dict[str, Any]],
437
+ ) -> List[Dict[str, Any]]:
438
+ """No special preparation needed for local caching."""
439
+ prepared = []
440
+
441
+ if system_prompt or context:
442
+ system_text = ""
443
+ if system_prompt:
444
+ system_text += system_prompt
445
+ if context:
446
+ if system_text:
447
+ system_text += "\n\n"
448
+ system_text += context
449
+
450
+ prepared.append({
451
+ "role": "system",
452
+ "content": system_text,
453
+ })
454
+
455
+ prepared.extend(messages)
456
+ return prepared
457
+
458
+ def estimate_savings(self, token_count: int, is_cached: bool) -> Tuple[int, float]:
459
+ """100% savings on local cache hits."""
460
+ if not is_cached:
461
+ return (0, 0.0)
462
+
463
+ # Assume average cost of $2/M tokens
464
+ cost_saved = (token_count / 1_000_000) * 2.00
465
+ return (token_count, cost_saved)
466
+
467
+
468
+ class LRUCache:
469
+ """Thread-safe LRU cache implementation."""
470
+
471
+ def __init__(self, max_size: int = 1000):
472
+ self._cache: OrderedDict[str, CacheEntry] = OrderedDict()
473
+ self._max_size = max_size
474
+ self._lock = threading.RLock()
475
+
476
+ def get(self, key: str, ttl_seconds: int) -> Optional[CacheEntry]:
477
+ """Get an entry from cache."""
478
+ with self._lock:
479
+ if key not in self._cache:
480
+ return None
481
+
482
+ entry = self._cache[key]
483
+
484
+ # Check expiration
485
+ if entry.is_expired(ttl_seconds):
486
+ del self._cache[key]
487
+ return None
488
+
489
+ # Move to end (most recently used)
490
+ self._cache.move_to_end(key)
491
+
492
+ # Update access stats
493
+ entry.access_count += 1
494
+ entry.last_accessed = datetime.now(timezone.utc)
495
+
496
+ return entry
497
+
498
+ def put(self, entry: CacheEntry) -> None:
499
+ """Add or update an entry in cache."""
500
+ with self._lock:
501
+ # Remove oldest entries if at capacity
502
+ while len(self._cache) >= self._max_size:
503
+ self._cache.popitem(last=False)
504
+
505
+ self._cache[entry.key] = entry
506
+ self._cache.move_to_end(entry.key)
507
+
508
+ def remove(self, key: str) -> bool:
509
+ """Remove an entry from cache."""
510
+ with self._lock:
511
+ if key in self._cache:
512
+ del self._cache[key]
513
+ return True
514
+ return False
515
+
516
+ def clear(self) -> None:
517
+ """Clear all entries."""
518
+ with self._lock:
519
+ self._cache.clear()
520
+
521
+ def __len__(self) -> int:
522
+ with self._lock:
523
+ return len(self._cache)
524
+
525
+ def cleanup_expired(self, ttl_seconds: int) -> int:
526
+ """Remove expired entries. Returns count of removed entries."""
527
+ with self._lock:
528
+ expired_keys = [
529
+ key for key, entry in self._cache.items()
530
+ if entry.is_expired(ttl_seconds)
531
+ ]
532
+ for key in expired_keys:
533
+ del self._cache[key]
534
+ return len(expired_keys)
535
+
536
+
537
+ class ContextCache:
538
+ """
539
+ Main context caching class.
540
+
541
+ Provides unified caching interface for LLM APIs with support for:
542
+ - Provider-specific caching (Anthropic, OpenAI)
543
+ - Local LRU cache for exact matches
544
+ - Statistics and cost tracking
545
+
546
+ Example:
547
+ cache = ContextCache(
548
+ strategy=AnthropicCacheStrategy(),
549
+ config=CacheConfig(ttl_seconds=3600)
550
+ )
551
+
552
+ # Check for cached response
553
+ result = cache.lookup(context_hash)
554
+
555
+ if result.cache_type == CacheType.MISS:
556
+ # Make API call
557
+ response = api.complete(...)
558
+ cache.store(context_hash, response)
559
+ """
560
+
561
+ def __init__(
562
+ self,
563
+ strategy: Optional[CacheStrategy] = None,
564
+ config: Optional[CacheConfig] = None,
565
+ ):
566
+ """
567
+ Initialize the context cache.
568
+
569
+ Args:
570
+ strategy: Provider-specific caching strategy
571
+ config: Cache configuration
572
+ """
573
+ self._strategy = strategy or LocalCacheStrategy()
574
+ self._config = config or CacheConfig()
575
+ self._local_cache = LRUCache(max_size=self._config.max_entries)
576
+ self._stats = CacheStats()
577
+ self._lock = threading.RLock()
578
+
579
+ @property
580
+ def strategy(self) -> CacheStrategy:
581
+ """Get the current caching strategy."""
582
+ return self._strategy
583
+
584
+ @property
585
+ def config(self) -> CacheConfig:
586
+ """Get the cache configuration."""
587
+ return self._config
588
+
589
+ def compute_key(self, content: str, metadata: Optional[Dict[str, Any]] = None) -> str:
590
+ """
591
+ Compute a cache key for content.
592
+
593
+ Args:
594
+ content: The content to hash
595
+ metadata: Optional metadata to include in key
596
+
597
+ Returns:
598
+ SHA-256 hash of the content
599
+ """
600
+ key_data = content
601
+ if metadata:
602
+ key_data += json.dumps(metadata, sort_keys=True)
603
+
604
+ return hashlib.sha256(key_data.encode()).hexdigest()[:32]
605
+
606
+ def lookup(
607
+ self,
608
+ context: str,
609
+ metadata: Optional[Dict[str, Any]] = None,
610
+ ) -> CacheResult:
611
+ """
612
+ Look up content in cache.
613
+
614
+ Args:
615
+ context: The context to look up
616
+ metadata: Optional metadata for key computation
617
+
618
+ Returns:
619
+ CacheResult with cache type and any cached content
620
+ """
621
+ with self._lock:
622
+ self._stats.total_requests += 1
623
+
624
+ cache_key = self.compute_key(context, metadata)
625
+ token_count = len(context) // 4 # Rough estimate
626
+
627
+ # Skip caching for small contexts
628
+ if token_count < self._config.min_tokens_for_caching:
629
+ with self._lock:
630
+ self._stats.misses += 1
631
+ return CacheResult(
632
+ cache_type=CacheType.MISS,
633
+ cache_key=cache_key,
634
+ )
635
+
636
+ # Check local cache first
637
+ if self._config.enable_local_cache:
638
+ entry = self._local_cache.get(cache_key, self._config.ttl_seconds)
639
+ if entry:
640
+ tokens_saved, cost_saved = self._strategy.estimate_savings(
641
+ entry.token_count, True
642
+ )
643
+
644
+ with self._lock:
645
+ self._stats.local_exact_hits += 1
646
+ self._stats.total_tokens_saved += tokens_saved
647
+ self._stats.total_cost_saved += cost_saved
648
+
649
+ return CacheResult(
650
+ cache_type=CacheType.LOCAL_EXACT,
651
+ cached_content=entry.content,
652
+ cache_key=cache_key,
653
+ token_savings=tokens_saved,
654
+ cost_savings=cost_saved,
655
+ )
656
+
657
+ # No local hit - check if provider caching is enabled
658
+ if self._config.enable_provider_cache:
659
+ # Provider caching is handled at message preparation time
660
+ # We return a result indicating provider cache should be used
661
+ with self._lock:
662
+ self._stats.provider_hits += 1
663
+
664
+ return CacheResult(
665
+ cache_type=CacheType.PROVIDER_CACHE,
666
+ cache_key=cache_key,
667
+ )
668
+
669
+ # Cache miss
670
+ with self._lock:
671
+ self._stats.misses += 1
672
+
673
+ return CacheResult(
674
+ cache_type=CacheType.MISS,
675
+ cache_key=cache_key,
676
+ )
677
+
678
+ def store(
679
+ self,
680
+ context: str,
681
+ response: Optional[str] = None,
682
+ metadata: Optional[Dict[str, Any]] = None,
683
+ ) -> str:
684
+ """
685
+ Store content in cache.
686
+
687
+ Args:
688
+ context: The context to cache
689
+ response: Optional response to cache with context
690
+ metadata: Optional metadata
691
+
692
+ Returns:
693
+ Cache key
694
+ """
695
+ cache_key = self.compute_key(context, metadata)
696
+ token_count = len(context) // 4
697
+
698
+ # Don't cache small contexts
699
+ if token_count < self._config.min_tokens_for_caching:
700
+ return cache_key
701
+
702
+ if self._config.enable_local_cache:
703
+ now = datetime.now(timezone.utc)
704
+ entry = CacheEntry(
705
+ key=cache_key,
706
+ content=response or context,
707
+ metadata=metadata or {},
708
+ created_at=now,
709
+ last_accessed=now,
710
+ token_count=token_count,
711
+ )
712
+ self._local_cache.put(entry)
713
+
714
+ with self._lock:
715
+ self._stats.current_entries = len(self._local_cache)
716
+
717
+ return cache_key
718
+
719
+ def prepare_messages(
720
+ self,
721
+ system_prompt: Optional[str] = None,
722
+ context: str = "",
723
+ messages: Optional[List[Dict[str, Any]]] = None,
724
+ ) -> List[Dict[str, Any]]:
725
+ """
726
+ Prepare messages with caching hints.
727
+
728
+ Uses the configured strategy to format messages for optimal caching.
729
+
730
+ Args:
731
+ system_prompt: Optional system prompt
732
+ context: Context to include (will be cached if large enough)
733
+ messages: Conversation messages
734
+
735
+ Returns:
736
+ List of messages formatted for the provider
737
+ """
738
+ return self._strategy.prepare_messages(
739
+ system_prompt=system_prompt,
740
+ context=context,
741
+ messages=messages or [],
742
+ )
743
+
744
+ def invalidate(self, cache_key: str) -> bool:
745
+ """
746
+ Invalidate a cache entry.
747
+
748
+ Args:
749
+ cache_key: Key of the entry to invalidate
750
+
751
+ Returns:
752
+ True if entry was removed, False if not found
753
+ """
754
+ removed = self._local_cache.remove(cache_key)
755
+ if removed:
756
+ with self._lock:
757
+ self._stats.current_entries = len(self._local_cache)
758
+ return removed
759
+
760
+ def clear(self) -> None:
761
+ """Clear all cache entries."""
762
+ self._local_cache.clear()
763
+ with self._lock:
764
+ self._stats.current_entries = 0
765
+
766
+ def cleanup(self) -> int:
767
+ """
768
+ Remove expired entries.
769
+
770
+ Returns:
771
+ Number of entries removed
772
+ """
773
+ removed = self._local_cache.cleanup_expired(self._config.ttl_seconds)
774
+ with self._lock:
775
+ self._stats.current_entries = len(self._local_cache)
776
+ return removed
777
+
778
+ def get_stats(self) -> Dict[str, Any]:
779
+ """
780
+ Get cache statistics.
781
+
782
+ Returns:
783
+ Dictionary with cache statistics
784
+ """
785
+ with self._lock:
786
+ self._stats.current_entries = len(self._local_cache)
787
+ return self._stats.to_dict()
788
+
789
+ def reset_stats(self) -> None:
790
+ """Reset cache statistics."""
791
+ with self._lock:
792
+ current_entries = len(self._local_cache)
793
+ self._stats = CacheStats(current_entries=current_entries)
794
+
795
+
796
+ # Convenience function for creating caches
797
+ def create_cache(
798
+ provider: Union[str, CacheProvider] = CacheProvider.LOCAL,
799
+ **kwargs,
800
+ ) -> ContextCache:
801
+ """
802
+ Create a context cache for a specific provider.
803
+
804
+ Args:
805
+ provider: Provider name or CacheProvider enum
806
+ **kwargs: Additional config options passed to CacheConfig
807
+
808
+ Returns:
809
+ Configured ContextCache instance
810
+
811
+ Example:
812
+ # Anthropic cache
813
+ cache = create_cache("anthropic", ttl_seconds=3600)
814
+
815
+ # OpenAI cache
816
+ cache = create_cache("openai", max_entries=500)
817
+
818
+ # Local-only cache
819
+ cache = create_cache("local")
820
+ """
821
+ if isinstance(provider, str):
822
+ provider = CacheProvider(provider.lower())
823
+
824
+ strategy: CacheStrategy
825
+ if provider == CacheProvider.ANTHROPIC:
826
+ strategy = AnthropicCacheStrategy()
827
+ elif provider == CacheProvider.OPENAI:
828
+ strategy = OpenAICacheStrategy()
829
+ else:
830
+ strategy = LocalCacheStrategy()
831
+
832
+ config = CacheConfig(**kwargs)
833
+
834
+ return ContextCache(strategy=strategy, config=config)