flashlite 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flashlite/__init__.py +169 -0
- flashlite/cache/__init__.py +14 -0
- flashlite/cache/base.py +194 -0
- flashlite/cache/disk.py +285 -0
- flashlite/cache/memory.py +157 -0
- flashlite/client.py +671 -0
- flashlite/config.py +154 -0
- flashlite/conversation/__init__.py +30 -0
- flashlite/conversation/context.py +319 -0
- flashlite/conversation/manager.py +385 -0
- flashlite/conversation/multi_agent.py +378 -0
- flashlite/core/__init__.py +13 -0
- flashlite/core/completion.py +145 -0
- flashlite/core/messages.py +130 -0
- flashlite/middleware/__init__.py +18 -0
- flashlite/middleware/base.py +90 -0
- flashlite/middleware/cache.py +121 -0
- flashlite/middleware/logging.py +159 -0
- flashlite/middleware/rate_limit.py +211 -0
- flashlite/middleware/retry.py +149 -0
- flashlite/observability/__init__.py +34 -0
- flashlite/observability/callbacks.py +155 -0
- flashlite/observability/inspect_compat.py +266 -0
- flashlite/observability/logging.py +293 -0
- flashlite/observability/metrics.py +221 -0
- flashlite/py.typed +0 -0
- flashlite/structured/__init__.py +31 -0
- flashlite/structured/outputs.py +189 -0
- flashlite/structured/schema.py +165 -0
- flashlite/templating/__init__.py +11 -0
- flashlite/templating/engine.py +217 -0
- flashlite/templating/filters.py +143 -0
- flashlite/templating/registry.py +165 -0
- flashlite/tools/__init__.py +74 -0
- flashlite/tools/definitions.py +382 -0
- flashlite/tools/execution.py +353 -0
- flashlite/types.py +233 -0
- flashlite-0.1.0.dist-info/METADATA +173 -0
- flashlite-0.1.0.dist-info/RECORD +41 -0
- flashlite-0.1.0.dist-info/WHEEL +4 -0
- flashlite-0.1.0.dist-info/licenses/LICENSE.md +21 -0
flashlite/__init__.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Flashlite - Batteries-included wrapper for litellm.
|
|
3
|
+
|
|
4
|
+
Features:
|
|
5
|
+
- Rate limiting with token bucket algorithm
|
|
6
|
+
- Retries with exponential backoff
|
|
7
|
+
- Jinja templating for prompts
|
|
8
|
+
- Async-first with sync wrappers
|
|
9
|
+
- Full passthrough of provider kwargs
|
|
10
|
+
- Response caching (memory and disk backends)
|
|
11
|
+
- Cost tracking and budget limits
|
|
12
|
+
- Structured logging and Inspect framework integration
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from .cache import CacheBackend, DiskCache, MemoryCache, generate_cache_key
|
|
16
|
+
from .client import Flashlite
|
|
17
|
+
from .config import FlashliteConfig, load_env_files, validate_api_keys
|
|
18
|
+
from .conversation import (
|
|
19
|
+
Agent,
|
|
20
|
+
ChatMessage,
|
|
21
|
+
ContextLimits,
|
|
22
|
+
ContextManager,
|
|
23
|
+
Conversation,
|
|
24
|
+
ConversationState,
|
|
25
|
+
MultiAgentChat,
|
|
26
|
+
Turn,
|
|
27
|
+
estimate_messages_tokens,
|
|
28
|
+
estimate_tokens,
|
|
29
|
+
truncate_messages,
|
|
30
|
+
)
|
|
31
|
+
from .core.messages import (
|
|
32
|
+
assistant_message,
|
|
33
|
+
format_messages,
|
|
34
|
+
system_message,
|
|
35
|
+
tool_message,
|
|
36
|
+
user_message,
|
|
37
|
+
)
|
|
38
|
+
from .observability import (
|
|
39
|
+
BudgetExceededError,
|
|
40
|
+
CallbackManager,
|
|
41
|
+
CostTracker,
|
|
42
|
+
FlashliteModelAPI,
|
|
43
|
+
InspectLogger,
|
|
44
|
+
StructuredLogger,
|
|
45
|
+
)
|
|
46
|
+
from .structured import (
|
|
47
|
+
StructuredOutputError,
|
|
48
|
+
generate_json_schema,
|
|
49
|
+
parse_json_response,
|
|
50
|
+
schema_to_prompt,
|
|
51
|
+
)
|
|
52
|
+
from .templating import TemplateEngine, TemplateRegistry
|
|
53
|
+
from .tools import (
|
|
54
|
+
ToolCall,
|
|
55
|
+
ToolDefinition,
|
|
56
|
+
ToolLoopResult,
|
|
57
|
+
ToolRegistry,
|
|
58
|
+
ToolResult,
|
|
59
|
+
format_tool_result,
|
|
60
|
+
run_tool_loop,
|
|
61
|
+
tool,
|
|
62
|
+
tool_from_pydantic,
|
|
63
|
+
tools_to_anthropic,
|
|
64
|
+
tools_to_openai,
|
|
65
|
+
)
|
|
66
|
+
from .types import (
|
|
67
|
+
CompletionError,
|
|
68
|
+
# Request/Response types
|
|
69
|
+
CompletionRequest,
|
|
70
|
+
CompletionResponse,
|
|
71
|
+
ConfigError,
|
|
72
|
+
# Exceptions
|
|
73
|
+
FlashliteError,
|
|
74
|
+
# Message types
|
|
75
|
+
Message,
|
|
76
|
+
MessageDict,
|
|
77
|
+
Messages,
|
|
78
|
+
RateLimitConfig,
|
|
79
|
+
RateLimitError,
|
|
80
|
+
# Configuration types
|
|
81
|
+
RetryConfig,
|
|
82
|
+
Role,
|
|
83
|
+
TemplateError,
|
|
84
|
+
ThinkingConfig,
|
|
85
|
+
UsageInfo,
|
|
86
|
+
ValidationError,
|
|
87
|
+
thinking_enabled,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
__version__ = "0.1.0"
|
|
91
|
+
|
|
92
|
+
__all__ = [
|
|
93
|
+
# Main client
|
|
94
|
+
"Flashlite",
|
|
95
|
+
# Configuration
|
|
96
|
+
"FlashliteConfig",
|
|
97
|
+
"RetryConfig",
|
|
98
|
+
"RateLimitConfig",
|
|
99
|
+
"ThinkingConfig",
|
|
100
|
+
"thinking_enabled",
|
|
101
|
+
"load_env_files",
|
|
102
|
+
"validate_api_keys",
|
|
103
|
+
# Request/Response
|
|
104
|
+
"CompletionRequest",
|
|
105
|
+
"CompletionResponse",
|
|
106
|
+
"UsageInfo",
|
|
107
|
+
# Messages
|
|
108
|
+
"Message",
|
|
109
|
+
"Messages",
|
|
110
|
+
"MessageDict",
|
|
111
|
+
"Role",
|
|
112
|
+
"format_messages",
|
|
113
|
+
"user_message",
|
|
114
|
+
"system_message",
|
|
115
|
+
"assistant_message",
|
|
116
|
+
"tool_message",
|
|
117
|
+
# Templating
|
|
118
|
+
"TemplateEngine",
|
|
119
|
+
"TemplateRegistry",
|
|
120
|
+
# Caching
|
|
121
|
+
"CacheBackend",
|
|
122
|
+
"MemoryCache",
|
|
123
|
+
"DiskCache",
|
|
124
|
+
"generate_cache_key",
|
|
125
|
+
# Conversation
|
|
126
|
+
"Conversation",
|
|
127
|
+
"ConversationState",
|
|
128
|
+
"Turn",
|
|
129
|
+
"ContextManager",
|
|
130
|
+
"ContextLimits",
|
|
131
|
+
"estimate_tokens",
|
|
132
|
+
"estimate_messages_tokens",
|
|
133
|
+
"truncate_messages",
|
|
134
|
+
# Multi-agent
|
|
135
|
+
"MultiAgentChat",
|
|
136
|
+
"Agent",
|
|
137
|
+
"ChatMessage",
|
|
138
|
+
# Observability
|
|
139
|
+
"StructuredLogger",
|
|
140
|
+
"CostTracker",
|
|
141
|
+
"BudgetExceededError",
|
|
142
|
+
"CallbackManager",
|
|
143
|
+
"InspectLogger",
|
|
144
|
+
"FlashliteModelAPI",
|
|
145
|
+
# Structured outputs
|
|
146
|
+
"StructuredOutputError",
|
|
147
|
+
"generate_json_schema",
|
|
148
|
+
"schema_to_prompt",
|
|
149
|
+
"parse_json_response",
|
|
150
|
+
# Exceptions
|
|
151
|
+
"FlashliteError",
|
|
152
|
+
"CompletionError",
|
|
153
|
+
"RateLimitError",
|
|
154
|
+
"ValidationError",
|
|
155
|
+
"TemplateError",
|
|
156
|
+
"ConfigError",
|
|
157
|
+
# Tools
|
|
158
|
+
"tool",
|
|
159
|
+
"tool_from_pydantic",
|
|
160
|
+
"ToolDefinition",
|
|
161
|
+
"ToolRegistry",
|
|
162
|
+
"ToolCall",
|
|
163
|
+
"ToolResult",
|
|
164
|
+
"ToolLoopResult",
|
|
165
|
+
"run_tool_loop",
|
|
166
|
+
"tools_to_openai",
|
|
167
|
+
"tools_to_anthropic",
|
|
168
|
+
"format_tool_result",
|
|
169
|
+
]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Caching module for flashlite."""
|
|
2
|
+
|
|
3
|
+
from .base import CacheBackend, CacheEntry, generate_cache_key, is_cacheable_request
|
|
4
|
+
from .disk import DiskCache
|
|
5
|
+
from .memory import MemoryCache
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"CacheBackend",
|
|
9
|
+
"CacheEntry",
|
|
10
|
+
"generate_cache_key",
|
|
11
|
+
"is_cacheable_request",
|
|
12
|
+
"MemoryCache",
|
|
13
|
+
"DiskCache",
|
|
14
|
+
]
|
flashlite/cache/base.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""Base cache protocol and key generation for flashlite."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from ..types import CompletionRequest, CompletionResponse
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class CacheEntry:
|
|
14
|
+
"""A cached completion response with metadata."""
|
|
15
|
+
|
|
16
|
+
response: CompletionResponse
|
|
17
|
+
request_hash: str
|
|
18
|
+
created_at: float
|
|
19
|
+
ttl: float | None = None # None means no expiration
|
|
20
|
+
|
|
21
|
+
def is_expired(self, current_time: float) -> bool:
|
|
22
|
+
"""Check if this cache entry has expired."""
|
|
23
|
+
if self.ttl is None:
|
|
24
|
+
return False
|
|
25
|
+
return current_time > self.created_at + self.ttl
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class CacheBackend(ABC):
|
|
29
|
+
"""Abstract base class for cache backends."""
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
async def get(self, key: str) -> CompletionResponse | None:
|
|
33
|
+
"""
|
|
34
|
+
Retrieve a cached response.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
key: The cache key
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
The cached CompletionResponse, or None if not found/expired
|
|
41
|
+
"""
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
async def set(
|
|
46
|
+
self,
|
|
47
|
+
key: str,
|
|
48
|
+
response: CompletionResponse,
|
|
49
|
+
ttl: float | None = None,
|
|
50
|
+
) -> None:
|
|
51
|
+
"""
|
|
52
|
+
Store a response in the cache.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
key: The cache key
|
|
56
|
+
response: The response to cache
|
|
57
|
+
ttl: Time-to-live in seconds (None = no expiration)
|
|
58
|
+
"""
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
@abstractmethod
|
|
62
|
+
async def delete(self, key: str) -> bool:
|
|
63
|
+
"""
|
|
64
|
+
Delete a cached entry.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
key: The cache key
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
True if the key existed and was deleted
|
|
71
|
+
"""
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
@abstractmethod
|
|
75
|
+
async def clear(self) -> int:
|
|
76
|
+
"""
|
|
77
|
+
Clear all cached entries.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Number of entries cleared
|
|
81
|
+
"""
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
@abstractmethod
|
|
85
|
+
async def size(self) -> int:
|
|
86
|
+
"""
|
|
87
|
+
Get the number of cached entries.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Number of entries in the cache
|
|
91
|
+
"""
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
async def close(self) -> None:
|
|
95
|
+
"""Close any resources held by the cache backend."""
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def generate_cache_key(request: CompletionRequest) -> str:
|
|
100
|
+
"""
|
|
101
|
+
Generate a deterministic cache key for a completion request.
|
|
102
|
+
|
|
103
|
+
The key is based on:
|
|
104
|
+
- Model name
|
|
105
|
+
- Messages (serialized)
|
|
106
|
+
- Temperature (if set)
|
|
107
|
+
- Other deterministic parameters
|
|
108
|
+
|
|
109
|
+
Note: Requests with temperature > 0 or reasoning enabled are typically
|
|
110
|
+
not good candidates for caching since responses are non-deterministic.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
request: The completion request
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
A hex-encoded SHA-256 hash as the cache key
|
|
117
|
+
"""
|
|
118
|
+
# Build the key components
|
|
119
|
+
key_data: dict[str, Any] = {
|
|
120
|
+
"model": request.model,
|
|
121
|
+
"messages": [dict(m) for m in request.messages],
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
# Include parameters that affect output
|
|
125
|
+
if request.temperature is not None:
|
|
126
|
+
key_data["temperature"] = request.temperature
|
|
127
|
+
if request.max_tokens is not None:
|
|
128
|
+
key_data["max_tokens"] = request.max_tokens
|
|
129
|
+
if request.max_completion_tokens is not None:
|
|
130
|
+
key_data["max_completion_tokens"] = request.max_completion_tokens
|
|
131
|
+
if request.top_p is not None:
|
|
132
|
+
key_data["top_p"] = request.top_p
|
|
133
|
+
if request.stop is not None:
|
|
134
|
+
key_data["stop"] = request.stop
|
|
135
|
+
if request.reasoning_effort is not None:
|
|
136
|
+
key_data["reasoning_effort"] = request.reasoning_effort
|
|
137
|
+
if request.thinking is not None:
|
|
138
|
+
key_data["thinking"] = request.thinking
|
|
139
|
+
|
|
140
|
+
# Include extra kwargs that affect output (excluding metadata-only ones and test helpers)
|
|
141
|
+
exclude_from_key = {"timeout", "metadata", "tags", "user", "mock_response"}
|
|
142
|
+
for k, v in request.extra_kwargs.items():
|
|
143
|
+
if k not in exclude_from_key:
|
|
144
|
+
key_data[f"extra.{k}"] = v
|
|
145
|
+
|
|
146
|
+
# Serialize to JSON with sorted keys for determinism
|
|
147
|
+
serialized = json.dumps(key_data, sort_keys=True, default=str)
|
|
148
|
+
|
|
149
|
+
# Hash to get fixed-length key
|
|
150
|
+
return hashlib.sha256(serialized.encode()).hexdigest()
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def is_cacheable_request(request: CompletionRequest) -> tuple[bool, str | None]:
|
|
154
|
+
"""
|
|
155
|
+
Check if a request is suitable for caching.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
Tuple of (is_cacheable, warning_message).
|
|
159
|
+
If is_cacheable is True, warning_message may still contain a warning.
|
|
160
|
+
If is_cacheable is False, warning_message explains why.
|
|
161
|
+
"""
|
|
162
|
+
warnings: list[str] = []
|
|
163
|
+
|
|
164
|
+
# Check for non-deterministic temperature
|
|
165
|
+
if request.temperature is not None and request.temperature > 0:
|
|
166
|
+
warnings.append(
|
|
167
|
+
f"temperature={request.temperature} > 0 may produce different outputs"
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Check for reasoning models (non-deterministic by nature)
|
|
171
|
+
if request.reasoning_effort is not None:
|
|
172
|
+
warnings.append(
|
|
173
|
+
f"reasoning_effort='{request.reasoning_effort}' - "
|
|
174
|
+
"reasoning models may produce varying outputs"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
if request.thinking is not None:
|
|
178
|
+
warnings.append(
|
|
179
|
+
"thinking enabled - extended thinking models may produce varying outputs"
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Check model name for known reasoning models
|
|
183
|
+
model_lower = request.model.lower()
|
|
184
|
+
reasoning_model_patterns = ["o1", "o3", "claude-3-5-sonnet", "claude-sonnet-4"]
|
|
185
|
+
if any(pattern in model_lower for pattern in reasoning_model_patterns):
|
|
186
|
+
if not any("reasoning" in w for w in warnings):
|
|
187
|
+
warnings.append(
|
|
188
|
+
f"model '{request.model}' appears to be a reasoning model - outputs may vary"
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
if warnings:
|
|
192
|
+
return True, "; ".join(warnings)
|
|
193
|
+
|
|
194
|
+
return True, None
|
flashlite/cache/disk.py
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
"""SQLite-based disk cache for persistent caching."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import sqlite3
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from ..types import CompletionResponse, UsageInfo
|
|
11
|
+
from .base import CacheBackend
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DiskCache(CacheBackend):
|
|
15
|
+
"""
|
|
16
|
+
SQLite-based disk cache for persistent caching.
|
|
17
|
+
|
|
18
|
+
This cache stores responses in a SQLite database, providing
|
|
19
|
+
persistence across process restarts.
|
|
20
|
+
|
|
21
|
+
Example:
|
|
22
|
+
cache = DiskCache("./cache/completions.db", default_ttl=86400)
|
|
23
|
+
|
|
24
|
+
# Store a response
|
|
25
|
+
await cache.set(key, response)
|
|
26
|
+
|
|
27
|
+
# Retrieve (returns None if expired or not found)
|
|
28
|
+
cached = await cache.get(key)
|
|
29
|
+
|
|
30
|
+
# Close when done
|
|
31
|
+
await cache.close()
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
path: str | Path,
|
|
37
|
+
default_ttl: float | None = None,
|
|
38
|
+
auto_vacuum: bool = True,
|
|
39
|
+
):
|
|
40
|
+
"""
|
|
41
|
+
Initialize the disk cache.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
path: Path to SQLite database file (will be created if doesn't exist)
|
|
45
|
+
default_ttl: Default time-to-live in seconds (None = no expiration)
|
|
46
|
+
auto_vacuum: Whether to run VACUUM on startup to reclaim space
|
|
47
|
+
"""
|
|
48
|
+
self._path = Path(path)
|
|
49
|
+
self._default_ttl = default_ttl
|
|
50
|
+
self._auto_vacuum = auto_vacuum
|
|
51
|
+
self._conn: sqlite3.Connection | None = None
|
|
52
|
+
self._lock = asyncio.Lock()
|
|
53
|
+
self._hits = 0
|
|
54
|
+
self._misses = 0
|
|
55
|
+
|
|
56
|
+
# Ensure parent directory exists
|
|
57
|
+
self._path.parent.mkdir(parents=True, exist_ok=True)
|
|
58
|
+
|
|
59
|
+
# Initialize database
|
|
60
|
+
self._init_db()
|
|
61
|
+
|
|
62
|
+
def _init_db(self) -> None:
|
|
63
|
+
"""Initialize the database schema."""
|
|
64
|
+
self._conn = sqlite3.connect(str(self._path), check_same_thread=False)
|
|
65
|
+
self._conn.row_factory = sqlite3.Row
|
|
66
|
+
|
|
67
|
+
# Create tables
|
|
68
|
+
self._conn.execute("""
|
|
69
|
+
CREATE TABLE IF NOT EXISTS cache (
|
|
70
|
+
key TEXT PRIMARY KEY,
|
|
71
|
+
response_json TEXT NOT NULL,
|
|
72
|
+
created_at REAL NOT NULL,
|
|
73
|
+
expires_at REAL,
|
|
74
|
+
model TEXT,
|
|
75
|
+
input_tokens INTEGER,
|
|
76
|
+
output_tokens INTEGER
|
|
77
|
+
)
|
|
78
|
+
""")
|
|
79
|
+
|
|
80
|
+
# Create index for expiration queries
|
|
81
|
+
self._conn.execute("""
|
|
82
|
+
CREATE INDEX IF NOT EXISTS idx_expires_at ON cache(expires_at)
|
|
83
|
+
""")
|
|
84
|
+
|
|
85
|
+
self._conn.commit()
|
|
86
|
+
|
|
87
|
+
# Optional vacuum to reclaim space
|
|
88
|
+
if self._auto_vacuum:
|
|
89
|
+
try:
|
|
90
|
+
self._conn.execute("VACUUM")
|
|
91
|
+
except sqlite3.OperationalError:
|
|
92
|
+
# VACUUM can fail if database is locked
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
def _serialize_response(self, response: CompletionResponse) -> str:
|
|
96
|
+
"""Serialize a CompletionResponse to JSON."""
|
|
97
|
+
data: dict[str, Any] = {
|
|
98
|
+
"content": response.content,
|
|
99
|
+
"model": response.model,
|
|
100
|
+
"finish_reason": response.finish_reason,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if response.usage:
|
|
104
|
+
data["usage"] = {
|
|
105
|
+
"input_tokens": response.usage.input_tokens,
|
|
106
|
+
"output_tokens": response.usage.output_tokens,
|
|
107
|
+
"total_tokens": response.usage.total_tokens,
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return json.dumps(data)
|
|
111
|
+
|
|
112
|
+
def _deserialize_response(self, json_str: str) -> CompletionResponse:
|
|
113
|
+
"""Deserialize a CompletionResponse from JSON."""
|
|
114
|
+
data = json.loads(json_str)
|
|
115
|
+
|
|
116
|
+
usage = None
|
|
117
|
+
if "usage" in data:
|
|
118
|
+
usage = UsageInfo(
|
|
119
|
+
input_tokens=data["usage"]["input_tokens"],
|
|
120
|
+
output_tokens=data["usage"]["output_tokens"],
|
|
121
|
+
total_tokens=data["usage"]["total_tokens"],
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
return CompletionResponse(
|
|
125
|
+
content=data["content"],
|
|
126
|
+
model=data["model"],
|
|
127
|
+
finish_reason=data.get("finish_reason"),
|
|
128
|
+
usage=usage,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
async def get(self, key: str) -> CompletionResponse | None:
|
|
132
|
+
"""Retrieve a cached response."""
|
|
133
|
+
async with self._lock:
|
|
134
|
+
if self._conn is None:
|
|
135
|
+
raise RuntimeError("Cache has been closed")
|
|
136
|
+
|
|
137
|
+
now = time.time()
|
|
138
|
+
|
|
139
|
+
# Query for non-expired entry
|
|
140
|
+
cursor = self._conn.execute(
|
|
141
|
+
"""
|
|
142
|
+
SELECT response_json FROM cache
|
|
143
|
+
WHERE key = ? AND (expires_at IS NULL OR expires_at > ?)
|
|
144
|
+
""",
|
|
145
|
+
(key, now),
|
|
146
|
+
)
|
|
147
|
+
row = cursor.fetchone()
|
|
148
|
+
|
|
149
|
+
if row is None:
|
|
150
|
+
self._misses += 1
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
self._hits += 1
|
|
154
|
+
return self._deserialize_response(row["response_json"])
|
|
155
|
+
|
|
156
|
+
async def set(
|
|
157
|
+
self,
|
|
158
|
+
key: str,
|
|
159
|
+
response: CompletionResponse,
|
|
160
|
+
ttl: float | None = None,
|
|
161
|
+
) -> None:
|
|
162
|
+
"""Store a response in the cache."""
|
|
163
|
+
async with self._lock:
|
|
164
|
+
if self._conn is None:
|
|
165
|
+
raise RuntimeError("Cache has been closed")
|
|
166
|
+
|
|
167
|
+
effective_ttl = ttl if ttl is not None else self._default_ttl
|
|
168
|
+
now = time.time()
|
|
169
|
+
expires_at = now + effective_ttl if effective_ttl else None
|
|
170
|
+
|
|
171
|
+
response_json = self._serialize_response(response)
|
|
172
|
+
|
|
173
|
+
self._conn.execute(
|
|
174
|
+
"""
|
|
175
|
+
INSERT OR REPLACE INTO cache
|
|
176
|
+
(key, response_json, created_at, expires_at, model, input_tokens, output_tokens)
|
|
177
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
178
|
+
""",
|
|
179
|
+
(
|
|
180
|
+
key,
|
|
181
|
+
response_json,
|
|
182
|
+
now,
|
|
183
|
+
expires_at,
|
|
184
|
+
response.model,
|
|
185
|
+
response.usage.input_tokens if response.usage else None,
|
|
186
|
+
response.usage.output_tokens if response.usage else None,
|
|
187
|
+
),
|
|
188
|
+
)
|
|
189
|
+
self._conn.commit()
|
|
190
|
+
|
|
191
|
+
async def delete(self, key: str) -> bool:
|
|
192
|
+
"""Delete a cached entry."""
|
|
193
|
+
async with self._lock:
|
|
194
|
+
if self._conn is None:
|
|
195
|
+
raise RuntimeError("Cache has been closed")
|
|
196
|
+
|
|
197
|
+
cursor = self._conn.execute("DELETE FROM cache WHERE key = ?", (key,))
|
|
198
|
+
self._conn.commit()
|
|
199
|
+
return cursor.rowcount > 0
|
|
200
|
+
|
|
201
|
+
async def clear(self) -> int:
|
|
202
|
+
"""Clear all cached entries."""
|
|
203
|
+
async with self._lock:
|
|
204
|
+
if self._conn is None:
|
|
205
|
+
raise RuntimeError("Cache has been closed")
|
|
206
|
+
|
|
207
|
+
cursor = self._conn.execute("SELECT COUNT(*) as count FROM cache")
|
|
208
|
+
count = cursor.fetchone()["count"]
|
|
209
|
+
|
|
210
|
+
self._conn.execute("DELETE FROM cache")
|
|
211
|
+
self._conn.commit()
|
|
212
|
+
|
|
213
|
+
self._hits = 0
|
|
214
|
+
self._misses = 0
|
|
215
|
+
|
|
216
|
+
return count
|
|
217
|
+
|
|
218
|
+
async def size(self) -> int:
|
|
219
|
+
"""Get the number of cached entries (excluding expired)."""
|
|
220
|
+
async with self._lock:
|
|
221
|
+
if self._conn is None:
|
|
222
|
+
raise RuntimeError("Cache has been closed")
|
|
223
|
+
|
|
224
|
+
now = time.time()
|
|
225
|
+
cursor = self._conn.execute(
|
|
226
|
+
"""
|
|
227
|
+
SELECT COUNT(*) as count FROM cache
|
|
228
|
+
WHERE expires_at IS NULL OR expires_at > ?
|
|
229
|
+
""",
|
|
230
|
+
(now,),
|
|
231
|
+
)
|
|
232
|
+
return cursor.fetchone()["count"]
|
|
233
|
+
|
|
234
|
+
async def cleanup_expired(self) -> int:
|
|
235
|
+
"""
|
|
236
|
+
Remove expired entries from the cache.
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
Number of entries removed
|
|
240
|
+
"""
|
|
241
|
+
async with self._lock:
|
|
242
|
+
if self._conn is None:
|
|
243
|
+
raise RuntimeError("Cache has been closed")
|
|
244
|
+
|
|
245
|
+
now = time.time()
|
|
246
|
+
cursor = self._conn.execute(
|
|
247
|
+
"DELETE FROM cache WHERE expires_at IS NOT NULL AND expires_at <= ?",
|
|
248
|
+
(now,),
|
|
249
|
+
)
|
|
250
|
+
self._conn.commit()
|
|
251
|
+
return cursor.rowcount
|
|
252
|
+
|
|
253
|
+
async def close(self) -> None:
|
|
254
|
+
"""Close the database connection."""
|
|
255
|
+
async with self._lock:
|
|
256
|
+
if self._conn is not None:
|
|
257
|
+
self._conn.close()
|
|
258
|
+
self._conn = None
|
|
259
|
+
|
|
260
|
+
@property
|
|
261
|
+
def hits(self) -> int:
|
|
262
|
+
"""Number of cache hits (this session only)."""
|
|
263
|
+
return self._hits
|
|
264
|
+
|
|
265
|
+
@property
|
|
266
|
+
def misses(self) -> int:
|
|
267
|
+
"""Number of cache misses (this session only)."""
|
|
268
|
+
return self._misses
|
|
269
|
+
|
|
270
|
+
@property
|
|
271
|
+
def hit_rate(self) -> float:
|
|
272
|
+
"""Cache hit rate (0.0 to 1.0) for this session."""
|
|
273
|
+
total = self._hits + self._misses
|
|
274
|
+
if total == 0:
|
|
275
|
+
return 0.0
|
|
276
|
+
return self._hits / total
|
|
277
|
+
|
|
278
|
+
def stats(self) -> dict[str, Any]:
|
|
279
|
+
"""Get cache statistics."""
|
|
280
|
+
return {
|
|
281
|
+
"path": str(self._path),
|
|
282
|
+
"hits": self._hits,
|
|
283
|
+
"misses": self._misses,
|
|
284
|
+
"hit_rate": self.hit_rate,
|
|
285
|
+
}
|