flashlite 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. flashlite/__init__.py +169 -0
  2. flashlite/cache/__init__.py +14 -0
  3. flashlite/cache/base.py +194 -0
  4. flashlite/cache/disk.py +285 -0
  5. flashlite/cache/memory.py +157 -0
  6. flashlite/client.py +671 -0
  7. flashlite/config.py +154 -0
  8. flashlite/conversation/__init__.py +30 -0
  9. flashlite/conversation/context.py +319 -0
  10. flashlite/conversation/manager.py +385 -0
  11. flashlite/conversation/multi_agent.py +378 -0
  12. flashlite/core/__init__.py +13 -0
  13. flashlite/core/completion.py +145 -0
  14. flashlite/core/messages.py +130 -0
  15. flashlite/middleware/__init__.py +18 -0
  16. flashlite/middleware/base.py +90 -0
  17. flashlite/middleware/cache.py +121 -0
  18. flashlite/middleware/logging.py +159 -0
  19. flashlite/middleware/rate_limit.py +211 -0
  20. flashlite/middleware/retry.py +149 -0
  21. flashlite/observability/__init__.py +34 -0
  22. flashlite/observability/callbacks.py +155 -0
  23. flashlite/observability/inspect_compat.py +266 -0
  24. flashlite/observability/logging.py +293 -0
  25. flashlite/observability/metrics.py +221 -0
  26. flashlite/py.typed +0 -0
  27. flashlite/structured/__init__.py +31 -0
  28. flashlite/structured/outputs.py +189 -0
  29. flashlite/structured/schema.py +165 -0
  30. flashlite/templating/__init__.py +11 -0
  31. flashlite/templating/engine.py +217 -0
  32. flashlite/templating/filters.py +143 -0
  33. flashlite/templating/registry.py +165 -0
  34. flashlite/tools/__init__.py +74 -0
  35. flashlite/tools/definitions.py +382 -0
  36. flashlite/tools/execution.py +353 -0
  37. flashlite/types.py +233 -0
  38. flashlite-0.1.0.dist-info/METADATA +173 -0
  39. flashlite-0.1.0.dist-info/RECORD +41 -0
  40. flashlite-0.1.0.dist-info/WHEEL +4 -0
  41. flashlite-0.1.0.dist-info/licenses/LICENSE.md +21 -0
flashlite/__init__.py ADDED
@@ -0,0 +1,169 @@
1
+ """
2
+ Flashlite - Batteries-included wrapper for litellm.
3
+
4
+ Features:
5
+ - Rate limiting with token bucket algorithm
6
+ - Retries with exponential backoff
7
+ - Jinja templating for prompts
8
+ - Async-first with sync wrappers
9
+ - Full passthrough of provider kwargs
10
+ - Response caching (memory and disk backends)
11
+ - Cost tracking and budget limits
12
+ - Structured logging and Inspect framework integration
13
+ """
14
+
15
+ from .cache import CacheBackend, DiskCache, MemoryCache, generate_cache_key
16
+ from .client import Flashlite
17
+ from .config import FlashliteConfig, load_env_files, validate_api_keys
18
+ from .conversation import (
19
+ Agent,
20
+ ChatMessage,
21
+ ContextLimits,
22
+ ContextManager,
23
+ Conversation,
24
+ ConversationState,
25
+ MultiAgentChat,
26
+ Turn,
27
+ estimate_messages_tokens,
28
+ estimate_tokens,
29
+ truncate_messages,
30
+ )
31
+ from .core.messages import (
32
+ assistant_message,
33
+ format_messages,
34
+ system_message,
35
+ tool_message,
36
+ user_message,
37
+ )
38
+ from .observability import (
39
+ BudgetExceededError,
40
+ CallbackManager,
41
+ CostTracker,
42
+ FlashliteModelAPI,
43
+ InspectLogger,
44
+ StructuredLogger,
45
+ )
46
+ from .structured import (
47
+ StructuredOutputError,
48
+ generate_json_schema,
49
+ parse_json_response,
50
+ schema_to_prompt,
51
+ )
52
+ from .templating import TemplateEngine, TemplateRegistry
53
+ from .tools import (
54
+ ToolCall,
55
+ ToolDefinition,
56
+ ToolLoopResult,
57
+ ToolRegistry,
58
+ ToolResult,
59
+ format_tool_result,
60
+ run_tool_loop,
61
+ tool,
62
+ tool_from_pydantic,
63
+ tools_to_anthropic,
64
+ tools_to_openai,
65
+ )
66
+ from .types import (
67
+ CompletionError,
68
+ # Request/Response types
69
+ CompletionRequest,
70
+ CompletionResponse,
71
+ ConfigError,
72
+ # Exceptions
73
+ FlashliteError,
74
+ # Message types
75
+ Message,
76
+ MessageDict,
77
+ Messages,
78
+ RateLimitConfig,
79
+ RateLimitError,
80
+ # Configuration types
81
+ RetryConfig,
82
+ Role,
83
+ TemplateError,
84
+ ThinkingConfig,
85
+ UsageInfo,
86
+ ValidationError,
87
+ thinking_enabled,
88
+ )
89
+
90
+ __version__ = "0.1.0"
91
+
92
+ __all__ = [
93
+ # Main client
94
+ "Flashlite",
95
+ # Configuration
96
+ "FlashliteConfig",
97
+ "RetryConfig",
98
+ "RateLimitConfig",
99
+ "ThinkingConfig",
100
+ "thinking_enabled",
101
+ "load_env_files",
102
+ "validate_api_keys",
103
+ # Request/Response
104
+ "CompletionRequest",
105
+ "CompletionResponse",
106
+ "UsageInfo",
107
+ # Messages
108
+ "Message",
109
+ "Messages",
110
+ "MessageDict",
111
+ "Role",
112
+ "format_messages",
113
+ "user_message",
114
+ "system_message",
115
+ "assistant_message",
116
+ "tool_message",
117
+ # Templating
118
+ "TemplateEngine",
119
+ "TemplateRegistry",
120
+ # Caching
121
+ "CacheBackend",
122
+ "MemoryCache",
123
+ "DiskCache",
124
+ "generate_cache_key",
125
+ # Conversation
126
+ "Conversation",
127
+ "ConversationState",
128
+ "Turn",
129
+ "ContextManager",
130
+ "ContextLimits",
131
+ "estimate_tokens",
132
+ "estimate_messages_tokens",
133
+ "truncate_messages",
134
+ # Multi-agent
135
+ "MultiAgentChat",
136
+ "Agent",
137
+ "ChatMessage",
138
+ # Observability
139
+ "StructuredLogger",
140
+ "CostTracker",
141
+ "BudgetExceededError",
142
+ "CallbackManager",
143
+ "InspectLogger",
144
+ "FlashliteModelAPI",
145
+ # Structured outputs
146
+ "StructuredOutputError",
147
+ "generate_json_schema",
148
+ "schema_to_prompt",
149
+ "parse_json_response",
150
+ # Exceptions
151
+ "FlashliteError",
152
+ "CompletionError",
153
+ "RateLimitError",
154
+ "ValidationError",
155
+ "TemplateError",
156
+ "ConfigError",
157
+ # Tools
158
+ "tool",
159
+ "tool_from_pydantic",
160
+ "ToolDefinition",
161
+ "ToolRegistry",
162
+ "ToolCall",
163
+ "ToolResult",
164
+ "ToolLoopResult",
165
+ "run_tool_loop",
166
+ "tools_to_openai",
167
+ "tools_to_anthropic",
168
+ "format_tool_result",
169
+ ]
@@ -0,0 +1,14 @@
1
+ """Caching module for flashlite."""
2
+
3
+ from .base import CacheBackend, CacheEntry, generate_cache_key, is_cacheable_request
4
+ from .disk import DiskCache
5
+ from .memory import MemoryCache
6
+
7
+ __all__ = [
8
+ "CacheBackend",
9
+ "CacheEntry",
10
+ "generate_cache_key",
11
+ "is_cacheable_request",
12
+ "MemoryCache",
13
+ "DiskCache",
14
+ ]
@@ -0,0 +1,194 @@
1
+ """Base cache protocol and key generation for flashlite."""
2
+
3
+ import hashlib
4
+ import json
5
+ from abc import ABC, abstractmethod
6
+ from dataclasses import dataclass
7
+ from typing import Any
8
+
9
+ from ..types import CompletionRequest, CompletionResponse
10
+
11
+
12
+ @dataclass
13
+ class CacheEntry:
14
+ """A cached completion response with metadata."""
15
+
16
+ response: CompletionResponse
17
+ request_hash: str
18
+ created_at: float
19
+ ttl: float | None = None # None means no expiration
20
+
21
+ def is_expired(self, current_time: float) -> bool:
22
+ """Check if this cache entry has expired."""
23
+ if self.ttl is None:
24
+ return False
25
+ return current_time > self.created_at + self.ttl
26
+
27
+
28
+ class CacheBackend(ABC):
29
+ """Abstract base class for cache backends."""
30
+
31
+ @abstractmethod
32
+ async def get(self, key: str) -> CompletionResponse | None:
33
+ """
34
+ Retrieve a cached response.
35
+
36
+ Args:
37
+ key: The cache key
38
+
39
+ Returns:
40
+ The cached CompletionResponse, or None if not found/expired
41
+ """
42
+ pass
43
+
44
+ @abstractmethod
45
+ async def set(
46
+ self,
47
+ key: str,
48
+ response: CompletionResponse,
49
+ ttl: float | None = None,
50
+ ) -> None:
51
+ """
52
+ Store a response in the cache.
53
+
54
+ Args:
55
+ key: The cache key
56
+ response: The response to cache
57
+ ttl: Time-to-live in seconds (None = no expiration)
58
+ """
59
+ pass
60
+
61
+ @abstractmethod
62
+ async def delete(self, key: str) -> bool:
63
+ """
64
+ Delete a cached entry.
65
+
66
+ Args:
67
+ key: The cache key
68
+
69
+ Returns:
70
+ True if the key existed and was deleted
71
+ """
72
+ pass
73
+
74
+ @abstractmethod
75
+ async def clear(self) -> int:
76
+ """
77
+ Clear all cached entries.
78
+
79
+ Returns:
80
+ Number of entries cleared
81
+ """
82
+ pass
83
+
84
+ @abstractmethod
85
+ async def size(self) -> int:
86
+ """
87
+ Get the number of cached entries.
88
+
89
+ Returns:
90
+ Number of entries in the cache
91
+ """
92
+ pass
93
+
94
+ async def close(self) -> None:
95
+ """Close any resources held by the cache backend."""
96
+ pass
97
+
98
+
99
+ def generate_cache_key(request: CompletionRequest) -> str:
100
+ """
101
+ Generate a deterministic cache key for a completion request.
102
+
103
+ The key is based on:
104
+ - Model name
105
+ - Messages (serialized)
106
+ - Temperature (if set)
107
+ - Other deterministic parameters
108
+
109
+ Note: Requests with temperature > 0 or reasoning enabled are typically
110
+ not good candidates for caching since responses are non-deterministic.
111
+
112
+ Args:
113
+ request: The completion request
114
+
115
+ Returns:
116
+ A hex-encoded SHA-256 hash as the cache key
117
+ """
118
+ # Build the key components
119
+ key_data: dict[str, Any] = {
120
+ "model": request.model,
121
+ "messages": [dict(m) for m in request.messages],
122
+ }
123
+
124
+ # Include parameters that affect output
125
+ if request.temperature is not None:
126
+ key_data["temperature"] = request.temperature
127
+ if request.max_tokens is not None:
128
+ key_data["max_tokens"] = request.max_tokens
129
+ if request.max_completion_tokens is not None:
130
+ key_data["max_completion_tokens"] = request.max_completion_tokens
131
+ if request.top_p is not None:
132
+ key_data["top_p"] = request.top_p
133
+ if request.stop is not None:
134
+ key_data["stop"] = request.stop
135
+ if request.reasoning_effort is not None:
136
+ key_data["reasoning_effort"] = request.reasoning_effort
137
+ if request.thinking is not None:
138
+ key_data["thinking"] = request.thinking
139
+
140
+ # Include extra kwargs that affect output (excluding metadata-only ones and test helpers)
141
+ exclude_from_key = {"timeout", "metadata", "tags", "user", "mock_response"}
142
+ for k, v in request.extra_kwargs.items():
143
+ if k not in exclude_from_key:
144
+ key_data[f"extra.{k}"] = v
145
+
146
+ # Serialize to JSON with sorted keys for determinism
147
+ serialized = json.dumps(key_data, sort_keys=True, default=str)
148
+
149
+ # Hash to get fixed-length key
150
+ return hashlib.sha256(serialized.encode()).hexdigest()
151
+
152
+
153
+ def is_cacheable_request(request: CompletionRequest) -> tuple[bool, str | None]:
154
+ """
155
+ Check if a request is suitable for caching.
156
+
157
+ Returns:
158
+ Tuple of (is_cacheable, warning_message).
159
+ If is_cacheable is True, warning_message may still contain a warning.
160
+ If is_cacheable is False, warning_message explains why.
161
+ """
162
+ warnings: list[str] = []
163
+
164
+ # Check for non-deterministic temperature
165
+ if request.temperature is not None and request.temperature > 0:
166
+ warnings.append(
167
+ f"temperature={request.temperature} > 0 may produce different outputs"
168
+ )
169
+
170
+ # Check for reasoning models (non-deterministic by nature)
171
+ if request.reasoning_effort is not None:
172
+ warnings.append(
173
+ f"reasoning_effort='{request.reasoning_effort}' - "
174
+ "reasoning models may produce varying outputs"
175
+ )
176
+
177
+ if request.thinking is not None:
178
+ warnings.append(
179
+ "thinking enabled - extended thinking models may produce varying outputs"
180
+ )
181
+
182
+ # Check model name for known reasoning models
183
+ model_lower = request.model.lower()
184
+ reasoning_model_patterns = ["o1", "o3", "claude-3-5-sonnet", "claude-sonnet-4"]
185
+ if any(pattern in model_lower for pattern in reasoning_model_patterns):
186
+ if not any("reasoning" in w for w in warnings):
187
+ warnings.append(
188
+ f"model '{request.model}' appears to be a reasoning model - outputs may vary"
189
+ )
190
+
191
+ if warnings:
192
+ return True, "; ".join(warnings)
193
+
194
+ return True, None
@@ -0,0 +1,285 @@
1
+ """SQLite-based disk cache for persistent caching."""
2
+
3
+ import asyncio
4
+ import json
5
+ import sqlite3
6
+ import time
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from ..types import CompletionResponse, UsageInfo
11
+ from .base import CacheBackend
12
+
13
+
14
+ class DiskCache(CacheBackend):
15
+ """
16
+ SQLite-based disk cache for persistent caching.
17
+
18
+ This cache stores responses in a SQLite database, providing
19
+ persistence across process restarts.
20
+
21
+ Example:
22
+ cache = DiskCache("./cache/completions.db", default_ttl=86400)
23
+
24
+ # Store a response
25
+ await cache.set(key, response)
26
+
27
+ # Retrieve (returns None if expired or not found)
28
+ cached = await cache.get(key)
29
+
30
+ # Close when done
31
+ await cache.close()
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ path: str | Path,
37
+ default_ttl: float | None = None,
38
+ auto_vacuum: bool = True,
39
+ ):
40
+ """
41
+ Initialize the disk cache.
42
+
43
+ Args:
44
+ path: Path to SQLite database file (will be created if doesn't exist)
45
+ default_ttl: Default time-to-live in seconds (None = no expiration)
46
+ auto_vacuum: Whether to run VACUUM on startup to reclaim space
47
+ """
48
+ self._path = Path(path)
49
+ self._default_ttl = default_ttl
50
+ self._auto_vacuum = auto_vacuum
51
+ self._conn: sqlite3.Connection | None = None
52
+ self._lock = asyncio.Lock()
53
+ self._hits = 0
54
+ self._misses = 0
55
+
56
+ # Ensure parent directory exists
57
+ self._path.parent.mkdir(parents=True, exist_ok=True)
58
+
59
+ # Initialize database
60
+ self._init_db()
61
+
62
+ def _init_db(self) -> None:
63
+ """Initialize the database schema."""
64
+ self._conn = sqlite3.connect(str(self._path), check_same_thread=False)
65
+ self._conn.row_factory = sqlite3.Row
66
+
67
+ # Create tables
68
+ self._conn.execute("""
69
+ CREATE TABLE IF NOT EXISTS cache (
70
+ key TEXT PRIMARY KEY,
71
+ response_json TEXT NOT NULL,
72
+ created_at REAL NOT NULL,
73
+ expires_at REAL,
74
+ model TEXT,
75
+ input_tokens INTEGER,
76
+ output_tokens INTEGER
77
+ )
78
+ """)
79
+
80
+ # Create index for expiration queries
81
+ self._conn.execute("""
82
+ CREATE INDEX IF NOT EXISTS idx_expires_at ON cache(expires_at)
83
+ """)
84
+
85
+ self._conn.commit()
86
+
87
+ # Optional vacuum to reclaim space
88
+ if self._auto_vacuum:
89
+ try:
90
+ self._conn.execute("VACUUM")
91
+ except sqlite3.OperationalError:
92
+ # VACUUM can fail if database is locked
93
+ pass
94
+
95
+ def _serialize_response(self, response: CompletionResponse) -> str:
96
+ """Serialize a CompletionResponse to JSON."""
97
+ data: dict[str, Any] = {
98
+ "content": response.content,
99
+ "model": response.model,
100
+ "finish_reason": response.finish_reason,
101
+ }
102
+
103
+ if response.usage:
104
+ data["usage"] = {
105
+ "input_tokens": response.usage.input_tokens,
106
+ "output_tokens": response.usage.output_tokens,
107
+ "total_tokens": response.usage.total_tokens,
108
+ }
109
+
110
+ return json.dumps(data)
111
+
112
+ def _deserialize_response(self, json_str: str) -> CompletionResponse:
113
+ """Deserialize a CompletionResponse from JSON."""
114
+ data = json.loads(json_str)
115
+
116
+ usage = None
117
+ if "usage" in data:
118
+ usage = UsageInfo(
119
+ input_tokens=data["usage"]["input_tokens"],
120
+ output_tokens=data["usage"]["output_tokens"],
121
+ total_tokens=data["usage"]["total_tokens"],
122
+ )
123
+
124
+ return CompletionResponse(
125
+ content=data["content"],
126
+ model=data["model"],
127
+ finish_reason=data.get("finish_reason"),
128
+ usage=usage,
129
+ )
130
+
131
+ async def get(self, key: str) -> CompletionResponse | None:
132
+ """Retrieve a cached response."""
133
+ async with self._lock:
134
+ if self._conn is None:
135
+ raise RuntimeError("Cache has been closed")
136
+
137
+ now = time.time()
138
+
139
+ # Query for non-expired entry
140
+ cursor = self._conn.execute(
141
+ """
142
+ SELECT response_json FROM cache
143
+ WHERE key = ? AND (expires_at IS NULL OR expires_at > ?)
144
+ """,
145
+ (key, now),
146
+ )
147
+ row = cursor.fetchone()
148
+
149
+ if row is None:
150
+ self._misses += 1
151
+ return None
152
+
153
+ self._hits += 1
154
+ return self._deserialize_response(row["response_json"])
155
+
156
+ async def set(
157
+ self,
158
+ key: str,
159
+ response: CompletionResponse,
160
+ ttl: float | None = None,
161
+ ) -> None:
162
+ """Store a response in the cache."""
163
+ async with self._lock:
164
+ if self._conn is None:
165
+ raise RuntimeError("Cache has been closed")
166
+
167
+ effective_ttl = ttl if ttl is not None else self._default_ttl
168
+ now = time.time()
169
+ expires_at = now + effective_ttl if effective_ttl else None
170
+
171
+ response_json = self._serialize_response(response)
172
+
173
+ self._conn.execute(
174
+ """
175
+ INSERT OR REPLACE INTO cache
176
+ (key, response_json, created_at, expires_at, model, input_tokens, output_tokens)
177
+ VALUES (?, ?, ?, ?, ?, ?, ?)
178
+ """,
179
+ (
180
+ key,
181
+ response_json,
182
+ now,
183
+ expires_at,
184
+ response.model,
185
+ response.usage.input_tokens if response.usage else None,
186
+ response.usage.output_tokens if response.usage else None,
187
+ ),
188
+ )
189
+ self._conn.commit()
190
+
191
+ async def delete(self, key: str) -> bool:
192
+ """Delete a cached entry."""
193
+ async with self._lock:
194
+ if self._conn is None:
195
+ raise RuntimeError("Cache has been closed")
196
+
197
+ cursor = self._conn.execute("DELETE FROM cache WHERE key = ?", (key,))
198
+ self._conn.commit()
199
+ return cursor.rowcount > 0
200
+
201
+ async def clear(self) -> int:
202
+ """Clear all cached entries."""
203
+ async with self._lock:
204
+ if self._conn is None:
205
+ raise RuntimeError("Cache has been closed")
206
+
207
+ cursor = self._conn.execute("SELECT COUNT(*) as count FROM cache")
208
+ count = cursor.fetchone()["count"]
209
+
210
+ self._conn.execute("DELETE FROM cache")
211
+ self._conn.commit()
212
+
213
+ self._hits = 0
214
+ self._misses = 0
215
+
216
+ return count
217
+
218
+ async def size(self) -> int:
219
+ """Get the number of cached entries (excluding expired)."""
220
+ async with self._lock:
221
+ if self._conn is None:
222
+ raise RuntimeError("Cache has been closed")
223
+
224
+ now = time.time()
225
+ cursor = self._conn.execute(
226
+ """
227
+ SELECT COUNT(*) as count FROM cache
228
+ WHERE expires_at IS NULL OR expires_at > ?
229
+ """,
230
+ (now,),
231
+ )
232
+ return cursor.fetchone()["count"]
233
+
234
+ async def cleanup_expired(self) -> int:
235
+ """
236
+ Remove expired entries from the cache.
237
+
238
+ Returns:
239
+ Number of entries removed
240
+ """
241
+ async with self._lock:
242
+ if self._conn is None:
243
+ raise RuntimeError("Cache has been closed")
244
+
245
+ now = time.time()
246
+ cursor = self._conn.execute(
247
+ "DELETE FROM cache WHERE expires_at IS NOT NULL AND expires_at <= ?",
248
+ (now,),
249
+ )
250
+ self._conn.commit()
251
+ return cursor.rowcount
252
+
253
+ async def close(self) -> None:
254
+ """Close the database connection."""
255
+ async with self._lock:
256
+ if self._conn is not None:
257
+ self._conn.close()
258
+ self._conn = None
259
+
260
+ @property
261
+ def hits(self) -> int:
262
+ """Number of cache hits (this session only)."""
263
+ return self._hits
264
+
265
+ @property
266
+ def misses(self) -> int:
267
+ """Number of cache misses (this session only)."""
268
+ return self._misses
269
+
270
+ @property
271
+ def hit_rate(self) -> float:
272
+ """Cache hit rate (0.0 to 1.0) for this session."""
273
+ total = self._hits + self._misses
274
+ if total == 0:
275
+ return 0.0
276
+ return self._hits / total
277
+
278
+ def stats(self) -> dict[str, Any]:
279
+ """Get cache statistics."""
280
+ return {
281
+ "path": str(self._path),
282
+ "hits": self._hits,
283
+ "misses": self._misses,
284
+ "hit_rate": self.hit_rate,
285
+ }