flashlite 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flashlite/__init__.py +169 -0
- flashlite/cache/__init__.py +14 -0
- flashlite/cache/base.py +194 -0
- flashlite/cache/disk.py +285 -0
- flashlite/cache/memory.py +157 -0
- flashlite/client.py +671 -0
- flashlite/config.py +154 -0
- flashlite/conversation/__init__.py +30 -0
- flashlite/conversation/context.py +319 -0
- flashlite/conversation/manager.py +385 -0
- flashlite/conversation/multi_agent.py +378 -0
- flashlite/core/__init__.py +13 -0
- flashlite/core/completion.py +145 -0
- flashlite/core/messages.py +130 -0
- flashlite/middleware/__init__.py +18 -0
- flashlite/middleware/base.py +90 -0
- flashlite/middleware/cache.py +121 -0
- flashlite/middleware/logging.py +159 -0
- flashlite/middleware/rate_limit.py +211 -0
- flashlite/middleware/retry.py +149 -0
- flashlite/observability/__init__.py +34 -0
- flashlite/observability/callbacks.py +155 -0
- flashlite/observability/inspect_compat.py +266 -0
- flashlite/observability/logging.py +293 -0
- flashlite/observability/metrics.py +221 -0
- flashlite/py.typed +0 -0
- flashlite/structured/__init__.py +31 -0
- flashlite/structured/outputs.py +189 -0
- flashlite/structured/schema.py +165 -0
- flashlite/templating/__init__.py +11 -0
- flashlite/templating/engine.py +217 -0
- flashlite/templating/filters.py +143 -0
- flashlite/templating/registry.py +165 -0
- flashlite/tools/__init__.py +74 -0
- flashlite/tools/definitions.py +382 -0
- flashlite/tools/execution.py +353 -0
- flashlite/types.py +233 -0
- flashlite-0.1.0.dist-info/METADATA +173 -0
- flashlite-0.1.0.dist-info/RECORD +41 -0
- flashlite-0.1.0.dist-info/WHEEL +4 -0
- flashlite-0.1.0.dist-info/licenses/LICENSE.md +21 -0
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
"""Tool execution loop helpers for agentic patterns.
|
|
2
|
+
|
|
3
|
+
This module provides utilities for running tool execution loops where the
|
|
4
|
+
model can call tools and receive results in a conversation flow.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
from collections.abc import Callable
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from typing import TYPE_CHECKING, Any
|
|
12
|
+
|
|
13
|
+
from .definitions import ToolDefinition, format_tool_result
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from ..client import Flashlite
|
|
17
|
+
from ..types import CompletionResponse, Message
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class ToolCall:
|
|
24
|
+
"""Represents a tool call from the model."""
|
|
25
|
+
|
|
26
|
+
id: str
|
|
27
|
+
name: str
|
|
28
|
+
arguments: dict[str, Any]
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def from_openai(cls, tool_call: dict[str, Any]) -> "ToolCall":
|
|
32
|
+
"""Parse from OpenAI tool call format."""
|
|
33
|
+
func = tool_call.get("function", {})
|
|
34
|
+
args_str = func.get("arguments", "{}")
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
args = json.loads(args_str)
|
|
38
|
+
except json.JSONDecodeError:
|
|
39
|
+
args = {"raw": args_str}
|
|
40
|
+
|
|
41
|
+
return cls(
|
|
42
|
+
id=tool_call.get("id", ""),
|
|
43
|
+
name=func.get("name", ""),
|
|
44
|
+
arguments=args,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class ToolResult:
|
|
50
|
+
"""Result of executing a tool."""
|
|
51
|
+
|
|
52
|
+
tool_call_id: str
|
|
53
|
+
name: str
|
|
54
|
+
result: Any
|
|
55
|
+
error: str | None = None
|
|
56
|
+
|
|
57
|
+
def to_message(self) -> dict[str, Any]:
|
|
58
|
+
"""Convert to tool result message."""
|
|
59
|
+
return format_tool_result(
|
|
60
|
+
tool_call_id=self.tool_call_id,
|
|
61
|
+
result=self.error if self.error else self.result,
|
|
62
|
+
is_error=self.error is not None,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class ToolLoopResult:
|
|
68
|
+
"""Result of a complete tool execution loop."""
|
|
69
|
+
|
|
70
|
+
messages: list["Message"] # Full conversation history
|
|
71
|
+
final_response: "CompletionResponse" # Final response from model
|
|
72
|
+
tool_calls_made: list[ToolCall] = field(default_factory=list)
|
|
73
|
+
tool_results: list[ToolResult] = field(default_factory=list)
|
|
74
|
+
iterations: int = 0
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def content(self) -> str:
|
|
78
|
+
"""Get the final response content."""
|
|
79
|
+
return self.final_response.content
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
async def execute_tool(
|
|
83
|
+
tool: ToolDefinition,
|
|
84
|
+
arguments: dict[str, Any],
|
|
85
|
+
) -> tuple[Any, str | None]:
|
|
86
|
+
"""
|
|
87
|
+
Execute a single tool with error handling.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Tuple of (result, error_message). error_message is None on success.
|
|
91
|
+
"""
|
|
92
|
+
try:
|
|
93
|
+
result = await tool.execute(**arguments)
|
|
94
|
+
return result, None
|
|
95
|
+
except Exception as e:
|
|
96
|
+
logger.warning(f"Tool {tool.name} failed: {e}")
|
|
97
|
+
return None, str(e)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
async def execute_tools_parallel(
|
|
101
|
+
tools: dict[str, ToolDefinition],
|
|
102
|
+
tool_calls: list[ToolCall],
|
|
103
|
+
) -> list[ToolResult]:
|
|
104
|
+
"""
|
|
105
|
+
Execute multiple tool calls in parallel.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
tools: Registry of available tools
|
|
109
|
+
tool_calls: Tool calls to execute
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
List of tool results
|
|
113
|
+
"""
|
|
114
|
+
import asyncio
|
|
115
|
+
|
|
116
|
+
async def run_one(tc: ToolCall) -> ToolResult:
|
|
117
|
+
tool = tools.get(tc.name)
|
|
118
|
+
if not tool:
|
|
119
|
+
return ToolResult(
|
|
120
|
+
tool_call_id=tc.id,
|
|
121
|
+
name=tc.name,
|
|
122
|
+
result=None,
|
|
123
|
+
error=f"Unknown tool: {tc.name}",
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
result, error = await execute_tool(tool, tc.arguments)
|
|
127
|
+
return ToolResult(
|
|
128
|
+
tool_call_id=tc.id,
|
|
129
|
+
name=tc.name,
|
|
130
|
+
result=result,
|
|
131
|
+
error=error,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
return await asyncio.gather(*[run_one(tc) for tc in tool_calls])
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def extract_tool_calls(response: "CompletionResponse") -> list[ToolCall]:
|
|
138
|
+
"""
|
|
139
|
+
Extract tool calls from a completion response.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
response: The completion response
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
List of tool calls (empty if none)
|
|
146
|
+
"""
|
|
147
|
+
# Check raw response for tool_calls
|
|
148
|
+
raw = response.raw_response
|
|
149
|
+
if raw is None:
|
|
150
|
+
return []
|
|
151
|
+
|
|
152
|
+
# Handle litellm ModelResponse
|
|
153
|
+
if hasattr(raw, "choices"):
|
|
154
|
+
choices = raw.choices
|
|
155
|
+
if choices and len(choices) > 0:
|
|
156
|
+
message = choices[0].message
|
|
157
|
+
if hasattr(message, "tool_calls") and message.tool_calls:
|
|
158
|
+
return [ToolCall.from_openai(tc.model_dump()) for tc in message.tool_calls]
|
|
159
|
+
|
|
160
|
+
# Handle dict response
|
|
161
|
+
if isinstance(raw, dict):
|
|
162
|
+
choices = raw.get("choices", [])
|
|
163
|
+
if choices:
|
|
164
|
+
message = choices[0].get("message", {})
|
|
165
|
+
tool_calls = message.get("tool_calls", [])
|
|
166
|
+
return [ToolCall.from_openai(tc) for tc in tool_calls]
|
|
167
|
+
|
|
168
|
+
return []
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def build_tool_registry(
|
|
172
|
+
tools: list[ToolDefinition | Callable[..., Any]],
|
|
173
|
+
) -> dict[str, ToolDefinition]:
|
|
174
|
+
"""
|
|
175
|
+
Build a tool registry from a list of tools.
|
|
176
|
+
|
|
177
|
+
Accepts both ToolDefinition objects and @tool decorated functions.
|
|
178
|
+
"""
|
|
179
|
+
registry: dict[str, ToolDefinition] = {}
|
|
180
|
+
for t in tools:
|
|
181
|
+
if isinstance(t, ToolDefinition):
|
|
182
|
+
registry[t.name] = t
|
|
183
|
+
elif hasattr(t, "_tool_definition"):
|
|
184
|
+
tool_def = t._tool_definition
|
|
185
|
+
registry[tool_def.name] = tool_def
|
|
186
|
+
else:
|
|
187
|
+
raise ValueError(f"Not a valid tool: {t}")
|
|
188
|
+
return registry
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
async def run_tool_loop(
|
|
192
|
+
client: "Flashlite",
|
|
193
|
+
messages: list["Message"],
|
|
194
|
+
tools: list[ToolDefinition | Callable[..., Any]],
|
|
195
|
+
*,
|
|
196
|
+
model: str | None = None,
|
|
197
|
+
max_iterations: int = 10,
|
|
198
|
+
execute_parallel: bool = True,
|
|
199
|
+
on_tool_call: Callable[[ToolCall], None] | None = None,
|
|
200
|
+
on_tool_result: Callable[[ToolResult], None] | None = None,
|
|
201
|
+
**completion_kwargs: Any,
|
|
202
|
+
) -> ToolLoopResult:
|
|
203
|
+
"""
|
|
204
|
+
Run a tool execution loop until the model stops calling tools.
|
|
205
|
+
|
|
206
|
+
This implements the standard agentic pattern:
|
|
207
|
+
1. Call the model with messages and tools
|
|
208
|
+
2. If model requests tool calls, execute them
|
|
209
|
+
3. Add tool results to messages and repeat
|
|
210
|
+
4. Continue until model returns without tool calls or max iterations
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
client: Flashlite client for completions
|
|
214
|
+
messages: Initial messages
|
|
215
|
+
tools: List of tools available to the model
|
|
216
|
+
model: Model to use (defaults to client's default)
|
|
217
|
+
max_iterations: Maximum tool call rounds (default: 10)
|
|
218
|
+
execute_parallel: Execute multiple tool calls in parallel
|
|
219
|
+
on_tool_call: Callback when a tool is called
|
|
220
|
+
on_tool_result: Callback when a tool returns
|
|
221
|
+
**completion_kwargs: Additional args passed to complete()
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
ToolLoopResult with final response and history
|
|
225
|
+
|
|
226
|
+
Example:
|
|
227
|
+
@tool()
|
|
228
|
+
def get_weather(location: str) -> str:
|
|
229
|
+
'''Get weather for a location.'''
|
|
230
|
+
return f"Weather in {location}: 72°F"
|
|
231
|
+
|
|
232
|
+
result = await run_tool_loop(
|
|
233
|
+
client=client,
|
|
234
|
+
messages=[{"role": "user", "content": "What's the weather in NYC?"}],
|
|
235
|
+
tools=[get_weather],
|
|
236
|
+
)
|
|
237
|
+
print(result.content) # Final response after tool execution
|
|
238
|
+
"""
|
|
239
|
+
# Build tool registry and convert to litellm format
|
|
240
|
+
registry = build_tool_registry(tools)
|
|
241
|
+
|
|
242
|
+
# Determine provider format based on model
|
|
243
|
+
effective_model = model or client.config.default_model or ""
|
|
244
|
+
model_lower = effective_model.lower()
|
|
245
|
+
|
|
246
|
+
if "claude" in model_lower or "anthropic" in model_lower:
|
|
247
|
+
from .definitions import tools_to_anthropic
|
|
248
|
+
|
|
249
|
+
tools_param = tools_to_anthropic(tools)
|
|
250
|
+
else:
|
|
251
|
+
from .definitions import tools_to_openai
|
|
252
|
+
|
|
253
|
+
tools_param = tools_to_openai(tools)
|
|
254
|
+
|
|
255
|
+
# Track state
|
|
256
|
+
current_messages = list(messages)
|
|
257
|
+
all_tool_calls: list[ToolCall] = []
|
|
258
|
+
all_tool_results: list[ToolResult] = []
|
|
259
|
+
iterations = 0
|
|
260
|
+
|
|
261
|
+
while iterations < max_iterations:
|
|
262
|
+
iterations += 1
|
|
263
|
+
|
|
264
|
+
# Call model with tools
|
|
265
|
+
# Pass pre-converted tools via extra kwargs to avoid double-conversion
|
|
266
|
+
response = await client.complete(
|
|
267
|
+
model=model,
|
|
268
|
+
messages=current_messages,
|
|
269
|
+
**{**completion_kwargs, "tools": tools_param},
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
# Check for tool calls
|
|
273
|
+
tool_calls = extract_tool_calls(response)
|
|
274
|
+
|
|
275
|
+
if not tool_calls:
|
|
276
|
+
# No more tool calls - we're done
|
|
277
|
+
return ToolLoopResult(
|
|
278
|
+
messages=current_messages,
|
|
279
|
+
final_response=response,
|
|
280
|
+
tool_calls_made=all_tool_calls,
|
|
281
|
+
tool_results=all_tool_results,
|
|
282
|
+
iterations=iterations,
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
# Add assistant message with tool calls
|
|
286
|
+
assistant_msg: dict[str, Any] = {"role": "assistant", "content": response.content or ""}
|
|
287
|
+
|
|
288
|
+
# Add tool_calls to message (needed for conversation continuity)
|
|
289
|
+
if response.raw_response and hasattr(response.raw_response, "choices"):
|
|
290
|
+
choices = response.raw_response.choices
|
|
291
|
+
if choices and hasattr(choices[0].message, "tool_calls"):
|
|
292
|
+
assistant_msg["tool_calls"] = [
|
|
293
|
+
tc.model_dump() for tc in choices[0].message.tool_calls
|
|
294
|
+
]
|
|
295
|
+
current_messages.append(assistant_msg)
|
|
296
|
+
|
|
297
|
+
# Execute tool calls
|
|
298
|
+
if on_tool_call:
|
|
299
|
+
for tc in tool_calls:
|
|
300
|
+
on_tool_call(tc)
|
|
301
|
+
|
|
302
|
+
all_tool_calls.extend(tool_calls)
|
|
303
|
+
|
|
304
|
+
if execute_parallel:
|
|
305
|
+
results = await execute_tools_parallel(registry, tool_calls)
|
|
306
|
+
else:
|
|
307
|
+
results = []
|
|
308
|
+
for tc in tool_calls:
|
|
309
|
+
tool = registry.get(tc.name)
|
|
310
|
+
if tool:
|
|
311
|
+
result, error = await execute_tool(tool, tc.arguments)
|
|
312
|
+
results.append(
|
|
313
|
+
ToolResult(
|
|
314
|
+
tool_call_id=tc.id,
|
|
315
|
+
name=tc.name,
|
|
316
|
+
result=result,
|
|
317
|
+
error=error,
|
|
318
|
+
)
|
|
319
|
+
)
|
|
320
|
+
else:
|
|
321
|
+
results.append(
|
|
322
|
+
ToolResult(
|
|
323
|
+
tool_call_id=tc.id,
|
|
324
|
+
name=tc.name,
|
|
325
|
+
result=None,
|
|
326
|
+
error=f"Unknown tool: {tc.name}",
|
|
327
|
+
)
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# Add tool results to messages
|
|
331
|
+
for tr in results:
|
|
332
|
+
if on_tool_result:
|
|
333
|
+
on_tool_result(tr)
|
|
334
|
+
all_tool_results.append(tr)
|
|
335
|
+
current_messages.append(tr.to_message())
|
|
336
|
+
|
|
337
|
+
# Max iterations reached
|
|
338
|
+
logger.warning(f"Tool loop reached max iterations ({max_iterations})")
|
|
339
|
+
|
|
340
|
+
# Make final call without tools to get a response
|
|
341
|
+
response = await client.complete(
|
|
342
|
+
model=model,
|
|
343
|
+
messages=current_messages,
|
|
344
|
+
**completion_kwargs,
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
return ToolLoopResult(
|
|
348
|
+
messages=current_messages,
|
|
349
|
+
final_response=response,
|
|
350
|
+
tool_calls_made=all_tool_calls,
|
|
351
|
+
tool_results=all_tool_results,
|
|
352
|
+
iterations=iterations,
|
|
353
|
+
)
|
flashlite/types.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""Shared types and protocols for flashlite."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Awaitable, Callable, Sequence
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Any, Literal, Protocol, TypedDict, TypeVar
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
# Type aliases for messages
|
|
10
|
+
Role = Literal["system", "user", "assistant", "tool"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class MessageDict(TypedDict, total=False):
|
|
14
|
+
"""A chat message in dictionary form."""
|
|
15
|
+
|
|
16
|
+
role: Role
|
|
17
|
+
content: str
|
|
18
|
+
name: str
|
|
19
|
+
tool_calls: list[dict[str, Any]]
|
|
20
|
+
tool_call_id: str
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
Message = MessageDict | dict[str, Any]
|
|
24
|
+
Messages = Sequence[Message]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Convenience function for creating thinking config
|
|
28
|
+
def thinking_enabled(budget_tokens: int) -> "ThinkingConfig":
|
|
29
|
+
"""
|
|
30
|
+
Create an Anthropic extended thinking configuration.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
budget_tokens: Maximum tokens for Claude's internal reasoning.
|
|
34
|
+
Minimum is 1024. Larger budgets (16k+) recommended for complex tasks.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
ThinkingConfig dict to pass to complete()
|
|
38
|
+
|
|
39
|
+
Example:
|
|
40
|
+
await client.complete(
|
|
41
|
+
model="claude-sonnet-4-5-20250929",
|
|
42
|
+
messages="Solve this complex problem...",
|
|
43
|
+
thinking=thinking_enabled(10000),
|
|
44
|
+
)
|
|
45
|
+
"""
|
|
46
|
+
return {"type": "enabled", "budget_tokens": budget_tokens}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ThinkingConfig(TypedDict, total=False):
|
|
50
|
+
"""Configuration for Anthropic extended thinking."""
|
|
51
|
+
|
|
52
|
+
type: Literal["enabled", "disabled"]
|
|
53
|
+
budget_tokens: int
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class CompletionRequest:
|
|
58
|
+
"""A request to complete a chat conversation."""
|
|
59
|
+
|
|
60
|
+
model: str
|
|
61
|
+
messages: Messages
|
|
62
|
+
temperature: float | None = None
|
|
63
|
+
max_tokens: int | None = None
|
|
64
|
+
max_completion_tokens: int | None = None
|
|
65
|
+
top_p: float | None = None
|
|
66
|
+
stop: str | list[str] | None = None
|
|
67
|
+
# OpenAI reasoning model parameters (o1, o3)
|
|
68
|
+
reasoning_effort: Literal["low", "medium", "high"] | None = None
|
|
69
|
+
# Anthropic extended thinking parameters (Claude)
|
|
70
|
+
thinking: ThinkingConfig | None = None
|
|
71
|
+
# Additional kwargs passed through to litellm
|
|
72
|
+
extra_kwargs: dict[str, Any] = field(default_factory=dict)
|
|
73
|
+
|
|
74
|
+
def to_litellm_kwargs(self) -> dict[str, Any]:
|
|
75
|
+
"""Convert to kwargs dict for litellm.completion()."""
|
|
76
|
+
kwargs: dict[str, Any] = {
|
|
77
|
+
"model": self.model,
|
|
78
|
+
"messages": list(self.messages),
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Add optional parameters if set
|
|
82
|
+
if self.temperature is not None:
|
|
83
|
+
kwargs["temperature"] = self.temperature
|
|
84
|
+
if self.max_tokens is not None:
|
|
85
|
+
kwargs["max_tokens"] = self.max_tokens
|
|
86
|
+
if self.max_completion_tokens is not None:
|
|
87
|
+
kwargs["max_completion_tokens"] = self.max_completion_tokens
|
|
88
|
+
if self.top_p is not None:
|
|
89
|
+
kwargs["top_p"] = self.top_p
|
|
90
|
+
if self.stop is not None:
|
|
91
|
+
kwargs["stop"] = self.stop
|
|
92
|
+
# OpenAI reasoning effort (o1, o3 models)
|
|
93
|
+
if self.reasoning_effort is not None:
|
|
94
|
+
kwargs["reasoning_effort"] = self.reasoning_effort
|
|
95
|
+
# Anthropic extended thinking (Claude models)
|
|
96
|
+
if self.thinking is not None:
|
|
97
|
+
kwargs["thinking"] = self.thinking
|
|
98
|
+
|
|
99
|
+
# Merge extra kwargs
|
|
100
|
+
kwargs.update(self.extra_kwargs)
|
|
101
|
+
|
|
102
|
+
return kwargs
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@dataclass
|
|
106
|
+
class CompletionResponse:
|
|
107
|
+
"""A response from a completion request."""
|
|
108
|
+
|
|
109
|
+
content: str
|
|
110
|
+
model: str
|
|
111
|
+
finish_reason: str | None = None
|
|
112
|
+
usage: "UsageInfo | None" = None
|
|
113
|
+
raw_response: Any = None
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def input_tokens(self) -> int:
|
|
117
|
+
"""Get input token count."""
|
|
118
|
+
return self.usage.input_tokens if self.usage else 0
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def output_tokens(self) -> int:
|
|
122
|
+
"""Get output token count."""
|
|
123
|
+
return self.usage.output_tokens if self.usage else 0
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@dataclass
|
|
127
|
+
class UsageInfo:
|
|
128
|
+
"""Token usage information."""
|
|
129
|
+
|
|
130
|
+
input_tokens: int = 0
|
|
131
|
+
output_tokens: int = 0
|
|
132
|
+
total_tokens: int = 0
|
|
133
|
+
|
|
134
|
+
@classmethod
|
|
135
|
+
def from_litellm(cls, usage: dict[str, Any] | None) -> "UsageInfo":
|
|
136
|
+
"""Create from litellm usage dict."""
|
|
137
|
+
if not usage:
|
|
138
|
+
return cls()
|
|
139
|
+
return cls(
|
|
140
|
+
input_tokens=usage.get("prompt_tokens", 0),
|
|
141
|
+
output_tokens=usage.get("completion_tokens", 0),
|
|
142
|
+
total_tokens=usage.get("total_tokens", 0),
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
# Response model type variable
|
|
147
|
+
ResponseModelT = TypeVar("ResponseModelT", bound=BaseModel)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
# Middleware protocol
|
|
151
|
+
class MiddlewareProtocol(Protocol):
|
|
152
|
+
"""Protocol for middleware that wraps completion calls."""
|
|
153
|
+
|
|
154
|
+
async def __call__(
|
|
155
|
+
self,
|
|
156
|
+
request: CompletionRequest,
|
|
157
|
+
next_handler: Callable[[CompletionRequest], Awaitable[CompletionResponse]],
|
|
158
|
+
) -> CompletionResponse:
|
|
159
|
+
"""Process a request, optionally delegating to the next handler."""
|
|
160
|
+
...
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
# Configuration types
|
|
164
|
+
@dataclass
|
|
165
|
+
class RetryConfig:
|
|
166
|
+
"""Configuration for retry behavior."""
|
|
167
|
+
|
|
168
|
+
max_attempts: int = 3
|
|
169
|
+
initial_delay: float = 1.0
|
|
170
|
+
max_delay: float = 60.0
|
|
171
|
+
exponential_base: float = 2.0
|
|
172
|
+
jitter: bool = True
|
|
173
|
+
# HTTP status codes to retry on
|
|
174
|
+
retry_on_status: tuple[int, ...] = (429, 500, 502, 503, 504)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@dataclass
|
|
178
|
+
class RateLimitConfig:
|
|
179
|
+
"""Configuration for rate limiting."""
|
|
180
|
+
|
|
181
|
+
requests_per_minute: float | None = None
|
|
182
|
+
tokens_per_minute: float | None = None
|
|
183
|
+
# If True, read limits from API response headers
|
|
184
|
+
auto_detect: bool = False
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
# Exceptions
|
|
188
|
+
class FlashliteError(Exception):
|
|
189
|
+
"""Base exception for flashlite errors."""
|
|
190
|
+
|
|
191
|
+
pass
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class CompletionError(FlashliteError):
|
|
195
|
+
"""Error during completion request."""
|
|
196
|
+
|
|
197
|
+
def __init__(
|
|
198
|
+
self,
|
|
199
|
+
message: str,
|
|
200
|
+
status_code: int | None = None,
|
|
201
|
+
response: Any = None,
|
|
202
|
+
):
|
|
203
|
+
super().__init__(message)
|
|
204
|
+
self.status_code = status_code
|
|
205
|
+
self.response = response
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class RateLimitError(FlashliteError):
|
|
209
|
+
"""Rate limit exceeded."""
|
|
210
|
+
|
|
211
|
+
def __init__(self, message: str, retry_after: float | None = None):
|
|
212
|
+
super().__init__(message)
|
|
213
|
+
self.retry_after = retry_after
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class ValidationError(FlashliteError):
|
|
217
|
+
"""Response validation failed."""
|
|
218
|
+
|
|
219
|
+
def __init__(self, message: str, errors: list[Any] | None = None):
|
|
220
|
+
super().__init__(message)
|
|
221
|
+
self.errors = errors or []
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class TemplateError(FlashliteError):
|
|
225
|
+
"""Template rendering error."""
|
|
226
|
+
|
|
227
|
+
pass
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class ConfigError(FlashliteError):
|
|
231
|
+
"""Configuration error."""
|
|
232
|
+
|
|
233
|
+
pass
|