flashlite 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. flashlite/__init__.py +169 -0
  2. flashlite/cache/__init__.py +14 -0
  3. flashlite/cache/base.py +194 -0
  4. flashlite/cache/disk.py +285 -0
  5. flashlite/cache/memory.py +157 -0
  6. flashlite/client.py +671 -0
  7. flashlite/config.py +154 -0
  8. flashlite/conversation/__init__.py +30 -0
  9. flashlite/conversation/context.py +319 -0
  10. flashlite/conversation/manager.py +385 -0
  11. flashlite/conversation/multi_agent.py +378 -0
  12. flashlite/core/__init__.py +13 -0
  13. flashlite/core/completion.py +145 -0
  14. flashlite/core/messages.py +130 -0
  15. flashlite/middleware/__init__.py +18 -0
  16. flashlite/middleware/base.py +90 -0
  17. flashlite/middleware/cache.py +121 -0
  18. flashlite/middleware/logging.py +159 -0
  19. flashlite/middleware/rate_limit.py +211 -0
  20. flashlite/middleware/retry.py +149 -0
  21. flashlite/observability/__init__.py +34 -0
  22. flashlite/observability/callbacks.py +155 -0
  23. flashlite/observability/inspect_compat.py +266 -0
  24. flashlite/observability/logging.py +293 -0
  25. flashlite/observability/metrics.py +221 -0
  26. flashlite/py.typed +0 -0
  27. flashlite/structured/__init__.py +31 -0
  28. flashlite/structured/outputs.py +189 -0
  29. flashlite/structured/schema.py +165 -0
  30. flashlite/templating/__init__.py +11 -0
  31. flashlite/templating/engine.py +217 -0
  32. flashlite/templating/filters.py +143 -0
  33. flashlite/templating/registry.py +165 -0
  34. flashlite/tools/__init__.py +74 -0
  35. flashlite/tools/definitions.py +382 -0
  36. flashlite/tools/execution.py +353 -0
  37. flashlite/types.py +233 -0
  38. flashlite-0.1.0.dist-info/METADATA +173 -0
  39. flashlite-0.1.0.dist-info/RECORD +41 -0
  40. flashlite-0.1.0.dist-info/WHEEL +4 -0
  41. flashlite-0.1.0.dist-info/licenses/LICENSE.md +21 -0
@@ -0,0 +1,353 @@
1
+ """Tool execution loop helpers for agentic patterns.
2
+
3
+ This module provides utilities for running tool execution loops where the
4
+ model can call tools and receive results in a conversation flow.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ from collections.abc import Callable
10
+ from dataclasses import dataclass, field
11
+ from typing import TYPE_CHECKING, Any
12
+
13
+ from .definitions import ToolDefinition, format_tool_result
14
+
15
+ if TYPE_CHECKING:
16
+ from ..client import Flashlite
17
+ from ..types import CompletionResponse, Message
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @dataclass
23
+ class ToolCall:
24
+ """Represents a tool call from the model."""
25
+
26
+ id: str
27
+ name: str
28
+ arguments: dict[str, Any]
29
+
30
+ @classmethod
31
+ def from_openai(cls, tool_call: dict[str, Any]) -> "ToolCall":
32
+ """Parse from OpenAI tool call format."""
33
+ func = tool_call.get("function", {})
34
+ args_str = func.get("arguments", "{}")
35
+
36
+ try:
37
+ args = json.loads(args_str)
38
+ except json.JSONDecodeError:
39
+ args = {"raw": args_str}
40
+
41
+ return cls(
42
+ id=tool_call.get("id", ""),
43
+ name=func.get("name", ""),
44
+ arguments=args,
45
+ )
46
+
47
+
48
+ @dataclass
49
+ class ToolResult:
50
+ """Result of executing a tool."""
51
+
52
+ tool_call_id: str
53
+ name: str
54
+ result: Any
55
+ error: str | None = None
56
+
57
+ def to_message(self) -> dict[str, Any]:
58
+ """Convert to tool result message."""
59
+ return format_tool_result(
60
+ tool_call_id=self.tool_call_id,
61
+ result=self.error if self.error else self.result,
62
+ is_error=self.error is not None,
63
+ )
64
+
65
+
66
+ @dataclass
67
+ class ToolLoopResult:
68
+ """Result of a complete tool execution loop."""
69
+
70
+ messages: list["Message"] # Full conversation history
71
+ final_response: "CompletionResponse" # Final response from model
72
+ tool_calls_made: list[ToolCall] = field(default_factory=list)
73
+ tool_results: list[ToolResult] = field(default_factory=list)
74
+ iterations: int = 0
75
+
76
+ @property
77
+ def content(self) -> str:
78
+ """Get the final response content."""
79
+ return self.final_response.content
80
+
81
+
82
+ async def execute_tool(
83
+ tool: ToolDefinition,
84
+ arguments: dict[str, Any],
85
+ ) -> tuple[Any, str | None]:
86
+ """
87
+ Execute a single tool with error handling.
88
+
89
+ Returns:
90
+ Tuple of (result, error_message). error_message is None on success.
91
+ """
92
+ try:
93
+ result = await tool.execute(**arguments)
94
+ return result, None
95
+ except Exception as e:
96
+ logger.warning(f"Tool {tool.name} failed: {e}")
97
+ return None, str(e)
98
+
99
+
100
+ async def execute_tools_parallel(
101
+ tools: dict[str, ToolDefinition],
102
+ tool_calls: list[ToolCall],
103
+ ) -> list[ToolResult]:
104
+ """
105
+ Execute multiple tool calls in parallel.
106
+
107
+ Args:
108
+ tools: Registry of available tools
109
+ tool_calls: Tool calls to execute
110
+
111
+ Returns:
112
+ List of tool results
113
+ """
114
+ import asyncio
115
+
116
+ async def run_one(tc: ToolCall) -> ToolResult:
117
+ tool = tools.get(tc.name)
118
+ if not tool:
119
+ return ToolResult(
120
+ tool_call_id=tc.id,
121
+ name=tc.name,
122
+ result=None,
123
+ error=f"Unknown tool: {tc.name}",
124
+ )
125
+
126
+ result, error = await execute_tool(tool, tc.arguments)
127
+ return ToolResult(
128
+ tool_call_id=tc.id,
129
+ name=tc.name,
130
+ result=result,
131
+ error=error,
132
+ )
133
+
134
+ return await asyncio.gather(*[run_one(tc) for tc in tool_calls])
135
+
136
+
137
+ def extract_tool_calls(response: "CompletionResponse") -> list[ToolCall]:
138
+ """
139
+ Extract tool calls from a completion response.
140
+
141
+ Args:
142
+ response: The completion response
143
+
144
+ Returns:
145
+ List of tool calls (empty if none)
146
+ """
147
+ # Check raw response for tool_calls
148
+ raw = response.raw_response
149
+ if raw is None:
150
+ return []
151
+
152
+ # Handle litellm ModelResponse
153
+ if hasattr(raw, "choices"):
154
+ choices = raw.choices
155
+ if choices and len(choices) > 0:
156
+ message = choices[0].message
157
+ if hasattr(message, "tool_calls") and message.tool_calls:
158
+ return [ToolCall.from_openai(tc.model_dump()) for tc in message.tool_calls]
159
+
160
+ # Handle dict response
161
+ if isinstance(raw, dict):
162
+ choices = raw.get("choices", [])
163
+ if choices:
164
+ message = choices[0].get("message", {})
165
+ tool_calls = message.get("tool_calls", [])
166
+ return [ToolCall.from_openai(tc) for tc in tool_calls]
167
+
168
+ return []
169
+
170
+
171
+ def build_tool_registry(
172
+ tools: list[ToolDefinition | Callable[..., Any]],
173
+ ) -> dict[str, ToolDefinition]:
174
+ """
175
+ Build a tool registry from a list of tools.
176
+
177
+ Accepts both ToolDefinition objects and @tool decorated functions.
178
+ """
179
+ registry: dict[str, ToolDefinition] = {}
180
+ for t in tools:
181
+ if isinstance(t, ToolDefinition):
182
+ registry[t.name] = t
183
+ elif hasattr(t, "_tool_definition"):
184
+ tool_def = t._tool_definition
185
+ registry[tool_def.name] = tool_def
186
+ else:
187
+ raise ValueError(f"Not a valid tool: {t}")
188
+ return registry
189
+
190
+
191
+ async def run_tool_loop(
192
+ client: "Flashlite",
193
+ messages: list["Message"],
194
+ tools: list[ToolDefinition | Callable[..., Any]],
195
+ *,
196
+ model: str | None = None,
197
+ max_iterations: int = 10,
198
+ execute_parallel: bool = True,
199
+ on_tool_call: Callable[[ToolCall], None] | None = None,
200
+ on_tool_result: Callable[[ToolResult], None] | None = None,
201
+ **completion_kwargs: Any,
202
+ ) -> ToolLoopResult:
203
+ """
204
+ Run a tool execution loop until the model stops calling tools.
205
+
206
+ This implements the standard agentic pattern:
207
+ 1. Call the model with messages and tools
208
+ 2. If model requests tool calls, execute them
209
+ 3. Add tool results to messages and repeat
210
+ 4. Continue until model returns without tool calls or max iterations
211
+
212
+ Args:
213
+ client: Flashlite client for completions
214
+ messages: Initial messages
215
+ tools: List of tools available to the model
216
+ model: Model to use (defaults to client's default)
217
+ max_iterations: Maximum tool call rounds (default: 10)
218
+ execute_parallel: Execute multiple tool calls in parallel
219
+ on_tool_call: Callback when a tool is called
220
+ on_tool_result: Callback when a tool returns
221
+ **completion_kwargs: Additional args passed to complete()
222
+
223
+ Returns:
224
+ ToolLoopResult with final response and history
225
+
226
+ Example:
227
+ @tool()
228
+ def get_weather(location: str) -> str:
229
+ '''Get weather for a location.'''
230
+ return f"Weather in {location}: 72°F"
231
+
232
+ result = await run_tool_loop(
233
+ client=client,
234
+ messages=[{"role": "user", "content": "What's the weather in NYC?"}],
235
+ tools=[get_weather],
236
+ )
237
+ print(result.content) # Final response after tool execution
238
+ """
239
+ # Build tool registry and convert to litellm format
240
+ registry = build_tool_registry(tools)
241
+
242
+ # Determine provider format based on model
243
+ effective_model = model or client.config.default_model or ""
244
+ model_lower = effective_model.lower()
245
+
246
+ if "claude" in model_lower or "anthropic" in model_lower:
247
+ from .definitions import tools_to_anthropic
248
+
249
+ tools_param = tools_to_anthropic(tools)
250
+ else:
251
+ from .definitions import tools_to_openai
252
+
253
+ tools_param = tools_to_openai(tools)
254
+
255
+ # Track state
256
+ current_messages = list(messages)
257
+ all_tool_calls: list[ToolCall] = []
258
+ all_tool_results: list[ToolResult] = []
259
+ iterations = 0
260
+
261
+ while iterations < max_iterations:
262
+ iterations += 1
263
+
264
+ # Call model with tools
265
+ # Pass pre-converted tools via extra kwargs to avoid double-conversion
266
+ response = await client.complete(
267
+ model=model,
268
+ messages=current_messages,
269
+ **{**completion_kwargs, "tools": tools_param},
270
+ )
271
+
272
+ # Check for tool calls
273
+ tool_calls = extract_tool_calls(response)
274
+
275
+ if not tool_calls:
276
+ # No more tool calls - we're done
277
+ return ToolLoopResult(
278
+ messages=current_messages,
279
+ final_response=response,
280
+ tool_calls_made=all_tool_calls,
281
+ tool_results=all_tool_results,
282
+ iterations=iterations,
283
+ )
284
+
285
+ # Add assistant message with tool calls
286
+ assistant_msg: dict[str, Any] = {"role": "assistant", "content": response.content or ""}
287
+
288
+ # Add tool_calls to message (needed for conversation continuity)
289
+ if response.raw_response and hasattr(response.raw_response, "choices"):
290
+ choices = response.raw_response.choices
291
+ if choices and hasattr(choices[0].message, "tool_calls"):
292
+ assistant_msg["tool_calls"] = [
293
+ tc.model_dump() for tc in choices[0].message.tool_calls
294
+ ]
295
+ current_messages.append(assistant_msg)
296
+
297
+ # Execute tool calls
298
+ if on_tool_call:
299
+ for tc in tool_calls:
300
+ on_tool_call(tc)
301
+
302
+ all_tool_calls.extend(tool_calls)
303
+
304
+ if execute_parallel:
305
+ results = await execute_tools_parallel(registry, tool_calls)
306
+ else:
307
+ results = []
308
+ for tc in tool_calls:
309
+ tool = registry.get(tc.name)
310
+ if tool:
311
+ result, error = await execute_tool(tool, tc.arguments)
312
+ results.append(
313
+ ToolResult(
314
+ tool_call_id=tc.id,
315
+ name=tc.name,
316
+ result=result,
317
+ error=error,
318
+ )
319
+ )
320
+ else:
321
+ results.append(
322
+ ToolResult(
323
+ tool_call_id=tc.id,
324
+ name=tc.name,
325
+ result=None,
326
+ error=f"Unknown tool: {tc.name}",
327
+ )
328
+ )
329
+
330
+ # Add tool results to messages
331
+ for tr in results:
332
+ if on_tool_result:
333
+ on_tool_result(tr)
334
+ all_tool_results.append(tr)
335
+ current_messages.append(tr.to_message())
336
+
337
+ # Max iterations reached
338
+ logger.warning(f"Tool loop reached max iterations ({max_iterations})")
339
+
340
+ # Make final call without tools to get a response
341
+ response = await client.complete(
342
+ model=model,
343
+ messages=current_messages,
344
+ **completion_kwargs,
345
+ )
346
+
347
+ return ToolLoopResult(
348
+ messages=current_messages,
349
+ final_response=response,
350
+ tool_calls_made=all_tool_calls,
351
+ tool_results=all_tool_results,
352
+ iterations=iterations,
353
+ )
flashlite/types.py ADDED
@@ -0,0 +1,233 @@
1
+ """Shared types and protocols for flashlite."""
2
+
3
+ from collections.abc import Awaitable, Callable, Sequence
4
+ from dataclasses import dataclass, field
5
+ from typing import Any, Literal, Protocol, TypedDict, TypeVar
6
+
7
+ from pydantic import BaseModel
8
+
9
+ # Type aliases for messages
10
+ Role = Literal["system", "user", "assistant", "tool"]
11
+
12
+
13
+ class MessageDict(TypedDict, total=False):
14
+ """A chat message in dictionary form."""
15
+
16
+ role: Role
17
+ content: str
18
+ name: str
19
+ tool_calls: list[dict[str, Any]]
20
+ tool_call_id: str
21
+
22
+
23
+ Message = MessageDict | dict[str, Any]
24
+ Messages = Sequence[Message]
25
+
26
+
27
+ # Convenience function for creating thinking config
28
+ def thinking_enabled(budget_tokens: int) -> "ThinkingConfig":
29
+ """
30
+ Create an Anthropic extended thinking configuration.
31
+
32
+ Args:
33
+ budget_tokens: Maximum tokens for Claude's internal reasoning.
34
+ Minimum is 1024. Larger budgets (16k+) recommended for complex tasks.
35
+
36
+ Returns:
37
+ ThinkingConfig dict to pass to complete()
38
+
39
+ Example:
40
+ await client.complete(
41
+ model="claude-sonnet-4-5-20250929",
42
+ messages="Solve this complex problem...",
43
+ thinking=thinking_enabled(10000),
44
+ )
45
+ """
46
+ return {"type": "enabled", "budget_tokens": budget_tokens}
47
+
48
+
49
+ class ThinkingConfig(TypedDict, total=False):
50
+ """Configuration for Anthropic extended thinking."""
51
+
52
+ type: Literal["enabled", "disabled"]
53
+ budget_tokens: int
54
+
55
+
56
+ @dataclass
57
+ class CompletionRequest:
58
+ """A request to complete a chat conversation."""
59
+
60
+ model: str
61
+ messages: Messages
62
+ temperature: float | None = None
63
+ max_tokens: int | None = None
64
+ max_completion_tokens: int | None = None
65
+ top_p: float | None = None
66
+ stop: str | list[str] | None = None
67
+ # OpenAI reasoning model parameters (o1, o3)
68
+ reasoning_effort: Literal["low", "medium", "high"] | None = None
69
+ # Anthropic extended thinking parameters (Claude)
70
+ thinking: ThinkingConfig | None = None
71
+ # Additional kwargs passed through to litellm
72
+ extra_kwargs: dict[str, Any] = field(default_factory=dict)
73
+
74
+ def to_litellm_kwargs(self) -> dict[str, Any]:
75
+ """Convert to kwargs dict for litellm.completion()."""
76
+ kwargs: dict[str, Any] = {
77
+ "model": self.model,
78
+ "messages": list(self.messages),
79
+ }
80
+
81
+ # Add optional parameters if set
82
+ if self.temperature is not None:
83
+ kwargs["temperature"] = self.temperature
84
+ if self.max_tokens is not None:
85
+ kwargs["max_tokens"] = self.max_tokens
86
+ if self.max_completion_tokens is not None:
87
+ kwargs["max_completion_tokens"] = self.max_completion_tokens
88
+ if self.top_p is not None:
89
+ kwargs["top_p"] = self.top_p
90
+ if self.stop is not None:
91
+ kwargs["stop"] = self.stop
92
+ # OpenAI reasoning effort (o1, o3 models)
93
+ if self.reasoning_effort is not None:
94
+ kwargs["reasoning_effort"] = self.reasoning_effort
95
+ # Anthropic extended thinking (Claude models)
96
+ if self.thinking is not None:
97
+ kwargs["thinking"] = self.thinking
98
+
99
+ # Merge extra kwargs
100
+ kwargs.update(self.extra_kwargs)
101
+
102
+ return kwargs
103
+
104
+
105
+ @dataclass
106
+ class CompletionResponse:
107
+ """A response from a completion request."""
108
+
109
+ content: str
110
+ model: str
111
+ finish_reason: str | None = None
112
+ usage: "UsageInfo | None" = None
113
+ raw_response: Any = None
114
+
115
+ @property
116
+ def input_tokens(self) -> int:
117
+ """Get input token count."""
118
+ return self.usage.input_tokens if self.usage else 0
119
+
120
+ @property
121
+ def output_tokens(self) -> int:
122
+ """Get output token count."""
123
+ return self.usage.output_tokens if self.usage else 0
124
+
125
+
126
+ @dataclass
127
+ class UsageInfo:
128
+ """Token usage information."""
129
+
130
+ input_tokens: int = 0
131
+ output_tokens: int = 0
132
+ total_tokens: int = 0
133
+
134
+ @classmethod
135
+ def from_litellm(cls, usage: dict[str, Any] | None) -> "UsageInfo":
136
+ """Create from litellm usage dict."""
137
+ if not usage:
138
+ return cls()
139
+ return cls(
140
+ input_tokens=usage.get("prompt_tokens", 0),
141
+ output_tokens=usage.get("completion_tokens", 0),
142
+ total_tokens=usage.get("total_tokens", 0),
143
+ )
144
+
145
+
146
+ # Response model type variable
147
+ ResponseModelT = TypeVar("ResponseModelT", bound=BaseModel)
148
+
149
+
150
+ # Middleware protocol
151
+ class MiddlewareProtocol(Protocol):
152
+ """Protocol for middleware that wraps completion calls."""
153
+
154
+ async def __call__(
155
+ self,
156
+ request: CompletionRequest,
157
+ next_handler: Callable[[CompletionRequest], Awaitable[CompletionResponse]],
158
+ ) -> CompletionResponse:
159
+ """Process a request, optionally delegating to the next handler."""
160
+ ...
161
+
162
+
163
+ # Configuration types
164
+ @dataclass
165
+ class RetryConfig:
166
+ """Configuration for retry behavior."""
167
+
168
+ max_attempts: int = 3
169
+ initial_delay: float = 1.0
170
+ max_delay: float = 60.0
171
+ exponential_base: float = 2.0
172
+ jitter: bool = True
173
+ # HTTP status codes to retry on
174
+ retry_on_status: tuple[int, ...] = (429, 500, 502, 503, 504)
175
+
176
+
177
+ @dataclass
178
+ class RateLimitConfig:
179
+ """Configuration for rate limiting."""
180
+
181
+ requests_per_minute: float | None = None
182
+ tokens_per_minute: float | None = None
183
+ # If True, read limits from API response headers
184
+ auto_detect: bool = False
185
+
186
+
187
+ # Exceptions
188
+ class FlashliteError(Exception):
189
+ """Base exception for flashlite errors."""
190
+
191
+ pass
192
+
193
+
194
+ class CompletionError(FlashliteError):
195
+ """Error during completion request."""
196
+
197
+ def __init__(
198
+ self,
199
+ message: str,
200
+ status_code: int | None = None,
201
+ response: Any = None,
202
+ ):
203
+ super().__init__(message)
204
+ self.status_code = status_code
205
+ self.response = response
206
+
207
+
208
+ class RateLimitError(FlashliteError):
209
+ """Rate limit exceeded."""
210
+
211
+ def __init__(self, message: str, retry_after: float | None = None):
212
+ super().__init__(message)
213
+ self.retry_after = retry_after
214
+
215
+
216
+ class ValidationError(FlashliteError):
217
+ """Response validation failed."""
218
+
219
+ def __init__(self, message: str, errors: list[Any] | None = None):
220
+ super().__init__(message)
221
+ self.errors = errors or []
222
+
223
+
224
+ class TemplateError(FlashliteError):
225
+ """Template rendering error."""
226
+
227
+ pass
228
+
229
+
230
+ class ConfigError(FlashliteError):
231
+ """Configuration error."""
232
+
233
+ pass