flashlite 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flashlite/__init__.py +169 -0
- flashlite/cache/__init__.py +14 -0
- flashlite/cache/base.py +194 -0
- flashlite/cache/disk.py +285 -0
- flashlite/cache/memory.py +157 -0
- flashlite/client.py +671 -0
- flashlite/config.py +154 -0
- flashlite/conversation/__init__.py +30 -0
- flashlite/conversation/context.py +319 -0
- flashlite/conversation/manager.py +385 -0
- flashlite/conversation/multi_agent.py +378 -0
- flashlite/core/__init__.py +13 -0
- flashlite/core/completion.py +145 -0
- flashlite/core/messages.py +130 -0
- flashlite/middleware/__init__.py +18 -0
- flashlite/middleware/base.py +90 -0
- flashlite/middleware/cache.py +121 -0
- flashlite/middleware/logging.py +159 -0
- flashlite/middleware/rate_limit.py +211 -0
- flashlite/middleware/retry.py +149 -0
- flashlite/observability/__init__.py +34 -0
- flashlite/observability/callbacks.py +155 -0
- flashlite/observability/inspect_compat.py +266 -0
- flashlite/observability/logging.py +293 -0
- flashlite/observability/metrics.py +221 -0
- flashlite/py.typed +0 -0
- flashlite/structured/__init__.py +31 -0
- flashlite/structured/outputs.py +189 -0
- flashlite/structured/schema.py +165 -0
- flashlite/templating/__init__.py +11 -0
- flashlite/templating/engine.py +217 -0
- flashlite/templating/filters.py +143 -0
- flashlite/templating/registry.py +165 -0
- flashlite/tools/__init__.py +74 -0
- flashlite/tools/definitions.py +382 -0
- flashlite/tools/execution.py +353 -0
- flashlite/types.py +233 -0
- flashlite-0.1.0.dist-info/METADATA +173 -0
- flashlite-0.1.0.dist-info/RECORD +41 -0
- flashlite-0.1.0.dist-info/WHEEL +4 -0
- flashlite-0.1.0.dist-info/licenses/LICENSE.md +21 -0
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
"""Structured logging for flashlite."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
import uuid
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from datetime import UTC, datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, TextIO
|
|
12
|
+
|
|
13
|
+
from ..types import CompletionRequest, CompletionResponse
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class RequestLogEntry:
|
|
18
|
+
"""A structured log entry for a completion request."""
|
|
19
|
+
|
|
20
|
+
request_id: str
|
|
21
|
+
timestamp: str
|
|
22
|
+
model: str
|
|
23
|
+
messages: list[dict[str, Any]]
|
|
24
|
+
parameters: dict[str, Any]
|
|
25
|
+
|
|
26
|
+
def to_dict(self) -> dict[str, Any]:
|
|
27
|
+
"""Convert to dictionary for JSON serialization."""
|
|
28
|
+
return {
|
|
29
|
+
"type": "request",
|
|
30
|
+
"request_id": self.request_id,
|
|
31
|
+
"timestamp": self.timestamp,
|
|
32
|
+
"model": self.model,
|
|
33
|
+
"messages": self.messages,
|
|
34
|
+
"parameters": self.parameters,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class ResponseLogEntry:
|
|
40
|
+
"""A structured log entry for a completion response."""
|
|
41
|
+
|
|
42
|
+
request_id: str
|
|
43
|
+
timestamp: str
|
|
44
|
+
model: str
|
|
45
|
+
content: str
|
|
46
|
+
finish_reason: str | None
|
|
47
|
+
input_tokens: int
|
|
48
|
+
output_tokens: int
|
|
49
|
+
total_tokens: int
|
|
50
|
+
latency_ms: float
|
|
51
|
+
cached: bool = False
|
|
52
|
+
error: str | None = None
|
|
53
|
+
|
|
54
|
+
def to_dict(self) -> dict[str, Any]:
|
|
55
|
+
"""Convert to dictionary for JSON serialization."""
|
|
56
|
+
return {
|
|
57
|
+
"type": "response",
|
|
58
|
+
"request_id": self.request_id,
|
|
59
|
+
"timestamp": self.timestamp,
|
|
60
|
+
"model": self.model,
|
|
61
|
+
"content": self.content,
|
|
62
|
+
"finish_reason": self.finish_reason,
|
|
63
|
+
"usage": {
|
|
64
|
+
"input_tokens": self.input_tokens,
|
|
65
|
+
"output_tokens": self.output_tokens,
|
|
66
|
+
"total_tokens": self.total_tokens,
|
|
67
|
+
},
|
|
68
|
+
"latency_ms": self.latency_ms,
|
|
69
|
+
"cached": self.cached,
|
|
70
|
+
"error": self.error,
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class StructuredLogger:
|
|
75
|
+
"""
|
|
76
|
+
A structured logger that outputs JSON-formatted log entries.
|
|
77
|
+
|
|
78
|
+
Can write to files, stdout, or both. Supports log rotation
|
|
79
|
+
and customizable formatting.
|
|
80
|
+
|
|
81
|
+
Example:
|
|
82
|
+
logger = StructuredLogger(
|
|
83
|
+
log_file="./logs/completions.jsonl",
|
|
84
|
+
log_level="INFO",
|
|
85
|
+
include_messages=True,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Log a request
|
|
89
|
+
logger.log_request(request, request_id)
|
|
90
|
+
|
|
91
|
+
# Log a response
|
|
92
|
+
logger.log_response(response, request_id, latency_ms)
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
def __init__(
|
|
96
|
+
self,
|
|
97
|
+
log_file: str | Path | None = None,
|
|
98
|
+
log_level: str = "INFO",
|
|
99
|
+
include_messages: bool = True,
|
|
100
|
+
include_content: bool = True,
|
|
101
|
+
max_content_length: int | None = None,
|
|
102
|
+
redact_patterns: list[str] | None = None,
|
|
103
|
+
stdout: bool = False,
|
|
104
|
+
):
|
|
105
|
+
"""
|
|
106
|
+
Initialize the structured logger.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
log_file: Path to log file (JSONL format). None disables file logging.
|
|
110
|
+
log_level: Minimum log level ("DEBUG", "INFO", "WARNING", "ERROR")
|
|
111
|
+
include_messages: Whether to include full message content in logs
|
|
112
|
+
include_content: Whether to include response content in logs
|
|
113
|
+
max_content_length: Max length of content to log (None = unlimited)
|
|
114
|
+
redact_patterns: Patterns to redact from logs (e.g., API keys)
|
|
115
|
+
stdout: Whether to also log to stdout
|
|
116
|
+
"""
|
|
117
|
+
self._log_file: Path | None = Path(log_file) if log_file else None
|
|
118
|
+
self._log_level = getattr(logging, log_level.upper())
|
|
119
|
+
self._include_messages = include_messages
|
|
120
|
+
self._include_content = include_content
|
|
121
|
+
self._max_content_length = max_content_length
|
|
122
|
+
self._redact_patterns = redact_patterns or []
|
|
123
|
+
self._stdout = stdout
|
|
124
|
+
self._file_handle: TextIO | None = None
|
|
125
|
+
|
|
126
|
+
# Ensure log directory exists
|
|
127
|
+
if self._log_file:
|
|
128
|
+
self._log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
129
|
+
self._file_handle = open(self._log_file, "a")
|
|
130
|
+
|
|
131
|
+
def _get_timestamp(self) -> str:
|
|
132
|
+
"""Get current timestamp in ISO format."""
|
|
133
|
+
return datetime.now(UTC).isoformat()
|
|
134
|
+
|
|
135
|
+
def _redact(self, text: str) -> str:
|
|
136
|
+
"""Redact sensitive patterns from text."""
|
|
137
|
+
for pattern in self._redact_patterns:
|
|
138
|
+
text = text.replace(pattern, "[REDACTED]")
|
|
139
|
+
return text
|
|
140
|
+
|
|
141
|
+
def _truncate(self, text: str) -> str:
|
|
142
|
+
"""Truncate text if max length is set."""
|
|
143
|
+
if self._max_content_length and len(text) > self._max_content_length:
|
|
144
|
+
return text[: self._max_content_length] + "... [truncated]"
|
|
145
|
+
return text
|
|
146
|
+
|
|
147
|
+
def _write_entry(self, entry: dict[str, Any]) -> None:
|
|
148
|
+
"""Write a log entry."""
|
|
149
|
+
json_str = json.dumps(entry, default=str)
|
|
150
|
+
|
|
151
|
+
if self._file_handle:
|
|
152
|
+
self._file_handle.write(json_str + "\n")
|
|
153
|
+
self._file_handle.flush()
|
|
154
|
+
|
|
155
|
+
if self._stdout:
|
|
156
|
+
print(json_str, file=sys.stdout)
|
|
157
|
+
|
|
158
|
+
def log_request(
|
|
159
|
+
self,
|
|
160
|
+
request: CompletionRequest,
|
|
161
|
+
request_id: str | None = None,
|
|
162
|
+
) -> str:
|
|
163
|
+
"""
|
|
164
|
+
Log a completion request.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
request: The completion request
|
|
168
|
+
request_id: Optional request ID (generated if not provided)
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
The request ID
|
|
172
|
+
"""
|
|
173
|
+
if request_id is None:
|
|
174
|
+
request_id = str(uuid.uuid4())
|
|
175
|
+
|
|
176
|
+
# Build parameters dict
|
|
177
|
+
params: dict[str, Any] = {}
|
|
178
|
+
if request.temperature is not None:
|
|
179
|
+
params["temperature"] = request.temperature
|
|
180
|
+
if request.max_tokens is not None:
|
|
181
|
+
params["max_tokens"] = request.max_tokens
|
|
182
|
+
if request.max_completion_tokens is not None:
|
|
183
|
+
params["max_completion_tokens"] = request.max_completion_tokens
|
|
184
|
+
if request.top_p is not None:
|
|
185
|
+
params["top_p"] = request.top_p
|
|
186
|
+
if request.stop is not None:
|
|
187
|
+
params["stop"] = request.stop
|
|
188
|
+
if request.reasoning_effort is not None:
|
|
189
|
+
params["reasoning_effort"] = request.reasoning_effort
|
|
190
|
+
if request.thinking is not None:
|
|
191
|
+
params["thinking"] = request.thinking
|
|
192
|
+
params.update(request.extra_kwargs)
|
|
193
|
+
|
|
194
|
+
# Build messages
|
|
195
|
+
messages: list[dict[str, Any]] = []
|
|
196
|
+
if self._include_messages:
|
|
197
|
+
for msg in request.messages:
|
|
198
|
+
msg_dict = dict(msg)
|
|
199
|
+
if "content" in msg_dict:
|
|
200
|
+
content = self._truncate(self._redact(str(msg_dict["content"])))
|
|
201
|
+
msg_dict["content"] = content
|
|
202
|
+
messages.append(msg_dict)
|
|
203
|
+
|
|
204
|
+
entry = RequestLogEntry(
|
|
205
|
+
request_id=request_id,
|
|
206
|
+
timestamp=self._get_timestamp(),
|
|
207
|
+
model=request.model,
|
|
208
|
+
messages=messages,
|
|
209
|
+
parameters=params,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
self._write_entry(entry.to_dict())
|
|
213
|
+
return request_id
|
|
214
|
+
|
|
215
|
+
def log_response(
|
|
216
|
+
self,
|
|
217
|
+
response: CompletionResponse,
|
|
218
|
+
request_id: str,
|
|
219
|
+
latency_ms: float,
|
|
220
|
+
cached: bool = False,
|
|
221
|
+
) -> None:
|
|
222
|
+
"""
|
|
223
|
+
Log a completion response.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
response: The completion response
|
|
227
|
+
request_id: The corresponding request ID
|
|
228
|
+
latency_ms: Request latency in milliseconds
|
|
229
|
+
cached: Whether the response was from cache
|
|
230
|
+
"""
|
|
231
|
+
content = ""
|
|
232
|
+
if self._include_content:
|
|
233
|
+
content = self._truncate(self._redact(response.content))
|
|
234
|
+
|
|
235
|
+
entry = ResponseLogEntry(
|
|
236
|
+
request_id=request_id,
|
|
237
|
+
timestamp=self._get_timestamp(),
|
|
238
|
+
model=response.model,
|
|
239
|
+
content=content,
|
|
240
|
+
finish_reason=response.finish_reason,
|
|
241
|
+
input_tokens=response.usage.input_tokens if response.usage else 0,
|
|
242
|
+
output_tokens=response.usage.output_tokens if response.usage else 0,
|
|
243
|
+
total_tokens=response.usage.total_tokens if response.usage else 0,
|
|
244
|
+
latency_ms=latency_ms,
|
|
245
|
+
cached=cached,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
self._write_entry(entry.to_dict())
|
|
249
|
+
|
|
250
|
+
def log_error(
|
|
251
|
+
self,
|
|
252
|
+
request_id: str,
|
|
253
|
+
error: Exception,
|
|
254
|
+
latency_ms: float,
|
|
255
|
+
) -> None:
|
|
256
|
+
"""
|
|
257
|
+
Log an error response.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
request_id: The corresponding request ID
|
|
261
|
+
error: The exception that occurred
|
|
262
|
+
latency_ms: Request latency in milliseconds
|
|
263
|
+
"""
|
|
264
|
+
entry = {
|
|
265
|
+
"type": "error",
|
|
266
|
+
"request_id": request_id,
|
|
267
|
+
"timestamp": self._get_timestamp(),
|
|
268
|
+
"error": str(error),
|
|
269
|
+
"error_type": type(error).__name__,
|
|
270
|
+
"latency_ms": latency_ms,
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
self._write_entry(entry)
|
|
274
|
+
|
|
275
|
+
def close(self) -> None:
|
|
276
|
+
"""Close the log file."""
|
|
277
|
+
if self._file_handle:
|
|
278
|
+
self._file_handle.close()
|
|
279
|
+
self._file_handle = None
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
@dataclass
|
|
283
|
+
class RequestContext:
|
|
284
|
+
"""Context for tracking a single request through the pipeline."""
|
|
285
|
+
|
|
286
|
+
request_id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
|
287
|
+
start_time: float = field(default_factory=time.perf_counter)
|
|
288
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
289
|
+
|
|
290
|
+
@property
|
|
291
|
+
def elapsed_ms(self) -> float:
|
|
292
|
+
"""Get elapsed time in milliseconds."""
|
|
293
|
+
return (time.perf_counter() - self.start_time) * 1000
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""Metrics and cost tracking for flashlite."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from ..types import CompletionResponse
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Approximate costs per 1K tokens (USD) as of early 2025
|
|
13
|
+
# These are estimates and may be outdated - use litellm's cost tracking for accuracy
|
|
14
|
+
DEFAULT_COSTS: dict[str, dict[str, float]] = {
|
|
15
|
+
# OpenAI
|
|
16
|
+
"gpt-4o": {"input": 0.0025, "output": 0.01},
|
|
17
|
+
"gpt-4o-mini": {"input": 0.00015, "output": 0.0006},
|
|
18
|
+
"gpt-4-turbo": {"input": 0.01, "output": 0.03},
|
|
19
|
+
"gpt-4": {"input": 0.03, "output": 0.06},
|
|
20
|
+
"gpt-3.5-turbo": {"input": 0.0005, "output": 0.0015},
|
|
21
|
+
"o1": {"input": 0.015, "output": 0.06},
|
|
22
|
+
"o1-mini": {"input": 0.003, "output": 0.012},
|
|
23
|
+
"o3-mini": {"input": 0.003, "output": 0.012},
|
|
24
|
+
# Anthropic
|
|
25
|
+
"claude-3-5-sonnet-20241022": {"input": 0.003, "output": 0.015},
|
|
26
|
+
"claude-sonnet-4-20250514": {"input": 0.003, "output": 0.015},
|
|
27
|
+
"claude-3-5-haiku-20241022": {"input": 0.001, "output": 0.005},
|
|
28
|
+
"claude-3-opus-20240229": {"input": 0.015, "output": 0.075},
|
|
29
|
+
# Add more as needed
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class CostMetrics:
|
|
35
|
+
"""Accumulated cost metrics."""
|
|
36
|
+
|
|
37
|
+
total_input_tokens: int = 0
|
|
38
|
+
total_output_tokens: int = 0
|
|
39
|
+
total_requests: int = 0
|
|
40
|
+
total_cost_usd: float = 0.0
|
|
41
|
+
cost_by_model: dict[str, float] = field(default_factory=dict)
|
|
42
|
+
tokens_by_model: dict[str, dict[str, int]] = field(default_factory=dict)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class CostTracker:
|
|
46
|
+
"""
|
|
47
|
+
Tracks token usage and estimated costs across requests.
|
|
48
|
+
|
|
49
|
+
Example:
|
|
50
|
+
tracker = CostTracker(budget_limit=10.0)
|
|
51
|
+
|
|
52
|
+
# Track a response
|
|
53
|
+
tracker.track(response)
|
|
54
|
+
|
|
55
|
+
# Check totals
|
|
56
|
+
print(f"Total cost: ${tracker.total_cost:.2f}")
|
|
57
|
+
print(f"Budget remaining: ${tracker.budget_remaining:.2f}")
|
|
58
|
+
|
|
59
|
+
# Export report
|
|
60
|
+
report = tracker.get_report()
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(
|
|
64
|
+
self,
|
|
65
|
+
budget_limit: float | None = None,
|
|
66
|
+
warn_at_percent: float = 80.0,
|
|
67
|
+
custom_costs: dict[str, dict[str, float]] | None = None,
|
|
68
|
+
):
|
|
69
|
+
"""
|
|
70
|
+
Initialize the cost tracker.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
budget_limit: Maximum budget in USD (None = no limit)
|
|
74
|
+
warn_at_percent: Warn when this percentage of budget is used
|
|
75
|
+
custom_costs: Custom cost overrides per model
|
|
76
|
+
"""
|
|
77
|
+
self._budget_limit = budget_limit
|
|
78
|
+
self._warn_at_percent = warn_at_percent
|
|
79
|
+
self._costs = {**DEFAULT_COSTS, **(custom_costs or {})}
|
|
80
|
+
self._metrics = CostMetrics()
|
|
81
|
+
self._budget_warning_issued = False
|
|
82
|
+
|
|
83
|
+
def _get_model_cost(self, model: str) -> dict[str, float]:
|
|
84
|
+
"""Get cost per 1K tokens for a model."""
|
|
85
|
+
# Exact match
|
|
86
|
+
if model in self._costs:
|
|
87
|
+
return self._costs[model]
|
|
88
|
+
|
|
89
|
+
# Try to match by prefix (e.g., "gpt-4o" matches "gpt-4o-2024-...")
|
|
90
|
+
model_lower = model.lower()
|
|
91
|
+
for known_model, cost in self._costs.items():
|
|
92
|
+
if model_lower.startswith(known_model.lower()):
|
|
93
|
+
return cost
|
|
94
|
+
|
|
95
|
+
# Default to GPT-4o pricing as a reasonable estimate
|
|
96
|
+
logger.debug(f"Unknown model cost for '{model}', using gpt-4o pricing estimate")
|
|
97
|
+
return self._costs.get("gpt-4o", {"input": 0.0025, "output": 0.01})
|
|
98
|
+
|
|
99
|
+
def track(self, response: CompletionResponse) -> float:
|
|
100
|
+
"""
|
|
101
|
+
Track a completion response and return its cost.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
response: The completion response to track
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
The estimated cost in USD for this response
|
|
108
|
+
"""
|
|
109
|
+
if not response.usage:
|
|
110
|
+
return 0.0
|
|
111
|
+
|
|
112
|
+
input_tokens = response.usage.input_tokens
|
|
113
|
+
output_tokens = response.usage.output_tokens
|
|
114
|
+
model = response.model
|
|
115
|
+
|
|
116
|
+
# Calculate cost
|
|
117
|
+
model_costs = self._get_model_cost(model)
|
|
118
|
+
input_cost = (input_tokens / 1000) * model_costs["input"]
|
|
119
|
+
output_cost = (output_tokens / 1000) * model_costs["output"]
|
|
120
|
+
total_cost = input_cost + output_cost
|
|
121
|
+
|
|
122
|
+
# Update metrics
|
|
123
|
+
self._metrics.total_input_tokens += input_tokens
|
|
124
|
+
self._metrics.total_output_tokens += output_tokens
|
|
125
|
+
self._metrics.total_requests += 1
|
|
126
|
+
self._metrics.total_cost_usd += total_cost
|
|
127
|
+
|
|
128
|
+
# Per-model tracking
|
|
129
|
+
if model not in self._metrics.cost_by_model:
|
|
130
|
+
self._metrics.cost_by_model[model] = 0.0
|
|
131
|
+
self._metrics.tokens_by_model[model] = {"input": 0, "output": 0}
|
|
132
|
+
|
|
133
|
+
self._metrics.cost_by_model[model] += total_cost
|
|
134
|
+
self._metrics.tokens_by_model[model]["input"] += input_tokens
|
|
135
|
+
self._metrics.tokens_by_model[model]["output"] += output_tokens
|
|
136
|
+
|
|
137
|
+
# Check budget
|
|
138
|
+
self._check_budget()
|
|
139
|
+
|
|
140
|
+
return total_cost
|
|
141
|
+
|
|
142
|
+
def _check_budget(self) -> None:
|
|
143
|
+
"""Check if budget thresholds are exceeded."""
|
|
144
|
+
if self._budget_limit is None:
|
|
145
|
+
return
|
|
146
|
+
|
|
147
|
+
percent_used = (self._metrics.total_cost_usd / self._budget_limit) * 100
|
|
148
|
+
|
|
149
|
+
# Warning threshold
|
|
150
|
+
if percent_used >= self._warn_at_percent and not self._budget_warning_issued:
|
|
151
|
+
logger.warning(
|
|
152
|
+
f"Budget warning: {percent_used:.1f}% of ${self._budget_limit:.2f} budget used "
|
|
153
|
+
f"(${self._metrics.total_cost_usd:.4f} spent)"
|
|
154
|
+
)
|
|
155
|
+
self._budget_warning_issued = True
|
|
156
|
+
|
|
157
|
+
# Hard limit
|
|
158
|
+
if self._metrics.total_cost_usd >= self._budget_limit:
|
|
159
|
+
raise BudgetExceededError(
|
|
160
|
+
f"Budget limit of ${self._budget_limit:.2f} exceeded "
|
|
161
|
+
f"(${self._metrics.total_cost_usd:.4f} spent)"
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
@property
|
|
165
|
+
def total_cost(self) -> float:
|
|
166
|
+
"""Total cost in USD."""
|
|
167
|
+
return self._metrics.total_cost_usd
|
|
168
|
+
|
|
169
|
+
@property
|
|
170
|
+
def total_tokens(self) -> int:
|
|
171
|
+
"""Total tokens used."""
|
|
172
|
+
return self._metrics.total_input_tokens + self._metrics.total_output_tokens
|
|
173
|
+
|
|
174
|
+
@property
|
|
175
|
+
def total_requests(self) -> int:
|
|
176
|
+
"""Total number of requests tracked."""
|
|
177
|
+
return self._metrics.total_requests
|
|
178
|
+
|
|
179
|
+
@property
|
|
180
|
+
def budget_remaining(self) -> float | None:
|
|
181
|
+
"""Remaining budget in USD, or None if no limit."""
|
|
182
|
+
if self._budget_limit is None:
|
|
183
|
+
return None
|
|
184
|
+
return max(0.0, self._budget_limit - self._metrics.total_cost_usd)
|
|
185
|
+
|
|
186
|
+
def get_report(self) -> dict[str, Any]:
|
|
187
|
+
"""
|
|
188
|
+
Get a detailed cost report.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Dictionary with cost breakdown
|
|
192
|
+
"""
|
|
193
|
+
return {
|
|
194
|
+
"total_cost_usd": self._metrics.total_cost_usd,
|
|
195
|
+
"total_requests": self._metrics.total_requests,
|
|
196
|
+
"total_tokens": {
|
|
197
|
+
"input": self._metrics.total_input_tokens,
|
|
198
|
+
"output": self._metrics.total_output_tokens,
|
|
199
|
+
"total": self.total_tokens,
|
|
200
|
+
},
|
|
201
|
+
"budget_limit_usd": self._budget_limit,
|
|
202
|
+
"budget_remaining_usd": self.budget_remaining,
|
|
203
|
+
"by_model": {
|
|
204
|
+
model: {
|
|
205
|
+
"cost_usd": self._metrics.cost_by_model[model],
|
|
206
|
+
"tokens": self._metrics.tokens_by_model[model],
|
|
207
|
+
}
|
|
208
|
+
for model in self._metrics.cost_by_model
|
|
209
|
+
},
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
def reset(self) -> None:
|
|
213
|
+
"""Reset all metrics."""
|
|
214
|
+
self._metrics = CostMetrics()
|
|
215
|
+
self._budget_warning_issued = False
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class BudgetExceededError(Exception):
|
|
219
|
+
"""Raised when the budget limit is exceeded."""
|
|
220
|
+
|
|
221
|
+
pass
|
flashlite/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Structured outputs module for Pydantic model integration."""
|
|
2
|
+
|
|
3
|
+
from .outputs import (
|
|
4
|
+
StructuredOutputError,
|
|
5
|
+
extract_json_from_content,
|
|
6
|
+
format_validation_error_for_retry,
|
|
7
|
+
parse_json_response,
|
|
8
|
+
validate_response,
|
|
9
|
+
)
|
|
10
|
+
from .schema import (
|
|
11
|
+
format_schema_for_openai,
|
|
12
|
+
generate_json_schema,
|
|
13
|
+
get_field_descriptions,
|
|
14
|
+
is_supported_type,
|
|
15
|
+
schema_to_prompt,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
# Schema generation
|
|
20
|
+
"generate_json_schema",
|
|
21
|
+
"schema_to_prompt",
|
|
22
|
+
"get_field_descriptions",
|
|
23
|
+
"format_schema_for_openai",
|
|
24
|
+
"is_supported_type",
|
|
25
|
+
# Parsing and validation
|
|
26
|
+
"parse_json_response",
|
|
27
|
+
"validate_response",
|
|
28
|
+
"format_validation_error_for_retry",
|
|
29
|
+
"extract_json_from_content",
|
|
30
|
+
"StructuredOutputError",
|
|
31
|
+
]
|