flashlite 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. flashlite/__init__.py +169 -0
  2. flashlite/cache/__init__.py +14 -0
  3. flashlite/cache/base.py +194 -0
  4. flashlite/cache/disk.py +285 -0
  5. flashlite/cache/memory.py +157 -0
  6. flashlite/client.py +671 -0
  7. flashlite/config.py +154 -0
  8. flashlite/conversation/__init__.py +30 -0
  9. flashlite/conversation/context.py +319 -0
  10. flashlite/conversation/manager.py +385 -0
  11. flashlite/conversation/multi_agent.py +378 -0
  12. flashlite/core/__init__.py +13 -0
  13. flashlite/core/completion.py +145 -0
  14. flashlite/core/messages.py +130 -0
  15. flashlite/middleware/__init__.py +18 -0
  16. flashlite/middleware/base.py +90 -0
  17. flashlite/middleware/cache.py +121 -0
  18. flashlite/middleware/logging.py +159 -0
  19. flashlite/middleware/rate_limit.py +211 -0
  20. flashlite/middleware/retry.py +149 -0
  21. flashlite/observability/__init__.py +34 -0
  22. flashlite/observability/callbacks.py +155 -0
  23. flashlite/observability/inspect_compat.py +266 -0
  24. flashlite/observability/logging.py +293 -0
  25. flashlite/observability/metrics.py +221 -0
  26. flashlite/py.typed +0 -0
  27. flashlite/structured/__init__.py +31 -0
  28. flashlite/structured/outputs.py +189 -0
  29. flashlite/structured/schema.py +165 -0
  30. flashlite/templating/__init__.py +11 -0
  31. flashlite/templating/engine.py +217 -0
  32. flashlite/templating/filters.py +143 -0
  33. flashlite/templating/registry.py +165 -0
  34. flashlite/tools/__init__.py +74 -0
  35. flashlite/tools/definitions.py +382 -0
  36. flashlite/tools/execution.py +353 -0
  37. flashlite/types.py +233 -0
  38. flashlite-0.1.0.dist-info/METADATA +173 -0
  39. flashlite-0.1.0.dist-info/RECORD +41 -0
  40. flashlite-0.1.0.dist-info/WHEEL +4 -0
  41. flashlite-0.1.0.dist-info/licenses/LICENSE.md +21 -0
@@ -0,0 +1,293 @@
1
+ """Structured logging for flashlite."""
2
+
3
+ import json
4
+ import logging
5
+ import sys
6
+ import time
7
+ import uuid
8
+ from dataclasses import dataclass, field
9
+ from datetime import UTC, datetime
10
+ from pathlib import Path
11
+ from typing import Any, TextIO
12
+
13
+ from ..types import CompletionRequest, CompletionResponse
14
+
15
+
16
+ @dataclass
17
+ class RequestLogEntry:
18
+ """A structured log entry for a completion request."""
19
+
20
+ request_id: str
21
+ timestamp: str
22
+ model: str
23
+ messages: list[dict[str, Any]]
24
+ parameters: dict[str, Any]
25
+
26
+ def to_dict(self) -> dict[str, Any]:
27
+ """Convert to dictionary for JSON serialization."""
28
+ return {
29
+ "type": "request",
30
+ "request_id": self.request_id,
31
+ "timestamp": self.timestamp,
32
+ "model": self.model,
33
+ "messages": self.messages,
34
+ "parameters": self.parameters,
35
+ }
36
+
37
+
38
+ @dataclass
39
+ class ResponseLogEntry:
40
+ """A structured log entry for a completion response."""
41
+
42
+ request_id: str
43
+ timestamp: str
44
+ model: str
45
+ content: str
46
+ finish_reason: str | None
47
+ input_tokens: int
48
+ output_tokens: int
49
+ total_tokens: int
50
+ latency_ms: float
51
+ cached: bool = False
52
+ error: str | None = None
53
+
54
+ def to_dict(self) -> dict[str, Any]:
55
+ """Convert to dictionary for JSON serialization."""
56
+ return {
57
+ "type": "response",
58
+ "request_id": self.request_id,
59
+ "timestamp": self.timestamp,
60
+ "model": self.model,
61
+ "content": self.content,
62
+ "finish_reason": self.finish_reason,
63
+ "usage": {
64
+ "input_tokens": self.input_tokens,
65
+ "output_tokens": self.output_tokens,
66
+ "total_tokens": self.total_tokens,
67
+ },
68
+ "latency_ms": self.latency_ms,
69
+ "cached": self.cached,
70
+ "error": self.error,
71
+ }
72
+
73
+
74
+ class StructuredLogger:
75
+ """
76
+ A structured logger that outputs JSON-formatted log entries.
77
+
78
+ Can write to files, stdout, or both. Supports log rotation
79
+ and customizable formatting.
80
+
81
+ Example:
82
+ logger = StructuredLogger(
83
+ log_file="./logs/completions.jsonl",
84
+ log_level="INFO",
85
+ include_messages=True,
86
+ )
87
+
88
+ # Log a request
89
+ logger.log_request(request, request_id)
90
+
91
+ # Log a response
92
+ logger.log_response(response, request_id, latency_ms)
93
+ """
94
+
95
+ def __init__(
96
+ self,
97
+ log_file: str | Path | None = None,
98
+ log_level: str = "INFO",
99
+ include_messages: bool = True,
100
+ include_content: bool = True,
101
+ max_content_length: int | None = None,
102
+ redact_patterns: list[str] | None = None,
103
+ stdout: bool = False,
104
+ ):
105
+ """
106
+ Initialize the structured logger.
107
+
108
+ Args:
109
+ log_file: Path to log file (JSONL format). None disables file logging.
110
+ log_level: Minimum log level ("DEBUG", "INFO", "WARNING", "ERROR")
111
+ include_messages: Whether to include full message content in logs
112
+ include_content: Whether to include response content in logs
113
+ max_content_length: Max length of content to log (None = unlimited)
114
+ redact_patterns: Patterns to redact from logs (e.g., API keys)
115
+ stdout: Whether to also log to stdout
116
+ """
117
+ self._log_file: Path | None = Path(log_file) if log_file else None
118
+ self._log_level = getattr(logging, log_level.upper())
119
+ self._include_messages = include_messages
120
+ self._include_content = include_content
121
+ self._max_content_length = max_content_length
122
+ self._redact_patterns = redact_patterns or []
123
+ self._stdout = stdout
124
+ self._file_handle: TextIO | None = None
125
+
126
+ # Ensure log directory exists
127
+ if self._log_file:
128
+ self._log_file.parent.mkdir(parents=True, exist_ok=True)
129
+ self._file_handle = open(self._log_file, "a")
130
+
131
+ def _get_timestamp(self) -> str:
132
+ """Get current timestamp in ISO format."""
133
+ return datetime.now(UTC).isoformat()
134
+
135
+ def _redact(self, text: str) -> str:
136
+ """Redact sensitive patterns from text."""
137
+ for pattern in self._redact_patterns:
138
+ text = text.replace(pattern, "[REDACTED]")
139
+ return text
140
+
141
+ def _truncate(self, text: str) -> str:
142
+ """Truncate text if max length is set."""
143
+ if self._max_content_length and len(text) > self._max_content_length:
144
+ return text[: self._max_content_length] + "... [truncated]"
145
+ return text
146
+
147
+ def _write_entry(self, entry: dict[str, Any]) -> None:
148
+ """Write a log entry."""
149
+ json_str = json.dumps(entry, default=str)
150
+
151
+ if self._file_handle:
152
+ self._file_handle.write(json_str + "\n")
153
+ self._file_handle.flush()
154
+
155
+ if self._stdout:
156
+ print(json_str, file=sys.stdout)
157
+
158
+ def log_request(
159
+ self,
160
+ request: CompletionRequest,
161
+ request_id: str | None = None,
162
+ ) -> str:
163
+ """
164
+ Log a completion request.
165
+
166
+ Args:
167
+ request: The completion request
168
+ request_id: Optional request ID (generated if not provided)
169
+
170
+ Returns:
171
+ The request ID
172
+ """
173
+ if request_id is None:
174
+ request_id = str(uuid.uuid4())
175
+
176
+ # Build parameters dict
177
+ params: dict[str, Any] = {}
178
+ if request.temperature is not None:
179
+ params["temperature"] = request.temperature
180
+ if request.max_tokens is not None:
181
+ params["max_tokens"] = request.max_tokens
182
+ if request.max_completion_tokens is not None:
183
+ params["max_completion_tokens"] = request.max_completion_tokens
184
+ if request.top_p is not None:
185
+ params["top_p"] = request.top_p
186
+ if request.stop is not None:
187
+ params["stop"] = request.stop
188
+ if request.reasoning_effort is not None:
189
+ params["reasoning_effort"] = request.reasoning_effort
190
+ if request.thinking is not None:
191
+ params["thinking"] = request.thinking
192
+ params.update(request.extra_kwargs)
193
+
194
+ # Build messages
195
+ messages: list[dict[str, Any]] = []
196
+ if self._include_messages:
197
+ for msg in request.messages:
198
+ msg_dict = dict(msg)
199
+ if "content" in msg_dict:
200
+ content = self._truncate(self._redact(str(msg_dict["content"])))
201
+ msg_dict["content"] = content
202
+ messages.append(msg_dict)
203
+
204
+ entry = RequestLogEntry(
205
+ request_id=request_id,
206
+ timestamp=self._get_timestamp(),
207
+ model=request.model,
208
+ messages=messages,
209
+ parameters=params,
210
+ )
211
+
212
+ self._write_entry(entry.to_dict())
213
+ return request_id
214
+
215
+ def log_response(
216
+ self,
217
+ response: CompletionResponse,
218
+ request_id: str,
219
+ latency_ms: float,
220
+ cached: bool = False,
221
+ ) -> None:
222
+ """
223
+ Log a completion response.
224
+
225
+ Args:
226
+ response: The completion response
227
+ request_id: The corresponding request ID
228
+ latency_ms: Request latency in milliseconds
229
+ cached: Whether the response was from cache
230
+ """
231
+ content = ""
232
+ if self._include_content:
233
+ content = self._truncate(self._redact(response.content))
234
+
235
+ entry = ResponseLogEntry(
236
+ request_id=request_id,
237
+ timestamp=self._get_timestamp(),
238
+ model=response.model,
239
+ content=content,
240
+ finish_reason=response.finish_reason,
241
+ input_tokens=response.usage.input_tokens if response.usage else 0,
242
+ output_tokens=response.usage.output_tokens if response.usage else 0,
243
+ total_tokens=response.usage.total_tokens if response.usage else 0,
244
+ latency_ms=latency_ms,
245
+ cached=cached,
246
+ )
247
+
248
+ self._write_entry(entry.to_dict())
249
+
250
+ def log_error(
251
+ self,
252
+ request_id: str,
253
+ error: Exception,
254
+ latency_ms: float,
255
+ ) -> None:
256
+ """
257
+ Log an error response.
258
+
259
+ Args:
260
+ request_id: The corresponding request ID
261
+ error: The exception that occurred
262
+ latency_ms: Request latency in milliseconds
263
+ """
264
+ entry = {
265
+ "type": "error",
266
+ "request_id": request_id,
267
+ "timestamp": self._get_timestamp(),
268
+ "error": str(error),
269
+ "error_type": type(error).__name__,
270
+ "latency_ms": latency_ms,
271
+ }
272
+
273
+ self._write_entry(entry)
274
+
275
+ def close(self) -> None:
276
+ """Close the log file."""
277
+ if self._file_handle:
278
+ self._file_handle.close()
279
+ self._file_handle = None
280
+
281
+
282
+ @dataclass
283
+ class RequestContext:
284
+ """Context for tracking a single request through the pipeline."""
285
+
286
+ request_id: str = field(default_factory=lambda: str(uuid.uuid4()))
287
+ start_time: float = field(default_factory=time.perf_counter)
288
+ metadata: dict[str, Any] = field(default_factory=dict)
289
+
290
+ @property
291
+ def elapsed_ms(self) -> float:
292
+ """Get elapsed time in milliseconds."""
293
+ return (time.perf_counter() - self.start_time) * 1000
@@ -0,0 +1,221 @@
1
+ """Metrics and cost tracking for flashlite."""
2
+
3
+ import logging
4
+ from dataclasses import dataclass, field
5
+ from typing import Any
6
+
7
+ from ..types import CompletionResponse
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ # Approximate costs per 1K tokens (USD) as of early 2025
13
+ # These are estimates and may be outdated - use litellm's cost tracking for accuracy
14
+ DEFAULT_COSTS: dict[str, dict[str, float]] = {
15
+ # OpenAI
16
+ "gpt-4o": {"input": 0.0025, "output": 0.01},
17
+ "gpt-4o-mini": {"input": 0.00015, "output": 0.0006},
18
+ "gpt-4-turbo": {"input": 0.01, "output": 0.03},
19
+ "gpt-4": {"input": 0.03, "output": 0.06},
20
+ "gpt-3.5-turbo": {"input": 0.0005, "output": 0.0015},
21
+ "o1": {"input": 0.015, "output": 0.06},
22
+ "o1-mini": {"input": 0.003, "output": 0.012},
23
+ "o3-mini": {"input": 0.003, "output": 0.012},
24
+ # Anthropic
25
+ "claude-3-5-sonnet-20241022": {"input": 0.003, "output": 0.015},
26
+ "claude-sonnet-4-20250514": {"input": 0.003, "output": 0.015},
27
+ "claude-3-5-haiku-20241022": {"input": 0.001, "output": 0.005},
28
+ "claude-3-opus-20240229": {"input": 0.015, "output": 0.075},
29
+ # Add more as needed
30
+ }
31
+
32
+
33
+ @dataclass
34
+ class CostMetrics:
35
+ """Accumulated cost metrics."""
36
+
37
+ total_input_tokens: int = 0
38
+ total_output_tokens: int = 0
39
+ total_requests: int = 0
40
+ total_cost_usd: float = 0.0
41
+ cost_by_model: dict[str, float] = field(default_factory=dict)
42
+ tokens_by_model: dict[str, dict[str, int]] = field(default_factory=dict)
43
+
44
+
45
+ class CostTracker:
46
+ """
47
+ Tracks token usage and estimated costs across requests.
48
+
49
+ Example:
50
+ tracker = CostTracker(budget_limit=10.0)
51
+
52
+ # Track a response
53
+ tracker.track(response)
54
+
55
+ # Check totals
56
+ print(f"Total cost: ${tracker.total_cost:.2f}")
57
+ print(f"Budget remaining: ${tracker.budget_remaining:.2f}")
58
+
59
+ # Export report
60
+ report = tracker.get_report()
61
+ """
62
+
63
+ def __init__(
64
+ self,
65
+ budget_limit: float | None = None,
66
+ warn_at_percent: float = 80.0,
67
+ custom_costs: dict[str, dict[str, float]] | None = None,
68
+ ):
69
+ """
70
+ Initialize the cost tracker.
71
+
72
+ Args:
73
+ budget_limit: Maximum budget in USD (None = no limit)
74
+ warn_at_percent: Warn when this percentage of budget is used
75
+ custom_costs: Custom cost overrides per model
76
+ """
77
+ self._budget_limit = budget_limit
78
+ self._warn_at_percent = warn_at_percent
79
+ self._costs = {**DEFAULT_COSTS, **(custom_costs or {})}
80
+ self._metrics = CostMetrics()
81
+ self._budget_warning_issued = False
82
+
83
+ def _get_model_cost(self, model: str) -> dict[str, float]:
84
+ """Get cost per 1K tokens for a model."""
85
+ # Exact match
86
+ if model in self._costs:
87
+ return self._costs[model]
88
+
89
+ # Try to match by prefix (e.g., "gpt-4o" matches "gpt-4o-2024-...")
90
+ model_lower = model.lower()
91
+ for known_model, cost in self._costs.items():
92
+ if model_lower.startswith(known_model.lower()):
93
+ return cost
94
+
95
+ # Default to GPT-4o pricing as a reasonable estimate
96
+ logger.debug(f"Unknown model cost for '{model}', using gpt-4o pricing estimate")
97
+ return self._costs.get("gpt-4o", {"input": 0.0025, "output": 0.01})
98
+
99
+ def track(self, response: CompletionResponse) -> float:
100
+ """
101
+ Track a completion response and return its cost.
102
+
103
+ Args:
104
+ response: The completion response to track
105
+
106
+ Returns:
107
+ The estimated cost in USD for this response
108
+ """
109
+ if not response.usage:
110
+ return 0.0
111
+
112
+ input_tokens = response.usage.input_tokens
113
+ output_tokens = response.usage.output_tokens
114
+ model = response.model
115
+
116
+ # Calculate cost
117
+ model_costs = self._get_model_cost(model)
118
+ input_cost = (input_tokens / 1000) * model_costs["input"]
119
+ output_cost = (output_tokens / 1000) * model_costs["output"]
120
+ total_cost = input_cost + output_cost
121
+
122
+ # Update metrics
123
+ self._metrics.total_input_tokens += input_tokens
124
+ self._metrics.total_output_tokens += output_tokens
125
+ self._metrics.total_requests += 1
126
+ self._metrics.total_cost_usd += total_cost
127
+
128
+ # Per-model tracking
129
+ if model not in self._metrics.cost_by_model:
130
+ self._metrics.cost_by_model[model] = 0.0
131
+ self._metrics.tokens_by_model[model] = {"input": 0, "output": 0}
132
+
133
+ self._metrics.cost_by_model[model] += total_cost
134
+ self._metrics.tokens_by_model[model]["input"] += input_tokens
135
+ self._metrics.tokens_by_model[model]["output"] += output_tokens
136
+
137
+ # Check budget
138
+ self._check_budget()
139
+
140
+ return total_cost
141
+
142
+ def _check_budget(self) -> None:
143
+ """Check if budget thresholds are exceeded."""
144
+ if self._budget_limit is None:
145
+ return
146
+
147
+ percent_used = (self._metrics.total_cost_usd / self._budget_limit) * 100
148
+
149
+ # Warning threshold
150
+ if percent_used >= self._warn_at_percent and not self._budget_warning_issued:
151
+ logger.warning(
152
+ f"Budget warning: {percent_used:.1f}% of ${self._budget_limit:.2f} budget used "
153
+ f"(${self._metrics.total_cost_usd:.4f} spent)"
154
+ )
155
+ self._budget_warning_issued = True
156
+
157
+ # Hard limit
158
+ if self._metrics.total_cost_usd >= self._budget_limit:
159
+ raise BudgetExceededError(
160
+ f"Budget limit of ${self._budget_limit:.2f} exceeded "
161
+ f"(${self._metrics.total_cost_usd:.4f} spent)"
162
+ )
163
+
164
+ @property
165
+ def total_cost(self) -> float:
166
+ """Total cost in USD."""
167
+ return self._metrics.total_cost_usd
168
+
169
+ @property
170
+ def total_tokens(self) -> int:
171
+ """Total tokens used."""
172
+ return self._metrics.total_input_tokens + self._metrics.total_output_tokens
173
+
174
+ @property
175
+ def total_requests(self) -> int:
176
+ """Total number of requests tracked."""
177
+ return self._metrics.total_requests
178
+
179
+ @property
180
+ def budget_remaining(self) -> float | None:
181
+ """Remaining budget in USD, or None if no limit."""
182
+ if self._budget_limit is None:
183
+ return None
184
+ return max(0.0, self._budget_limit - self._metrics.total_cost_usd)
185
+
186
+ def get_report(self) -> dict[str, Any]:
187
+ """
188
+ Get a detailed cost report.
189
+
190
+ Returns:
191
+ Dictionary with cost breakdown
192
+ """
193
+ return {
194
+ "total_cost_usd": self._metrics.total_cost_usd,
195
+ "total_requests": self._metrics.total_requests,
196
+ "total_tokens": {
197
+ "input": self._metrics.total_input_tokens,
198
+ "output": self._metrics.total_output_tokens,
199
+ "total": self.total_tokens,
200
+ },
201
+ "budget_limit_usd": self._budget_limit,
202
+ "budget_remaining_usd": self.budget_remaining,
203
+ "by_model": {
204
+ model: {
205
+ "cost_usd": self._metrics.cost_by_model[model],
206
+ "tokens": self._metrics.tokens_by_model[model],
207
+ }
208
+ for model in self._metrics.cost_by_model
209
+ },
210
+ }
211
+
212
+ def reset(self) -> None:
213
+ """Reset all metrics."""
214
+ self._metrics = CostMetrics()
215
+ self._budget_warning_issued = False
216
+
217
+
218
+ class BudgetExceededError(Exception):
219
+ """Raised when the budget limit is exceeded."""
220
+
221
+ pass
flashlite/py.typed ADDED
File without changes
@@ -0,0 +1,31 @@
1
+ """Structured outputs module for Pydantic model integration."""
2
+
3
+ from .outputs import (
4
+ StructuredOutputError,
5
+ extract_json_from_content,
6
+ format_validation_error_for_retry,
7
+ parse_json_response,
8
+ validate_response,
9
+ )
10
+ from .schema import (
11
+ format_schema_for_openai,
12
+ generate_json_schema,
13
+ get_field_descriptions,
14
+ is_supported_type,
15
+ schema_to_prompt,
16
+ )
17
+
18
+ __all__ = [
19
+ # Schema generation
20
+ "generate_json_schema",
21
+ "schema_to_prompt",
22
+ "get_field_descriptions",
23
+ "format_schema_for_openai",
24
+ "is_supported_type",
25
+ # Parsing and validation
26
+ "parse_json_response",
27
+ "validate_response",
28
+ "format_validation_error_for_retry",
29
+ "extract_json_from_content",
30
+ "StructuredOutputError",
31
+ ]