coderouter-cli 2.0.0__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/cli.py +219 -0
- coderouter/config/schemas.py +235 -2
- coderouter/guards/__init__.py +6 -4
- coderouter/guards/backend_health.py +34 -0
- coderouter/guards/continuous_probe.py +349 -0
- coderouter/guards/drift_actions.py +111 -0
- coderouter/guards/drift_detection.py +308 -0
- coderouter/guards/self_healing.py +413 -0
- coderouter/guards/tool_loop.py +71 -0
- coderouter/ingress/anthropic_routes.py +106 -12
- coderouter/ingress/app.py +129 -0
- coderouter/logging.py +370 -0
- coderouter/metrics/collector.py +168 -0
- coderouter/metrics/prometheus.py +141 -0
- coderouter/output_filters.py +95 -4
- coderouter/routing/adaptive.py +23 -0
- coderouter/routing/budget.py +35 -0
- coderouter/routing/fallback.py +496 -5
- coderouter/state/__init__.py +15 -0
- coderouter/state/audit_log.py +269 -0
- coderouter/state/replay.py +316 -0
- coderouter/state/request_log.py +178 -0
- coderouter/state/store.py +212 -0
- coderouter/translation/tool_repair.py +42 -1
- coderouter_cli-2.2.0.dist-info/METADATA +243 -0
- {coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/RECORD +29 -20
- coderouter_cli-2.0.0.dist-info/METADATA +0 -559
- {coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/WHEEL +0 -0
- {coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/entry_points.txt +0 -0
- {coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""Structured JSONL audit log (v2.0-K).
|
|
2
|
+
|
|
3
|
+
Captures guard activations, chain fallbacks, budget warnings,
|
|
4
|
+
self-healing events, and drift transitions as append-only JSONL
|
|
5
|
+
records. Implements ``logging.Handler`` so it taps the same
|
|
6
|
+
structured log stream that :class:`MetricsCollector` observes — no
|
|
7
|
+
second instrumentation path needed.
|
|
8
|
+
|
|
9
|
+
Architecture
|
|
10
|
+
============
|
|
11
|
+
|
|
12
|
+
::
|
|
13
|
+
|
|
14
|
+
logger.info("backend-health-changed", extra={...})
|
|
15
|
+
│
|
|
16
|
+
├─ MetricsCollector.emit() → in-memory counters
|
|
17
|
+
└─ AuditLogHandler.emit() → append JSONL line to disk
|
|
18
|
+
|
|
19
|
+
Only *audit-worthy* events are written (guard state changes, chain
|
|
20
|
+
decisions, cost/budget events, self-healing lifecycle). High-frequency
|
|
21
|
+
per-request events (``try-provider``, ``provider-ok``) are excluded to
|
|
22
|
+
keep the log small.
|
|
23
|
+
|
|
24
|
+
File rotation
|
|
25
|
+
=============
|
|
26
|
+
|
|
27
|
+
Simple single-backup rotation: when the active file exceeds
|
|
28
|
+
``max_bytes``, it is renamed to ``audit.jsonl.1`` (overwriting any
|
|
29
|
+
existing backup) and a fresh ``audit.jsonl`` is started. One backup
|
|
30
|
+
is enough for the typical use case (reviewing yesterday's events while
|
|
31
|
+
today's stream runs).
|
|
32
|
+
|
|
33
|
+
Thread safety
|
|
34
|
+
=============
|
|
35
|
+
|
|
36
|
+
Inherits ``logging.Handler``'s built-in lock (``self.lock``) via the
|
|
37
|
+
``acquire()``/``release()`` protocol. File writes are atomic single
|
|
38
|
+
lines (no partial-line interleaving) because Python's stdlib file
|
|
39
|
+
``write()`` of a single string ≤ PIPE_BUF is POSIX-atomic on Linux.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
from __future__ import annotations
|
|
43
|
+
|
|
44
|
+
import json
|
|
45
|
+
import logging
|
|
46
|
+
from datetime import UTC, datetime
|
|
47
|
+
from pathlib import Path
|
|
48
|
+
|
|
49
|
+
# Events that are audit-worthy: guard state changes, chain decisions,
|
|
50
|
+
# cost/budget, self-healing lifecycle, drift, probing milestones.
|
|
51
|
+
_AUDIT_EVENTS: frozenset[str] = frozenset(
|
|
52
|
+
{
|
|
53
|
+
# Backend health
|
|
54
|
+
"backend-health-changed",
|
|
55
|
+
"demote-unhealthy-provider",
|
|
56
|
+
# Self-healing (v2.0-J)
|
|
57
|
+
"self-healing-exclude",
|
|
58
|
+
"self-healing-restore",
|
|
59
|
+
"self-healing-restart",
|
|
60
|
+
"self-healing-recovery-probe",
|
|
61
|
+
# Budget / cost
|
|
62
|
+
"skip-budget-exceeded",
|
|
63
|
+
"chain-budget-exceeded",
|
|
64
|
+
# Chain gate events
|
|
65
|
+
"chain-paid-gate-blocked",
|
|
66
|
+
"chain-memory-pressure-blocked",
|
|
67
|
+
"chain-uniform-auth-failure",
|
|
68
|
+
# Memory pressure
|
|
69
|
+
"memory-pressure-detected",
|
|
70
|
+
# Drift (v2.0-G)
|
|
71
|
+
"drift-detected",
|
|
72
|
+
"drift-promoted",
|
|
73
|
+
"drift-reload-attempted",
|
|
74
|
+
"drift-recovered",
|
|
75
|
+
# Context budget (v2.0-F)
|
|
76
|
+
"context-budget-warning",
|
|
77
|
+
"context-budget-trimmed",
|
|
78
|
+
# Tool loop (L3)
|
|
79
|
+
"tool-loop-detected",
|
|
80
|
+
# Probe milestones (v2.0-I)
|
|
81
|
+
"probe-capabilities-drift",
|
|
82
|
+
# Startup / shutdown
|
|
83
|
+
"coderouter-startup",
|
|
84
|
+
"coderouter-shutdown",
|
|
85
|
+
}
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class AuditLogHandler(logging.Handler):
|
|
90
|
+
"""Append-only JSONL handler for audit-worthy events.
|
|
91
|
+
|
|
92
|
+
Public API:
|
|
93
|
+
|
|
94
|
+
- Constructor: ``AuditLogHandler(log_path, max_bytes=10_485_760)``
|
|
95
|
+
- Inherited ``emit()`` is called automatically by the logging
|
|
96
|
+
framework for every log record.
|
|
97
|
+
- :meth:`close()` — flush and close the file handle.
|
|
98
|
+
|
|
99
|
+
Only events whose ``record.msg`` is in :data:`_AUDIT_EVENTS` are
|
|
100
|
+
written. Everything else is silently ignored (zero I/O cost for
|
|
101
|
+
non-audit log lines).
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
def __init__(
|
|
105
|
+
self,
|
|
106
|
+
log_path: str | Path,
|
|
107
|
+
*,
|
|
108
|
+
max_bytes: int = 10_485_760,
|
|
109
|
+
) -> None:
|
|
110
|
+
super().__init__(level=logging.DEBUG)
|
|
111
|
+
self._log_path = Path(log_path)
|
|
112
|
+
self._max_bytes = max_bytes
|
|
113
|
+
self._log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
114
|
+
self._file = open(self._log_path, "a", encoding="utf-8") # noqa: SIM115
|
|
115
|
+
|
|
116
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
117
|
+
"""Write an audit line if the event is audit-worthy."""
|
|
118
|
+
if record.msg not in _AUDIT_EVENTS:
|
|
119
|
+
return
|
|
120
|
+
try:
|
|
121
|
+
self.acquire()
|
|
122
|
+
try:
|
|
123
|
+
line = self._format_line(record)
|
|
124
|
+
self._file.write(line)
|
|
125
|
+
self._file.flush()
|
|
126
|
+
self._maybe_rotate()
|
|
127
|
+
finally:
|
|
128
|
+
self.release()
|
|
129
|
+
except Exception:
|
|
130
|
+
self.handleError(record)
|
|
131
|
+
|
|
132
|
+
def close(self) -> None:
|
|
133
|
+
"""Flush and close the underlying file."""
|
|
134
|
+
self.acquire()
|
|
135
|
+
try:
|
|
136
|
+
if self._file and not self._file.closed:
|
|
137
|
+
self._file.flush()
|
|
138
|
+
self._file.close()
|
|
139
|
+
finally:
|
|
140
|
+
self.release()
|
|
141
|
+
super().close()
|
|
142
|
+
|
|
143
|
+
# ------------------------------------------------------------------
|
|
144
|
+
# Internals
|
|
145
|
+
# ------------------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
def _format_line(self, record: logging.LogRecord) -> str:
|
|
148
|
+
"""Build a single JSONL line from a log record."""
|
|
149
|
+
payload: dict[str, object] = {
|
|
150
|
+
"ts": datetime.now(UTC).isoformat(),
|
|
151
|
+
"event": record.msg,
|
|
152
|
+
"level": record.levelname,
|
|
153
|
+
}
|
|
154
|
+
# Merge structured extras (skip stdlib internal fields).
|
|
155
|
+
_stdlib_keys = {
|
|
156
|
+
"name",
|
|
157
|
+
"msg",
|
|
158
|
+
"args",
|
|
159
|
+
"created",
|
|
160
|
+
"filename",
|
|
161
|
+
"funcName",
|
|
162
|
+
"levelname",
|
|
163
|
+
"levelno",
|
|
164
|
+
"lineno",
|
|
165
|
+
"module",
|
|
166
|
+
"msecs",
|
|
167
|
+
"pathname",
|
|
168
|
+
"process",
|
|
169
|
+
"processName",
|
|
170
|
+
"relativeCreated",
|
|
171
|
+
"stack_info",
|
|
172
|
+
"thread",
|
|
173
|
+
"threadName",
|
|
174
|
+
"exc_info",
|
|
175
|
+
"exc_text",
|
|
176
|
+
"message",
|
|
177
|
+
"taskName",
|
|
178
|
+
}
|
|
179
|
+
for key, value in record.__dict__.items():
|
|
180
|
+
if key.startswith("_") or key in _stdlib_keys:
|
|
181
|
+
continue
|
|
182
|
+
payload[key] = value
|
|
183
|
+
return json.dumps(payload, default=str) + "\n"
|
|
184
|
+
|
|
185
|
+
def _maybe_rotate(self) -> None:
|
|
186
|
+
"""Rotate if the current file exceeds max_bytes."""
|
|
187
|
+
try:
|
|
188
|
+
size = self._file.tell()
|
|
189
|
+
if size < self._max_bytes:
|
|
190
|
+
return
|
|
191
|
+
self._file.close()
|
|
192
|
+
backup = self._log_path.with_suffix(".jsonl.1")
|
|
193
|
+
# Overwrite any existing backup.
|
|
194
|
+
if backup.exists():
|
|
195
|
+
backup.unlink()
|
|
196
|
+
self._log_path.rename(backup)
|
|
197
|
+
self._file = open(self._log_path, "a", encoding="utf-8") # noqa: SIM115
|
|
198
|
+
except OSError:
|
|
199
|
+
# If rotation fails, just keep writing to the current file.
|
|
200
|
+
if self._file.closed:
|
|
201
|
+
self._file = open(self._log_path, "a", encoding="utf-8") # noqa: SIM115
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def read_audit_log(
|
|
205
|
+
log_path: str | Path,
|
|
206
|
+
*,
|
|
207
|
+
tail: int | None = None,
|
|
208
|
+
event_filter: str | None = None,
|
|
209
|
+
since: str | None = None,
|
|
210
|
+
) -> list[dict[str, object]]:
|
|
211
|
+
"""Read and filter audit log entries.
|
|
212
|
+
|
|
213
|
+
Parameters:
|
|
214
|
+
|
|
215
|
+
- ``tail`` — return only the last N entries.
|
|
216
|
+
- ``event_filter`` — only entries whose ``event`` field contains
|
|
217
|
+
this substring (case-insensitive).
|
|
218
|
+
- ``since`` — only entries with ``ts >= since`` (ISO 8601 prefix
|
|
219
|
+
match).
|
|
220
|
+
|
|
221
|
+
Returns a list of parsed dicts, newest last.
|
|
222
|
+
"""
|
|
223
|
+
path = Path(log_path)
|
|
224
|
+
if not path.exists():
|
|
225
|
+
return []
|
|
226
|
+
|
|
227
|
+
entries: list[dict[str, object]] = []
|
|
228
|
+
with open(path, encoding="utf-8") as f:
|
|
229
|
+
for line in f:
|
|
230
|
+
line = line.strip()
|
|
231
|
+
if not line:
|
|
232
|
+
continue
|
|
233
|
+
try:
|
|
234
|
+
entry = json.loads(line)
|
|
235
|
+
except json.JSONDecodeError:
|
|
236
|
+
continue
|
|
237
|
+
|
|
238
|
+
if event_filter and event_filter.lower() not in str(
|
|
239
|
+
entry.get("event", "")
|
|
240
|
+
).lower():
|
|
241
|
+
continue
|
|
242
|
+
|
|
243
|
+
if since:
|
|
244
|
+
ts = str(entry.get("ts", ""))
|
|
245
|
+
if ts < since:
|
|
246
|
+
continue
|
|
247
|
+
|
|
248
|
+
entries.append(entry)
|
|
249
|
+
|
|
250
|
+
if tail is not None and tail > 0:
|
|
251
|
+
entries = entries[-tail:]
|
|
252
|
+
|
|
253
|
+
return entries
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def summarize_audit_log(entries: list[dict[str, object]]) -> dict[str, int]:
|
|
257
|
+
"""Return event type → count summary from a list of audit entries."""
|
|
258
|
+
summary: dict[str, int] = {}
|
|
259
|
+
for entry in entries:
|
|
260
|
+
event = str(entry.get("event", "unknown"))
|
|
261
|
+
summary[event] = summary.get(event, 0) + 1
|
|
262
|
+
return dict(sorted(summary.items(), key=lambda x: -x[1]))
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
__all__ = [
|
|
266
|
+
"AuditLogHandler",
|
|
267
|
+
"read_audit_log",
|
|
268
|
+
"summarize_audit_log",
|
|
269
|
+
]
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
"""Replay analysis engine (v2.0-K Replay framework).
|
|
2
|
+
|
|
3
|
+
Provides statistical A/B comparison of request journal entries across
|
|
4
|
+
providers. Since the request journal records only metadata (token
|
|
5
|
+
counts, cost, streaming flag) — **not** request/response bodies — this
|
|
6
|
+
is *statistical replay*, not literal re-execution.
|
|
7
|
+
|
|
8
|
+
Typical use: an operator changes the fallback chain (swap provider A
|
|
9
|
+
for provider B) and wants to know how the new routing affected cost,
|
|
10
|
+
token counts, and request distribution compared to the previous window.
|
|
11
|
+
|
|
12
|
+
Usage::
|
|
13
|
+
|
|
14
|
+
from coderouter.state.request_log import read_request_log
|
|
15
|
+
from coderouter.state.replay import compare_providers, summarize_window
|
|
16
|
+
|
|
17
|
+
entries = read_request_log("~/.coderouter/state/requests.jsonl")
|
|
18
|
+
summary = summarize_window(entries)
|
|
19
|
+
comparison = compare_providers(entries, "anthropic-api", "openrouter-free")
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import math
|
|
25
|
+
from dataclasses import dataclass, field
|
|
26
|
+
|
|
27
|
+
# ------------------------------------------------------------------
|
|
28
|
+
# Per-provider summary
|
|
29
|
+
# ------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ProviderSummary:
|
|
34
|
+
"""Aggregated statistics for one provider over a time window."""
|
|
35
|
+
|
|
36
|
+
provider: str
|
|
37
|
+
request_count: int = 0
|
|
38
|
+
total_input_tokens: int = 0
|
|
39
|
+
total_output_tokens: int = 0
|
|
40
|
+
total_cost_usd: float = 0.0
|
|
41
|
+
total_cost_savings_usd: float = 0.0
|
|
42
|
+
total_cache_read_tokens: int = 0
|
|
43
|
+
total_cache_creation_tokens: int = 0
|
|
44
|
+
streaming_count: int = 0
|
|
45
|
+
# Derived (populated by _finalize)
|
|
46
|
+
avg_input_tokens: float = 0.0
|
|
47
|
+
avg_output_tokens: float = 0.0
|
|
48
|
+
avg_cost_usd: float = 0.0
|
|
49
|
+
streaming_ratio: float = 0.0
|
|
50
|
+
cache_hit_ratio: float = 0.0
|
|
51
|
+
|
|
52
|
+
def _finalize(self) -> None:
|
|
53
|
+
"""Compute derived averages and ratios."""
|
|
54
|
+
n = self.request_count
|
|
55
|
+
if n > 0:
|
|
56
|
+
self.avg_input_tokens = self.total_input_tokens / n
|
|
57
|
+
self.avg_output_tokens = self.total_output_tokens / n
|
|
58
|
+
self.avg_cost_usd = self.total_cost_usd / n
|
|
59
|
+
self.streaming_ratio = self.streaming_count / n
|
|
60
|
+
total_input = self.total_input_tokens + self.total_cache_read_tokens
|
|
61
|
+
if total_input > 0:
|
|
62
|
+
self.cache_hit_ratio = self.total_cache_read_tokens / total_input
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass
|
|
66
|
+
class WindowSummary:
|
|
67
|
+
"""Full window summary across all providers."""
|
|
68
|
+
|
|
69
|
+
total_requests: int = 0
|
|
70
|
+
total_cost_usd: float = 0.0
|
|
71
|
+
total_cost_savings_usd: float = 0.0
|
|
72
|
+
total_input_tokens: int = 0
|
|
73
|
+
total_output_tokens: int = 0
|
|
74
|
+
providers: dict[str, ProviderSummary] = field(default_factory=dict)
|
|
75
|
+
first_ts: str = ""
|
|
76
|
+
last_ts: str = ""
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def summarize_window(entries: list[dict[str, object]]) -> WindowSummary:
|
|
80
|
+
"""Aggregate request journal entries into a :class:`WindowSummary`.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
entries
|
|
85
|
+
Parsed JSONL dicts from :func:`read_request_log`.
|
|
86
|
+
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
89
|
+
WindowSummary
|
|
90
|
+
Per-provider and overall statistics.
|
|
91
|
+
"""
|
|
92
|
+
summary = WindowSummary()
|
|
93
|
+
for entry in entries:
|
|
94
|
+
provider = str(entry.get("provider", "unknown"))
|
|
95
|
+
if provider not in summary.providers:
|
|
96
|
+
summary.providers[provider] = ProviderSummary(provider=provider)
|
|
97
|
+
|
|
98
|
+
ps = summary.providers[provider]
|
|
99
|
+
ps.request_count += 1
|
|
100
|
+
|
|
101
|
+
input_tokens = int(entry.get("input_tokens", 0))
|
|
102
|
+
output_tokens = int(entry.get("output_tokens", 0))
|
|
103
|
+
cost_usd = float(entry.get("cost_usd", 0.0))
|
|
104
|
+
cost_savings = float(entry.get("cost_savings_usd", 0.0))
|
|
105
|
+
cache_read = int(entry.get("cache_read_input_tokens", 0))
|
|
106
|
+
cache_creation = int(entry.get("cache_creation_input_tokens", 0))
|
|
107
|
+
streaming = bool(entry.get("streaming", False))
|
|
108
|
+
|
|
109
|
+
ps.total_input_tokens += input_tokens
|
|
110
|
+
ps.total_output_tokens += output_tokens
|
|
111
|
+
ps.total_cost_usd += cost_usd
|
|
112
|
+
ps.total_cost_savings_usd += cost_savings
|
|
113
|
+
ps.total_cache_read_tokens += cache_read
|
|
114
|
+
ps.total_cache_creation_tokens += cache_creation
|
|
115
|
+
if streaming:
|
|
116
|
+
ps.streaming_count += 1
|
|
117
|
+
|
|
118
|
+
summary.total_requests += 1
|
|
119
|
+
summary.total_cost_usd += cost_usd
|
|
120
|
+
summary.total_cost_savings_usd += cost_savings
|
|
121
|
+
summary.total_input_tokens += input_tokens
|
|
122
|
+
summary.total_output_tokens += output_tokens
|
|
123
|
+
|
|
124
|
+
ts = str(entry.get("ts", ""))
|
|
125
|
+
if ts:
|
|
126
|
+
if not summary.first_ts or ts < summary.first_ts:
|
|
127
|
+
summary.first_ts = ts
|
|
128
|
+
if not summary.last_ts or ts > summary.last_ts:
|
|
129
|
+
summary.last_ts = ts
|
|
130
|
+
|
|
131
|
+
for ps in summary.providers.values():
|
|
132
|
+
ps._finalize()
|
|
133
|
+
|
|
134
|
+
return summary
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# ------------------------------------------------------------------
|
|
138
|
+
# A/B provider comparison
|
|
139
|
+
# ------------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@dataclass
|
|
143
|
+
class ProviderComparison:
|
|
144
|
+
"""Side-by-side comparison of two providers."""
|
|
145
|
+
|
|
146
|
+
provider_a: ProviderSummary
|
|
147
|
+
provider_b: ProviderSummary
|
|
148
|
+
# Deltas: B - A (positive = B is larger)
|
|
149
|
+
delta_avg_input_tokens: float = 0.0
|
|
150
|
+
delta_avg_output_tokens: float = 0.0
|
|
151
|
+
delta_avg_cost_usd: float = 0.0
|
|
152
|
+
delta_total_cost_usd: float = 0.0
|
|
153
|
+
# Percentage changes (relative to A; NaN if A is zero)
|
|
154
|
+
pct_avg_cost_change: float = 0.0
|
|
155
|
+
pct_total_cost_change: float = 0.0
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def compare_providers(
|
|
159
|
+
entries: list[dict[str, object]],
|
|
160
|
+
provider_a: str,
|
|
161
|
+
provider_b: str,
|
|
162
|
+
) -> ProviderComparison:
|
|
163
|
+
"""Compare two providers' statistics from the same journal.
|
|
164
|
+
|
|
165
|
+
Parameters
|
|
166
|
+
----------
|
|
167
|
+
entries
|
|
168
|
+
Parsed JSONL dicts from :func:`read_request_log`.
|
|
169
|
+
provider_a, provider_b
|
|
170
|
+
Provider names to compare. Entries not matching either are
|
|
171
|
+
ignored.
|
|
172
|
+
|
|
173
|
+
Returns
|
|
174
|
+
-------
|
|
175
|
+
ProviderComparison
|
|
176
|
+
Side-by-side stats with deltas.
|
|
177
|
+
"""
|
|
178
|
+
a_entries = [e for e in entries if str(e.get("provider", "")) == provider_a]
|
|
179
|
+
b_entries = [e for e in entries if str(e.get("provider", "")) == provider_b]
|
|
180
|
+
|
|
181
|
+
a_summary = summarize_window(a_entries)
|
|
182
|
+
b_summary = summarize_window(b_entries)
|
|
183
|
+
|
|
184
|
+
ps_a = a_summary.providers.get(
|
|
185
|
+
provider_a, ProviderSummary(provider=provider_a)
|
|
186
|
+
)
|
|
187
|
+
ps_b = b_summary.providers.get(
|
|
188
|
+
provider_b, ProviderSummary(provider=provider_b)
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
comparison = ProviderComparison(provider_a=ps_a, provider_b=ps_b)
|
|
192
|
+
comparison.delta_avg_input_tokens = ps_b.avg_input_tokens - ps_a.avg_input_tokens
|
|
193
|
+
comparison.delta_avg_output_tokens = ps_b.avg_output_tokens - ps_a.avg_output_tokens
|
|
194
|
+
comparison.delta_avg_cost_usd = ps_b.avg_cost_usd - ps_a.avg_cost_usd
|
|
195
|
+
comparison.delta_total_cost_usd = ps_b.total_cost_usd - ps_a.total_cost_usd
|
|
196
|
+
|
|
197
|
+
if ps_a.avg_cost_usd > 0:
|
|
198
|
+
comparison.pct_avg_cost_change = (
|
|
199
|
+
(ps_b.avg_cost_usd - ps_a.avg_cost_usd) / ps_a.avg_cost_usd * 100.0
|
|
200
|
+
)
|
|
201
|
+
else:
|
|
202
|
+
comparison.pct_avg_cost_change = float("nan")
|
|
203
|
+
|
|
204
|
+
if ps_a.total_cost_usd > 0:
|
|
205
|
+
comparison.pct_total_cost_change = (
|
|
206
|
+
(ps_b.total_cost_usd - ps_a.total_cost_usd) / ps_a.total_cost_usd * 100.0
|
|
207
|
+
)
|
|
208
|
+
else:
|
|
209
|
+
comparison.pct_total_cost_change = float("nan")
|
|
210
|
+
|
|
211
|
+
return comparison
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
# ------------------------------------------------------------------
|
|
215
|
+
# CLI table formatting helpers
|
|
216
|
+
# ------------------------------------------------------------------
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def format_summary_table(summary: WindowSummary) -> str:
|
|
220
|
+
"""Render a :class:`WindowSummary` as a CLI table.
|
|
221
|
+
|
|
222
|
+
Returns a plain-text table suitable for terminal output.
|
|
223
|
+
"""
|
|
224
|
+
lines: list[str] = []
|
|
225
|
+
lines.append(f"Window: {summary.first_ts} → {summary.last_ts}")
|
|
226
|
+
lines.append(f"Total: {summary.total_requests} requests, "
|
|
227
|
+
f"${summary.total_cost_usd:.4f} cost, "
|
|
228
|
+
f"${summary.total_cost_savings_usd:.4f} savings")
|
|
229
|
+
lines.append("")
|
|
230
|
+
|
|
231
|
+
# Header
|
|
232
|
+
hdr = (
|
|
233
|
+
f"{'Provider':<25} {'Reqs':>6} {'AvgIn':>8} {'AvgOut':>8} "
|
|
234
|
+
f"{'AvgCost':>10} {'TotalCost':>10} {'Cache%':>7} {'Stream%':>8}"
|
|
235
|
+
)
|
|
236
|
+
lines.append(hdr)
|
|
237
|
+
lines.append("-" * len(hdr))
|
|
238
|
+
|
|
239
|
+
for ps in sorted(summary.providers.values(),
|
|
240
|
+
key=lambda p: p.total_cost_usd, reverse=True):
|
|
241
|
+
lines.append(
|
|
242
|
+
f"{ps.provider:<25} {ps.request_count:>6} "
|
|
243
|
+
f"{ps.avg_input_tokens:>8.0f} {ps.avg_output_tokens:>8.0f} "
|
|
244
|
+
f"${ps.avg_cost_usd:>9.4f} ${ps.total_cost_usd:>9.4f} "
|
|
245
|
+
f"{ps.cache_hit_ratio * 100:>6.1f}% "
|
|
246
|
+
f"{ps.streaming_ratio * 100:>7.1f}%"
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
return "\n".join(lines)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def format_comparison_table(comp: ProviderComparison) -> str:
|
|
253
|
+
"""Render a :class:`ProviderComparison` as a CLI table.
|
|
254
|
+
|
|
255
|
+
Returns a plain-text side-by-side comparison table.
|
|
256
|
+
"""
|
|
257
|
+
a = comp.provider_a
|
|
258
|
+
b = comp.provider_b
|
|
259
|
+
lines: list[str] = []
|
|
260
|
+
|
|
261
|
+
hdr = f"{'Metric':<25} {a.provider:<20} {b.provider:<20} {'Delta':>12}"
|
|
262
|
+
lines.append(hdr)
|
|
263
|
+
lines.append("-" * len(hdr))
|
|
264
|
+
|
|
265
|
+
def _row(label: str, va: object, vb: object, delta: float, fmt: str = ".0f") -> str:
|
|
266
|
+
d_str = f"{delta:+{fmt}}"
|
|
267
|
+
return f"{label:<25} {va!s:<20} {vb!s:<20} {d_str:>12}"
|
|
268
|
+
|
|
269
|
+
lines.append(_row("Requests", a.request_count, b.request_count,
|
|
270
|
+
b.request_count - a.request_count))
|
|
271
|
+
lines.append(_row("Avg input tokens", f"{a.avg_input_tokens:.0f}",
|
|
272
|
+
f"{b.avg_input_tokens:.0f}",
|
|
273
|
+
comp.delta_avg_input_tokens))
|
|
274
|
+
lines.append(_row("Avg output tokens", f"{a.avg_output_tokens:.0f}",
|
|
275
|
+
f"{b.avg_output_tokens:.0f}",
|
|
276
|
+
comp.delta_avg_output_tokens))
|
|
277
|
+
lines.append(_row("Avg cost (USD)", f"${a.avg_cost_usd:.4f}",
|
|
278
|
+
f"${b.avg_cost_usd:.4f}",
|
|
279
|
+
comp.delta_avg_cost_usd, fmt=".4f"))
|
|
280
|
+
lines.append(_row("Total cost (USD)", f"${a.total_cost_usd:.4f}",
|
|
281
|
+
f"${b.total_cost_usd:.4f}",
|
|
282
|
+
comp.delta_total_cost_usd, fmt=".4f"))
|
|
283
|
+
lines.append(_row("Cache hit ratio", f"{a.cache_hit_ratio * 100:.1f}%",
|
|
284
|
+
f"{b.cache_hit_ratio * 100:.1f}%",
|
|
285
|
+
(b.cache_hit_ratio - a.cache_hit_ratio) * 100, fmt=".1f"))
|
|
286
|
+
lines.append(_row("Streaming ratio", f"{a.streaming_ratio * 100:.1f}%",
|
|
287
|
+
f"{b.streaming_ratio * 100:.1f}%",
|
|
288
|
+
(b.streaming_ratio - a.streaming_ratio) * 100, fmt=".1f"))
|
|
289
|
+
|
|
290
|
+
# Cost change summary
|
|
291
|
+
lines.append("")
|
|
292
|
+
if not math.isnan(comp.pct_avg_cost_change):
|
|
293
|
+
direction = "cheaper" if comp.pct_avg_cost_change < 0 else "more expensive"
|
|
294
|
+
lines.append(
|
|
295
|
+
f"Per-request: {b.provider} is {abs(comp.pct_avg_cost_change):.1f}% "
|
|
296
|
+
f"{direction} than {a.provider}"
|
|
297
|
+
)
|
|
298
|
+
if not math.isnan(comp.pct_total_cost_change):
|
|
299
|
+
direction = "less" if comp.pct_total_cost_change < 0 else "more"
|
|
300
|
+
lines.append(
|
|
301
|
+
f"Total spend: {b.provider} spent {abs(comp.pct_total_cost_change):.1f}% "
|
|
302
|
+
f"{direction} than {a.provider}"
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
return "\n".join(lines)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
__all__ = [
|
|
309
|
+
"ProviderComparison",
|
|
310
|
+
"ProviderSummary",
|
|
311
|
+
"WindowSummary",
|
|
312
|
+
"compare_providers",
|
|
313
|
+
"format_comparison_table",
|
|
314
|
+
"format_summary_table",
|
|
315
|
+
"summarize_window",
|
|
316
|
+
]
|