coderouter-cli 2.0.0__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,269 @@
1
+ """Structured JSONL audit log (v2.0-K).
2
+
3
+ Captures guard activations, chain fallbacks, budget warnings,
4
+ self-healing events, and drift transitions as append-only JSONL
5
+ records. Implements ``logging.Handler`` so it taps the same
6
+ structured log stream that :class:`MetricsCollector` observes — no
7
+ second instrumentation path needed.
8
+
9
+ Architecture
10
+ ============
11
+
12
+ ::
13
+
14
+ logger.info("backend-health-changed", extra={...})
15
+
16
+ ├─ MetricsCollector.emit() → in-memory counters
17
+ └─ AuditLogHandler.emit() → append JSONL line to disk
18
+
19
+ Only *audit-worthy* events are written (guard state changes, chain
20
+ decisions, cost/budget events, self-healing lifecycle). High-frequency
21
+ per-request events (``try-provider``, ``provider-ok``) are excluded to
22
+ keep the log small.
23
+
24
+ File rotation
25
+ =============
26
+
27
+ Simple single-backup rotation: when the active file exceeds
28
+ ``max_bytes``, it is renamed to ``audit.jsonl.1`` (overwriting any
29
+ existing backup) and a fresh ``audit.jsonl`` is started. One backup
30
+ is enough for the typical use case (reviewing yesterday's events while
31
+ today's stream runs).
32
+
33
+ Thread safety
34
+ =============
35
+
36
+ Inherits ``logging.Handler``'s built-in lock (``self.lock``) via the
37
+ ``acquire()``/``release()`` protocol. File writes are atomic single
38
+ lines (no partial-line interleaving) because Python's stdlib file
39
+ ``write()`` of a single string ≤ PIPE_BUF is POSIX-atomic on Linux.
40
+ """
41
+
42
+ from __future__ import annotations
43
+
44
+ import json
45
+ import logging
46
+ from datetime import UTC, datetime
47
+ from pathlib import Path
48
+
49
+ # Events that are audit-worthy: guard state changes, chain decisions,
50
+ # cost/budget, self-healing lifecycle, drift, probing milestones.
51
+ _AUDIT_EVENTS: frozenset[str] = frozenset(
52
+ {
53
+ # Backend health
54
+ "backend-health-changed",
55
+ "demote-unhealthy-provider",
56
+ # Self-healing (v2.0-J)
57
+ "self-healing-exclude",
58
+ "self-healing-restore",
59
+ "self-healing-restart",
60
+ "self-healing-recovery-probe",
61
+ # Budget / cost
62
+ "skip-budget-exceeded",
63
+ "chain-budget-exceeded",
64
+ # Chain gate events
65
+ "chain-paid-gate-blocked",
66
+ "chain-memory-pressure-blocked",
67
+ "chain-uniform-auth-failure",
68
+ # Memory pressure
69
+ "memory-pressure-detected",
70
+ # Drift (v2.0-G)
71
+ "drift-detected",
72
+ "drift-promoted",
73
+ "drift-reload-attempted",
74
+ "drift-recovered",
75
+ # Context budget (v2.0-F)
76
+ "context-budget-warning",
77
+ "context-budget-trimmed",
78
+ # Tool loop (L3)
79
+ "tool-loop-detected",
80
+ # Probe milestones (v2.0-I)
81
+ "probe-capabilities-drift",
82
+ # Startup / shutdown
83
+ "coderouter-startup",
84
+ "coderouter-shutdown",
85
+ }
86
+ )
87
+
88
+
89
+ class AuditLogHandler(logging.Handler):
90
+ """Append-only JSONL handler for audit-worthy events.
91
+
92
+ Public API:
93
+
94
+ - Constructor: ``AuditLogHandler(log_path, max_bytes=10_485_760)``
95
+ - Inherited ``emit()`` is called automatically by the logging
96
+ framework for every log record.
97
+ - :meth:`close()` — flush and close the file handle.
98
+
99
+ Only events whose ``record.msg`` is in :data:`_AUDIT_EVENTS` are
100
+ written. Everything else is silently ignored (zero I/O cost for
101
+ non-audit log lines).
102
+ """
103
+
104
+ def __init__(
105
+ self,
106
+ log_path: str | Path,
107
+ *,
108
+ max_bytes: int = 10_485_760,
109
+ ) -> None:
110
+ super().__init__(level=logging.DEBUG)
111
+ self._log_path = Path(log_path)
112
+ self._max_bytes = max_bytes
113
+ self._log_path.parent.mkdir(parents=True, exist_ok=True)
114
+ self._file = open(self._log_path, "a", encoding="utf-8") # noqa: SIM115
115
+
116
+ def emit(self, record: logging.LogRecord) -> None:
117
+ """Write an audit line if the event is audit-worthy."""
118
+ if record.msg not in _AUDIT_EVENTS:
119
+ return
120
+ try:
121
+ self.acquire()
122
+ try:
123
+ line = self._format_line(record)
124
+ self._file.write(line)
125
+ self._file.flush()
126
+ self._maybe_rotate()
127
+ finally:
128
+ self.release()
129
+ except Exception:
130
+ self.handleError(record)
131
+
132
+ def close(self) -> None:
133
+ """Flush and close the underlying file."""
134
+ self.acquire()
135
+ try:
136
+ if self._file and not self._file.closed:
137
+ self._file.flush()
138
+ self._file.close()
139
+ finally:
140
+ self.release()
141
+ super().close()
142
+
143
+ # ------------------------------------------------------------------
144
+ # Internals
145
+ # ------------------------------------------------------------------
146
+
147
+ def _format_line(self, record: logging.LogRecord) -> str:
148
+ """Build a single JSONL line from a log record."""
149
+ payload: dict[str, object] = {
150
+ "ts": datetime.now(UTC).isoformat(),
151
+ "event": record.msg,
152
+ "level": record.levelname,
153
+ }
154
+ # Merge structured extras (skip stdlib internal fields).
155
+ _stdlib_keys = {
156
+ "name",
157
+ "msg",
158
+ "args",
159
+ "created",
160
+ "filename",
161
+ "funcName",
162
+ "levelname",
163
+ "levelno",
164
+ "lineno",
165
+ "module",
166
+ "msecs",
167
+ "pathname",
168
+ "process",
169
+ "processName",
170
+ "relativeCreated",
171
+ "stack_info",
172
+ "thread",
173
+ "threadName",
174
+ "exc_info",
175
+ "exc_text",
176
+ "message",
177
+ "taskName",
178
+ }
179
+ for key, value in record.__dict__.items():
180
+ if key.startswith("_") or key in _stdlib_keys:
181
+ continue
182
+ payload[key] = value
183
+ return json.dumps(payload, default=str) + "\n"
184
+
185
+ def _maybe_rotate(self) -> None:
186
+ """Rotate if the current file exceeds max_bytes."""
187
+ try:
188
+ size = self._file.tell()
189
+ if size < self._max_bytes:
190
+ return
191
+ self._file.close()
192
+ backup = self._log_path.with_suffix(".jsonl.1")
193
+ # Overwrite any existing backup.
194
+ if backup.exists():
195
+ backup.unlink()
196
+ self._log_path.rename(backup)
197
+ self._file = open(self._log_path, "a", encoding="utf-8") # noqa: SIM115
198
+ except OSError:
199
+ # If rotation fails, just keep writing to the current file.
200
+ if self._file.closed:
201
+ self._file = open(self._log_path, "a", encoding="utf-8") # noqa: SIM115
202
+
203
+
204
+ def read_audit_log(
205
+ log_path: str | Path,
206
+ *,
207
+ tail: int | None = None,
208
+ event_filter: str | None = None,
209
+ since: str | None = None,
210
+ ) -> list[dict[str, object]]:
211
+ """Read and filter audit log entries.
212
+
213
+ Parameters:
214
+
215
+ - ``tail`` — return only the last N entries.
216
+ - ``event_filter`` — only entries whose ``event`` field contains
217
+ this substring (case-insensitive).
218
+ - ``since`` — only entries with ``ts >= since`` (ISO 8601 prefix
219
+ match).
220
+
221
+ Returns a list of parsed dicts, newest last.
222
+ """
223
+ path = Path(log_path)
224
+ if not path.exists():
225
+ return []
226
+
227
+ entries: list[dict[str, object]] = []
228
+ with open(path, encoding="utf-8") as f:
229
+ for line in f:
230
+ line = line.strip()
231
+ if not line:
232
+ continue
233
+ try:
234
+ entry = json.loads(line)
235
+ except json.JSONDecodeError:
236
+ continue
237
+
238
+ if event_filter and event_filter.lower() not in str(
239
+ entry.get("event", "")
240
+ ).lower():
241
+ continue
242
+
243
+ if since:
244
+ ts = str(entry.get("ts", ""))
245
+ if ts < since:
246
+ continue
247
+
248
+ entries.append(entry)
249
+
250
+ if tail is not None and tail > 0:
251
+ entries = entries[-tail:]
252
+
253
+ return entries
254
+
255
+
256
+ def summarize_audit_log(entries: list[dict[str, object]]) -> dict[str, int]:
257
+ """Return event type → count summary from a list of audit entries."""
258
+ summary: dict[str, int] = {}
259
+ for entry in entries:
260
+ event = str(entry.get("event", "unknown"))
261
+ summary[event] = summary.get(event, 0) + 1
262
+ return dict(sorted(summary.items(), key=lambda x: -x[1]))
263
+
264
+
265
+ __all__ = [
266
+ "AuditLogHandler",
267
+ "read_audit_log",
268
+ "summarize_audit_log",
269
+ ]
@@ -0,0 +1,316 @@
1
+ """Replay analysis engine (v2.0-K Replay framework).
2
+
3
+ Provides statistical A/B comparison of request journal entries across
4
+ providers. Since the request journal records only metadata (token
5
+ counts, cost, streaming flag) — **not** request/response bodies — this
6
+ is *statistical replay*, not literal re-execution.
7
+
8
+ Typical use: an operator changes the fallback chain (swap provider A
9
+ for provider B) and wants to know how the new routing affected cost,
10
+ token counts, and request distribution compared to the previous window.
11
+
12
+ Usage::
13
+
14
+ from coderouter.state.request_log import read_request_log
15
+ from coderouter.state.replay import compare_providers, summarize_window
16
+
17
+ entries = read_request_log("~/.coderouter/state/requests.jsonl")
18
+ summary = summarize_window(entries)
19
+ comparison = compare_providers(entries, "anthropic-api", "openrouter-free")
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import math
25
+ from dataclasses import dataclass, field
26
+
27
+ # ------------------------------------------------------------------
28
+ # Per-provider summary
29
+ # ------------------------------------------------------------------
30
+
31
+
32
+ @dataclass
33
+ class ProviderSummary:
34
+ """Aggregated statistics for one provider over a time window."""
35
+
36
+ provider: str
37
+ request_count: int = 0
38
+ total_input_tokens: int = 0
39
+ total_output_tokens: int = 0
40
+ total_cost_usd: float = 0.0
41
+ total_cost_savings_usd: float = 0.0
42
+ total_cache_read_tokens: int = 0
43
+ total_cache_creation_tokens: int = 0
44
+ streaming_count: int = 0
45
+ # Derived (populated by _finalize)
46
+ avg_input_tokens: float = 0.0
47
+ avg_output_tokens: float = 0.0
48
+ avg_cost_usd: float = 0.0
49
+ streaming_ratio: float = 0.0
50
+ cache_hit_ratio: float = 0.0
51
+
52
+ def _finalize(self) -> None:
53
+ """Compute derived averages and ratios."""
54
+ n = self.request_count
55
+ if n > 0:
56
+ self.avg_input_tokens = self.total_input_tokens / n
57
+ self.avg_output_tokens = self.total_output_tokens / n
58
+ self.avg_cost_usd = self.total_cost_usd / n
59
+ self.streaming_ratio = self.streaming_count / n
60
+ total_input = self.total_input_tokens + self.total_cache_read_tokens
61
+ if total_input > 0:
62
+ self.cache_hit_ratio = self.total_cache_read_tokens / total_input
63
+
64
+
65
+ @dataclass
66
+ class WindowSummary:
67
+ """Full window summary across all providers."""
68
+
69
+ total_requests: int = 0
70
+ total_cost_usd: float = 0.0
71
+ total_cost_savings_usd: float = 0.0
72
+ total_input_tokens: int = 0
73
+ total_output_tokens: int = 0
74
+ providers: dict[str, ProviderSummary] = field(default_factory=dict)
75
+ first_ts: str = ""
76
+ last_ts: str = ""
77
+
78
+
79
+ def summarize_window(entries: list[dict[str, object]]) -> WindowSummary:
80
+ """Aggregate request journal entries into a :class:`WindowSummary`.
81
+
82
+ Parameters
83
+ ----------
84
+ entries
85
+ Parsed JSONL dicts from :func:`read_request_log`.
86
+
87
+ Returns
88
+ -------
89
+ WindowSummary
90
+ Per-provider and overall statistics.
91
+ """
92
+ summary = WindowSummary()
93
+ for entry in entries:
94
+ provider = str(entry.get("provider", "unknown"))
95
+ if provider not in summary.providers:
96
+ summary.providers[provider] = ProviderSummary(provider=provider)
97
+
98
+ ps = summary.providers[provider]
99
+ ps.request_count += 1
100
+
101
+ input_tokens = int(entry.get("input_tokens", 0))
102
+ output_tokens = int(entry.get("output_tokens", 0))
103
+ cost_usd = float(entry.get("cost_usd", 0.0))
104
+ cost_savings = float(entry.get("cost_savings_usd", 0.0))
105
+ cache_read = int(entry.get("cache_read_input_tokens", 0))
106
+ cache_creation = int(entry.get("cache_creation_input_tokens", 0))
107
+ streaming = bool(entry.get("streaming", False))
108
+
109
+ ps.total_input_tokens += input_tokens
110
+ ps.total_output_tokens += output_tokens
111
+ ps.total_cost_usd += cost_usd
112
+ ps.total_cost_savings_usd += cost_savings
113
+ ps.total_cache_read_tokens += cache_read
114
+ ps.total_cache_creation_tokens += cache_creation
115
+ if streaming:
116
+ ps.streaming_count += 1
117
+
118
+ summary.total_requests += 1
119
+ summary.total_cost_usd += cost_usd
120
+ summary.total_cost_savings_usd += cost_savings
121
+ summary.total_input_tokens += input_tokens
122
+ summary.total_output_tokens += output_tokens
123
+
124
+ ts = str(entry.get("ts", ""))
125
+ if ts:
126
+ if not summary.first_ts or ts < summary.first_ts:
127
+ summary.first_ts = ts
128
+ if not summary.last_ts or ts > summary.last_ts:
129
+ summary.last_ts = ts
130
+
131
+ for ps in summary.providers.values():
132
+ ps._finalize()
133
+
134
+ return summary
135
+
136
+
137
+ # ------------------------------------------------------------------
138
+ # A/B provider comparison
139
+ # ------------------------------------------------------------------
140
+
141
+
142
+ @dataclass
143
+ class ProviderComparison:
144
+ """Side-by-side comparison of two providers."""
145
+
146
+ provider_a: ProviderSummary
147
+ provider_b: ProviderSummary
148
+ # Deltas: B - A (positive = B is larger)
149
+ delta_avg_input_tokens: float = 0.0
150
+ delta_avg_output_tokens: float = 0.0
151
+ delta_avg_cost_usd: float = 0.0
152
+ delta_total_cost_usd: float = 0.0
153
+ # Percentage changes (relative to A; NaN if A is zero)
154
+ pct_avg_cost_change: float = 0.0
155
+ pct_total_cost_change: float = 0.0
156
+
157
+
158
+ def compare_providers(
159
+ entries: list[dict[str, object]],
160
+ provider_a: str,
161
+ provider_b: str,
162
+ ) -> ProviderComparison:
163
+ """Compare two providers' statistics from the same journal.
164
+
165
+ Parameters
166
+ ----------
167
+ entries
168
+ Parsed JSONL dicts from :func:`read_request_log`.
169
+ provider_a, provider_b
170
+ Provider names to compare. Entries not matching either are
171
+ ignored.
172
+
173
+ Returns
174
+ -------
175
+ ProviderComparison
176
+ Side-by-side stats with deltas.
177
+ """
178
+ a_entries = [e for e in entries if str(e.get("provider", "")) == provider_a]
179
+ b_entries = [e for e in entries if str(e.get("provider", "")) == provider_b]
180
+
181
+ a_summary = summarize_window(a_entries)
182
+ b_summary = summarize_window(b_entries)
183
+
184
+ ps_a = a_summary.providers.get(
185
+ provider_a, ProviderSummary(provider=provider_a)
186
+ )
187
+ ps_b = b_summary.providers.get(
188
+ provider_b, ProviderSummary(provider=provider_b)
189
+ )
190
+
191
+ comparison = ProviderComparison(provider_a=ps_a, provider_b=ps_b)
192
+ comparison.delta_avg_input_tokens = ps_b.avg_input_tokens - ps_a.avg_input_tokens
193
+ comparison.delta_avg_output_tokens = ps_b.avg_output_tokens - ps_a.avg_output_tokens
194
+ comparison.delta_avg_cost_usd = ps_b.avg_cost_usd - ps_a.avg_cost_usd
195
+ comparison.delta_total_cost_usd = ps_b.total_cost_usd - ps_a.total_cost_usd
196
+
197
+ if ps_a.avg_cost_usd > 0:
198
+ comparison.pct_avg_cost_change = (
199
+ (ps_b.avg_cost_usd - ps_a.avg_cost_usd) / ps_a.avg_cost_usd * 100.0
200
+ )
201
+ else:
202
+ comparison.pct_avg_cost_change = float("nan")
203
+
204
+ if ps_a.total_cost_usd > 0:
205
+ comparison.pct_total_cost_change = (
206
+ (ps_b.total_cost_usd - ps_a.total_cost_usd) / ps_a.total_cost_usd * 100.0
207
+ )
208
+ else:
209
+ comparison.pct_total_cost_change = float("nan")
210
+
211
+ return comparison
212
+
213
+
214
+ # ------------------------------------------------------------------
215
+ # CLI table formatting helpers
216
+ # ------------------------------------------------------------------
217
+
218
+
219
+ def format_summary_table(summary: WindowSummary) -> str:
220
+ """Render a :class:`WindowSummary` as a CLI table.
221
+
222
+ Returns a plain-text table suitable for terminal output.
223
+ """
224
+ lines: list[str] = []
225
+ lines.append(f"Window: {summary.first_ts} → {summary.last_ts}")
226
+ lines.append(f"Total: {summary.total_requests} requests, "
227
+ f"${summary.total_cost_usd:.4f} cost, "
228
+ f"${summary.total_cost_savings_usd:.4f} savings")
229
+ lines.append("")
230
+
231
+ # Header
232
+ hdr = (
233
+ f"{'Provider':<25} {'Reqs':>6} {'AvgIn':>8} {'AvgOut':>8} "
234
+ f"{'AvgCost':>10} {'TotalCost':>10} {'Cache%':>7} {'Stream%':>8}"
235
+ )
236
+ lines.append(hdr)
237
+ lines.append("-" * len(hdr))
238
+
239
+ for ps in sorted(summary.providers.values(),
240
+ key=lambda p: p.total_cost_usd, reverse=True):
241
+ lines.append(
242
+ f"{ps.provider:<25} {ps.request_count:>6} "
243
+ f"{ps.avg_input_tokens:>8.0f} {ps.avg_output_tokens:>8.0f} "
244
+ f"${ps.avg_cost_usd:>9.4f} ${ps.total_cost_usd:>9.4f} "
245
+ f"{ps.cache_hit_ratio * 100:>6.1f}% "
246
+ f"{ps.streaming_ratio * 100:>7.1f}%"
247
+ )
248
+
249
+ return "\n".join(lines)
250
+
251
+
252
+ def format_comparison_table(comp: ProviderComparison) -> str:
253
+ """Render a :class:`ProviderComparison` as a CLI table.
254
+
255
+ Returns a plain-text side-by-side comparison table.
256
+ """
257
+ a = comp.provider_a
258
+ b = comp.provider_b
259
+ lines: list[str] = []
260
+
261
+ hdr = f"{'Metric':<25} {a.provider:<20} {b.provider:<20} {'Delta':>12}"
262
+ lines.append(hdr)
263
+ lines.append("-" * len(hdr))
264
+
265
+ def _row(label: str, va: object, vb: object, delta: float, fmt: str = ".0f") -> str:
266
+ d_str = f"{delta:+{fmt}}"
267
+ return f"{label:<25} {va!s:<20} {vb!s:<20} {d_str:>12}"
268
+
269
+ lines.append(_row("Requests", a.request_count, b.request_count,
270
+ b.request_count - a.request_count))
271
+ lines.append(_row("Avg input tokens", f"{a.avg_input_tokens:.0f}",
272
+ f"{b.avg_input_tokens:.0f}",
273
+ comp.delta_avg_input_tokens))
274
+ lines.append(_row("Avg output tokens", f"{a.avg_output_tokens:.0f}",
275
+ f"{b.avg_output_tokens:.0f}",
276
+ comp.delta_avg_output_tokens))
277
+ lines.append(_row("Avg cost (USD)", f"${a.avg_cost_usd:.4f}",
278
+ f"${b.avg_cost_usd:.4f}",
279
+ comp.delta_avg_cost_usd, fmt=".4f"))
280
+ lines.append(_row("Total cost (USD)", f"${a.total_cost_usd:.4f}",
281
+ f"${b.total_cost_usd:.4f}",
282
+ comp.delta_total_cost_usd, fmt=".4f"))
283
+ lines.append(_row("Cache hit ratio", f"{a.cache_hit_ratio * 100:.1f}%",
284
+ f"{b.cache_hit_ratio * 100:.1f}%",
285
+ (b.cache_hit_ratio - a.cache_hit_ratio) * 100, fmt=".1f"))
286
+ lines.append(_row("Streaming ratio", f"{a.streaming_ratio * 100:.1f}%",
287
+ f"{b.streaming_ratio * 100:.1f}%",
288
+ (b.streaming_ratio - a.streaming_ratio) * 100, fmt=".1f"))
289
+
290
+ # Cost change summary
291
+ lines.append("")
292
+ if not math.isnan(comp.pct_avg_cost_change):
293
+ direction = "cheaper" if comp.pct_avg_cost_change < 0 else "more expensive"
294
+ lines.append(
295
+ f"Per-request: {b.provider} is {abs(comp.pct_avg_cost_change):.1f}% "
296
+ f"{direction} than {a.provider}"
297
+ )
298
+ if not math.isnan(comp.pct_total_cost_change):
299
+ direction = "less" if comp.pct_total_cost_change < 0 else "more"
300
+ lines.append(
301
+ f"Total spend: {b.provider} spent {abs(comp.pct_total_cost_change):.1f}% "
302
+ f"{direction} than {a.provider}"
303
+ )
304
+
305
+ return "\n".join(lines)
306
+
307
+
308
+ __all__ = [
309
+ "ProviderComparison",
310
+ "ProviderSummary",
311
+ "WindowSummary",
312
+ "compare_providers",
313
+ "format_comparison_table",
314
+ "format_summary_table",
315
+ "summarize_window",
316
+ ]