coderouter-cli 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/__init__.py +17 -0
- coderouter/__main__.py +6 -0
- coderouter/adapters/__init__.py +23 -0
- coderouter/adapters/anthropic_native.py +502 -0
- coderouter/adapters/base.py +220 -0
- coderouter/adapters/openai_compat.py +395 -0
- coderouter/adapters/registry.py +17 -0
- coderouter/cli.py +345 -0
- coderouter/cli_stats.py +751 -0
- coderouter/config/__init__.py +10 -0
- coderouter/config/capability_registry.py +339 -0
- coderouter/config/env_file.py +295 -0
- coderouter/config/loader.py +73 -0
- coderouter/config/schemas.py +515 -0
- coderouter/data/__init__.py +7 -0
- coderouter/data/model-capabilities.yaml +86 -0
- coderouter/doctor.py +1596 -0
- coderouter/env_security.py +434 -0
- coderouter/errors.py +29 -0
- coderouter/ingress/__init__.py +5 -0
- coderouter/ingress/anthropic_routes.py +205 -0
- coderouter/ingress/app.py +144 -0
- coderouter/ingress/dashboard_routes.py +493 -0
- coderouter/ingress/metrics_routes.py +92 -0
- coderouter/ingress/openai_routes.py +153 -0
- coderouter/logging.py +315 -0
- coderouter/metrics/__init__.py +39 -0
- coderouter/metrics/collector.py +471 -0
- coderouter/metrics/prometheus.py +221 -0
- coderouter/output_filters.py +407 -0
- coderouter/routing/__init__.py +13 -0
- coderouter/routing/auto_router.py +244 -0
- coderouter/routing/capability.py +285 -0
- coderouter/routing/fallback.py +611 -0
- coderouter/translation/__init__.py +57 -0
- coderouter/translation/anthropic.py +204 -0
- coderouter/translation/convert.py +1291 -0
- coderouter/translation/tool_repair.py +236 -0
- coderouter_cli-1.7.0.dist-info/METADATA +509 -0
- coderouter_cli-1.7.0.dist-info/RECORD +43 -0
- coderouter_cli-1.7.0.dist-info/WHEEL +4 -0
- coderouter_cli-1.7.0.dist-info/entry_points.txt +2 -0
- coderouter_cli-1.7.0.dist-info/licenses/LICENSE +21 -0
coderouter/cli_stats.py
ADDED
|
@@ -0,0 +1,751 @@
|
|
|
1
|
+
"""``coderouter stats`` — CLI / TUI for the metrics endpoint (v1.5-C).
|
|
2
|
+
|
|
3
|
+
This module is split into three layers so the core logic is testable
|
|
4
|
+
without a terminal:
|
|
5
|
+
|
|
6
|
+
1. **Data fetch** (:func:`fetch_snapshot`) — stdlib ``urllib`` GET of
|
|
7
|
+
``/metrics.json``. Returns parsed dict or ``None`` on transport /
|
|
8
|
+
parse error. No curses, no sleeping.
|
|
9
|
+
2. **Pure render layer** (:func:`build_provider_rows`,
|
|
10
|
+
:func:`build_gates_summary`, :func:`build_recent_rows`,
|
|
11
|
+
:func:`format_text`) — canonical ``dict → dataclass / str``
|
|
12
|
+
transforms. No I/O, no side effects — they just take a snapshot
|
|
13
|
+
and produce display-ready shapes.
|
|
14
|
+
3. **Drivers** (:func:`run_once`, :func:`run_tui`) — the actual user-
|
|
15
|
+
facing execution. ``run_once`` prints a single plain-text dump
|
|
16
|
+
(for scripts / non-tty); ``run_tui`` is the curses loop.
|
|
17
|
+
|
|
18
|
+
Why stdlib-only (no ``rich`` / ``textual``)
|
|
19
|
+
plan.md §12.3.5: dependencies go up with every library we add, and
|
|
20
|
+
the design memo explicitly calls out "stdlib ``curses`` + 1s clear-
|
|
21
|
+
redraw". ``curses`` is in stdlib on macOS and Linux (Windows would
|
|
22
|
+
need ``windows-curses`` but CodeRouter is primarily a local-dev tool
|
|
23
|
+
for Unix shells). ``rich`` adds ~500kB and a dependency, for which
|
|
24
|
+
the payoff — prettier borders — is not worth the 5-dep budget.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import json
|
|
30
|
+
import sys
|
|
31
|
+
import urllib.error
|
|
32
|
+
import urllib.request
|
|
33
|
+
from dataclasses import dataclass
|
|
34
|
+
from datetime import UTC, datetime
|
|
35
|
+
from typing import Any, Final
|
|
36
|
+
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
|
37
|
+
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
# Defaults
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
# Matches ``coderouter serve --host 127.0.0.1 --port 4000`` from cli.py.
|
|
43
|
+
# A deliberate choice to leave the default localhost-only — remote
|
|
44
|
+
# scraping goes through the Prometheus endpoint (v1.5-B), not this TUI.
|
|
45
|
+
DEFAULT_URL: Final[str] = "http://127.0.0.1:4000/metrics.json"
|
|
46
|
+
|
|
47
|
+
# 1-second refresh (design memo §12.3.5). Short enough to feel live, long
|
|
48
|
+
# enough that the HTTP overhead is negligible even on busy machines.
|
|
49
|
+
DEFAULT_INTERVAL_S: Final[float] = 1.0
|
|
50
|
+
|
|
51
|
+
# A 2-second HTTP timeout — the endpoint is on localhost, so anything
|
|
52
|
+
# slower than that means the server is wedged and the TUI should surface
|
|
53
|
+
# that rather than hang.
|
|
54
|
+
_FETCH_TIMEOUT_S: Final[float] = 2.0
|
|
55
|
+
|
|
56
|
+
# Thresholds for the provider-health dot. Derived from "what an operator
|
|
57
|
+
# expects of a healthy provider" rather than a statistical definition —
|
|
58
|
+
# anything below 95% success merits attention.
|
|
59
|
+
_HEALTH_GREEN_MIN_RATE: Final[float] = 0.95
|
|
60
|
+
_HEALTH_YELLOW_MIN_RATE: Final[float] = 0.80
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
# Structured display types
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass(frozen=True)
|
|
69
|
+
class ProviderRow:
|
|
70
|
+
"""One row in the "Providers" panel of the TUI.
|
|
71
|
+
|
|
72
|
+
Fields map 1:1 to columns shown in the wireframe (§12.3.5.1). The
|
|
73
|
+
``health`` string is a stable token (``green`` / ``yellow`` / ``red``
|
|
74
|
+
/ ``gray``) that the driver maps to a curses color pair — keeping it
|
|
75
|
+
a token rather than an int means tests don't need curses constants.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
name: str
|
|
79
|
+
attempts: int
|
|
80
|
+
ok: int
|
|
81
|
+
failed: int
|
|
82
|
+
failed_midstream: int
|
|
83
|
+
last_error: str # short one-line description or "-"
|
|
84
|
+
health: str # "green" | "yellow" | "red" | "gray"
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def ok_rate_pct(self) -> int:
|
|
88
|
+
"""Rounded success percentage (``ok / attempts * 100``).
|
|
89
|
+
|
|
90
|
+
Returns 100 when ``attempts`` is zero so the UI shows a neutral
|
|
91
|
+
value rather than ``0%`` for a provider that hasn't been tried.
|
|
92
|
+
"""
|
|
93
|
+
if self.attempts <= 0:
|
|
94
|
+
return 100
|
|
95
|
+
return round(self.ok * 100 / self.attempts)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@dataclass(frozen=True)
|
|
99
|
+
class GatesSummary:
|
|
100
|
+
"""Aggregate counters shown in the "Fallback & Gates" panel.
|
|
101
|
+
|
|
102
|
+
Everything is a scalar — this panel is a glance surface ("are any
|
|
103
|
+
gates firing?"), not a drill-down. The per-provider breakdown lives
|
|
104
|
+
in :class:`ProviderRow` and the recent ring.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
total_requests: int
|
|
108
|
+
total_failed: int
|
|
109
|
+
fallback_rate_pct: float # (failed / total_requests * 100), 0.0 when no reqs
|
|
110
|
+
paid_gate_blocked: int
|
|
111
|
+
degraded_total: int
|
|
112
|
+
degraded_breakdown: dict[str, int] # capability → count
|
|
113
|
+
filters_applied_total: int
|
|
114
|
+
filters_breakdown: dict[str, int] # filter name → count
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dataclass(frozen=True)
|
|
118
|
+
class RecentRow:
|
|
119
|
+
"""One entry from the recent-events ring buffer.
|
|
120
|
+
|
|
121
|
+
Driver decides coloring based on ``event`` (e.g. ``provider-failed``
|
|
122
|
+
gets a red cell on the status column).
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
ts: str # "HH:MM:SS" extracted from the full ts
|
|
126
|
+
event: str
|
|
127
|
+
provider: str
|
|
128
|
+
stream: bool | None
|
|
129
|
+
status: int | None # only populated for provider-failed*
|
|
130
|
+
is_failure: bool
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def status_text(self) -> str:
|
|
134
|
+
"""Short human label for the status column.
|
|
135
|
+
|
|
136
|
+
Maps each event to one of: ``ok`` (provider-ok), ``try``
|
|
137
|
+
(try-provider), ``FAIL`` / ``FAIL (<status>)`` for the failed
|
|
138
|
+
variants. The driver uses this in the recent panel cell.
|
|
139
|
+
"""
|
|
140
|
+
if self.event == "provider-ok":
|
|
141
|
+
return "ok"
|
|
142
|
+
if self.event == "try-provider":
|
|
143
|
+
return "try"
|
|
144
|
+
if self.event.startswith("provider-failed"):
|
|
145
|
+
return f"FAIL ({self.status})" if self.status is not None else "FAIL"
|
|
146
|
+
return self.event
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ---------------------------------------------------------------------------
|
|
150
|
+
# 1. Data fetch
|
|
151
|
+
# ---------------------------------------------------------------------------
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@dataclass
|
|
155
|
+
class FetchError:
|
|
156
|
+
"""Structured error from :func:`fetch_snapshot`.
|
|
157
|
+
|
|
158
|
+
Carries a short one-line message suitable for the TUI's status bar.
|
|
159
|
+
Separating this from "no data yet" (``None``) lets the driver show
|
|
160
|
+
"connecting…" on first fetch vs. "server down — retrying" on later
|
|
161
|
+
fetches.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
message: str
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def fetch_snapshot(
|
|
168
|
+
url: str, *, timeout_s: float = _FETCH_TIMEOUT_S
|
|
169
|
+
) -> dict[str, Any] | FetchError:
|
|
170
|
+
"""HTTP GET + JSON parse. Pure w.r.t. snapshot dict semantics.
|
|
171
|
+
|
|
172
|
+
Returns the parsed dict on success; otherwise a :class:`FetchError`
|
|
173
|
+
with a compact, operator-readable reason. Never raises — the TUI
|
|
174
|
+
loop relies on this swallowing transport hiccups (``ConnRefused``
|
|
175
|
+
when the server is starting, for example) without crashing.
|
|
176
|
+
"""
|
|
177
|
+
try:
|
|
178
|
+
with urllib.request.urlopen(url, timeout=timeout_s) as resp:
|
|
179
|
+
raw = resp.read()
|
|
180
|
+
except urllib.error.URLError as exc:
|
|
181
|
+
return FetchError(f"connection failed: {exc.reason}")
|
|
182
|
+
except TimeoutError: # pragma: no cover - only reproducible under load
|
|
183
|
+
return FetchError("timeout")
|
|
184
|
+
except Exception as exc: # pragma: no cover - defensive catch-all
|
|
185
|
+
return FetchError(f"fetch failed: {exc}")
|
|
186
|
+
try:
|
|
187
|
+
data = json.loads(raw)
|
|
188
|
+
except json.JSONDecodeError as exc:
|
|
189
|
+
return FetchError(f"invalid JSON: {exc.msg}")
|
|
190
|
+
if not isinstance(data, dict):
|
|
191
|
+
return FetchError("unexpected non-object response")
|
|
192
|
+
return data
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
# ---------------------------------------------------------------------------
|
|
196
|
+
# 2. Pure render layer
|
|
197
|
+
# ---------------------------------------------------------------------------
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def build_provider_rows(snapshot: dict[str, Any]) -> list[ProviderRow]:
|
|
201
|
+
"""Transform the snapshot's providers[] into display rows, sorted by name.
|
|
202
|
+
|
|
203
|
+
We sort alphabetically (not by attempts) so re-runs show providers in
|
|
204
|
+
a stable order — operators glancing at the TUI benefit from muscle
|
|
205
|
+
memory ("local is the first row") over a "hottest-first" ordering
|
|
206
|
+
that bounces around.
|
|
207
|
+
"""
|
|
208
|
+
rows: list[ProviderRow] = []
|
|
209
|
+
for entry in snapshot.get("providers", []):
|
|
210
|
+
name = str(entry.get("name", ""))
|
|
211
|
+
attempts = int(entry.get("attempts", 0))
|
|
212
|
+
outcomes = entry.get("outcomes", {}) or {}
|
|
213
|
+
ok = int(outcomes.get("ok", 0))
|
|
214
|
+
failed = int(outcomes.get("failed", 0))
|
|
215
|
+
failed_midstream = int(outcomes.get("failed_midstream", 0))
|
|
216
|
+
|
|
217
|
+
last_error_raw = entry.get("last_error")
|
|
218
|
+
last_error = _format_last_error(last_error_raw)
|
|
219
|
+
health = _compute_health(attempts=attempts, ok=ok, failed_midstream=failed_midstream)
|
|
220
|
+
|
|
221
|
+
rows.append(
|
|
222
|
+
ProviderRow(
|
|
223
|
+
name=name,
|
|
224
|
+
attempts=attempts,
|
|
225
|
+
ok=ok,
|
|
226
|
+
failed=failed,
|
|
227
|
+
failed_midstream=failed_midstream,
|
|
228
|
+
last_error=last_error,
|
|
229
|
+
health=health,
|
|
230
|
+
)
|
|
231
|
+
)
|
|
232
|
+
rows.sort(key=lambda r: r.name)
|
|
233
|
+
return rows
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def build_gates_summary(snapshot: dict[str, Any]) -> GatesSummary:
|
|
237
|
+
"""Collapse the counters block into the one-glance panel dataclass.
|
|
238
|
+
|
|
239
|
+
``fallback_rate_pct`` is computed as failures / total — when total
|
|
240
|
+
is 0 we return 0.0 (not a 0/0 division) to match the TUI's "no data
|
|
241
|
+
yet" state.
|
|
242
|
+
"""
|
|
243
|
+
counters = snapshot.get("counters", {}) or {}
|
|
244
|
+
total_requests = int(counters.get("requests_total", 0))
|
|
245
|
+
# Sum of failed + failed_midstream across all providers.
|
|
246
|
+
total_failed = 0
|
|
247
|
+
for outcomes in (counters.get("provider_outcomes", {}) or {}).values():
|
|
248
|
+
total_failed += int(outcomes.get("failed", 0))
|
|
249
|
+
total_failed += int(outcomes.get("failed_midstream", 0))
|
|
250
|
+
fallback_rate_pct = (
|
|
251
|
+
(total_failed * 100 / total_requests) if total_requests > 0 else 0.0
|
|
252
|
+
)
|
|
253
|
+
degraded_breakdown = dict(counters.get("capability_degraded", {}) or {})
|
|
254
|
+
filters_breakdown = dict(counters.get("output_filter_applied", {}) or {})
|
|
255
|
+
return GatesSummary(
|
|
256
|
+
total_requests=total_requests,
|
|
257
|
+
total_failed=total_failed,
|
|
258
|
+
fallback_rate_pct=fallback_rate_pct,
|
|
259
|
+
paid_gate_blocked=int(counters.get("chain_paid_gate_blocked_total", 0)),
|
|
260
|
+
degraded_total=sum(degraded_breakdown.values()),
|
|
261
|
+
degraded_breakdown=degraded_breakdown,
|
|
262
|
+
filters_applied_total=sum(filters_breakdown.values()),
|
|
263
|
+
filters_breakdown=filters_breakdown,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def build_recent_rows(
|
|
268
|
+
snapshot: dict[str, Any], *, failures_only: bool = False
|
|
269
|
+
) -> list[RecentRow]:
|
|
270
|
+
"""Transform the recent ring into display rows.
|
|
271
|
+
|
|
272
|
+
``failures_only`` drives the ``[f]`` key binding in the TUI — when
|
|
273
|
+
on, only ``provider-failed*`` events are returned so an operator
|
|
274
|
+
debugging a fallback chain can scroll without noise from the "happy
|
|
275
|
+
path" ok lines.
|
|
276
|
+
|
|
277
|
+
v1.5-E: when ``snapshot["config"]["display_timezone"]`` is set to an
|
|
278
|
+
IANA zone name, the ``ts`` column is rendered in that zone. Unset →
|
|
279
|
+
the raw UTC ``HH:MM:SS`` from the ring, matching pre-v1.5-E output.
|
|
280
|
+
A malformed zone (shouldn't happen — the server-side validator
|
|
281
|
+
rejects them — but the snapshot might predate the caller's
|
|
282
|
+
upgrade) silently falls back to UTC so the table still renders.
|
|
283
|
+
"""
|
|
284
|
+
config = snapshot.get("config", {}) or {}
|
|
285
|
+
tz_name = config.get("display_timezone")
|
|
286
|
+
target_tz: ZoneInfo | None
|
|
287
|
+
if isinstance(tz_name, str) and tz_name:
|
|
288
|
+
try:
|
|
289
|
+
target_tz = ZoneInfo(tz_name)
|
|
290
|
+
except ZoneInfoNotFoundError:
|
|
291
|
+
target_tz = None
|
|
292
|
+
else:
|
|
293
|
+
target_tz = None
|
|
294
|
+
|
|
295
|
+
rows: list[RecentRow] = []
|
|
296
|
+
for entry in snapshot.get("recent", []) or []:
|
|
297
|
+
event = str(entry.get("event", ""))
|
|
298
|
+
is_failure = event.startswith("provider-failed")
|
|
299
|
+
if failures_only and not is_failure:
|
|
300
|
+
continue
|
|
301
|
+
ts_full = str(entry.get("ts", ""))
|
|
302
|
+
ts_display = _format_ts_in_tz(ts_full, target_tz)
|
|
303
|
+
status_raw = entry.get("status")
|
|
304
|
+
stream = entry.get("stream")
|
|
305
|
+
rows.append(
|
|
306
|
+
RecentRow(
|
|
307
|
+
ts=ts_display,
|
|
308
|
+
event=event,
|
|
309
|
+
provider=str(entry.get("provider", "")),
|
|
310
|
+
stream=bool(stream) if isinstance(stream, bool) else None,
|
|
311
|
+
status=int(status_raw) if isinstance(status_raw, int) else None,
|
|
312
|
+
is_failure=is_failure,
|
|
313
|
+
)
|
|
314
|
+
)
|
|
315
|
+
return rows
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def _format_ts_in_tz(ts_full: str, tz: ZoneInfo | None) -> str:
|
|
319
|
+
"""Render a ``YYYY-MM-DDTHH:MM:SS`` UTC stamp as ``HH:MM:SS`` in ``tz``.
|
|
320
|
+
|
|
321
|
+
When ``tz`` is ``None`` (unset / typo / zoneinfo missing), strips the
|
|
322
|
+
date prefix and returns the naive UTC time — this preserves the
|
|
323
|
+
pre-v1.5-E behavior. Anything that fails to parse as an ISO datetime
|
|
324
|
+
also falls back to the naive slice so the column never blanks.
|
|
325
|
+
"""
|
|
326
|
+
naive = ts_full.split("T", 1)[-1] if "T" in ts_full else ts_full
|
|
327
|
+
if tz is None or not ts_full:
|
|
328
|
+
return naive
|
|
329
|
+
try:
|
|
330
|
+
dt_utc = datetime.fromisoformat(ts_full).replace(tzinfo=UTC)
|
|
331
|
+
except ValueError:
|
|
332
|
+
return naive
|
|
333
|
+
return dt_utc.astimezone(tz).strftime("%H:%M:%S")
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def format_text(snapshot: dict[str, Any], *, width: int = 80) -> str:
|
|
337
|
+
"""Render a snapshot as a plain-text dump — used by ``--once`` mode.
|
|
338
|
+
|
|
339
|
+
Three blocks mirroring the TUI panels (Providers / Gates / Recent).
|
|
340
|
+
Kept as one function because ``--once`` is supposed to be a single
|
|
341
|
+
atomic output that a shell script can grep without thinking about
|
|
342
|
+
layout — splitting into more functions would gain nothing.
|
|
343
|
+
"""
|
|
344
|
+
startup = snapshot.get("startup", {}) or {}
|
|
345
|
+
config = snapshot.get("config", {}) or {}
|
|
346
|
+
profile = str(
|
|
347
|
+
startup.get("default_profile") or config.get("default_profile") or "?"
|
|
348
|
+
)
|
|
349
|
+
uptime_s = float(snapshot.get("uptime_s", 0.0))
|
|
350
|
+
gates = build_gates_summary(snapshot)
|
|
351
|
+
providers = build_provider_rows(snapshot)
|
|
352
|
+
recent = build_recent_rows(snapshot)
|
|
353
|
+
|
|
354
|
+
# v1.5-E: surface the configured display TZ next to uptime so a
|
|
355
|
+
# reader of piped ``coderouter stats --once`` output can tell which
|
|
356
|
+
# zone the ``Recent`` column is rendered in. Falls back to ``UTC``
|
|
357
|
+
# when unset, matching the dashboard header convention.
|
|
358
|
+
tz_label = str(config.get("display_timezone") or "UTC")
|
|
359
|
+
lines: list[str] = []
|
|
360
|
+
lines.append(
|
|
361
|
+
f"coderouter stats — profile: {profile} uptime: {_fmt_uptime(uptime_s)} "
|
|
362
|
+
f"requests: {gates.total_requests} tz: {tz_label}"
|
|
363
|
+
)
|
|
364
|
+
lines.append("-" * min(width, 80))
|
|
365
|
+
lines.append("Providers")
|
|
366
|
+
lines.append(
|
|
367
|
+
f" {'name':<22} {'att':>5} {'ok%':>5} {'failed':>7} {'last error':<25}"
|
|
368
|
+
)
|
|
369
|
+
if not providers:
|
|
370
|
+
lines.append(" (no requests seen yet)")
|
|
371
|
+
for prov in providers:
|
|
372
|
+
lines.append(
|
|
373
|
+
f" {prov.name:<22} {prov.attempts:>5} {prov.ok_rate_pct:>5} "
|
|
374
|
+
f"{prov.failed + prov.failed_midstream:>7} "
|
|
375
|
+
f"{_truncate(prov.last_error, 25):<25}"
|
|
376
|
+
)
|
|
377
|
+
lines.append("")
|
|
378
|
+
lines.append("Fallback & Gates")
|
|
379
|
+
lines.append(
|
|
380
|
+
f" fallback rate: {gates.fallback_rate_pct:5.1f}% "
|
|
381
|
+
f"({gates.total_failed}/{gates.total_requests})"
|
|
382
|
+
)
|
|
383
|
+
lines.append(f" paid-gate blocked: {gates.paid_gate_blocked}")
|
|
384
|
+
lines.append(
|
|
385
|
+
f" capability degraded: {gates.degraded_total}"
|
|
386
|
+
+ (
|
|
387
|
+
f" ({_fmt_breakdown(gates.degraded_breakdown)})"
|
|
388
|
+
if gates.degraded_breakdown
|
|
389
|
+
else ""
|
|
390
|
+
)
|
|
391
|
+
)
|
|
392
|
+
lines.append(
|
|
393
|
+
f" output-filter applied: {gates.filters_applied_total}"
|
|
394
|
+
+ (
|
|
395
|
+
f" ({_fmt_breakdown(gates.filters_breakdown)})"
|
|
396
|
+
if gates.filters_breakdown
|
|
397
|
+
else ""
|
|
398
|
+
)
|
|
399
|
+
)
|
|
400
|
+
lines.append("")
|
|
401
|
+
lines.append("Recent")
|
|
402
|
+
if not recent:
|
|
403
|
+
lines.append(" (no events yet)")
|
|
404
|
+
for rec in recent[-10:]:
|
|
405
|
+
lines.append(
|
|
406
|
+
f" {rec.ts:<8} {rec.provider:<22} {rec.status_text}"
|
|
407
|
+
)
|
|
408
|
+
return "\n".join(lines) + "\n"
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
# ---------------------------------------------------------------------------
|
|
412
|
+
# 3. Drivers
|
|
413
|
+
# ---------------------------------------------------------------------------
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def run_once(url: str) -> int:
|
|
417
|
+
"""Fetch once, print the plain-text dump, exit.
|
|
418
|
+
|
|
419
|
+
Also used automatically when stdout is not a TTY so ``coderouter
|
|
420
|
+
stats | grep foo`` works in scripts. Exit code: 0 on success, 2 on
|
|
421
|
+
fetch failure (matches the doctor "needs-tuning" convention of
|
|
422
|
+
"non-fatal but actionable").
|
|
423
|
+
"""
|
|
424
|
+
snap = fetch_snapshot(url)
|
|
425
|
+
if isinstance(snap, FetchError):
|
|
426
|
+
print(f"coderouter stats: {snap.message}", file=sys.stderr)
|
|
427
|
+
return 2
|
|
428
|
+
sys.stdout.write(format_text(snap))
|
|
429
|
+
return 0
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def run_tui(url: str, *, interval_s: float = DEFAULT_INTERVAL_S) -> int: # pragma: no cover
|
|
433
|
+
"""curses driver. Refreshes every ``interval_s`` seconds.
|
|
434
|
+
|
|
435
|
+
Not unit-tested because curses needs a real terminal — we keep this
|
|
436
|
+
function thin (data fetch + :mod:`curses` bookkeeping) and push the
|
|
437
|
+
content construction into :func:`build_provider_rows` &c., which
|
|
438
|
+
ARE tested. Manual QA: ``coderouter stats`` against a running server
|
|
439
|
+
shows the 4-panel layout described in plan.md §12.3.5.1.
|
|
440
|
+
|
|
441
|
+
Imports ``curses`` lazily so the test runner on a non-tty CI doesn't
|
|
442
|
+
eagerly import it (avoids a potential ImportError on systems
|
|
443
|
+
without the terminfo lib — Alpine / some minimal containers).
|
|
444
|
+
"""
|
|
445
|
+
import curses
|
|
446
|
+
|
|
447
|
+
def _driver(stdscr: Any) -> int:
|
|
448
|
+
curses.curs_set(0)
|
|
449
|
+
curses.use_default_colors()
|
|
450
|
+
_init_color_pairs(curses)
|
|
451
|
+
# halfdelay: getch blocks up to N tenths of a second before
|
|
452
|
+
# returning -1. halfdelay(10) → 1-second cap, matching interval.
|
|
453
|
+
curses.halfdelay(max(1, int(interval_s * 10)))
|
|
454
|
+
|
|
455
|
+
paused = False
|
|
456
|
+
failures_only = False
|
|
457
|
+
last_snap: dict[str, Any] | FetchError = FetchError("connecting…")
|
|
458
|
+
while True:
|
|
459
|
+
if not paused:
|
|
460
|
+
last_snap = fetch_snapshot(url)
|
|
461
|
+
stdscr.erase()
|
|
462
|
+
height, width = stdscr.getmaxyx()
|
|
463
|
+
if isinstance(last_snap, FetchError):
|
|
464
|
+
_draw_error_screen(curses, stdscr, last_snap, width=width)
|
|
465
|
+
else:
|
|
466
|
+
_draw_frame(
|
|
467
|
+
curses,
|
|
468
|
+
stdscr,
|
|
469
|
+
snapshot=last_snap,
|
|
470
|
+
width=width,
|
|
471
|
+
height=height,
|
|
472
|
+
paused=paused,
|
|
473
|
+
failures_only=failures_only,
|
|
474
|
+
)
|
|
475
|
+
stdscr.refresh()
|
|
476
|
+
ch = stdscr.getch()
|
|
477
|
+
if ch in (ord("q"), ord("Q")):
|
|
478
|
+
return 0
|
|
479
|
+
if ch in (ord("p"), ord("P")):
|
|
480
|
+
paused = not paused
|
|
481
|
+
if ch in (ord("r"), ord("R")):
|
|
482
|
+
last_snap = fetch_snapshot(url)
|
|
483
|
+
if ch in (ord("f"), ord("F")):
|
|
484
|
+
failures_only = not failures_only
|
|
485
|
+
# KEY_RESIZE handled implicitly: next loop re-reads getmaxyx
|
|
486
|
+
|
|
487
|
+
return int(curses.wrapper(_driver))
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def main(argv_url: str, *, interval: float, once: bool) -> int:
|
|
491
|
+
"""Entry called from :mod:`coderouter.cli`.
|
|
492
|
+
|
|
493
|
+
Non-tty stdout → ``--once`` mode regardless of the flag, so piping
|
|
494
|
+
to grep / redirecting to a file works without an extra flag. A
|
|
495
|
+
deliberate user-facing ``--once`` still wins when the shell IS a TTY
|
|
496
|
+
and the caller wants a single snapshot anyway.
|
|
497
|
+
"""
|
|
498
|
+
if once or not sys.stdout.isatty():
|
|
499
|
+
return run_once(argv_url)
|
|
500
|
+
return run_tui(argv_url, interval_s=interval)
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
# ---------------------------------------------------------------------------
|
|
504
|
+
# Curses drawing helpers (imported lazily inside run_tui so this module
|
|
505
|
+
# stays importable on systems without curses — e.g. windows CI runners).
|
|
506
|
+
# ---------------------------------------------------------------------------
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
_COLOR_GREEN_PAIR = 1
|
|
510
|
+
_COLOR_YELLOW_PAIR = 2
|
|
511
|
+
_COLOR_RED_PAIR = 3
|
|
512
|
+
_COLOR_GRAY_PAIR = 4
|
|
513
|
+
_COLOR_DIM_PAIR = 5
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
def _init_color_pairs(curses: Any) -> None: # pragma: no cover - curses-only
|
|
517
|
+
"""Install the 5 color pairs the TUI uses.
|
|
518
|
+
|
|
519
|
+
Paired with :data:`_COLOR_*_PAIR` constants above so the draw
|
|
520
|
+
helpers can reference them without importing ``curses`` at module
|
|
521
|
+
scope. ``use_default_colors`` was already called in the driver, so
|
|
522
|
+
``-1`` means "terminal default" (honors the user's color scheme).
|
|
523
|
+
"""
|
|
524
|
+
curses.init_pair(_COLOR_GREEN_PAIR, curses.COLOR_GREEN, -1)
|
|
525
|
+
curses.init_pair(_COLOR_YELLOW_PAIR, curses.COLOR_YELLOW, -1)
|
|
526
|
+
curses.init_pair(_COLOR_RED_PAIR, curses.COLOR_RED, -1)
|
|
527
|
+
curses.init_pair(_COLOR_GRAY_PAIR, curses.COLOR_WHITE, -1)
|
|
528
|
+
curses.init_pair(_COLOR_DIM_PAIR, -1, -1)
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
def _color_for_health(curses: Any, token: str) -> int: # pragma: no cover - curses-only
|
|
532
|
+
"""Map a health token (``green``/``yellow``/``red``/``gray``) to a curses attr."""
|
|
533
|
+
if token == "green":
|
|
534
|
+
return int(curses.color_pair(_COLOR_GREEN_PAIR))
|
|
535
|
+
if token == "yellow":
|
|
536
|
+
return int(curses.color_pair(_COLOR_YELLOW_PAIR))
|
|
537
|
+
if token == "red":
|
|
538
|
+
return int(curses.color_pair(_COLOR_RED_PAIR) | curses.A_BOLD)
|
|
539
|
+
return int(curses.color_pair(_COLOR_GRAY_PAIR) | curses.A_DIM)
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def _draw_frame( # pragma: no cover - curses-only
|
|
543
|
+
curses: Any,
|
|
544
|
+
stdscr: Any,
|
|
545
|
+
*,
|
|
546
|
+
snapshot: dict[str, Any],
|
|
547
|
+
width: int,
|
|
548
|
+
height: int,
|
|
549
|
+
paused: bool,
|
|
550
|
+
failures_only: bool,
|
|
551
|
+
) -> None:
|
|
552
|
+
"""Render one frame of the 4-panel layout.
|
|
553
|
+
|
|
554
|
+
Driver holds the bookkeeping (paused / failures_only) and passes
|
|
555
|
+
them in; this function is purely draw-from-state. Height/width come
|
|
556
|
+
from ``getmaxyx`` each frame, so KEY_RESIZE is handled implicitly.
|
|
557
|
+
"""
|
|
558
|
+
startup = snapshot.get("startup", {}) or {}
|
|
559
|
+
config = snapshot.get("config", {}) or {}
|
|
560
|
+
profile = str(
|
|
561
|
+
startup.get("default_profile") or config.get("default_profile") or "?"
|
|
562
|
+
)
|
|
563
|
+
uptime = _fmt_uptime(float(snapshot.get("uptime_s", 0.0)))
|
|
564
|
+
tz_label = str(config.get("display_timezone") or "UTC")
|
|
565
|
+
gates = build_gates_summary(snapshot)
|
|
566
|
+
providers = build_provider_rows(snapshot)
|
|
567
|
+
recent = build_recent_rows(snapshot, failures_only=failures_only)
|
|
568
|
+
|
|
569
|
+
row = 0
|
|
570
|
+
header = (
|
|
571
|
+
f" coderouter stats profile: {profile} uptime: {uptime} "
|
|
572
|
+
f"requests: {gates.total_requests} tz: {tz_label} "
|
|
573
|
+
)
|
|
574
|
+
stdscr.addnstr(row, 0, header.ljust(width), width, curses.A_REVERSE)
|
|
575
|
+
row += 1
|
|
576
|
+
stdscr.addnstr(
|
|
577
|
+
row,
|
|
578
|
+
0,
|
|
579
|
+
" providers ".ljust(width, "─"),
|
|
580
|
+
width,
|
|
581
|
+
curses.A_BOLD,
|
|
582
|
+
)
|
|
583
|
+
row += 1
|
|
584
|
+
stdscr.addnstr(
|
|
585
|
+
row,
|
|
586
|
+
0,
|
|
587
|
+
f" {'provider':<22} {'att':>5} {'ok%':>5} {'failed':>7} {'last error':<25}",
|
|
588
|
+
width,
|
|
589
|
+
curses.A_DIM,
|
|
590
|
+
)
|
|
591
|
+
row += 1
|
|
592
|
+
for pr in providers:
|
|
593
|
+
if row >= height - 2:
|
|
594
|
+
break
|
|
595
|
+
line = (
|
|
596
|
+
f" {pr.name:<22} {pr.attempts:>5} {pr.ok_rate_pct:>4}% "
|
|
597
|
+
f"{pr.failed + pr.failed_midstream:>7} {_truncate(pr.last_error, 25):<25}"
|
|
598
|
+
)
|
|
599
|
+
stdscr.addnstr(row, 0, line.ljust(width), width, _color_for_health(curses, pr.health))
|
|
600
|
+
row += 1
|
|
601
|
+
|
|
602
|
+
row += 1
|
|
603
|
+
if row >= height - 2:
|
|
604
|
+
return
|
|
605
|
+
stdscr.addnstr(row, 0, " fallback / gates ".ljust(width, "─"), width, curses.A_BOLD)
|
|
606
|
+
row += 1
|
|
607
|
+
rate = gates.fallback_rate_pct
|
|
608
|
+
rate_color = (
|
|
609
|
+
_COLOR_GREEN_PAIR if rate < 5 else _COLOR_YELLOW_PAIR if rate < 20 else _COLOR_RED_PAIR
|
|
610
|
+
)
|
|
611
|
+
stdscr.addnstr(
|
|
612
|
+
row,
|
|
613
|
+
0,
|
|
614
|
+
f" fallback rate: {rate:5.1f}% ({gates.total_failed}/{gates.total_requests})",
|
|
615
|
+
width,
|
|
616
|
+
int(curses.color_pair(rate_color)),
|
|
617
|
+
)
|
|
618
|
+
row += 1
|
|
619
|
+
stdscr.addnstr(row, 0, f" paid-gate blocked: {gates.paid_gate_blocked}", width)
|
|
620
|
+
row += 1
|
|
621
|
+
stdscr.addnstr(
|
|
622
|
+
row,
|
|
623
|
+
0,
|
|
624
|
+
f" capability degraded: {gates.degraded_total}"
|
|
625
|
+
+ (f" ({_fmt_breakdown(gates.degraded_breakdown)})" if gates.degraded_breakdown else ""),
|
|
626
|
+
width,
|
|
627
|
+
)
|
|
628
|
+
row += 1
|
|
629
|
+
stdscr.addnstr(
|
|
630
|
+
row,
|
|
631
|
+
0,
|
|
632
|
+
f" output-filter applied: {gates.filters_applied_total}"
|
|
633
|
+
+ (f" ({_fmt_breakdown(gates.filters_breakdown)})" if gates.filters_breakdown else ""),
|
|
634
|
+
width,
|
|
635
|
+
)
|
|
636
|
+
row += 2
|
|
637
|
+
|
|
638
|
+
if row >= height - 2:
|
|
639
|
+
return
|
|
640
|
+
title = " recent (failures only) " if failures_only else " recent "
|
|
641
|
+
stdscr.addnstr(row, 0, title.ljust(width, "─"), width, curses.A_BOLD)
|
|
642
|
+
row += 1
|
|
643
|
+
for rr in recent[-(height - row - 2) :]:
|
|
644
|
+
if row >= height - 1:
|
|
645
|
+
break
|
|
646
|
+
attr = int(curses.color_pair(_COLOR_RED_PAIR) | curses.A_BOLD) if rr.is_failure else 0
|
|
647
|
+
line = f" {rr.ts:<8} {rr.provider:<22} {rr.status_text}"
|
|
648
|
+
stdscr.addnstr(row, 0, line.ljust(width), width, attr)
|
|
649
|
+
row += 1
|
|
650
|
+
|
|
651
|
+
# Footer / keybind hints
|
|
652
|
+
footer = f" [q]uit [r]efresh [p]ause{' ✔' if paused else ''} [f]ailures{' ✔' if failures_only else ''} "
|
|
653
|
+
stdscr.addnstr(height - 1, 0, footer.ljust(width), width, curses.A_REVERSE)
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
def _draw_error_screen( # pragma: no cover - curses-only
|
|
657
|
+
curses: Any, stdscr: Any, err: FetchError, *, width: int
|
|
658
|
+
) -> None:
|
|
659
|
+
"""Minimal error display — shown until a fetch succeeds."""
|
|
660
|
+
stdscr.addnstr(0, 0, " coderouter stats ".ljust(width), width, curses.A_REVERSE)
|
|
661
|
+
stdscr.addnstr(
|
|
662
|
+
2,
|
|
663
|
+
2,
|
|
664
|
+
f"cannot reach metrics endpoint: {err.message}",
|
|
665
|
+
width - 2,
|
|
666
|
+
int(curses.color_pair(_COLOR_RED_PAIR)),
|
|
667
|
+
)
|
|
668
|
+
stdscr.addnstr(
|
|
669
|
+
4,
|
|
670
|
+
2,
|
|
671
|
+
" - is the server running? try: coderouter serve",
|
|
672
|
+
width - 2,
|
|
673
|
+
curses.A_DIM,
|
|
674
|
+
)
|
|
675
|
+
stdscr.addnstr(5, 2, " - check --url", width - 2, curses.A_DIM)
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
# ---------------------------------------------------------------------------
|
|
679
|
+
# Tiny internal helpers
|
|
680
|
+
# ---------------------------------------------------------------------------
|
|
681
|
+
|
|
682
|
+
|
|
683
|
+
def _compute_health(*, attempts: int, ok: int, failed_midstream: int) -> str:
|
|
684
|
+
"""Derive a health token from counters.
|
|
685
|
+
|
|
686
|
+
``failed_midstream`` is promoted to "red" regardless of the overall
|
|
687
|
+
rate — a single mid-stream failure means the client saw a partial
|
|
688
|
+
response, which is the kind of incident that should draw an
|
|
689
|
+
operator's eye immediately even if total volume is low.
|
|
690
|
+
"""
|
|
691
|
+
if attempts <= 0:
|
|
692
|
+
return "gray"
|
|
693
|
+
if failed_midstream > 0:
|
|
694
|
+
return "red"
|
|
695
|
+
rate = ok / attempts
|
|
696
|
+
if rate >= _HEALTH_GREEN_MIN_RATE:
|
|
697
|
+
return "green"
|
|
698
|
+
if rate >= _HEALTH_YELLOW_MIN_RATE:
|
|
699
|
+
return "yellow"
|
|
700
|
+
return "red"
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
def _format_last_error(raw: Any) -> str:
|
|
704
|
+
"""Render the per-provider last-error dict as a short one-liner.
|
|
705
|
+
|
|
706
|
+
``raw`` is the dict snapshot shape ``{status, retryable, error}`` or
|
|
707
|
+
``None``. Returns ``"-"`` for "no error yet" so column width stays
|
|
708
|
+
stable.
|
|
709
|
+
"""
|
|
710
|
+
if not isinstance(raw, dict):
|
|
711
|
+
return "-"
|
|
712
|
+
status = raw.get("status")
|
|
713
|
+
error = str(raw.get("error") or "")
|
|
714
|
+
if status is not None and error:
|
|
715
|
+
return f"{status} {_truncate(error, 40)}"
|
|
716
|
+
if error:
|
|
717
|
+
return _truncate(error, 40)
|
|
718
|
+
if status is not None:
|
|
719
|
+
return f"status={status}"
|
|
720
|
+
return "-"
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
def _fmt_uptime(seconds: float) -> str:
|
|
724
|
+
"""Humanize uptime: "12s", "3m 04s", "1h 23m"."""
|
|
725
|
+
s = int(seconds)
|
|
726
|
+
if s < 60:
|
|
727
|
+
return f"{s}s"
|
|
728
|
+
if s < 3600:
|
|
729
|
+
m, r = divmod(s, 60)
|
|
730
|
+
return f"{m}m {r:02d}s"
|
|
731
|
+
h, r = divmod(s, 3600)
|
|
732
|
+
m = r // 60
|
|
733
|
+
return f"{h}h {m:02d}m"
|
|
734
|
+
|
|
735
|
+
|
|
736
|
+
def _fmt_breakdown(counter: dict[str, int]) -> str:
|
|
737
|
+
"""Render a breakdown dict as ``k:v k2:v2`` with stable key order."""
|
|
738
|
+
return " ".join(f"{k}:{v}" for k, v in sorted(counter.items()))
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
def _truncate(text: str, max_chars: int) -> str:
|
|
742
|
+
"""Trim text with an ellipsis when it exceeds ``max_chars``.
|
|
743
|
+
|
|
744
|
+
Used for the last-error column and recent-event error fields —
|
|
745
|
+
the single ``…`` terminator matches :mod:`coderouter.metrics.collector`'s
|
|
746
|
+
internal truncation so the TUI output is byte-aligned with the
|
|
747
|
+
snapshot payload.
|
|
748
|
+
"""
|
|
749
|
+
if len(text) <= max_chars:
|
|
750
|
+
return text
|
|
751
|
+
return text[: max_chars - 1] + "…"
|