coderouter-cli 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. coderouter/__init__.py +17 -0
  2. coderouter/__main__.py +6 -0
  3. coderouter/adapters/__init__.py +23 -0
  4. coderouter/adapters/anthropic_native.py +502 -0
  5. coderouter/adapters/base.py +220 -0
  6. coderouter/adapters/openai_compat.py +395 -0
  7. coderouter/adapters/registry.py +17 -0
  8. coderouter/cli.py +345 -0
  9. coderouter/cli_stats.py +751 -0
  10. coderouter/config/__init__.py +10 -0
  11. coderouter/config/capability_registry.py +339 -0
  12. coderouter/config/env_file.py +295 -0
  13. coderouter/config/loader.py +73 -0
  14. coderouter/config/schemas.py +515 -0
  15. coderouter/data/__init__.py +7 -0
  16. coderouter/data/model-capabilities.yaml +86 -0
  17. coderouter/doctor.py +1596 -0
  18. coderouter/env_security.py +434 -0
  19. coderouter/errors.py +29 -0
  20. coderouter/ingress/__init__.py +5 -0
  21. coderouter/ingress/anthropic_routes.py +205 -0
  22. coderouter/ingress/app.py +144 -0
  23. coderouter/ingress/dashboard_routes.py +493 -0
  24. coderouter/ingress/metrics_routes.py +92 -0
  25. coderouter/ingress/openai_routes.py +153 -0
  26. coderouter/logging.py +315 -0
  27. coderouter/metrics/__init__.py +39 -0
  28. coderouter/metrics/collector.py +471 -0
  29. coderouter/metrics/prometheus.py +221 -0
  30. coderouter/output_filters.py +407 -0
  31. coderouter/routing/__init__.py +13 -0
  32. coderouter/routing/auto_router.py +244 -0
  33. coderouter/routing/capability.py +285 -0
  34. coderouter/routing/fallback.py +611 -0
  35. coderouter/translation/__init__.py +57 -0
  36. coderouter/translation/anthropic.py +204 -0
  37. coderouter/translation/convert.py +1291 -0
  38. coderouter/translation/tool_repair.py +236 -0
  39. coderouter_cli-1.7.0.dist-info/METADATA +509 -0
  40. coderouter_cli-1.7.0.dist-info/RECORD +43 -0
  41. coderouter_cli-1.7.0.dist-info/WHEEL +4 -0
  42. coderouter_cli-1.7.0.dist-info/entry_points.txt +2 -0
  43. coderouter_cli-1.7.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,92 @@
1
+ """Metrics endpoint — ``GET /metrics.json`` (v1.5-A).
2
+
3
+ The endpoint returns a JSON-safe snapshot from the process-global
4
+ :class:`coderouter.metrics.MetricsCollector`. It is mounted at the
5
+ root (no ``/v1`` prefix) because the metrics payload is not part of
6
+ the OpenAI / Anthropic API surface and because Prometheus-shaped
7
+ exporters conventionally live at ``/metrics`` on the root.
8
+
9
+ v1.5 scope (plan.md §12.3.4)
10
+ - ``GET /metrics.json`` — JSON shape, internal / dashboard consumer.
11
+ - ``GET /metrics`` — v1.5-B: Prometheus text exposition
12
+ format, content-type ``text/plain; version=0.0.4; charset=utf-8``.
13
+ Same collector singleton as ``/metrics.json``.
14
+ - ``GET /dashboard`` — HTML one-pager. Lands in v1.5-D.
15
+
16
+ The JSON handler merges a little context from ``app.state`` (namely the
17
+ resolved config's allow_paid + paid-vs-free provider classification)
18
+ so the dashboard can compute the "local / free / paid" usage-mix
19
+ without each UI re-reading providers.yaml. The Prometheus handler
20
+ stays strict-spec — only the metrics payload, no extra stanzas — so
21
+ ``promtool check metrics`` round-trips cleanly.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ from typing import Any
27
+
28
+ from fastapi import APIRouter, Request
29
+ from fastapi.responses import PlainTextResponse
30
+
31
+ from coderouter.metrics import format_prometheus, get_collector
32
+
33
+ router = APIRouter()
34
+
35
+ # Prometheus text exposition v0.0.4 content type. Prom parsers will fall
36
+ # back to plain ``text/plain`` if missing, but being explicit pins the
37
+ # negotiated media type when a Grafana Agent or OTel collector probes us.
38
+ _PROM_CONTENT_TYPE = "text/plain; version=0.0.4; charset=utf-8"
39
+
40
+
41
+ @router.get("/metrics.json")
42
+ async def metrics_json(request: Request) -> dict[str, Any]:
43
+ """Return the current MetricsCollector snapshot as JSON.
44
+
45
+ Merges in a ``config`` stanza sourced from ``app.state.config`` —
46
+ this is static for the lifetime of the process (providers.yaml is
47
+ loaded once at startup) so it's cheap to re-emit per request. The
48
+ dashboard uses it to classify providers into local / free / paid
49
+ for the usage-mix bar without a second endpoint round-trip.
50
+ """
51
+ snapshot = get_collector().snapshot()
52
+
53
+ config = getattr(request.app.state, "config", None)
54
+ if config is not None:
55
+ snapshot["config"] = {
56
+ "default_profile": config.default_profile,
57
+ "allow_paid": config.allow_paid,
58
+ # v1.5-E: display-only TZ hint for /dashboard + coderouter stats.
59
+ # Stays ``None`` when unset so clients can keep their UTC fallback
60
+ # without probing for a string value.
61
+ "display_timezone": config.display_timezone,
62
+ "providers": [
63
+ {
64
+ "name": p.name,
65
+ "kind": p.kind,
66
+ "paid": p.paid,
67
+ # ``HttpUrl`` is not JSON-serializable directly in Pydantic v2;
68
+ # the cast also makes the shape stable if Pydantic switches types.
69
+ "base_url": str(p.base_url),
70
+ }
71
+ for p in config.providers
72
+ ],
73
+ "profiles": [
74
+ {"name": pr.name, "providers": list(pr.providers)}
75
+ for pr in config.profiles
76
+ ],
77
+ }
78
+ return snapshot
79
+
80
+
81
+ @router.get("/metrics", response_class=PlainTextResponse)
82
+ async def metrics_prometheus() -> PlainTextResponse:
83
+ """Prometheus text exposition format (v1.5-B).
84
+
85
+ Convention-compliant endpoint path for Prometheus scrapers. Returns
86
+ the same counters the JSON snapshot surfaces, rendered per
87
+ https://prometheus.io/docs/instrumenting/exposition_formats/ .
88
+ Sits alongside :func:`metrics_json` (not a replacement) — JSON is
89
+ for internal UI, Prometheus is for external time-series DBs.
90
+ """
91
+ body = format_prometheus(get_collector().snapshot())
92
+ return PlainTextResponse(content=body, media_type=_PROM_CONTENT_TYPE)
@@ -0,0 +1,153 @@
1
+ """OpenAI-compatible routes: POST /v1/chat/completions (+ minimal /v1/models).
2
+
3
+ Profile selection precedence (first hit wins):
4
+ 1. JSON body field: {"profile": "fast", ...}
5
+ 2. HTTP header: X-CodeRouter-Profile: fast
6
+ 3. HTTP header: X-CodeRouter-Mode: coding (v0.6-D, via mode_aliases)
7
+ 4. auto_router (v1.6-A, fires only when default_profile == "auto")
8
+ 5. config.default_profile
9
+
10
+ Body wins over header so that a caller who can embed the field has final say
11
+ (useful when a single client talks to multiple routers behind a proxy that
12
+ rewrites headers). Mode sits below Profile because Mode is an INTENT
13
+ (``coding`` / ``long`` / ``fast``) and Profile is the concrete
14
+ implementation — when a caller specifies the concrete profile, respect it.
15
+
16
+ The auto router slot is intentionally narrow: it only fires when the operator
17
+ opts in via ``default_profile: auto`` (the reserved sentinel). For every other
18
+ configuration the chain behaves exactly as in v0.6-D — unresolved requests fall
19
+ through to the engine, which applies ``config.default_profile``.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import json
25
+ import time
26
+ from collections.abc import AsyncIterator
27
+ from typing import Any
28
+
29
+ from fastapi import APIRouter, Header, HTTPException, Request
30
+ from fastapi.responses import StreamingResponse
31
+
32
+ from coderouter.adapters.base import ChatRequest
33
+ from coderouter.logging import get_logger
34
+ from coderouter.routing import FallbackEngine, NoProvidersAvailableError
35
+ from coderouter.routing.auto_router import RESERVED_PROFILE_NAME, classify
36
+
37
+ router = APIRouter()
38
+ logger = get_logger(__name__)
39
+
40
+ _PROFILE_HEADER = "x-coderouter-profile"
41
+ _MODE_HEADER = "x-coderouter-mode"
42
+
43
+
44
+ @router.get("/models")
45
+ async def list_models(request: Request) -> dict[str, object]:
46
+ """Minimal /v1/models so OpenAI SDKs that probe it don't choke."""
47
+ config = request.app.state.config
48
+ return {
49
+ "object": "list",
50
+ "data": [
51
+ {
52
+ "id": p.name,
53
+ "object": "model",
54
+ "created": int(time.time()),
55
+ "owned_by": "coderouter",
56
+ }
57
+ for p in config.providers
58
+ ],
59
+ }
60
+
61
+
62
+ @router.post("/chat/completions", response_model=None)
63
+ async def chat_completions(
64
+ payload: dict[str, Any],
65
+ request: Request,
66
+ x_coderouter_profile: str | None = Header(default=None, alias=_PROFILE_HEADER),
67
+ x_coderouter_mode: str | None = Header(default=None, alias=_MODE_HEADER),
68
+ ) -> StreamingResponse | dict[str, Any]:
69
+ """OpenAI Chat Completions endpoint.
70
+
71
+ Validates the body into :class:`ChatRequest`, resolves the profile
72
+ per the precedence described in the module docstring, and dispatches
73
+ to the engine. Streaming requests return a :class:`StreamingResponse`
74
+ that serializes chunks onto the OpenAI SSE wire (``data: {json}`` +
75
+ trailing ``data: [DONE]``); non-streaming requests return the JSON
76
+ response body.
77
+ """
78
+ engine: FallbackEngine = request.app.state.engine
79
+ config = request.app.state.config
80
+
81
+ # Accept extension fields (e.g. "profile") without rejecting
82
+ try:
83
+ chat_req = ChatRequest.model_validate(payload)
84
+ except Exception as exc: # pydantic.ValidationError, etc.
85
+ raise HTTPException(status_code=422, detail=str(exc)) from exc
86
+
87
+ # Header-based override (body wins if both are set — see module docstring)
88
+ if chat_req.profile is None and x_coderouter_profile:
89
+ chat_req.profile = x_coderouter_profile
90
+
91
+ # v0.6-D: ``X-CodeRouter-Mode`` → mode_aliases → profile. Only kicks
92
+ # in when neither body nor X-CodeRouter-Profile already nailed down
93
+ # the profile (profile > mode precedence).
94
+ if chat_req.profile is None and x_coderouter_mode:
95
+ try:
96
+ chat_req.profile = config.resolve_mode(x_coderouter_mode)
97
+ except KeyError as exc:
98
+ available = sorted(config.mode_aliases.keys())
99
+ raise HTTPException(
100
+ status_code=400,
101
+ detail=(f"unknown mode {x_coderouter_mode!r}. available modes: {available}"),
102
+ ) from exc
103
+ logger.info(
104
+ "mode-alias-resolved",
105
+ extra={"mode": x_coderouter_mode, "profile": chat_req.profile},
106
+ )
107
+
108
+ # v1.6-A: auto router slot. Only fires when the operator opted in by
109
+ # setting ``default_profile: auto`` and no higher-priority caller signal
110
+ # (body / profile header / mode header) already nailed down a profile.
111
+ # When inactive, the engine still falls through to
112
+ # ``config.default_profile`` on its own — same semantics as pre-v1.6.
113
+ if chat_req.profile is None and config.default_profile == RESERVED_PROFILE_NAME:
114
+ chat_req.profile = classify(payload, config)
115
+
116
+ # Validate profile exists before we kick off any upstream call
117
+ if chat_req.profile is not None:
118
+ try:
119
+ config.profile_by_name(chat_req.profile)
120
+ except KeyError as exc:
121
+ available = [p.name for p in config.profiles]
122
+ raise HTTPException(
123
+ status_code=400,
124
+ detail=(f"unknown profile {chat_req.profile!r}. available: {available}"),
125
+ ) from exc
126
+
127
+ if chat_req.stream:
128
+ return StreamingResponse(
129
+ _sse_iterator(engine, chat_req),
130
+ media_type="text/event-stream",
131
+ headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
132
+ )
133
+
134
+ try:
135
+ response = await engine.generate(chat_req)
136
+ except NoProvidersAvailableError as exc:
137
+ raise HTTPException(status_code=502, detail=str(exc)) from exc
138
+
139
+ return response.model_dump(exclude_none=True)
140
+
141
+
142
+ async def _sse_iterator(engine: FallbackEngine, chat_req: ChatRequest) -> AsyncIterator[str]:
143
+ """Wrap the engine's stream into SSE wire format."""
144
+ try:
145
+ async for chunk in engine.stream(chat_req):
146
+ data = chunk.model_dump(exclude_none=True)
147
+ yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
148
+ yield "data: [DONE]\n\n"
149
+ except NoProvidersAvailableError as exc:
150
+ # Encode the error inside the SSE channel — OpenAI clients handle this
151
+ err = {"error": {"message": str(exc), "type": "no_providers_available"}}
152
+ yield f"data: {json.dumps(err, ensure_ascii=False)}\n\n"
153
+ yield "data: [DONE]\n\n"
coderouter/logging.py ADDED
@@ -0,0 +1,315 @@
1
+ """Tiny structured-logging helper.
2
+
3
+ We don't pull in structlog/loguru — see plan.md §5.4. stdlib logging + a
4
+ custom formatter that emits JSON lines is enough for v0.1.
5
+
6
+ v0.5.1 additions
7
+ ``CapabilityDegradedReason`` / ``CapabilityDegradedPayload`` /
8
+ ``log_capability_degraded`` are the typed contract for the
9
+ ``capability-degraded`` log line (v0.5 gate trio). They live here —
10
+ rather than in ``coderouter/routing/capability.py`` where they fit
11
+ semantically — because (a) importing anything from the ``routing``
12
+ package eagerly triggers ``routing/__init__.py`` which pulls
13
+ ``FallbackEngine`` and creates a cycle with adapter modules that
14
+ want to emit the same log, and (b) logging.py is a dependency-free
15
+ leaf, so it is the safest home for a cross-cutting log shape.
16
+ ``capability.py`` re-exports all three for discoverability.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ import logging
23
+ import sys
24
+ from typing import Any, Literal, TypedDict
25
+
26
+
27
+ class JsonLineFormatter(logging.Formatter):
28
+ """Emit each record as a single JSON line."""
29
+
30
+ def format(self, record: logging.LogRecord) -> str:
31
+ """Render a LogRecord as a single-line JSON string.
32
+
33
+ Standard ``logging`` attributes (levelname, funcName, lineno, …)
34
+ are whitelisted out; everything attached via ``extra={...}`` is
35
+ included, so structured calls like
36
+ ``logger.info("evt", extra={"provider": "ollama"})`` surface
37
+ ``"provider": "ollama"`` verbatim in the output line.
38
+ """
39
+ payload: dict[str, Any] = {
40
+ "ts": self.formatTime(record, datefmt="%Y-%m-%dT%H:%M:%S"),
41
+ "level": record.levelname,
42
+ "logger": record.name,
43
+ "msg": record.getMessage(),
44
+ }
45
+ # Pick up custom attributes attached via `extra={...}`
46
+ for key, value in record.__dict__.items():
47
+ if key in {
48
+ "args",
49
+ "asctime",
50
+ "created",
51
+ "exc_info",
52
+ "exc_text",
53
+ "filename",
54
+ "funcName",
55
+ "levelname",
56
+ "levelno",
57
+ "lineno",
58
+ "message",
59
+ "module",
60
+ "msecs",
61
+ "msg",
62
+ "name",
63
+ "pathname",
64
+ "process",
65
+ "processName",
66
+ "relativeCreated",
67
+ "stack_info",
68
+ "thread",
69
+ "threadName",
70
+ "taskName",
71
+ }:
72
+ continue
73
+ payload[key] = value
74
+ if record.exc_info:
75
+ payload["exc"] = self.formatException(record.exc_info)
76
+ return json.dumps(payload, ensure_ascii=False)
77
+
78
+
79
+ def configure_logging(level: str = "INFO") -> None:
80
+ """Install JSON-line logging on the root logger. Idempotent."""
81
+ root = logging.getLogger()
82
+ root.setLevel(level.upper())
83
+ # Avoid duplicate handlers on reload
84
+ for h in list(root.handlers):
85
+ root.removeHandler(h)
86
+ handler = logging.StreamHandler(sys.stderr)
87
+ handler.setFormatter(JsonLineFormatter())
88
+ root.addHandler(handler)
89
+
90
+
91
+ def get_logger(name: str) -> logging.Logger:
92
+ """Alias for :func:`logging.getLogger` — exists so modules can import
93
+ from :mod:`coderouter.logging` without reaching into stdlib directly,
94
+ keeping future logger customization (tags, adapters, …) to one line.
95
+ """
96
+ return logging.getLogger(name)
97
+
98
+
99
+ # ---------------------------------------------------------------------------
100
+ # v0.5.1: capability-degraded log shape
101
+ #
102
+ # Single chokepoint for the log line emitted by the v0.5 capability gates
103
+ # (thinking / cache_control / reasoning). See module docstring above for
104
+ # why this lives in logging.py rather than in capability.py.
105
+ # ---------------------------------------------------------------------------
106
+
107
+ CapabilityDegradedReason = Literal[
108
+ "provider-does-not-support",
109
+ "translation-lossy",
110
+ "non-standard-field",
111
+ ]
112
+ """Why a capability was degraded.
113
+
114
+ - ``provider-does-not-support``: the provider's wire format would 400 on
115
+ the field. v0.5-A thinking gate; request-side strip happens before the
116
+ call.
117
+ - ``translation-lossy``: the field has no equivalent in the target wire
118
+ format so it is dropped during translation. v0.5-B cache_control;
119
+ observability only — no strip happens inside the gate itself (the
120
+ translation layer already drops the marker).
121
+ - ``non-standard-field``: upstream emits a field that is not in the spec
122
+ the ingress speaks, so we strip it on the response-side boundary.
123
+ v0.5-C reasoning field.
124
+ """
125
+
126
+
127
+ class CapabilityDegradedPayload(TypedDict):
128
+ """Structured shape of the ``capability-degraded`` log record.
129
+
130
+ Fields
131
+ provider: the ``name:`` of the ProviderConfig that degraded — so
132
+ operators can correlate with the ``provider-failed`` /
133
+ ``provider-ok`` lines sharing that key.
134
+ dropped: list of capability names affected. Single-element today
135
+ (``["thinking"]`` / ``["cache_control"]`` / ``["reasoning"]``)
136
+ but typed as a list so a single call can report multiple
137
+ simultaneous drops in the future without a schema break.
138
+ reason: see ``CapabilityDegradedReason``.
139
+ """
140
+
141
+ provider: str
142
+ dropped: list[str]
143
+ reason: CapabilityDegradedReason
144
+
145
+
146
+ def log_capability_degraded(
147
+ logger: logging.Logger,
148
+ *,
149
+ provider: str,
150
+ dropped: list[str],
151
+ reason: CapabilityDegradedReason,
152
+ ) -> None:
153
+ """Emit a ``capability-degraded`` log record with the unified shape.
154
+
155
+ Single chokepoint for the log. Keyword-only args force callers through
156
+ the TypedDict contract at the static-type level. The ``logger``
157
+ argument is passed in so the record's ``logger`` name (captured by
158
+ JsonLineFormatter) reflects the site of the degradation — request-side
159
+ gates emit under ``coderouter.routing.fallback``, response-side under
160
+ ``coderouter.adapters.openai_compat``. That distinction is useful
161
+ when reading the log alongside the surrounding ``try-provider`` /
162
+ ``provider-ok`` trail.
163
+ """
164
+ payload: CapabilityDegradedPayload = {
165
+ "provider": provider,
166
+ "dropped": dropped,
167
+ "reason": reason,
168
+ }
169
+ logger.info("capability-degraded", extra=payload)
170
+
171
+
172
+ # ---------------------------------------------------------------------------
173
+ # v0.6-C: chain-paid-gate-blocked log shape
174
+ #
175
+ # Motivation (plan.md §9.3 #3, "宣言的 ALLOW_PAID gate"):
176
+ # v0.1 already filters ``paid: true`` providers from the chain when
177
+ # ``allow_paid=False`` (per-provider INFO ``skip-paid-provider``), but
178
+ # when the gate ends up filtering the ENTIRE chain to empty, the
179
+ # operator-visible symptom is a generic ``NoProvidersAvailableError``.
180
+ # A dedicated aggregate warn makes the gate "declarative" in the same
181
+ # sense as v0.5's capability gates: the rule is visible in one line.
182
+ #
183
+ # Scope:
184
+ # - Fires once per request (the 4 engine entry points), only when the
185
+ # chain resolves to ZERO adapters AND at least one provider was
186
+ # filtered out by the paid gate. Mixed chains where at least one
187
+ # free provider survives stay quiet — they proceed into the normal
188
+ # try-provider / provider-failed trail.
189
+ # - ``skip-paid-provider`` is still emitted per-provider at INFO so
190
+ # per-provider traceability is intact. This warn sits at a coarser
191
+ # granularity (one line per blocked chain).
192
+ # ---------------------------------------------------------------------------
193
+
194
+ _DEFAULT_PAID_GATE_HINT: str = (
195
+ "set ALLOW_PAID=true, mark a provider paid=false, "
196
+ "or add a free provider to this profile's chain"
197
+ )
198
+
199
+
200
+ class ChainPaidGateBlockedPayload(TypedDict):
201
+ """Structured shape of the ``chain-paid-gate-blocked`` log record.
202
+
203
+ Fields
204
+ profile: the active profile name (resolved, not user-supplied —
205
+ so after falling back to ``default_profile``).
206
+ blocked_providers: names of providers on this chain that were
207
+ ``paid: true`` and filtered out by the gate. Order matches
208
+ their position in the chain (same as what the ``skip-paid-
209
+ provider`` INFO lines report individually).
210
+ hint: a one-line remediation suggestion — stable text so it can
211
+ be grepped, overridable at the call site when context-
212
+ specific advice is warranted.
213
+ """
214
+
215
+ profile: str
216
+ blocked_providers: list[str]
217
+ hint: str
218
+
219
+
220
+ def log_chain_paid_gate_blocked(
221
+ logger: logging.Logger,
222
+ *,
223
+ profile: str,
224
+ blocked_providers: list[str],
225
+ hint: str = _DEFAULT_PAID_GATE_HINT,
226
+ ) -> None:
227
+ """Emit a ``chain-paid-gate-blocked`` warn with the unified shape.
228
+
229
+ Single chokepoint mirroring :func:`log_capability_degraded`. Warn
230
+ level (not info) because an empty chain is always a config problem
231
+ the operator needs to see — whereas the per-provider
232
+ ``skip-paid-provider`` can stay info (the chain as a whole may still
233
+ be viable).
234
+ """
235
+ payload: ChainPaidGateBlockedPayload = {
236
+ "profile": profile,
237
+ "blocked_providers": blocked_providers,
238
+ "hint": hint,
239
+ }
240
+ logger.warning(
241
+ "chain-paid-gate-blocked",
242
+ extra=payload,
243
+ )
244
+
245
+
246
+ # ---------------------------------------------------------------------------
247
+ # v1.0-A: output-filter-applied log shape
248
+ #
249
+ # Motivation (plan.md §10.2 "出力クリーニング" / retrospective v0.7 "transformation
250
+ # には probe が伴う"):
251
+ # ``output_filters`` is an operator opt-in (declared in providers.yaml)
252
+ # rather than a passive / silent strip, so it does not fit the
253
+ # ``capability-degraded`` vocabulary — nothing is "degraded" when a user
254
+ # explicitly asked for scrubbing. A dedicated typed log line keeps the
255
+ # observability surface legible (grep for ``output-filter-applied`` to
256
+ # see exactly when a filter fired, for which provider, via which
257
+ # filters).
258
+ #
259
+ # Scope:
260
+ # - Fires ONCE per generate()/stream() call (log-once, mirroring the
261
+ # v0.5-C reasoning-strip dedupe).
262
+ # - Only fires when at least one filter actually modified the stream.
263
+ # A chain configured but never triggered stays quiet.
264
+ # ---------------------------------------------------------------------------
265
+
266
+
267
+ class OutputFilterAppliedPayload(TypedDict):
268
+ """Structured shape of the ``output-filter-applied`` log record.
269
+
270
+ Fields
271
+ provider: the ``name:`` of the ProviderConfig whose adapter ran
272
+ the chain — correlates with surrounding ``provider-ok`` /
273
+ ``provider-failed`` log lines.
274
+ filters: names of filters that actually modified the stream
275
+ (subset of the configured chain, preserving declaration
276
+ order). Single-entry today when only ``strip_thinking``
277
+ triggers, multi-entry once an operator enables two+.
278
+ streaming: True if emitted from the streaming path, False from
279
+ non-streaming. Lets a log-reading operator distinguish
280
+ "filter fired mid-stream" from "filter fired on the final
281
+ body" without cross-referencing the surrounding request
282
+ metadata.
283
+ """
284
+
285
+ provider: str
286
+ filters: list[str]
287
+ streaming: bool
288
+
289
+
290
+ def log_output_filter_applied(
291
+ logger: logging.Logger,
292
+ *,
293
+ provider: str,
294
+ filters: list[str],
295
+ streaming: bool,
296
+ ) -> None:
297
+ """Emit an ``output-filter-applied`` info record.
298
+
299
+ Single chokepoint mirroring :func:`log_capability_degraded`.
300
+ Called at most once per request/stream — adapter threads a
301
+ dedupe flag on the enclosing call. ``filters`` SHOULD be the subset
302
+ that actually modified text (see ``OutputFilterChain.applied_filters``),
303
+ not the declared chain — so a chain of ``[strip_thinking,
304
+ strip_stop_markers]`` where only the first triggers logs
305
+ ``filters=["strip_thinking"]``.
306
+ """
307
+ payload: OutputFilterAppliedPayload = {
308
+ "provider": provider,
309
+ "filters": filters,
310
+ "streaming": streaming,
311
+ }
312
+ logger.info(
313
+ "output-filter-applied",
314
+ extra=payload,
315
+ )
@@ -0,0 +1,39 @@
1
+ """CodeRouter metrics collection (v1.5-A).
2
+
3
+ The metrics layer taps the existing structured-logging stream rather than
4
+ adding new instrumentation hooks throughout the routing/adapter code.
5
+ Rationale (plan.md §12.3.1): every metric the v1.5 dashboard needs is
6
+ already in a ``capability-degraded`` / ``provider-ok`` / ``try-provider``
7
+ / ``output-filter-applied`` / ``chain-paid-gate-blocked`` / ``skip-paid-
8
+ provider`` / ``provider-failed`` record — so wiring a
9
+ ``logging.Handler`` subclass onto the root logger gives us lossless
10
+ collection with zero risk of regression.
11
+
12
+ Public surface
13
+ :class:`MetricsCollector`
14
+ ``logging.Handler`` subclass that maintains in-memory counters,
15
+ last-error snapshots per provider, and a ring buffer of recent
16
+ events. ``snapshot()`` returns a JSON-safe dict consumed by the
17
+ ``/metrics.json`` endpoint.
18
+
19
+ :func:`get_collector` / :func:`install_collector`
20
+ Module-level singleton accessors. The ingress ``create_app``
21
+ lifespan calls ``install_collector()`` at startup; ``/metrics.json``
22
+ and tests read via ``get_collector()``. Idempotent.
23
+ """
24
+
25
+ from coderouter.metrics.collector import (
26
+ MetricsCollector,
27
+ get_collector,
28
+ install_collector,
29
+ uninstall_collector,
30
+ )
31
+ from coderouter.metrics.prometheus import format_prometheus
32
+
33
+ __all__ = [
34
+ "MetricsCollector",
35
+ "format_prometheus",
36
+ "get_collector",
37
+ "install_collector",
38
+ "uninstall_collector",
39
+ ]