coderouter-cli 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/__init__.py +17 -0
- coderouter/__main__.py +6 -0
- coderouter/adapters/__init__.py +23 -0
- coderouter/adapters/anthropic_native.py +502 -0
- coderouter/adapters/base.py +220 -0
- coderouter/adapters/openai_compat.py +395 -0
- coderouter/adapters/registry.py +17 -0
- coderouter/cli.py +345 -0
- coderouter/cli_stats.py +751 -0
- coderouter/config/__init__.py +10 -0
- coderouter/config/capability_registry.py +339 -0
- coderouter/config/env_file.py +295 -0
- coderouter/config/loader.py +73 -0
- coderouter/config/schemas.py +515 -0
- coderouter/data/__init__.py +7 -0
- coderouter/data/model-capabilities.yaml +86 -0
- coderouter/doctor.py +1596 -0
- coderouter/env_security.py +434 -0
- coderouter/errors.py +29 -0
- coderouter/ingress/__init__.py +5 -0
- coderouter/ingress/anthropic_routes.py +205 -0
- coderouter/ingress/app.py +144 -0
- coderouter/ingress/dashboard_routes.py +493 -0
- coderouter/ingress/metrics_routes.py +92 -0
- coderouter/ingress/openai_routes.py +153 -0
- coderouter/logging.py +315 -0
- coderouter/metrics/__init__.py +39 -0
- coderouter/metrics/collector.py +471 -0
- coderouter/metrics/prometheus.py +221 -0
- coderouter/output_filters.py +407 -0
- coderouter/routing/__init__.py +13 -0
- coderouter/routing/auto_router.py +244 -0
- coderouter/routing/capability.py +285 -0
- coderouter/routing/fallback.py +611 -0
- coderouter/translation/__init__.py +57 -0
- coderouter/translation/anthropic.py +204 -0
- coderouter/translation/convert.py +1291 -0
- coderouter/translation/tool_repair.py +236 -0
- coderouter_cli-1.7.0.dist-info/METADATA +509 -0
- coderouter_cli-1.7.0.dist-info/RECORD +43 -0
- coderouter_cli-1.7.0.dist-info/WHEEL +4 -0
- coderouter_cli-1.7.0.dist-info/entry_points.txt +2 -0
- coderouter_cli-1.7.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Metrics endpoint — ``GET /metrics.json`` (v1.5-A).
|
|
2
|
+
|
|
3
|
+
The endpoint returns a JSON-safe snapshot from the process-global
|
|
4
|
+
:class:`coderouter.metrics.MetricsCollector`. It is mounted at the
|
|
5
|
+
root (no ``/v1`` prefix) because the metrics payload is not part of
|
|
6
|
+
the OpenAI / Anthropic API surface and because Prometheus-shaped
|
|
7
|
+
exporters conventionally live at ``/metrics`` on the root.
|
|
8
|
+
|
|
9
|
+
v1.5 scope (plan.md §12.3.4)
|
|
10
|
+
- ``GET /metrics.json`` — JSON shape, internal / dashboard consumer.
|
|
11
|
+
- ``GET /metrics`` — v1.5-B: Prometheus text exposition
|
|
12
|
+
format, content-type ``text/plain; version=0.0.4; charset=utf-8``.
|
|
13
|
+
Same collector singleton as ``/metrics.json``.
|
|
14
|
+
- ``GET /dashboard`` — HTML one-pager. Lands in v1.5-D.
|
|
15
|
+
|
|
16
|
+
The JSON handler merges a little context from ``app.state`` (namely the
|
|
17
|
+
resolved config's allow_paid + paid-vs-free provider classification)
|
|
18
|
+
so the dashboard can compute the "local / free / paid" usage-mix
|
|
19
|
+
without each UI re-reading providers.yaml. The Prometheus handler
|
|
20
|
+
stays strict-spec — only the metrics payload, no extra stanzas — so
|
|
21
|
+
``promtool check metrics`` round-trips cleanly.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
from fastapi import APIRouter, Request
|
|
29
|
+
from fastapi.responses import PlainTextResponse
|
|
30
|
+
|
|
31
|
+
from coderouter.metrics import format_prometheus, get_collector
|
|
32
|
+
|
|
33
|
+
router = APIRouter()
|
|
34
|
+
|
|
35
|
+
# Prometheus text exposition v0.0.4 content type. Prom parsers will fall
|
|
36
|
+
# back to plain ``text/plain`` if missing, but being explicit pins the
|
|
37
|
+
# negotiated media type when a Grafana Agent or OTel collector probes us.
|
|
38
|
+
_PROM_CONTENT_TYPE = "text/plain; version=0.0.4; charset=utf-8"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@router.get("/metrics.json")
|
|
42
|
+
async def metrics_json(request: Request) -> dict[str, Any]:
|
|
43
|
+
"""Return the current MetricsCollector snapshot as JSON.
|
|
44
|
+
|
|
45
|
+
Merges in a ``config`` stanza sourced from ``app.state.config`` —
|
|
46
|
+
this is static for the lifetime of the process (providers.yaml is
|
|
47
|
+
loaded once at startup) so it's cheap to re-emit per request. The
|
|
48
|
+
dashboard uses it to classify providers into local / free / paid
|
|
49
|
+
for the usage-mix bar without a second endpoint round-trip.
|
|
50
|
+
"""
|
|
51
|
+
snapshot = get_collector().snapshot()
|
|
52
|
+
|
|
53
|
+
config = getattr(request.app.state, "config", None)
|
|
54
|
+
if config is not None:
|
|
55
|
+
snapshot["config"] = {
|
|
56
|
+
"default_profile": config.default_profile,
|
|
57
|
+
"allow_paid": config.allow_paid,
|
|
58
|
+
# v1.5-E: display-only TZ hint for /dashboard + coderouter stats.
|
|
59
|
+
# Stays ``None`` when unset so clients can keep their UTC fallback
|
|
60
|
+
# without probing for a string value.
|
|
61
|
+
"display_timezone": config.display_timezone,
|
|
62
|
+
"providers": [
|
|
63
|
+
{
|
|
64
|
+
"name": p.name,
|
|
65
|
+
"kind": p.kind,
|
|
66
|
+
"paid": p.paid,
|
|
67
|
+
# ``HttpUrl`` is not JSON-serializable directly in Pydantic v2;
|
|
68
|
+
# the cast also makes the shape stable if Pydantic switches types.
|
|
69
|
+
"base_url": str(p.base_url),
|
|
70
|
+
}
|
|
71
|
+
for p in config.providers
|
|
72
|
+
],
|
|
73
|
+
"profiles": [
|
|
74
|
+
{"name": pr.name, "providers": list(pr.providers)}
|
|
75
|
+
for pr in config.profiles
|
|
76
|
+
],
|
|
77
|
+
}
|
|
78
|
+
return snapshot
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@router.get("/metrics", response_class=PlainTextResponse)
|
|
82
|
+
async def metrics_prometheus() -> PlainTextResponse:
|
|
83
|
+
"""Prometheus text exposition format (v1.5-B).
|
|
84
|
+
|
|
85
|
+
Convention-compliant endpoint path for Prometheus scrapers. Returns
|
|
86
|
+
the same counters the JSON snapshot surfaces, rendered per
|
|
87
|
+
https://prometheus.io/docs/instrumenting/exposition_formats/ .
|
|
88
|
+
Sits alongside :func:`metrics_json` (not a replacement) — JSON is
|
|
89
|
+
for internal UI, Prometheus is for external time-series DBs.
|
|
90
|
+
"""
|
|
91
|
+
body = format_prometheus(get_collector().snapshot())
|
|
92
|
+
return PlainTextResponse(content=body, media_type=_PROM_CONTENT_TYPE)
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""OpenAI-compatible routes: POST /v1/chat/completions (+ minimal /v1/models).
|
|
2
|
+
|
|
3
|
+
Profile selection precedence (first hit wins):
|
|
4
|
+
1. JSON body field: {"profile": "fast", ...}
|
|
5
|
+
2. HTTP header: X-CodeRouter-Profile: fast
|
|
6
|
+
3. HTTP header: X-CodeRouter-Mode: coding (v0.6-D, via mode_aliases)
|
|
7
|
+
4. auto_router (v1.6-A, fires only when default_profile == "auto")
|
|
8
|
+
5. config.default_profile
|
|
9
|
+
|
|
10
|
+
Body wins over header so that a caller who can embed the field has final say
|
|
11
|
+
(useful when a single client talks to multiple routers behind a proxy that
|
|
12
|
+
rewrites headers). Mode sits below Profile because Mode is an INTENT
|
|
13
|
+
(``coding`` / ``long`` / ``fast``) and Profile is the concrete
|
|
14
|
+
implementation — when a caller specifies the concrete profile, respect it.
|
|
15
|
+
|
|
16
|
+
The auto router slot is intentionally narrow: it only fires when the operator
|
|
17
|
+
opts in via ``default_profile: auto`` (the reserved sentinel). For every other
|
|
18
|
+
configuration the chain behaves exactly as in v0.6-D — unresolved requests fall
|
|
19
|
+
through to the engine, which applies ``config.default_profile``.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import json
|
|
25
|
+
import time
|
|
26
|
+
from collections.abc import AsyncIterator
|
|
27
|
+
from typing import Any
|
|
28
|
+
|
|
29
|
+
from fastapi import APIRouter, Header, HTTPException, Request
|
|
30
|
+
from fastapi.responses import StreamingResponse
|
|
31
|
+
|
|
32
|
+
from coderouter.adapters.base import ChatRequest
|
|
33
|
+
from coderouter.logging import get_logger
|
|
34
|
+
from coderouter.routing import FallbackEngine, NoProvidersAvailableError
|
|
35
|
+
from coderouter.routing.auto_router import RESERVED_PROFILE_NAME, classify
|
|
36
|
+
|
|
37
|
+
router = APIRouter()
|
|
38
|
+
logger = get_logger(__name__)
|
|
39
|
+
|
|
40
|
+
_PROFILE_HEADER = "x-coderouter-profile"
|
|
41
|
+
_MODE_HEADER = "x-coderouter-mode"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@router.get("/models")
|
|
45
|
+
async def list_models(request: Request) -> dict[str, object]:
|
|
46
|
+
"""Minimal /v1/models so OpenAI SDKs that probe it don't choke."""
|
|
47
|
+
config = request.app.state.config
|
|
48
|
+
return {
|
|
49
|
+
"object": "list",
|
|
50
|
+
"data": [
|
|
51
|
+
{
|
|
52
|
+
"id": p.name,
|
|
53
|
+
"object": "model",
|
|
54
|
+
"created": int(time.time()),
|
|
55
|
+
"owned_by": "coderouter",
|
|
56
|
+
}
|
|
57
|
+
for p in config.providers
|
|
58
|
+
],
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@router.post("/chat/completions", response_model=None)
|
|
63
|
+
async def chat_completions(
|
|
64
|
+
payload: dict[str, Any],
|
|
65
|
+
request: Request,
|
|
66
|
+
x_coderouter_profile: str | None = Header(default=None, alias=_PROFILE_HEADER),
|
|
67
|
+
x_coderouter_mode: str | None = Header(default=None, alias=_MODE_HEADER),
|
|
68
|
+
) -> StreamingResponse | dict[str, Any]:
|
|
69
|
+
"""OpenAI Chat Completions endpoint.
|
|
70
|
+
|
|
71
|
+
Validates the body into :class:`ChatRequest`, resolves the profile
|
|
72
|
+
per the precedence described in the module docstring, and dispatches
|
|
73
|
+
to the engine. Streaming requests return a :class:`StreamingResponse`
|
|
74
|
+
that serializes chunks onto the OpenAI SSE wire (``data: {json}`` +
|
|
75
|
+
trailing ``data: [DONE]``); non-streaming requests return the JSON
|
|
76
|
+
response body.
|
|
77
|
+
"""
|
|
78
|
+
engine: FallbackEngine = request.app.state.engine
|
|
79
|
+
config = request.app.state.config
|
|
80
|
+
|
|
81
|
+
# Accept extension fields (e.g. "profile") without rejecting
|
|
82
|
+
try:
|
|
83
|
+
chat_req = ChatRequest.model_validate(payload)
|
|
84
|
+
except Exception as exc: # pydantic.ValidationError, etc.
|
|
85
|
+
raise HTTPException(status_code=422, detail=str(exc)) from exc
|
|
86
|
+
|
|
87
|
+
# Header-based override (body wins if both are set — see module docstring)
|
|
88
|
+
if chat_req.profile is None and x_coderouter_profile:
|
|
89
|
+
chat_req.profile = x_coderouter_profile
|
|
90
|
+
|
|
91
|
+
# v0.6-D: ``X-CodeRouter-Mode`` → mode_aliases → profile. Only kicks
|
|
92
|
+
# in when neither body nor X-CodeRouter-Profile already nailed down
|
|
93
|
+
# the profile (profile > mode precedence).
|
|
94
|
+
if chat_req.profile is None and x_coderouter_mode:
|
|
95
|
+
try:
|
|
96
|
+
chat_req.profile = config.resolve_mode(x_coderouter_mode)
|
|
97
|
+
except KeyError as exc:
|
|
98
|
+
available = sorted(config.mode_aliases.keys())
|
|
99
|
+
raise HTTPException(
|
|
100
|
+
status_code=400,
|
|
101
|
+
detail=(f"unknown mode {x_coderouter_mode!r}. available modes: {available}"),
|
|
102
|
+
) from exc
|
|
103
|
+
logger.info(
|
|
104
|
+
"mode-alias-resolved",
|
|
105
|
+
extra={"mode": x_coderouter_mode, "profile": chat_req.profile},
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# v1.6-A: auto router slot. Only fires when the operator opted in by
|
|
109
|
+
# setting ``default_profile: auto`` and no higher-priority caller signal
|
|
110
|
+
# (body / profile header / mode header) already nailed down a profile.
|
|
111
|
+
# When inactive, the engine still falls through to
|
|
112
|
+
# ``config.default_profile`` on its own — same semantics as pre-v1.6.
|
|
113
|
+
if chat_req.profile is None and config.default_profile == RESERVED_PROFILE_NAME:
|
|
114
|
+
chat_req.profile = classify(payload, config)
|
|
115
|
+
|
|
116
|
+
# Validate profile exists before we kick off any upstream call
|
|
117
|
+
if chat_req.profile is not None:
|
|
118
|
+
try:
|
|
119
|
+
config.profile_by_name(chat_req.profile)
|
|
120
|
+
except KeyError as exc:
|
|
121
|
+
available = [p.name for p in config.profiles]
|
|
122
|
+
raise HTTPException(
|
|
123
|
+
status_code=400,
|
|
124
|
+
detail=(f"unknown profile {chat_req.profile!r}. available: {available}"),
|
|
125
|
+
) from exc
|
|
126
|
+
|
|
127
|
+
if chat_req.stream:
|
|
128
|
+
return StreamingResponse(
|
|
129
|
+
_sse_iterator(engine, chat_req),
|
|
130
|
+
media_type="text/event-stream",
|
|
131
|
+
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
response = await engine.generate(chat_req)
|
|
136
|
+
except NoProvidersAvailableError as exc:
|
|
137
|
+
raise HTTPException(status_code=502, detail=str(exc)) from exc
|
|
138
|
+
|
|
139
|
+
return response.model_dump(exclude_none=True)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
async def _sse_iterator(engine: FallbackEngine, chat_req: ChatRequest) -> AsyncIterator[str]:
|
|
143
|
+
"""Wrap the engine's stream into SSE wire format."""
|
|
144
|
+
try:
|
|
145
|
+
async for chunk in engine.stream(chat_req):
|
|
146
|
+
data = chunk.model_dump(exclude_none=True)
|
|
147
|
+
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
|
|
148
|
+
yield "data: [DONE]\n\n"
|
|
149
|
+
except NoProvidersAvailableError as exc:
|
|
150
|
+
# Encode the error inside the SSE channel — OpenAI clients handle this
|
|
151
|
+
err = {"error": {"message": str(exc), "type": "no_providers_available"}}
|
|
152
|
+
yield f"data: {json.dumps(err, ensure_ascii=False)}\n\n"
|
|
153
|
+
yield "data: [DONE]\n\n"
|
coderouter/logging.py
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
"""Tiny structured-logging helper.
|
|
2
|
+
|
|
3
|
+
We don't pull in structlog/loguru — see plan.md §5.4. stdlib logging + a
|
|
4
|
+
custom formatter that emits JSON lines is enough for v0.1.
|
|
5
|
+
|
|
6
|
+
v0.5.1 additions
|
|
7
|
+
``CapabilityDegradedReason`` / ``CapabilityDegradedPayload`` /
|
|
8
|
+
``log_capability_degraded`` are the typed contract for the
|
|
9
|
+
``capability-degraded`` log line (v0.5 gate trio). They live here —
|
|
10
|
+
rather than in ``coderouter/routing/capability.py`` where they fit
|
|
11
|
+
semantically — because (a) importing anything from the ``routing``
|
|
12
|
+
package eagerly triggers ``routing/__init__.py`` which pulls
|
|
13
|
+
``FallbackEngine`` and creates a cycle with adapter modules that
|
|
14
|
+
want to emit the same log, and (b) logging.py is a dependency-free
|
|
15
|
+
leaf, so it is the safest home for a cross-cutting log shape.
|
|
16
|
+
``capability.py`` re-exports all three for discoverability.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import json
|
|
22
|
+
import logging
|
|
23
|
+
import sys
|
|
24
|
+
from typing import Any, Literal, TypedDict
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class JsonLineFormatter(logging.Formatter):
|
|
28
|
+
"""Emit each record as a single JSON line."""
|
|
29
|
+
|
|
30
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
31
|
+
"""Render a LogRecord as a single-line JSON string.
|
|
32
|
+
|
|
33
|
+
Standard ``logging`` attributes (levelname, funcName, lineno, …)
|
|
34
|
+
are whitelisted out; everything attached via ``extra={...}`` is
|
|
35
|
+
included, so structured calls like
|
|
36
|
+
``logger.info("evt", extra={"provider": "ollama"})`` surface
|
|
37
|
+
``"provider": "ollama"`` verbatim in the output line.
|
|
38
|
+
"""
|
|
39
|
+
payload: dict[str, Any] = {
|
|
40
|
+
"ts": self.formatTime(record, datefmt="%Y-%m-%dT%H:%M:%S"),
|
|
41
|
+
"level": record.levelname,
|
|
42
|
+
"logger": record.name,
|
|
43
|
+
"msg": record.getMessage(),
|
|
44
|
+
}
|
|
45
|
+
# Pick up custom attributes attached via `extra={...}`
|
|
46
|
+
for key, value in record.__dict__.items():
|
|
47
|
+
if key in {
|
|
48
|
+
"args",
|
|
49
|
+
"asctime",
|
|
50
|
+
"created",
|
|
51
|
+
"exc_info",
|
|
52
|
+
"exc_text",
|
|
53
|
+
"filename",
|
|
54
|
+
"funcName",
|
|
55
|
+
"levelname",
|
|
56
|
+
"levelno",
|
|
57
|
+
"lineno",
|
|
58
|
+
"message",
|
|
59
|
+
"module",
|
|
60
|
+
"msecs",
|
|
61
|
+
"msg",
|
|
62
|
+
"name",
|
|
63
|
+
"pathname",
|
|
64
|
+
"process",
|
|
65
|
+
"processName",
|
|
66
|
+
"relativeCreated",
|
|
67
|
+
"stack_info",
|
|
68
|
+
"thread",
|
|
69
|
+
"threadName",
|
|
70
|
+
"taskName",
|
|
71
|
+
}:
|
|
72
|
+
continue
|
|
73
|
+
payload[key] = value
|
|
74
|
+
if record.exc_info:
|
|
75
|
+
payload["exc"] = self.formatException(record.exc_info)
|
|
76
|
+
return json.dumps(payload, ensure_ascii=False)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def configure_logging(level: str = "INFO") -> None:
|
|
80
|
+
"""Install JSON-line logging on the root logger. Idempotent."""
|
|
81
|
+
root = logging.getLogger()
|
|
82
|
+
root.setLevel(level.upper())
|
|
83
|
+
# Avoid duplicate handlers on reload
|
|
84
|
+
for h in list(root.handlers):
|
|
85
|
+
root.removeHandler(h)
|
|
86
|
+
handler = logging.StreamHandler(sys.stderr)
|
|
87
|
+
handler.setFormatter(JsonLineFormatter())
|
|
88
|
+
root.addHandler(handler)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def get_logger(name: str) -> logging.Logger:
|
|
92
|
+
"""Alias for :func:`logging.getLogger` — exists so modules can import
|
|
93
|
+
from :mod:`coderouter.logging` without reaching into stdlib directly,
|
|
94
|
+
keeping future logger customization (tags, adapters, …) to one line.
|
|
95
|
+
"""
|
|
96
|
+
return logging.getLogger(name)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# ---------------------------------------------------------------------------
|
|
100
|
+
# v0.5.1: capability-degraded log shape
|
|
101
|
+
#
|
|
102
|
+
# Single chokepoint for the log line emitted by the v0.5 capability gates
|
|
103
|
+
# (thinking / cache_control / reasoning). See module docstring above for
|
|
104
|
+
# why this lives in logging.py rather than in capability.py.
|
|
105
|
+
# ---------------------------------------------------------------------------
|
|
106
|
+
|
|
107
|
+
CapabilityDegradedReason = Literal[
|
|
108
|
+
"provider-does-not-support",
|
|
109
|
+
"translation-lossy",
|
|
110
|
+
"non-standard-field",
|
|
111
|
+
]
|
|
112
|
+
"""Why a capability was degraded.
|
|
113
|
+
|
|
114
|
+
- ``provider-does-not-support``: the provider's wire format would 400 on
|
|
115
|
+
the field. v0.5-A thinking gate; request-side strip happens before the
|
|
116
|
+
call.
|
|
117
|
+
- ``translation-lossy``: the field has no equivalent in the target wire
|
|
118
|
+
format so it is dropped during translation. v0.5-B cache_control;
|
|
119
|
+
observability only — no strip happens inside the gate itself (the
|
|
120
|
+
translation layer already drops the marker).
|
|
121
|
+
- ``non-standard-field``: upstream emits a field that is not in the spec
|
|
122
|
+
the ingress speaks, so we strip it on the response-side boundary.
|
|
123
|
+
v0.5-C reasoning field.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class CapabilityDegradedPayload(TypedDict):
|
|
128
|
+
"""Structured shape of the ``capability-degraded`` log record.
|
|
129
|
+
|
|
130
|
+
Fields
|
|
131
|
+
provider: the ``name:`` of the ProviderConfig that degraded — so
|
|
132
|
+
operators can correlate with the ``provider-failed`` /
|
|
133
|
+
``provider-ok`` lines sharing that key.
|
|
134
|
+
dropped: list of capability names affected. Single-element today
|
|
135
|
+
(``["thinking"]`` / ``["cache_control"]`` / ``["reasoning"]``)
|
|
136
|
+
but typed as a list so a single call can report multiple
|
|
137
|
+
simultaneous drops in the future without a schema break.
|
|
138
|
+
reason: see ``CapabilityDegradedReason``.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
provider: str
|
|
142
|
+
dropped: list[str]
|
|
143
|
+
reason: CapabilityDegradedReason
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def log_capability_degraded(
|
|
147
|
+
logger: logging.Logger,
|
|
148
|
+
*,
|
|
149
|
+
provider: str,
|
|
150
|
+
dropped: list[str],
|
|
151
|
+
reason: CapabilityDegradedReason,
|
|
152
|
+
) -> None:
|
|
153
|
+
"""Emit a ``capability-degraded`` log record with the unified shape.
|
|
154
|
+
|
|
155
|
+
Single chokepoint for the log. Keyword-only args force callers through
|
|
156
|
+
the TypedDict contract at the static-type level. The ``logger``
|
|
157
|
+
argument is passed in so the record's ``logger`` name (captured by
|
|
158
|
+
JsonLineFormatter) reflects the site of the degradation — request-side
|
|
159
|
+
gates emit under ``coderouter.routing.fallback``, response-side under
|
|
160
|
+
``coderouter.adapters.openai_compat``. That distinction is useful
|
|
161
|
+
when reading the log alongside the surrounding ``try-provider`` /
|
|
162
|
+
``provider-ok`` trail.
|
|
163
|
+
"""
|
|
164
|
+
payload: CapabilityDegradedPayload = {
|
|
165
|
+
"provider": provider,
|
|
166
|
+
"dropped": dropped,
|
|
167
|
+
"reason": reason,
|
|
168
|
+
}
|
|
169
|
+
logger.info("capability-degraded", extra=payload)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# ---------------------------------------------------------------------------
|
|
173
|
+
# v0.6-C: chain-paid-gate-blocked log shape
|
|
174
|
+
#
|
|
175
|
+
# Motivation (plan.md §9.3 #3, "宣言的 ALLOW_PAID gate"):
|
|
176
|
+
# v0.1 already filters ``paid: true`` providers from the chain when
|
|
177
|
+
# ``allow_paid=False`` (per-provider INFO ``skip-paid-provider``), but
|
|
178
|
+
# when the gate ends up filtering the ENTIRE chain to empty, the
|
|
179
|
+
# operator-visible symptom is a generic ``NoProvidersAvailableError``.
|
|
180
|
+
# A dedicated aggregate warn makes the gate "declarative" in the same
|
|
181
|
+
# sense as v0.5's capability gates: the rule is visible in one line.
|
|
182
|
+
#
|
|
183
|
+
# Scope:
|
|
184
|
+
# - Fires once per request (the 4 engine entry points), only when the
|
|
185
|
+
# chain resolves to ZERO adapters AND at least one provider was
|
|
186
|
+
# filtered out by the paid gate. Mixed chains where at least one
|
|
187
|
+
# free provider survives stay quiet — they proceed into the normal
|
|
188
|
+
# try-provider / provider-failed trail.
|
|
189
|
+
# - ``skip-paid-provider`` is still emitted per-provider at INFO so
|
|
190
|
+
# per-provider traceability is intact. This warn sits at a coarser
|
|
191
|
+
# granularity (one line per blocked chain).
|
|
192
|
+
# ---------------------------------------------------------------------------
|
|
193
|
+
|
|
194
|
+
_DEFAULT_PAID_GATE_HINT: str = (
|
|
195
|
+
"set ALLOW_PAID=true, mark a provider paid=false, "
|
|
196
|
+
"or add a free provider to this profile's chain"
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class ChainPaidGateBlockedPayload(TypedDict):
|
|
201
|
+
"""Structured shape of the ``chain-paid-gate-blocked`` log record.
|
|
202
|
+
|
|
203
|
+
Fields
|
|
204
|
+
profile: the active profile name (resolved, not user-supplied —
|
|
205
|
+
so after falling back to ``default_profile``).
|
|
206
|
+
blocked_providers: names of providers on this chain that were
|
|
207
|
+
``paid: true`` and filtered out by the gate. Order matches
|
|
208
|
+
their position in the chain (same as what the ``skip-paid-
|
|
209
|
+
provider`` INFO lines report individually).
|
|
210
|
+
hint: a one-line remediation suggestion — stable text so it can
|
|
211
|
+
be grepped, overridable at the call site when context-
|
|
212
|
+
specific advice is warranted.
|
|
213
|
+
"""
|
|
214
|
+
|
|
215
|
+
profile: str
|
|
216
|
+
blocked_providers: list[str]
|
|
217
|
+
hint: str
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def log_chain_paid_gate_blocked(
|
|
221
|
+
logger: logging.Logger,
|
|
222
|
+
*,
|
|
223
|
+
profile: str,
|
|
224
|
+
blocked_providers: list[str],
|
|
225
|
+
hint: str = _DEFAULT_PAID_GATE_HINT,
|
|
226
|
+
) -> None:
|
|
227
|
+
"""Emit a ``chain-paid-gate-blocked`` warn with the unified shape.
|
|
228
|
+
|
|
229
|
+
Single chokepoint mirroring :func:`log_capability_degraded`. Warn
|
|
230
|
+
level (not info) because an empty chain is always a config problem
|
|
231
|
+
the operator needs to see — whereas the per-provider
|
|
232
|
+
``skip-paid-provider`` can stay info (the chain as a whole may still
|
|
233
|
+
be viable).
|
|
234
|
+
"""
|
|
235
|
+
payload: ChainPaidGateBlockedPayload = {
|
|
236
|
+
"profile": profile,
|
|
237
|
+
"blocked_providers": blocked_providers,
|
|
238
|
+
"hint": hint,
|
|
239
|
+
}
|
|
240
|
+
logger.warning(
|
|
241
|
+
"chain-paid-gate-blocked",
|
|
242
|
+
extra=payload,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
# ---------------------------------------------------------------------------
|
|
247
|
+
# v1.0-A: output-filter-applied log shape
|
|
248
|
+
#
|
|
249
|
+
# Motivation (plan.md §10.2 "出力クリーニング" / retrospective v0.7 "transformation
|
|
250
|
+
# には probe が伴う"):
|
|
251
|
+
# ``output_filters`` is an operator opt-in (declared in providers.yaml)
|
|
252
|
+
# rather than a passive / silent strip, so it does not fit the
|
|
253
|
+
# ``capability-degraded`` vocabulary — nothing is "degraded" when a user
|
|
254
|
+
# explicitly asked for scrubbing. A dedicated typed log line keeps the
|
|
255
|
+
# observability surface legible (grep for ``output-filter-applied`` to
|
|
256
|
+
# see exactly when a filter fired, for which provider, via which
|
|
257
|
+
# filters).
|
|
258
|
+
#
|
|
259
|
+
# Scope:
|
|
260
|
+
# - Fires ONCE per generate()/stream() call (log-once, mirroring the
|
|
261
|
+
# v0.5-C reasoning-strip dedupe).
|
|
262
|
+
# - Only fires when at least one filter actually modified the stream.
|
|
263
|
+
# A chain configured but never triggered stays quiet.
|
|
264
|
+
# ---------------------------------------------------------------------------
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
class OutputFilterAppliedPayload(TypedDict):
|
|
268
|
+
"""Structured shape of the ``output-filter-applied`` log record.
|
|
269
|
+
|
|
270
|
+
Fields
|
|
271
|
+
provider: the ``name:`` of the ProviderConfig whose adapter ran
|
|
272
|
+
the chain — correlates with surrounding ``provider-ok`` /
|
|
273
|
+
``provider-failed`` log lines.
|
|
274
|
+
filters: names of filters that actually modified the stream
|
|
275
|
+
(subset of the configured chain, preserving declaration
|
|
276
|
+
order). Single-entry today when only ``strip_thinking``
|
|
277
|
+
triggers, multi-entry once an operator enables two+.
|
|
278
|
+
streaming: True if emitted from the streaming path, False from
|
|
279
|
+
non-streaming. Lets a log-reading operator distinguish
|
|
280
|
+
"filter fired mid-stream" from "filter fired on the final
|
|
281
|
+
body" without cross-referencing the surrounding request
|
|
282
|
+
metadata.
|
|
283
|
+
"""
|
|
284
|
+
|
|
285
|
+
provider: str
|
|
286
|
+
filters: list[str]
|
|
287
|
+
streaming: bool
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def log_output_filter_applied(
|
|
291
|
+
logger: logging.Logger,
|
|
292
|
+
*,
|
|
293
|
+
provider: str,
|
|
294
|
+
filters: list[str],
|
|
295
|
+
streaming: bool,
|
|
296
|
+
) -> None:
|
|
297
|
+
"""Emit an ``output-filter-applied`` info record.
|
|
298
|
+
|
|
299
|
+
Single chokepoint mirroring :func:`log_capability_degraded`.
|
|
300
|
+
Called at most once per request/stream — adapter threads a
|
|
301
|
+
dedupe flag on the enclosing call. ``filters`` SHOULD be the subset
|
|
302
|
+
that actually modified text (see ``OutputFilterChain.applied_filters``),
|
|
303
|
+
not the declared chain — so a chain of ``[strip_thinking,
|
|
304
|
+
strip_stop_markers]`` where only the first triggers logs
|
|
305
|
+
``filters=["strip_thinking"]``.
|
|
306
|
+
"""
|
|
307
|
+
payload: OutputFilterAppliedPayload = {
|
|
308
|
+
"provider": provider,
|
|
309
|
+
"filters": filters,
|
|
310
|
+
"streaming": streaming,
|
|
311
|
+
}
|
|
312
|
+
logger.info(
|
|
313
|
+
"output-filter-applied",
|
|
314
|
+
extra=payload,
|
|
315
|
+
)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""CodeRouter metrics collection (v1.5-A).
|
|
2
|
+
|
|
3
|
+
The metrics layer taps the existing structured-logging stream rather than
|
|
4
|
+
adding new instrumentation hooks throughout the routing/adapter code.
|
|
5
|
+
Rationale (plan.md §12.3.1): every metric the v1.5 dashboard needs is
|
|
6
|
+
already in a ``capability-degraded`` / ``provider-ok`` / ``try-provider``
|
|
7
|
+
/ ``output-filter-applied`` / ``chain-paid-gate-blocked`` / ``skip-paid-
|
|
8
|
+
provider`` / ``provider-failed`` record — so wiring a
|
|
9
|
+
``logging.Handler`` subclass onto the root logger gives us lossless
|
|
10
|
+
collection with zero risk of regression.
|
|
11
|
+
|
|
12
|
+
Public surface
|
|
13
|
+
:class:`MetricsCollector`
|
|
14
|
+
``logging.Handler`` subclass that maintains in-memory counters,
|
|
15
|
+
last-error snapshots per provider, and a ring buffer of recent
|
|
16
|
+
events. ``snapshot()`` returns a JSON-safe dict consumed by the
|
|
17
|
+
``/metrics.json`` endpoint.
|
|
18
|
+
|
|
19
|
+
:func:`get_collector` / :func:`install_collector`
|
|
20
|
+
Module-level singleton accessors. The ingress ``create_app``
|
|
21
|
+
lifespan calls ``install_collector()`` at startup; ``/metrics.json``
|
|
22
|
+
and tests read via ``get_collector()``. Idempotent.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from coderouter.metrics.collector import (
|
|
26
|
+
MetricsCollector,
|
|
27
|
+
get_collector,
|
|
28
|
+
install_collector,
|
|
29
|
+
uninstall_collector,
|
|
30
|
+
)
|
|
31
|
+
from coderouter.metrics.prometheus import format_prometheus
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
"MetricsCollector",
|
|
35
|
+
"format_prometheus",
|
|
36
|
+
"get_collector",
|
|
37
|
+
"install_collector",
|
|
38
|
+
"uninstall_collector",
|
|
39
|
+
]
|