coderouter-cli 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/__init__.py +17 -0
- coderouter/__main__.py +6 -0
- coderouter/adapters/__init__.py +23 -0
- coderouter/adapters/anthropic_native.py +502 -0
- coderouter/adapters/base.py +220 -0
- coderouter/adapters/openai_compat.py +395 -0
- coderouter/adapters/registry.py +17 -0
- coderouter/cli.py +345 -0
- coderouter/cli_stats.py +751 -0
- coderouter/config/__init__.py +10 -0
- coderouter/config/capability_registry.py +339 -0
- coderouter/config/env_file.py +295 -0
- coderouter/config/loader.py +73 -0
- coderouter/config/schemas.py +515 -0
- coderouter/data/__init__.py +7 -0
- coderouter/data/model-capabilities.yaml +86 -0
- coderouter/doctor.py +1596 -0
- coderouter/env_security.py +434 -0
- coderouter/errors.py +29 -0
- coderouter/ingress/__init__.py +5 -0
- coderouter/ingress/anthropic_routes.py +205 -0
- coderouter/ingress/app.py +144 -0
- coderouter/ingress/dashboard_routes.py +493 -0
- coderouter/ingress/metrics_routes.py +92 -0
- coderouter/ingress/openai_routes.py +153 -0
- coderouter/logging.py +315 -0
- coderouter/metrics/__init__.py +39 -0
- coderouter/metrics/collector.py +471 -0
- coderouter/metrics/prometheus.py +221 -0
- coderouter/output_filters.py +407 -0
- coderouter/routing/__init__.py +13 -0
- coderouter/routing/auto_router.py +244 -0
- coderouter/routing/capability.py +285 -0
- coderouter/routing/fallback.py +611 -0
- coderouter/translation/__init__.py +57 -0
- coderouter/translation/anthropic.py +204 -0
- coderouter/translation/convert.py +1291 -0
- coderouter/translation/tool_repair.py +236 -0
- coderouter_cli-1.7.0.dist-info/METADATA +509 -0
- coderouter_cli-1.7.0.dist-info/RECORD +43 -0
- coderouter_cli-1.7.0.dist-info/WHEEL +4 -0
- coderouter_cli-1.7.0.dist-info/entry_points.txt +2 -0
- coderouter_cli-1.7.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
"""Capability gate for request-level block normalization (v0.5-A, v0.7-A).
|
|
2
|
+
|
|
3
|
+
Purpose
|
|
4
|
+
Claude Code sends requests that carry Anthropic-specific body fields
|
|
5
|
+
(`thinking: {type: enabled}`, `cache_control: ...`) which only a subset
|
|
6
|
+
of models accept. Hitting a non-supporting model returns a 400 like
|
|
7
|
+
``"adaptive thinking is not supported on this model"`` (v0.4-D retro).
|
|
8
|
+
|
|
9
|
+
v0.5-A introduced a capability gate that:
|
|
10
|
+
1. Declares per-provider support via ``Capabilities.thinking`` in
|
|
11
|
+
``providers.yaml`` (explicit — honored verbatim).
|
|
12
|
+
2. Falls back to a declarative registry when unset (v0.7-A: was a
|
|
13
|
+
Python-literal regex in v0.5-A). The bundled default registry
|
|
14
|
+
at ``coderouter/data/model-capabilities.yaml`` encodes the
|
|
15
|
+
families we've verified accept the feature; users can extend /
|
|
16
|
+
override via ``~/.coderouter/model-capabilities.yaml``.
|
|
17
|
+
3. Lets the fallback engine prefer capable providers and silently
|
|
18
|
+
strip the block when it has to hand off to a non-capable one,
|
|
19
|
+
logging the degradation so operators can see it after the fact.
|
|
20
|
+
|
|
21
|
+
Design decisions
|
|
22
|
+
- Pure functions, no I/O at the gate level. The registry is a module-
|
|
23
|
+
level lazy-loaded singleton (one disk read per process); tests can
|
|
24
|
+
inject a custom registry via the ``registry=`` kwarg on each gate
|
|
25
|
+
function.
|
|
26
|
+
- Heuristic lives in YAML (v0.7-A) rather than scattered across
|
|
27
|
+
adapters or baked into regex. Adding a new Anthropic family is a
|
|
28
|
+
one-line YAML edit.
|
|
29
|
+
- ``strip_thinking`` returns a new ``AnthropicRequest`` instance (does
|
|
30
|
+
not mutate) — fallback chains may revisit the original.
|
|
31
|
+
- OpenAI-compat providers are not rejected by a hardcoded ``kind``
|
|
32
|
+
check anymore (v0.7-A); the registry simply does not declare any
|
|
33
|
+
openai_compat rules for thinking, so the lookup returns
|
|
34
|
+
``thinking=None`` which the gate treats as False. The per-provider
|
|
35
|
+
YAML escape hatch still lets users opt in explicitly.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
from __future__ import annotations
|
|
39
|
+
|
|
40
|
+
from coderouter.config.capability_registry import (
|
|
41
|
+
CapabilityRegistry,
|
|
42
|
+
ResolvedCapabilities,
|
|
43
|
+
)
|
|
44
|
+
from coderouter.config.schemas import ProviderConfig
|
|
45
|
+
from coderouter.logging import (
|
|
46
|
+
CapabilityDegradedPayload,
|
|
47
|
+
CapabilityDegradedReason,
|
|
48
|
+
log_capability_degraded,
|
|
49
|
+
)
|
|
50
|
+
from coderouter.translation.anthropic import AnthropicRequest
|
|
51
|
+
|
|
52
|
+
# Re-export the v0.5.1 log-shape contract so consumers that already think
|
|
53
|
+
# of it as a capability concept can import it from here. The canonical
|
|
54
|
+
# home is ``coderouter.logging`` — see that module's docstring for why
|
|
55
|
+
# (short version: avoids a routing ↔ adapter import cycle).
|
|
56
|
+
__all__ = [
|
|
57
|
+
"CapabilityDegradedPayload",
|
|
58
|
+
"CapabilityDegradedReason",
|
|
59
|
+
"CapabilityRegistry",
|
|
60
|
+
"ResolvedCapabilities",
|
|
61
|
+
"anthropic_request_has_cache_control",
|
|
62
|
+
"anthropic_request_requires_thinking",
|
|
63
|
+
"get_default_registry",
|
|
64
|
+
"log_capability_degraded",
|
|
65
|
+
"provider_supports_cache_control",
|
|
66
|
+
"provider_supports_thinking",
|
|
67
|
+
"reset_default_registry",
|
|
68
|
+
"strip_thinking",
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
# Registry: declarative model-capabilities.yaml (v0.7-A)
|
|
73
|
+
#
|
|
74
|
+
# Loaded lazily once per process. Tests can inject a custom registry via
|
|
75
|
+
# the ``registry=`` kwarg on the gate functions, or call
|
|
76
|
+
# ``reset_default_registry()`` to force a reload (picks up a user YAML
|
|
77
|
+
# written in a test fixture). See ``coderouter.config.capability_registry``
|
|
78
|
+
# for the schema and lookup semantics.
|
|
79
|
+
# ---------------------------------------------------------------------------
|
|
80
|
+
|
|
81
|
+
_DEFAULT_REGISTRY: CapabilityRegistry | None = None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def get_default_registry() -> CapabilityRegistry:
|
|
85
|
+
"""Return the process-wide default capability registry.
|
|
86
|
+
|
|
87
|
+
First call loads ``coderouter/data/model-capabilities.yaml`` +
|
|
88
|
+
optional ``~/.coderouter/model-capabilities.yaml``; subsequent calls
|
|
89
|
+
return the cached instance.
|
|
90
|
+
"""
|
|
91
|
+
global _DEFAULT_REGISTRY
|
|
92
|
+
if _DEFAULT_REGISTRY is None:
|
|
93
|
+
_DEFAULT_REGISTRY = CapabilityRegistry.load_default()
|
|
94
|
+
return _DEFAULT_REGISTRY
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def reset_default_registry() -> None:
|
|
98
|
+
"""Forget the cached default registry; next lookup re-reads disk.
|
|
99
|
+
|
|
100
|
+
Intended for tests that stage a user YAML and want the gate to pick
|
|
101
|
+
it up. Production code never needs this.
|
|
102
|
+
"""
|
|
103
|
+
global _DEFAULT_REGISTRY
|
|
104
|
+
_DEFAULT_REGISTRY = None
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _resolve(
|
|
108
|
+
provider: ProviderConfig,
|
|
109
|
+
registry: CapabilityRegistry | None,
|
|
110
|
+
) -> ResolvedCapabilities:
|
|
111
|
+
"""Consult the registry for ``provider``. ``registry=None`` uses the default."""
|
|
112
|
+
reg = registry if registry is not None else get_default_registry()
|
|
113
|
+
return reg.lookup(kind=provider.kind, model=provider.model or "")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def provider_supports_thinking(
|
|
117
|
+
provider: ProviderConfig,
|
|
118
|
+
*,
|
|
119
|
+
registry: CapabilityRegistry | None = None,
|
|
120
|
+
) -> bool:
|
|
121
|
+
"""Does this provider accept ``thinking: {type: enabled}`` blocks?
|
|
122
|
+
|
|
123
|
+
Resolution order:
|
|
124
|
+
1. If ``provider.capabilities.thinking`` is True → True (explicit
|
|
125
|
+
per-provider opt-in from providers.yaml — highest precedence).
|
|
126
|
+
2. Otherwise consult the registry via
|
|
127
|
+
:func:`coderouter.config.capability_registry.CapabilityRegistry.lookup`.
|
|
128
|
+
The registry returns ``thinking=True`` when any matching rule
|
|
129
|
+
declares it, or ``None`` when no rule matches. ``None`` →
|
|
130
|
+
treated as False (conservative default; capability gate then
|
|
131
|
+
strips the block and logs degradation before the call).
|
|
132
|
+
|
|
133
|
+
Explicit ``thinking: false`` in YAML is indistinguishable from the
|
|
134
|
+
default (both produce False); the registry only promotes to True. A
|
|
135
|
+
user who wants to hard-disable thinking on a registry-capable model
|
|
136
|
+
can change the provider's model to one that isn't declared, or add a
|
|
137
|
+
more-specific rule to ``~/.coderouter/model-capabilities.yaml`` that
|
|
138
|
+
declares ``thinking: false`` earlier in the chain.
|
|
139
|
+
|
|
140
|
+
The ``registry`` kwarg is for tests — production callers pass
|
|
141
|
+
nothing and get the module-level default.
|
|
142
|
+
"""
|
|
143
|
+
if provider.capabilities.thinking:
|
|
144
|
+
return True
|
|
145
|
+
return _resolve(provider, registry).thinking is True
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def anthropic_request_requires_thinking(request: AnthropicRequest) -> bool:
|
|
149
|
+
"""True iff the request carries a ``thinking: {type: enabled, ...}`` block.
|
|
150
|
+
|
|
151
|
+
The ``thinking`` field isn't declared on AnthropicRequest (it's a
|
|
152
|
+
beta-evolving shape), so it arrives via Pydantic's ``extra="allow"``
|
|
153
|
+
mechanism and is read from ``model_extra``.
|
|
154
|
+
|
|
155
|
+
A disabled or absent block (``{type: disabled}``, ``None``, missing)
|
|
156
|
+
returns False — gating only fires for actual requests that would
|
|
157
|
+
trigger the upstream's extended-thinking mode.
|
|
158
|
+
"""
|
|
159
|
+
extra = request.model_extra or {}
|
|
160
|
+
thinking = extra.get("thinking")
|
|
161
|
+
if not isinstance(thinking, dict):
|
|
162
|
+
return False
|
|
163
|
+
return thinking.get("type") == "enabled"
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def strip_thinking(request: AnthropicRequest) -> AnthropicRequest:
|
|
167
|
+
"""Return a copy of ``request`` with the ``thinking`` field removed.
|
|
168
|
+
|
|
169
|
+
No-op (returns a distinct-but-equivalent copy) when ``thinking`` is
|
|
170
|
+
absent. Preserves the CodeRouter-internal ``profile`` and
|
|
171
|
+
``anthropic_beta`` fields since those are excluded from the body but
|
|
172
|
+
still needed by the engine / adapter.
|
|
173
|
+
|
|
174
|
+
The original request is not mutated — callers that iterate a fallback
|
|
175
|
+
chain can keep the original around for retries against capable
|
|
176
|
+
providers later in the chain (though the default chain ordering puts
|
|
177
|
+
capable providers first, so this mostly matters for tests).
|
|
178
|
+
"""
|
|
179
|
+
extra = request.model_extra or {}
|
|
180
|
+
if "thinking" not in extra:
|
|
181
|
+
# Still return a fresh copy for consistency with the mutation-free
|
|
182
|
+
# contract. model_copy() preserves extras.
|
|
183
|
+
return request.model_copy(deep=True)
|
|
184
|
+
|
|
185
|
+
# model_dump() serializes extras; roundtripping via validate rebuilds a
|
|
186
|
+
# clean instance without the dropped key. exclude=True fields (profile,
|
|
187
|
+
# anthropic_beta) are omitted by model_dump, so we reassign them.
|
|
188
|
+
dumped = request.model_dump()
|
|
189
|
+
dumped.pop("thinking", None)
|
|
190
|
+
stripped = AnthropicRequest.model_validate(dumped)
|
|
191
|
+
stripped.profile = request.profile
|
|
192
|
+
stripped.anthropic_beta = request.anthropic_beta
|
|
193
|
+
return stripped
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
# ---------------------------------------------------------------------------
|
|
197
|
+
# v0.5-B: cache_control observability
|
|
198
|
+
#
|
|
199
|
+
# Unlike `thinking`, cache_control doesn't produce a 400 on non-supporting
|
|
200
|
+
# providers — it's silently lost during Anthropic → OpenAI translation
|
|
201
|
+
# (the cache_control marker lives on content blocks and has no OpenAI
|
|
202
|
+
# wire equivalent). So the gate here is observability-only: we detect
|
|
203
|
+
# when cache_control is present, check whether the outgoing provider can
|
|
204
|
+
# honor it, and emit a structured log when it's about to be lost. We do
|
|
205
|
+
# NOT reorder the chain — the user's ordering almost certainly reflects
|
|
206
|
+
# a latency / cost intent that outweighs cache-hit savings.
|
|
207
|
+
#
|
|
208
|
+
# Footgun to be aware of (from the v0.4 retro §What was sharp):
|
|
209
|
+
# Anthropic's prompt cache has a 1024-token minimum. System prompts
|
|
210
|
+
# shorter than that silently report 0 cached tokens even on supported
|
|
211
|
+
# providers. That's an Anthropic-side constraint, not something this
|
|
212
|
+
# gate can fix — but it's worth noting here so nobody blames CodeRouter
|
|
213
|
+
# for 0 hits on small prompts.
|
|
214
|
+
# ---------------------------------------------------------------------------
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def provider_supports_cache_control(provider: ProviderConfig) -> bool:
|
|
218
|
+
"""Does this provider preserve ``cache_control`` blocks end-to-end?
|
|
219
|
+
|
|
220
|
+
Resolution order:
|
|
221
|
+
1. If ``provider.capabilities.prompt_cache`` is True → True. This
|
|
222
|
+
is an explicit opt-in for any future ``openai_compat``
|
|
223
|
+
upstream that extends the wire format to preserve cache
|
|
224
|
+
markers (not known to exist today, 2026-04).
|
|
225
|
+
2. Otherwise:
|
|
226
|
+
- ``kind: anthropic``: True. Native passthrough via
|
|
227
|
+
``/v1/messages`` keeps cache_control intact. Verified real-
|
|
228
|
+
machine against api.anthropic.com on 2026-04-20 (v0.4
|
|
229
|
+
retro §3: 1321 tokens written on call 1, 1321 read on call 2).
|
|
230
|
+
- ``kind: openai_compat``: False. The OpenAI Chat Completions
|
|
231
|
+
wire has no equivalent marker, so the existing
|
|
232
|
+
``to_chat_request`` translation drops cache_control during
|
|
233
|
+
the Anthropic → OpenAI hop. The upstream itself might have
|
|
234
|
+
prompt caching, but CodeRouter can't currently carry the
|
|
235
|
+
marker through.
|
|
236
|
+
|
|
237
|
+
This routine does not inspect the request — it's a per-provider
|
|
238
|
+
capability. Combine with ``anthropic_request_has_cache_control`` in
|
|
239
|
+
the engine to decide whether to log a degradation.
|
|
240
|
+
"""
|
|
241
|
+
if provider.capabilities.prompt_cache:
|
|
242
|
+
return True
|
|
243
|
+
return provider.kind == "anthropic"
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _block_has_cache_control(block: object) -> bool:
|
|
247
|
+
"""True if ``block`` is a dict that carries a ``cache_control`` key."""
|
|
248
|
+
return isinstance(block, dict) and "cache_control" in block
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def anthropic_request_has_cache_control(request: AnthropicRequest) -> bool:
|
|
252
|
+
"""True iff the request carries any ``cache_control`` markers.
|
|
253
|
+
|
|
254
|
+
Checks all three locations Anthropic allows:
|
|
255
|
+
- ``system`` as a list of blocks (each block may have
|
|
256
|
+
``cache_control``; the shorthand ``str`` form cannot).
|
|
257
|
+
- ``tools[*]`` as Anthropic tools — ``cache_control`` arrives via
|
|
258
|
+
Pydantic's ``extra="allow"`` on ``AnthropicTool``.
|
|
259
|
+
- ``messages[*].content`` when it's a list of blocks (the
|
|
260
|
+
shorthand ``str`` form, again, cannot carry the marker).
|
|
261
|
+
|
|
262
|
+
A single cache_control marker anywhere in the request returns True.
|
|
263
|
+
"""
|
|
264
|
+
# system blocks
|
|
265
|
+
system = request.system
|
|
266
|
+
if isinstance(system, list):
|
|
267
|
+
for block in system:
|
|
268
|
+
if _block_has_cache_control(block):
|
|
269
|
+
return True
|
|
270
|
+
|
|
271
|
+
# tool definitions
|
|
272
|
+
for tool in request.tools or []:
|
|
273
|
+
extra = tool.model_extra or {}
|
|
274
|
+
if "cache_control" in extra:
|
|
275
|
+
return True
|
|
276
|
+
|
|
277
|
+
# message content blocks
|
|
278
|
+
for msg in request.messages:
|
|
279
|
+
content = msg.content
|
|
280
|
+
if isinstance(content, list):
|
|
281
|
+
for block in content:
|
|
282
|
+
if _block_has_cache_control(block):
|
|
283
|
+
return True
|
|
284
|
+
|
|
285
|
+
return False
|