coderouter-cli 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. coderouter/__init__.py +17 -0
  2. coderouter/__main__.py +6 -0
  3. coderouter/adapters/__init__.py +23 -0
  4. coderouter/adapters/anthropic_native.py +502 -0
  5. coderouter/adapters/base.py +220 -0
  6. coderouter/adapters/openai_compat.py +395 -0
  7. coderouter/adapters/registry.py +17 -0
  8. coderouter/cli.py +345 -0
  9. coderouter/cli_stats.py +751 -0
  10. coderouter/config/__init__.py +10 -0
  11. coderouter/config/capability_registry.py +339 -0
  12. coderouter/config/env_file.py +295 -0
  13. coderouter/config/loader.py +73 -0
  14. coderouter/config/schemas.py +515 -0
  15. coderouter/data/__init__.py +7 -0
  16. coderouter/data/model-capabilities.yaml +86 -0
  17. coderouter/doctor.py +1596 -0
  18. coderouter/env_security.py +434 -0
  19. coderouter/errors.py +29 -0
  20. coderouter/ingress/__init__.py +5 -0
  21. coderouter/ingress/anthropic_routes.py +205 -0
  22. coderouter/ingress/app.py +144 -0
  23. coderouter/ingress/dashboard_routes.py +493 -0
  24. coderouter/ingress/metrics_routes.py +92 -0
  25. coderouter/ingress/openai_routes.py +153 -0
  26. coderouter/logging.py +315 -0
  27. coderouter/metrics/__init__.py +39 -0
  28. coderouter/metrics/collector.py +471 -0
  29. coderouter/metrics/prometheus.py +221 -0
  30. coderouter/output_filters.py +407 -0
  31. coderouter/routing/__init__.py +13 -0
  32. coderouter/routing/auto_router.py +244 -0
  33. coderouter/routing/capability.py +285 -0
  34. coderouter/routing/fallback.py +611 -0
  35. coderouter/translation/__init__.py +57 -0
  36. coderouter/translation/anthropic.py +204 -0
  37. coderouter/translation/convert.py +1291 -0
  38. coderouter/translation/tool_repair.py +236 -0
  39. coderouter_cli-1.7.0.dist-info/METADATA +509 -0
  40. coderouter_cli-1.7.0.dist-info/RECORD +43 -0
  41. coderouter_cli-1.7.0.dist-info/WHEEL +4 -0
  42. coderouter_cli-1.7.0.dist-info/entry_points.txt +2 -0
  43. coderouter_cli-1.7.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,285 @@
1
+ """Capability gate for request-level block normalization (v0.5-A, v0.7-A).
2
+
3
+ Purpose
4
+ Claude Code sends requests that carry Anthropic-specific body fields
5
+ (`thinking: {type: enabled}`, `cache_control: ...`) which only a subset
6
+ of models accept. Hitting a non-supporting model returns a 400 like
7
+ ``"adaptive thinking is not supported on this model"`` (v0.4-D retro).
8
+
9
+ v0.5-A introduced a capability gate that:
10
+ 1. Declares per-provider support via ``Capabilities.thinking`` in
11
+ ``providers.yaml`` (explicit — honored verbatim).
12
+ 2. Falls back to a declarative registry when unset (v0.7-A: was a
13
+ Python-literal regex in v0.5-A). The bundled default registry
14
+ at ``coderouter/data/model-capabilities.yaml`` encodes the
15
+ families we've verified accept the feature; users can extend /
16
+ override via ``~/.coderouter/model-capabilities.yaml``.
17
+ 3. Lets the fallback engine prefer capable providers and silently
18
+ strip the block when it has to hand off to a non-capable one,
19
+ logging the degradation so operators can see it after the fact.
20
+
21
+ Design decisions
22
+ - Pure functions, no I/O at the gate level. The registry is a module-
23
+ level lazy-loaded singleton (one disk read per process); tests can
24
+ inject a custom registry via the ``registry=`` kwarg on each gate
25
+ function.
26
+ - Heuristic lives in YAML (v0.7-A) rather than scattered across
27
+ adapters or baked into regex. Adding a new Anthropic family is a
28
+ one-line YAML edit.
29
+ - ``strip_thinking`` returns a new ``AnthropicRequest`` instance (does
30
+ not mutate) — fallback chains may revisit the original.
31
+ - OpenAI-compat providers are not rejected by a hardcoded ``kind``
32
+ check anymore (v0.7-A); the registry simply does not declare any
33
+ openai_compat rules for thinking, so the lookup returns
34
+ ``thinking=None`` which the gate treats as False. The per-provider
35
+ YAML escape hatch still lets users opt in explicitly.
36
+ """
37
+
38
+ from __future__ import annotations
39
+
40
+ from coderouter.config.capability_registry import (
41
+ CapabilityRegistry,
42
+ ResolvedCapabilities,
43
+ )
44
+ from coderouter.config.schemas import ProviderConfig
45
+ from coderouter.logging import (
46
+ CapabilityDegradedPayload,
47
+ CapabilityDegradedReason,
48
+ log_capability_degraded,
49
+ )
50
+ from coderouter.translation.anthropic import AnthropicRequest
51
+
52
+ # Re-export the v0.5.1 log-shape contract so consumers that already think
53
+ # of it as a capability concept can import it from here. The canonical
54
+ # home is ``coderouter.logging`` — see that module's docstring for why
55
+ # (short version: avoids a routing ↔ adapter import cycle).
56
+ __all__ = [
57
+ "CapabilityDegradedPayload",
58
+ "CapabilityDegradedReason",
59
+ "CapabilityRegistry",
60
+ "ResolvedCapabilities",
61
+ "anthropic_request_has_cache_control",
62
+ "anthropic_request_requires_thinking",
63
+ "get_default_registry",
64
+ "log_capability_degraded",
65
+ "provider_supports_cache_control",
66
+ "provider_supports_thinking",
67
+ "reset_default_registry",
68
+ "strip_thinking",
69
+ ]
70
+
71
+ # ---------------------------------------------------------------------------
72
+ # Registry: declarative model-capabilities.yaml (v0.7-A)
73
+ #
74
+ # Loaded lazily once per process. Tests can inject a custom registry via
75
+ # the ``registry=`` kwarg on the gate functions, or call
76
+ # ``reset_default_registry()`` to force a reload (picks up a user YAML
77
+ # written in a test fixture). See ``coderouter.config.capability_registry``
78
+ # for the schema and lookup semantics.
79
+ # ---------------------------------------------------------------------------
80
+
81
+ _DEFAULT_REGISTRY: CapabilityRegistry | None = None
82
+
83
+
84
+ def get_default_registry() -> CapabilityRegistry:
85
+ """Return the process-wide default capability registry.
86
+
87
+ First call loads ``coderouter/data/model-capabilities.yaml`` +
88
+ optional ``~/.coderouter/model-capabilities.yaml``; subsequent calls
89
+ return the cached instance.
90
+ """
91
+ global _DEFAULT_REGISTRY
92
+ if _DEFAULT_REGISTRY is None:
93
+ _DEFAULT_REGISTRY = CapabilityRegistry.load_default()
94
+ return _DEFAULT_REGISTRY
95
+
96
+
97
+ def reset_default_registry() -> None:
98
+ """Forget the cached default registry; next lookup re-reads disk.
99
+
100
+ Intended for tests that stage a user YAML and want the gate to pick
101
+ it up. Production code never needs this.
102
+ """
103
+ global _DEFAULT_REGISTRY
104
+ _DEFAULT_REGISTRY = None
105
+
106
+
107
+ def _resolve(
108
+ provider: ProviderConfig,
109
+ registry: CapabilityRegistry | None,
110
+ ) -> ResolvedCapabilities:
111
+ """Consult the registry for ``provider``. ``registry=None`` uses the default."""
112
+ reg = registry if registry is not None else get_default_registry()
113
+ return reg.lookup(kind=provider.kind, model=provider.model or "")
114
+
115
+
116
+ def provider_supports_thinking(
117
+ provider: ProviderConfig,
118
+ *,
119
+ registry: CapabilityRegistry | None = None,
120
+ ) -> bool:
121
+ """Does this provider accept ``thinking: {type: enabled}`` blocks?
122
+
123
+ Resolution order:
124
+ 1. If ``provider.capabilities.thinking`` is True → True (explicit
125
+ per-provider opt-in from providers.yaml — highest precedence).
126
+ 2. Otherwise consult the registry via
127
+ :func:`coderouter.config.capability_registry.CapabilityRegistry.lookup`.
128
+ The registry returns ``thinking=True`` when any matching rule
129
+ declares it, or ``None`` when no rule matches. ``None`` →
130
+ treated as False (conservative default; capability gate then
131
+ strips the block and logs degradation before the call).
132
+
133
+ Explicit ``thinking: false`` in YAML is indistinguishable from the
134
+ default (both produce False); the registry only promotes to True. A
135
+ user who wants to hard-disable thinking on a registry-capable model
136
+ can change the provider's model to one that isn't declared, or add a
137
+ more-specific rule to ``~/.coderouter/model-capabilities.yaml`` that
138
+ declares ``thinking: false`` earlier in the chain.
139
+
140
+ The ``registry`` kwarg is for tests — production callers pass
141
+ nothing and get the module-level default.
142
+ """
143
+ if provider.capabilities.thinking:
144
+ return True
145
+ return _resolve(provider, registry).thinking is True
146
+
147
+
148
+ def anthropic_request_requires_thinking(request: AnthropicRequest) -> bool:
149
+ """True iff the request carries a ``thinking: {type: enabled, ...}`` block.
150
+
151
+ The ``thinking`` field isn't declared on AnthropicRequest (it's a
152
+ beta-evolving shape), so it arrives via Pydantic's ``extra="allow"``
153
+ mechanism and is read from ``model_extra``.
154
+
155
+ A disabled or absent block (``{type: disabled}``, ``None``, missing)
156
+ returns False — gating only fires for actual requests that would
157
+ trigger the upstream's extended-thinking mode.
158
+ """
159
+ extra = request.model_extra or {}
160
+ thinking = extra.get("thinking")
161
+ if not isinstance(thinking, dict):
162
+ return False
163
+ return thinking.get("type") == "enabled"
164
+
165
+
166
+ def strip_thinking(request: AnthropicRequest) -> AnthropicRequest:
167
+ """Return a copy of ``request`` with the ``thinking`` field removed.
168
+
169
+ No-op (returns a distinct-but-equivalent copy) when ``thinking`` is
170
+ absent. Preserves the CodeRouter-internal ``profile`` and
171
+ ``anthropic_beta`` fields since those are excluded from the body but
172
+ still needed by the engine / adapter.
173
+
174
+ The original request is not mutated — callers that iterate a fallback
175
+ chain can keep the original around for retries against capable
176
+ providers later in the chain (though the default chain ordering puts
177
+ capable providers first, so this mostly matters for tests).
178
+ """
179
+ extra = request.model_extra or {}
180
+ if "thinking" not in extra:
181
+ # Still return a fresh copy for consistency with the mutation-free
182
+ # contract. model_copy() preserves extras.
183
+ return request.model_copy(deep=True)
184
+
185
+ # model_dump() serializes extras; roundtripping via validate rebuilds a
186
+ # clean instance without the dropped key. exclude=True fields (profile,
187
+ # anthropic_beta) are omitted by model_dump, so we reassign them.
188
+ dumped = request.model_dump()
189
+ dumped.pop("thinking", None)
190
+ stripped = AnthropicRequest.model_validate(dumped)
191
+ stripped.profile = request.profile
192
+ stripped.anthropic_beta = request.anthropic_beta
193
+ return stripped
194
+
195
+
196
+ # ---------------------------------------------------------------------------
197
+ # v0.5-B: cache_control observability
198
+ #
199
+ # Unlike `thinking`, cache_control doesn't produce a 400 on non-supporting
200
+ # providers — it's silently lost during Anthropic → OpenAI translation
201
+ # (the cache_control marker lives on content blocks and has no OpenAI
202
+ # wire equivalent). So the gate here is observability-only: we detect
203
+ # when cache_control is present, check whether the outgoing provider can
204
+ # honor it, and emit a structured log when it's about to be lost. We do
205
+ # NOT reorder the chain — the user's ordering almost certainly reflects
206
+ # a latency / cost intent that outweighs cache-hit savings.
207
+ #
208
+ # Footgun to be aware of (from the v0.4 retro §What was sharp):
209
+ # Anthropic's prompt cache has a 1024-token minimum. System prompts
210
+ # shorter than that silently report 0 cached tokens even on supported
211
+ # providers. That's an Anthropic-side constraint, not something this
212
+ # gate can fix — but it's worth noting here so nobody blames CodeRouter
213
+ # for 0 hits on small prompts.
214
+ # ---------------------------------------------------------------------------
215
+
216
+
217
+ def provider_supports_cache_control(provider: ProviderConfig) -> bool:
218
+ """Does this provider preserve ``cache_control`` blocks end-to-end?
219
+
220
+ Resolution order:
221
+ 1. If ``provider.capabilities.prompt_cache`` is True → True. This
222
+ is an explicit opt-in for any future ``openai_compat``
223
+ upstream that extends the wire format to preserve cache
224
+ markers (not known to exist today, 2026-04).
225
+ 2. Otherwise:
226
+ - ``kind: anthropic``: True. Native passthrough via
227
+ ``/v1/messages`` keeps cache_control intact. Verified real-
228
+ machine against api.anthropic.com on 2026-04-20 (v0.4
229
+ retro §3: 1321 tokens written on call 1, 1321 read on call 2).
230
+ - ``kind: openai_compat``: False. The OpenAI Chat Completions
231
+ wire has no equivalent marker, so the existing
232
+ ``to_chat_request`` translation drops cache_control during
233
+ the Anthropic → OpenAI hop. The upstream itself might have
234
+ prompt caching, but CodeRouter can't currently carry the
235
+ marker through.
236
+
237
+ This routine does not inspect the request — it's a per-provider
238
+ capability. Combine with ``anthropic_request_has_cache_control`` in
239
+ the engine to decide whether to log a degradation.
240
+ """
241
+ if provider.capabilities.prompt_cache:
242
+ return True
243
+ return provider.kind == "anthropic"
244
+
245
+
246
+ def _block_has_cache_control(block: object) -> bool:
247
+ """True if ``block`` is a dict that carries a ``cache_control`` key."""
248
+ return isinstance(block, dict) and "cache_control" in block
249
+
250
+
251
+ def anthropic_request_has_cache_control(request: AnthropicRequest) -> bool:
252
+ """True iff the request carries any ``cache_control`` markers.
253
+
254
+ Checks all three locations Anthropic allows:
255
+ - ``system`` as a list of blocks (each block may have
256
+ ``cache_control``; the shorthand ``str`` form cannot).
257
+ - ``tools[*]`` as Anthropic tools — ``cache_control`` arrives via
258
+ Pydantic's ``extra="allow"`` on ``AnthropicTool``.
259
+ - ``messages[*].content`` when it's a list of blocks (the
260
+ shorthand ``str`` form, again, cannot carry the marker).
261
+
262
+ A single cache_control marker anywhere in the request returns True.
263
+ """
264
+ # system blocks
265
+ system = request.system
266
+ if isinstance(system, list):
267
+ for block in system:
268
+ if _block_has_cache_control(block):
269
+ return True
270
+
271
+ # tool definitions
272
+ for tool in request.tools or []:
273
+ extra = tool.model_extra or {}
274
+ if "cache_control" in extra:
275
+ return True
276
+
277
+ # message content blocks
278
+ for msg in request.messages:
279
+ content = msg.content
280
+ if isinstance(content, list):
281
+ for block in content:
282
+ if _block_has_cache_control(block):
283
+ return True
284
+
285
+ return False