coderouter-cli 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/__init__.py +17 -0
- coderouter/__main__.py +6 -0
- coderouter/adapters/__init__.py +23 -0
- coderouter/adapters/anthropic_native.py +502 -0
- coderouter/adapters/base.py +220 -0
- coderouter/adapters/openai_compat.py +395 -0
- coderouter/adapters/registry.py +17 -0
- coderouter/cli.py +345 -0
- coderouter/cli_stats.py +751 -0
- coderouter/config/__init__.py +10 -0
- coderouter/config/capability_registry.py +339 -0
- coderouter/config/env_file.py +295 -0
- coderouter/config/loader.py +73 -0
- coderouter/config/schemas.py +515 -0
- coderouter/data/__init__.py +7 -0
- coderouter/data/model-capabilities.yaml +86 -0
- coderouter/doctor.py +1596 -0
- coderouter/env_security.py +434 -0
- coderouter/errors.py +29 -0
- coderouter/ingress/__init__.py +5 -0
- coderouter/ingress/anthropic_routes.py +205 -0
- coderouter/ingress/app.py +144 -0
- coderouter/ingress/dashboard_routes.py +493 -0
- coderouter/ingress/metrics_routes.py +92 -0
- coderouter/ingress/openai_routes.py +153 -0
- coderouter/logging.py +315 -0
- coderouter/metrics/__init__.py +39 -0
- coderouter/metrics/collector.py +471 -0
- coderouter/metrics/prometheus.py +221 -0
- coderouter/output_filters.py +407 -0
- coderouter/routing/__init__.py +13 -0
- coderouter/routing/auto_router.py +244 -0
- coderouter/routing/capability.py +285 -0
- coderouter/routing/fallback.py +611 -0
- coderouter/translation/__init__.py +57 -0
- coderouter/translation/anthropic.py +204 -0
- coderouter/translation/convert.py +1291 -0
- coderouter/translation/tool_repair.py +236 -0
- coderouter_cli-1.7.0.dist-info/METADATA +509 -0
- coderouter_cli-1.7.0.dist-info/RECORD +43 -0
- coderouter_cli-1.7.0.dist-info/WHEEL +4 -0
- coderouter_cli-1.7.0.dist-info/entry_points.txt +2 -0
- coderouter_cli-1.7.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,611 @@
|
|
|
1
|
+
"""Sequential fallback engine.
|
|
2
|
+
|
|
3
|
+
Behavior (plan.md §7):
|
|
4
|
+
1. Iterate the provider list of the chosen profile in order.
|
|
5
|
+
2. Skip paid providers when ALLOW_PAID is false.
|
|
6
|
+
3. Try generate() / stream() on each. If AdapterError(retryable=True) → next.
|
|
7
|
+
4. If all providers fail, raise NoProvidersAvailableError.
|
|
8
|
+
|
|
9
|
+
Dual entry points (v0.3.x-1):
|
|
10
|
+
The engine exposes both OpenAI-shaped (generate / stream) and
|
|
11
|
+
Anthropic-shaped (generate_anthropic / stream_anthropic) methods. The
|
|
12
|
+
Anthropic-shaped methods dispatch per-provider on `ProviderConfig.kind`:
|
|
13
|
+
- kind="anthropic": passthrough — no translation on either leg.
|
|
14
|
+
- kind="openai_compat": translate AnthropicRequest → ChatRequest,
|
|
15
|
+
call the adapter, translate ChatResponse /
|
|
16
|
+
stream chunks back. Tool-call repair runs on
|
|
17
|
+
non-streaming responses; streaming tool-turns
|
|
18
|
+
are downgraded to non-stream internally
|
|
19
|
+
(v0.3-D strategy).
|
|
20
|
+
|
|
21
|
+
Mixed chains are supported: a profile can list a native Anthropic
|
|
22
|
+
provider first and fall through to an openai_compat provider second.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
from collections.abc import AsyncIterator
|
|
28
|
+
from typing import Final
|
|
29
|
+
|
|
30
|
+
from coderouter.adapters.anthropic_native import AnthropicAdapter
|
|
31
|
+
from coderouter.adapters.base import (
|
|
32
|
+
AdapterError,
|
|
33
|
+
BaseAdapter,
|
|
34
|
+
ChatRequest,
|
|
35
|
+
ChatResponse,
|
|
36
|
+
ProviderCallOverrides,
|
|
37
|
+
StreamChunk,
|
|
38
|
+
)
|
|
39
|
+
from coderouter.adapters.registry import build_adapter
|
|
40
|
+
from coderouter.config.schemas import CodeRouterConfig
|
|
41
|
+
from coderouter.errors import CodeRouterError
|
|
42
|
+
from coderouter.logging import get_logger, log_chain_paid_gate_blocked
|
|
43
|
+
from coderouter.routing.capability import (
|
|
44
|
+
anthropic_request_has_cache_control,
|
|
45
|
+
anthropic_request_requires_thinking,
|
|
46
|
+
log_capability_degraded,
|
|
47
|
+
provider_supports_cache_control,
|
|
48
|
+
provider_supports_thinking,
|
|
49
|
+
strip_thinking,
|
|
50
|
+
)
|
|
51
|
+
from coderouter.translation import (
|
|
52
|
+
AnthropicRequest,
|
|
53
|
+
AnthropicResponse,
|
|
54
|
+
AnthropicStreamEvent,
|
|
55
|
+
stream_chat_to_anthropic_events,
|
|
56
|
+
synthesize_anthropic_stream_from_response,
|
|
57
|
+
to_anthropic_response,
|
|
58
|
+
to_chat_request,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
logger = get_logger(__name__)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class NoProvidersAvailableError(CodeRouterError):
|
|
65
|
+
"""Raised when every provider in the chain has failed (or was filtered out)."""
|
|
66
|
+
|
|
67
|
+
def __init__(self, profile: str, errors: list[AdapterError]) -> None:
|
|
68
|
+
"""Construct with the resolved profile name and per-provider errors.
|
|
69
|
+
|
|
70
|
+
``errors`` may be empty when every provider was filtered out
|
|
71
|
+
before a call was attempted (e.g. the paid-gate blocked the
|
|
72
|
+
whole chain); in that case the rendered message falls back to
|
|
73
|
+
``"no providers eligible"``.
|
|
74
|
+
"""
|
|
75
|
+
self.profile = profile
|
|
76
|
+
self.errors = errors
|
|
77
|
+
detail = " | ".join(str(e) for e in errors) or "no providers eligible"
|
|
78
|
+
super().__init__(f"profile={profile!r}: all providers failed: {detail}")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class MidStreamError(CodeRouterError):
|
|
82
|
+
"""Raised when a provider fails AFTER it has already emitted at least
|
|
83
|
+
one chunk to the client. Fallback is not attempted (the client has
|
|
84
|
+
received partial content, so switching providers would corrupt the
|
|
85
|
+
stream). Callers should surface this as a terminal error event.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
def __init__(self, provider: str, original: AdapterError) -> None:
|
|
89
|
+
"""Wrap the underlying :class:`AdapterError` with the provider name.
|
|
90
|
+
|
|
91
|
+
The ingress layer catches this and converts it into an in-stream
|
|
92
|
+
``event: error`` (never a 5xx) because HTTP headers have already
|
|
93
|
+
shipped by the time we know the stream failed.
|
|
94
|
+
"""
|
|
95
|
+
self.provider = provider
|
|
96
|
+
self.original = original
|
|
97
|
+
super().__init__(f"provider {provider!r} failed mid-stream: {original}")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
# v0.5.1 A-3: "probable misconfig" warn
|
|
102
|
+
#
|
|
103
|
+
# Motivation (from v0.5-verify.md §Follow-ons, 2026-04-20 re-verify):
|
|
104
|
+
# The first verify run hit OpenRouter with a mis-read env var and got
|
|
105
|
+
# 401 back. The single-provider chain short-circuited as it should, but
|
|
106
|
+
# the surface error was just "all providers failed" — operators had to
|
|
107
|
+
# grep the ``provider-failed`` line to spot the common 401 in the
|
|
108
|
+
# `error` field. A one-line warn at the aggregate level turns that
|
|
109
|
+
# grep-and-diagnose into a directly-readable hint.
|
|
110
|
+
#
|
|
111
|
+
# Scope:
|
|
112
|
+
# - Fires only when EVERY attempt in the chain returned the SAME
|
|
113
|
+
# non-retryable auth status (401 or 403). A mixed chain (one 401 +
|
|
114
|
+
# one 429, etc.) is ambiguous and stays quiet; so does any chain
|
|
115
|
+
# where at least one error was retryable (transient / rate-limit).
|
|
116
|
+
# - Auth-only by design. 400 "model not found" is also non-retryable
|
|
117
|
+
# but reflects a config-vs-upstream-reality mismatch that a generic
|
|
118
|
+
# "probable misconfig" hint would mis-diagnose. Widening later is
|
|
119
|
+
# cheap if we see the need.
|
|
120
|
+
# - Fires for single-provider chains too (the verify scenario). "Every
|
|
121
|
+
# attempt" is trivially all attempts when there is one.
|
|
122
|
+
# ---------------------------------------------------------------------------
|
|
123
|
+
|
|
124
|
+
_AUTH_STATUS_CODES: Final[frozenset[int]] = frozenset({401, 403})
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _warn_if_uniform_auth_failure(errors: list[AdapterError], *, profile: str) -> None:
|
|
128
|
+
"""Emit a ``chain-uniform-auth-failure`` warn when the whole chain 401/403'd.
|
|
129
|
+
|
|
130
|
+
Called from each of the four ``raise NoProvidersAvailableError`` sites
|
|
131
|
+
right before the raise. No-op when:
|
|
132
|
+
- ``errors`` is empty (nothing was attempted — e.g. every provider
|
|
133
|
+
was filtered out by paid-blocking).
|
|
134
|
+
- The first error's status is not in ``_AUTH_STATUS_CODES``.
|
|
135
|
+
- Any error has a different status_code, or is retryable.
|
|
136
|
+
|
|
137
|
+
The log is intentionally separate from the raised exception (which
|
|
138
|
+
stays unchanged for API stability) — it sits alongside the
|
|
139
|
+
``provider-failed`` lines and gives operators a single-line diagnosis
|
|
140
|
+
without changing the ingress response shape.
|
|
141
|
+
"""
|
|
142
|
+
if not errors:
|
|
143
|
+
return
|
|
144
|
+
status = errors[0].status_code
|
|
145
|
+
if status not in _AUTH_STATUS_CODES:
|
|
146
|
+
return
|
|
147
|
+
for exc in errors:
|
|
148
|
+
if exc.status_code != status or exc.retryable:
|
|
149
|
+
return
|
|
150
|
+
logger.warning(
|
|
151
|
+
"chain-uniform-auth-failure",
|
|
152
|
+
extra={
|
|
153
|
+
"profile": profile,
|
|
154
|
+
"status": status,
|
|
155
|
+
"count": len(errors),
|
|
156
|
+
"providers": [exc.provider for exc in errors],
|
|
157
|
+
"hint": "probable-misconfig",
|
|
158
|
+
},
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class FallbackEngine:
|
|
163
|
+
"""Sequential fallback router — the core of CodeRouter.
|
|
164
|
+
|
|
165
|
+
Holds the resolved :class:`CodeRouterConfig` plus a pre-built adapter
|
|
166
|
+
per provider (adapters are cheap but constructing them per-request
|
|
167
|
+
would repeatedly re-read provider config). Exposes four entry
|
|
168
|
+
points: :meth:`generate` / :meth:`stream` for OpenAI-shaped requests,
|
|
169
|
+
:meth:`generate_anthropic` / :meth:`stream_anthropic` for Anthropic
|
|
170
|
+
Messages API requests. See the module docstring for the per-kind
|
|
171
|
+
translation behavior.
|
|
172
|
+
"""
|
|
173
|
+
|
|
174
|
+
def __init__(self, config: CodeRouterConfig) -> None:
|
|
175
|
+
"""Pre-build one adapter per configured provider.
|
|
176
|
+
|
|
177
|
+
Adapters are stateless with respect to requests (all state is
|
|
178
|
+
held in the per-call ``ProviderCallOverrides``), so caching by
|
|
179
|
+
provider name across requests is safe and avoids the cost of
|
|
180
|
+
re-parsing YAML / re-resolving env vars on every request.
|
|
181
|
+
"""
|
|
182
|
+
self.config = config
|
|
183
|
+
# Cache adapters so we don't re-instantiate per request
|
|
184
|
+
self._adapters: dict[str, BaseAdapter] = {
|
|
185
|
+
p.name: build_adapter(p) for p in config.providers
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
def _resolve_profile_overrides(self, profile_name: str | None) -> ProviderCallOverrides:
|
|
189
|
+
"""v0.6-B: build the ProviderCallOverrides for the active profile.
|
|
190
|
+
|
|
191
|
+
Invariant across every adapter call on one chain (profiles are
|
|
192
|
+
immutable per request), so callers resolve this once at the top of
|
|
193
|
+
each engine method and pass to every adapter invocation.
|
|
194
|
+
"""
|
|
195
|
+
chosen = profile_name or self.config.default_profile
|
|
196
|
+
profile = self.config.profile_by_name(chosen)
|
|
197
|
+
return ProviderCallOverrides(
|
|
198
|
+
timeout_s=profile.timeout_s,
|
|
199
|
+
append_system_prompt=profile.append_system_prompt,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
def _resolve_chain(self, profile_name: str | None) -> list[BaseAdapter]:
|
|
203
|
+
"""Return the list of adapters to try, in order, for this profile.
|
|
204
|
+
|
|
205
|
+
v0.6-C declarative ALLOW_PAID gate: when the paid gate filters
|
|
206
|
+
the chain to zero adapters, emit ``chain-paid-gate-blocked`` at
|
|
207
|
+
warn level via :func:`log_chain_paid_gate_blocked`. Per-provider
|
|
208
|
+
``skip-paid-provider`` info lines are still emitted (one per
|
|
209
|
+
blocked provider) so per-provider traceability is intact; the
|
|
210
|
+
warn sits at chain granularity for operator diagnosis.
|
|
211
|
+
"""
|
|
212
|
+
chosen = profile_name or self.config.default_profile
|
|
213
|
+
chain = self.config.profile_by_name(chosen)
|
|
214
|
+
|
|
215
|
+
adapters: list[BaseAdapter] = []
|
|
216
|
+
blocked_by_paid: list[str] = []
|
|
217
|
+
for prov_name in chain.providers:
|
|
218
|
+
try:
|
|
219
|
+
provider_cfg = self.config.provider_by_name(prov_name)
|
|
220
|
+
except KeyError:
|
|
221
|
+
logger.warning(
|
|
222
|
+
"skip-unknown-provider",
|
|
223
|
+
extra={"profile": chosen, "provider": prov_name},
|
|
224
|
+
)
|
|
225
|
+
continue
|
|
226
|
+
if provider_cfg.paid and not self.config.allow_paid:
|
|
227
|
+
logger.info(
|
|
228
|
+
"skip-paid-provider",
|
|
229
|
+
extra={"profile": chosen, "provider": prov_name},
|
|
230
|
+
)
|
|
231
|
+
blocked_by_paid.append(prov_name)
|
|
232
|
+
continue
|
|
233
|
+
adapters.append(self._adapters[prov_name])
|
|
234
|
+
|
|
235
|
+
# v0.6-C: aggregate warn fires ONLY when the paid gate left the
|
|
236
|
+
# chain empty. A mixed chain where at least one free provider
|
|
237
|
+
# survives stays quiet (the normal try-provider / provider-
|
|
238
|
+
# failed trail already narrates what happened).
|
|
239
|
+
if not adapters and blocked_by_paid:
|
|
240
|
+
log_chain_paid_gate_blocked(
|
|
241
|
+
logger,
|
|
242
|
+
profile=chosen,
|
|
243
|
+
blocked_providers=blocked_by_paid,
|
|
244
|
+
)
|
|
245
|
+
return adapters
|
|
246
|
+
|
|
247
|
+
def _resolve_anthropic_chain(self, request: AnthropicRequest) -> list[tuple[BaseAdapter, bool]]:
|
|
248
|
+
"""Resolve a chain, annotating each adapter with a ``will_degrade`` flag.
|
|
249
|
+
|
|
250
|
+
v0.5-A capability gate: when ``request`` carries ``thinking: {type:
|
|
251
|
+
enabled}`` and a provider does not support it (per
|
|
252
|
+
``provider_supports_thinking``), we still include that provider in
|
|
253
|
+
the chain — it becomes a degraded-fallback. The block will be
|
|
254
|
+
stripped before the call and a ``capability-degraded`` log line
|
|
255
|
+
will fire. Capable providers are pulled to the front (stable sort)
|
|
256
|
+
so the user's ordering is preserved within each bucket.
|
|
257
|
+
|
|
258
|
+
Returns a list of ``(adapter, will_degrade)`` pairs in the order
|
|
259
|
+
they should be tried. When the request has no capability
|
|
260
|
+
requirement, all entries have ``will_degrade=False`` and the order
|
|
261
|
+
matches ``_resolve_chain``.
|
|
262
|
+
"""
|
|
263
|
+
base = self._resolve_chain(request.profile)
|
|
264
|
+
if not anthropic_request_requires_thinking(request):
|
|
265
|
+
return [(a, False) for a in base]
|
|
266
|
+
|
|
267
|
+
capable: list[tuple[BaseAdapter, bool]] = []
|
|
268
|
+
degraded: list[tuple[BaseAdapter, bool]] = []
|
|
269
|
+
for adapter in base:
|
|
270
|
+
if provider_supports_thinking(adapter.config):
|
|
271
|
+
capable.append((adapter, False))
|
|
272
|
+
else:
|
|
273
|
+
degraded.append((adapter, True))
|
|
274
|
+
return capable + degraded
|
|
275
|
+
|
|
276
|
+
async def generate(self, request: ChatRequest) -> ChatResponse:
|
|
277
|
+
"""Non-streaming OpenAI-shaped generation with sequential fallback.
|
|
278
|
+
|
|
279
|
+
Walks the chain in order, returning the first provider's response.
|
|
280
|
+
On retryable :class:`AdapterError` (transport failure, rate
|
|
281
|
+
limit, upstream 5xx, etc.) the loop advances; on non-retryable
|
|
282
|
+
errors it breaks immediately. When every provider has been tried
|
|
283
|
+
without success, raises :class:`NoProvidersAvailableError` with
|
|
284
|
+
the full per-provider error list so the ingress layer can
|
|
285
|
+
surface a single 502.
|
|
286
|
+
"""
|
|
287
|
+
adapters = self._resolve_chain(request.profile)
|
|
288
|
+
overrides = self._resolve_profile_overrides(request.profile)
|
|
289
|
+
errors: list[AdapterError] = []
|
|
290
|
+
for adapter in adapters:
|
|
291
|
+
logger.info(
|
|
292
|
+
"try-provider",
|
|
293
|
+
extra={"provider": adapter.name, "stream": False},
|
|
294
|
+
)
|
|
295
|
+
try:
|
|
296
|
+
response = await adapter.generate(request, overrides=overrides)
|
|
297
|
+
logger.info(
|
|
298
|
+
"provider-ok",
|
|
299
|
+
extra={"provider": adapter.name, "stream": False},
|
|
300
|
+
)
|
|
301
|
+
return response
|
|
302
|
+
except AdapterError as exc:
|
|
303
|
+
logger.warning(
|
|
304
|
+
"provider-failed",
|
|
305
|
+
extra={
|
|
306
|
+
"provider": adapter.name,
|
|
307
|
+
"status": exc.status_code,
|
|
308
|
+
"retryable": exc.retryable,
|
|
309
|
+
"error": str(exc)[:500],
|
|
310
|
+
},
|
|
311
|
+
)
|
|
312
|
+
errors.append(exc)
|
|
313
|
+
if not exc.retryable:
|
|
314
|
+
break
|
|
315
|
+
profile = request.profile or self.config.default_profile
|
|
316
|
+
_warn_if_uniform_auth_failure(errors, profile=profile)
|
|
317
|
+
raise NoProvidersAvailableError(profile=profile, errors=errors)
|
|
318
|
+
|
|
319
|
+
async def stream(self, request: ChatRequest) -> AsyncIterator[StreamChunk]:
|
|
320
|
+
"""Stream from the first provider that successfully starts streaming.
|
|
321
|
+
|
|
322
|
+
Important: once we begin yielding chunks from an adapter, we cannot
|
|
323
|
+
fall back mid-stream (the client has already received partial content).
|
|
324
|
+
We only fall through if the *initial* response is an error.
|
|
325
|
+
"""
|
|
326
|
+
adapters: list[BaseAdapter] = self._resolve_chain(request.profile)
|
|
327
|
+
overrides = self._resolve_profile_overrides(request.profile)
|
|
328
|
+
errors: list[AdapterError] = []
|
|
329
|
+
for adapter in adapters:
|
|
330
|
+
logger.info(
|
|
331
|
+
"try-provider",
|
|
332
|
+
extra={"provider": adapter.name, "stream": True},
|
|
333
|
+
)
|
|
334
|
+
stream_iter = adapter.stream(request, overrides=overrides)
|
|
335
|
+
try:
|
|
336
|
+
first = await anext(stream_iter)
|
|
337
|
+
except StopAsyncIteration:
|
|
338
|
+
# Adapter produced zero chunks — treat as failure, try next
|
|
339
|
+
errors.append(AdapterError("empty stream", provider=adapter.name, retryable=True))
|
|
340
|
+
continue
|
|
341
|
+
except AdapterError as exc:
|
|
342
|
+
logger.warning(
|
|
343
|
+
"provider-failed",
|
|
344
|
+
extra={
|
|
345
|
+
"provider": adapter.name,
|
|
346
|
+
"status": exc.status_code,
|
|
347
|
+
"retryable": exc.retryable,
|
|
348
|
+
"error": str(exc)[:500],
|
|
349
|
+
},
|
|
350
|
+
)
|
|
351
|
+
errors.append(exc)
|
|
352
|
+
if not exc.retryable:
|
|
353
|
+
break
|
|
354
|
+
continue
|
|
355
|
+
|
|
356
|
+
logger.info(
|
|
357
|
+
"provider-ok",
|
|
358
|
+
extra={"provider": adapter.name, "stream": True},
|
|
359
|
+
)
|
|
360
|
+
yield first
|
|
361
|
+
# Mid-stream fallback guard: once the first byte is out the door,
|
|
362
|
+
# any subsequent adapter exception is terminal — we cannot fall
|
|
363
|
+
# back without risking duplicate / interleaved content reaching
|
|
364
|
+
# the client.
|
|
365
|
+
try:
|
|
366
|
+
async for chunk in stream_iter:
|
|
367
|
+
yield chunk
|
|
368
|
+
except AdapterError as exc:
|
|
369
|
+
logger.warning(
|
|
370
|
+
"provider-failed-midstream",
|
|
371
|
+
extra={
|
|
372
|
+
"provider": adapter.name,
|
|
373
|
+
"status": exc.status_code,
|
|
374
|
+
"retryable": exc.retryable,
|
|
375
|
+
"error": str(exc)[:500],
|
|
376
|
+
},
|
|
377
|
+
)
|
|
378
|
+
raise MidStreamError(adapter.name, exc) from exc
|
|
379
|
+
return
|
|
380
|
+
|
|
381
|
+
profile = request.profile or self.config.default_profile
|
|
382
|
+
_warn_if_uniform_auth_failure(errors, profile=profile)
|
|
383
|
+
raise NoProvidersAvailableError(profile=profile, errors=errors)
|
|
384
|
+
|
|
385
|
+
# ==================================================================
|
|
386
|
+
# Anthropic-shaped entry points (v0.3.x-1)
|
|
387
|
+
# ==================================================================
|
|
388
|
+
#
|
|
389
|
+
# These exist so the /v1/messages ingress can route to a `kind:
|
|
390
|
+
# "anthropic"` provider without a lossy round-trip through the
|
|
391
|
+
# OpenAI-shaped internal format. Per-provider dispatch:
|
|
392
|
+
# - AnthropicAdapter: direct passthrough via generate_anthropic /
|
|
393
|
+
# stream_anthropic — no translation on either leg.
|
|
394
|
+
# - any other adapter: translate AnthropicRequest → ChatRequest,
|
|
395
|
+
# call the OpenAI-shaped methods, translate the result back.
|
|
396
|
+
# Tool-call repair + v0.3-D downgrade happen on this path.
|
|
397
|
+
|
|
398
|
+
async def generate_anthropic(self, request: AnthropicRequest) -> AnthropicResponse:
|
|
399
|
+
"""Non-streaming Anthropic request, per-provider dispatch."""
|
|
400
|
+
chain = self._resolve_anthropic_chain(request)
|
|
401
|
+
overrides = self._resolve_profile_overrides(request.profile)
|
|
402
|
+
errors: list[AdapterError] = []
|
|
403
|
+
tool_names = [t.name for t in request.tools] if request.tools else None
|
|
404
|
+
|
|
405
|
+
for adapter, will_degrade in chain:
|
|
406
|
+
is_native = isinstance(adapter, AnthropicAdapter)
|
|
407
|
+
effective_request = request
|
|
408
|
+
if will_degrade:
|
|
409
|
+
# v0.5-A: strip unsupported blocks before handing to this
|
|
410
|
+
# provider and emit a structured log so operators can see
|
|
411
|
+
# the downgrade after the fact. Today only `thinking` is
|
|
412
|
+
# gated; the list is surfaced in the log for forward-compat.
|
|
413
|
+
effective_request = strip_thinking(request)
|
|
414
|
+
log_capability_degraded(
|
|
415
|
+
logger,
|
|
416
|
+
provider=adapter.name,
|
|
417
|
+
dropped=["thinking"],
|
|
418
|
+
reason="provider-does-not-support",
|
|
419
|
+
)
|
|
420
|
+
# v0.5-B: observability-only gate for cache_control. The
|
|
421
|
+
# field is silently dropped during Anthropic → OpenAI
|
|
422
|
+
# translation for openai_compat providers — no strip is
|
|
423
|
+
# needed here (to_chat_request already handles it) and no
|
|
424
|
+
# chain reorder is done (user ordering preserved). We just
|
|
425
|
+
# emit a log line so operators can see the lossiness.
|
|
426
|
+
if anthropic_request_has_cache_control(request) and not provider_supports_cache_control(
|
|
427
|
+
adapter.config
|
|
428
|
+
):
|
|
429
|
+
log_capability_degraded(
|
|
430
|
+
logger,
|
|
431
|
+
provider=adapter.name,
|
|
432
|
+
dropped=["cache_control"],
|
|
433
|
+
reason="translation-lossy",
|
|
434
|
+
)
|
|
435
|
+
logger.info(
|
|
436
|
+
"try-provider",
|
|
437
|
+
extra={
|
|
438
|
+
"provider": adapter.name,
|
|
439
|
+
"stream": False,
|
|
440
|
+
"native_anthropic": is_native,
|
|
441
|
+
"degraded": will_degrade,
|
|
442
|
+
},
|
|
443
|
+
)
|
|
444
|
+
try:
|
|
445
|
+
# `is_native` is the same test as this `isinstance`; we do
|
|
446
|
+
# it directly here so mypy narrows `adapter` to
|
|
447
|
+
# AnthropicAdapter inside the branch (BaseAdapter itself
|
|
448
|
+
# does not declare the Anthropic-shaped methods).
|
|
449
|
+
if isinstance(adapter, AnthropicAdapter):
|
|
450
|
+
resp = await adapter.generate_anthropic(effective_request, overrides=overrides)
|
|
451
|
+
else:
|
|
452
|
+
chat_req = to_chat_request(effective_request)
|
|
453
|
+
chat_req.stream = False
|
|
454
|
+
chat_resp = await adapter.generate(chat_req, overrides=overrides)
|
|
455
|
+
resp = to_anthropic_response(chat_resp, allowed_tool_names=tool_names)
|
|
456
|
+
except AdapterError as exc:
|
|
457
|
+
logger.warning(
|
|
458
|
+
"provider-failed",
|
|
459
|
+
extra={
|
|
460
|
+
"provider": adapter.name,
|
|
461
|
+
"status": exc.status_code,
|
|
462
|
+
"retryable": exc.retryable,
|
|
463
|
+
"error": str(exc)[:500],
|
|
464
|
+
},
|
|
465
|
+
)
|
|
466
|
+
errors.append(exc)
|
|
467
|
+
if not exc.retryable:
|
|
468
|
+
break
|
|
469
|
+
continue
|
|
470
|
+
|
|
471
|
+
logger.info(
|
|
472
|
+
"provider-ok",
|
|
473
|
+
extra={
|
|
474
|
+
"provider": adapter.name,
|
|
475
|
+
"stream": False,
|
|
476
|
+
"native_anthropic": is_native,
|
|
477
|
+
},
|
|
478
|
+
)
|
|
479
|
+
return resp
|
|
480
|
+
|
|
481
|
+
profile = request.profile or self.config.default_profile
|
|
482
|
+
_warn_if_uniform_auth_failure(errors, profile=profile)
|
|
483
|
+
raise NoProvidersAvailableError(profile=profile, errors=errors)
|
|
484
|
+
|
|
485
|
+
async def stream_anthropic(
|
|
486
|
+
self, request: AnthropicRequest
|
|
487
|
+
) -> AsyncIterator[AnthropicStreamEvent]:
|
|
488
|
+
"""Streaming Anthropic request, per-provider dispatch.
|
|
489
|
+
|
|
490
|
+
For non-native providers with tools declared, we use the v0.3-D
|
|
491
|
+
downgrade path (run the request non-streaming internally, repair
|
|
492
|
+
tool calls, then synthesize an Anthropic SSE event sequence) —
|
|
493
|
+
the same logic that used to live in the ingress. Consolidating
|
|
494
|
+
it here keeps the ingress thin and lets native providers bypass
|
|
495
|
+
the downgrade entirely (Anthropic emits structured tool_use
|
|
496
|
+
blocks natively, no repair needed).
|
|
497
|
+
"""
|
|
498
|
+
chain = self._resolve_anthropic_chain(request)
|
|
499
|
+
overrides = self._resolve_profile_overrides(request.profile)
|
|
500
|
+
errors: list[AdapterError] = []
|
|
501
|
+
tool_names = [t.name for t in request.tools] if request.tools else None
|
|
502
|
+
|
|
503
|
+
for adapter, will_degrade in chain:
|
|
504
|
+
is_native = isinstance(adapter, AnthropicAdapter)
|
|
505
|
+
downgrading = (not is_native) and bool(request.tools)
|
|
506
|
+
effective_request = request
|
|
507
|
+
if will_degrade:
|
|
508
|
+
effective_request = strip_thinking(request)
|
|
509
|
+
log_capability_degraded(
|
|
510
|
+
logger,
|
|
511
|
+
provider=adapter.name,
|
|
512
|
+
dropped=["thinking"],
|
|
513
|
+
reason="provider-does-not-support",
|
|
514
|
+
)
|
|
515
|
+
# v0.5-B: mirror of the non-streaming path — see comment there.
|
|
516
|
+
if anthropic_request_has_cache_control(request) and not provider_supports_cache_control(
|
|
517
|
+
adapter.config
|
|
518
|
+
):
|
|
519
|
+
log_capability_degraded(
|
|
520
|
+
logger,
|
|
521
|
+
provider=adapter.name,
|
|
522
|
+
dropped=["cache_control"],
|
|
523
|
+
reason="translation-lossy",
|
|
524
|
+
)
|
|
525
|
+
logger.info(
|
|
526
|
+
"try-provider",
|
|
527
|
+
extra={
|
|
528
|
+
"provider": adapter.name,
|
|
529
|
+
"stream": True,
|
|
530
|
+
"native_anthropic": is_native,
|
|
531
|
+
"downgrade": downgrading,
|
|
532
|
+
"degraded": will_degrade,
|
|
533
|
+
},
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
# Stage 1: acquire an AnthropicStreamEvent iterator. Failures
|
|
537
|
+
# here are candidates for fallback (no bytes have been sent to
|
|
538
|
+
# the client yet).
|
|
539
|
+
event_iter: AsyncIterator[AnthropicStreamEvent]
|
|
540
|
+
first: AnthropicStreamEvent
|
|
541
|
+
try:
|
|
542
|
+
# See the non-streaming branch above: `is_native` and this
|
|
543
|
+
# isinstance test are the same check; we do it inline so
|
|
544
|
+
# mypy narrows for stream_anthropic (not on BaseAdapter).
|
|
545
|
+
if isinstance(adapter, AnthropicAdapter):
|
|
546
|
+
event_iter = adapter.stream_anthropic(effective_request, overrides=overrides)
|
|
547
|
+
first = await anext(event_iter)
|
|
548
|
+
elif downgrading:
|
|
549
|
+
# v0.3-D downgrade: run non-streaming, repair, replay.
|
|
550
|
+
chat_req = to_chat_request(effective_request)
|
|
551
|
+
chat_req.stream = False
|
|
552
|
+
chat_resp = await adapter.generate(chat_req, overrides=overrides)
|
|
553
|
+
anth_resp = to_anthropic_response(chat_resp, allowed_tool_names=tool_names)
|
|
554
|
+
event_iter = synthesize_anthropic_stream_from_response(anth_resp)
|
|
555
|
+
first = await anext(event_iter)
|
|
556
|
+
else:
|
|
557
|
+
chat_req = to_chat_request(effective_request)
|
|
558
|
+
chat_req.stream = True
|
|
559
|
+
event_iter = stream_chat_to_anthropic_events(
|
|
560
|
+
adapter.stream(chat_req, overrides=overrides)
|
|
561
|
+
)
|
|
562
|
+
first = await anext(event_iter)
|
|
563
|
+
except StopAsyncIteration:
|
|
564
|
+
errors.append(AdapterError("empty stream", provider=adapter.name, retryable=True))
|
|
565
|
+
continue
|
|
566
|
+
except AdapterError as exc:
|
|
567
|
+
logger.warning(
|
|
568
|
+
"provider-failed",
|
|
569
|
+
extra={
|
|
570
|
+
"provider": adapter.name,
|
|
571
|
+
"status": exc.status_code,
|
|
572
|
+
"retryable": exc.retryable,
|
|
573
|
+
"error": str(exc)[:500],
|
|
574
|
+
},
|
|
575
|
+
)
|
|
576
|
+
errors.append(exc)
|
|
577
|
+
if not exc.retryable:
|
|
578
|
+
break
|
|
579
|
+
continue
|
|
580
|
+
|
|
581
|
+
logger.info(
|
|
582
|
+
"provider-ok",
|
|
583
|
+
extra={
|
|
584
|
+
"provider": adapter.name,
|
|
585
|
+
"stream": True,
|
|
586
|
+
"native_anthropic": is_native,
|
|
587
|
+
"downgrade": downgrading,
|
|
588
|
+
},
|
|
589
|
+
)
|
|
590
|
+
yield first
|
|
591
|
+
# Mid-stream guard identical to stream() — any error after the
|
|
592
|
+
# first event is terminal.
|
|
593
|
+
try:
|
|
594
|
+
async for ev in event_iter:
|
|
595
|
+
yield ev
|
|
596
|
+
except AdapterError as exc:
|
|
597
|
+
logger.warning(
|
|
598
|
+
"provider-failed-midstream",
|
|
599
|
+
extra={
|
|
600
|
+
"provider": adapter.name,
|
|
601
|
+
"status": exc.status_code,
|
|
602
|
+
"retryable": exc.retryable,
|
|
603
|
+
"error": str(exc)[:500],
|
|
604
|
+
},
|
|
605
|
+
)
|
|
606
|
+
raise MidStreamError(adapter.name, exc) from exc
|
|
607
|
+
return
|
|
608
|
+
|
|
609
|
+
profile = request.profile or self.config.default_profile
|
|
610
|
+
_warn_if_uniform_auth_failure(errors, profile=profile)
|
|
611
|
+
raise NoProvidersAvailableError(profile=profile, errors=errors)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Wire-format translators between Anthropic Messages and internal ChatRequest.
|
|
2
|
+
|
|
3
|
+
The internal ChatRequest / ChatResponse / StreamChunk shapes mirror OpenAI
|
|
4
|
+
Chat Completions (see coderouter/adapters/base.py). This package contains the
|
|
5
|
+
bidirectional translation layer used by the Anthropic ingress:
|
|
6
|
+
|
|
7
|
+
AnthropicRequest --to_chat_request--> ChatRequest --> adapter
|
|
8
|
+
ChatResponse --to_anthropic_response--> AnthropicResponse
|
|
9
|
+
StreamChunk... --stream_to_anthropic_events--> AnthropicStreamEvent...
|
|
10
|
+
|
|
11
|
+
v0.2 scope: spec-level translation for text + tool_use content blocks.
|
|
12
|
+
v1.0 scope: tool_call JSON repair / format normalization across local models.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from coderouter.translation.anthropic import (
|
|
16
|
+
AnthropicContentBlock,
|
|
17
|
+
AnthropicImageBlock,
|
|
18
|
+
AnthropicMessage,
|
|
19
|
+
AnthropicRequest,
|
|
20
|
+
AnthropicResponse,
|
|
21
|
+
AnthropicStreamEvent,
|
|
22
|
+
AnthropicTextBlock,
|
|
23
|
+
AnthropicTool,
|
|
24
|
+
AnthropicToolResultBlock,
|
|
25
|
+
AnthropicToolUseBlock,
|
|
26
|
+
AnthropicUsage,
|
|
27
|
+
)
|
|
28
|
+
from coderouter.translation.convert import (
|
|
29
|
+
stream_anthropic_to_chat_chunks,
|
|
30
|
+
stream_chat_to_anthropic_events,
|
|
31
|
+
synthesize_anthropic_stream_from_response,
|
|
32
|
+
to_anthropic_request,
|
|
33
|
+
to_anthropic_response,
|
|
34
|
+
to_chat_request,
|
|
35
|
+
to_chat_response,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
__all__ = [
|
|
39
|
+
"AnthropicContentBlock",
|
|
40
|
+
"AnthropicImageBlock",
|
|
41
|
+
"AnthropicMessage",
|
|
42
|
+
"AnthropicRequest",
|
|
43
|
+
"AnthropicResponse",
|
|
44
|
+
"AnthropicStreamEvent",
|
|
45
|
+
"AnthropicTextBlock",
|
|
46
|
+
"AnthropicTool",
|
|
47
|
+
"AnthropicToolResultBlock",
|
|
48
|
+
"AnthropicToolUseBlock",
|
|
49
|
+
"AnthropicUsage",
|
|
50
|
+
"stream_anthropic_to_chat_chunks",
|
|
51
|
+
"stream_chat_to_anthropic_events",
|
|
52
|
+
"synthesize_anthropic_stream_from_response",
|
|
53
|
+
"to_anthropic_request",
|
|
54
|
+
"to_anthropic_response",
|
|
55
|
+
"to_chat_request",
|
|
56
|
+
"to_chat_response",
|
|
57
|
+
]
|