coderouter-cli 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. coderouter/__init__.py +17 -0
  2. coderouter/__main__.py +6 -0
  3. coderouter/adapters/__init__.py +23 -0
  4. coderouter/adapters/anthropic_native.py +502 -0
  5. coderouter/adapters/base.py +220 -0
  6. coderouter/adapters/openai_compat.py +395 -0
  7. coderouter/adapters/registry.py +17 -0
  8. coderouter/cli.py +345 -0
  9. coderouter/cli_stats.py +751 -0
  10. coderouter/config/__init__.py +10 -0
  11. coderouter/config/capability_registry.py +339 -0
  12. coderouter/config/env_file.py +295 -0
  13. coderouter/config/loader.py +73 -0
  14. coderouter/config/schemas.py +515 -0
  15. coderouter/data/__init__.py +7 -0
  16. coderouter/data/model-capabilities.yaml +86 -0
  17. coderouter/doctor.py +1596 -0
  18. coderouter/env_security.py +434 -0
  19. coderouter/errors.py +29 -0
  20. coderouter/ingress/__init__.py +5 -0
  21. coderouter/ingress/anthropic_routes.py +205 -0
  22. coderouter/ingress/app.py +144 -0
  23. coderouter/ingress/dashboard_routes.py +493 -0
  24. coderouter/ingress/metrics_routes.py +92 -0
  25. coderouter/ingress/openai_routes.py +153 -0
  26. coderouter/logging.py +315 -0
  27. coderouter/metrics/__init__.py +39 -0
  28. coderouter/metrics/collector.py +471 -0
  29. coderouter/metrics/prometheus.py +221 -0
  30. coderouter/output_filters.py +407 -0
  31. coderouter/routing/__init__.py +13 -0
  32. coderouter/routing/auto_router.py +244 -0
  33. coderouter/routing/capability.py +285 -0
  34. coderouter/routing/fallback.py +611 -0
  35. coderouter/translation/__init__.py +57 -0
  36. coderouter/translation/anthropic.py +204 -0
  37. coderouter/translation/convert.py +1291 -0
  38. coderouter/translation/tool_repair.py +236 -0
  39. coderouter_cli-1.7.0.dist-info/METADATA +509 -0
  40. coderouter_cli-1.7.0.dist-info/RECORD +43 -0
  41. coderouter_cli-1.7.0.dist-info/WHEEL +4 -0
  42. coderouter_cli-1.7.0.dist-info/entry_points.txt +2 -0
  43. coderouter_cli-1.7.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,611 @@
1
+ """Sequential fallback engine.
2
+
3
+ Behavior (plan.md §7):
4
+ 1. Iterate the provider list of the chosen profile in order.
5
+ 2. Skip paid providers when ALLOW_PAID is false.
6
+ 3. Try generate() / stream() on each. If AdapterError(retryable=True) → next.
7
+ 4. If all providers fail, raise NoProvidersAvailableError.
8
+
9
+ Dual entry points (v0.3.x-1):
10
+ The engine exposes both OpenAI-shaped (generate / stream) and
11
+ Anthropic-shaped (generate_anthropic / stream_anthropic) methods. The
12
+ Anthropic-shaped methods dispatch per-provider on `ProviderConfig.kind`:
13
+ - kind="anthropic": passthrough — no translation on either leg.
14
+ - kind="openai_compat": translate AnthropicRequest → ChatRequest,
15
+ call the adapter, translate ChatResponse /
16
+ stream chunks back. Tool-call repair runs on
17
+ non-streaming responses; streaming tool-turns
18
+ are downgraded to non-stream internally
19
+ (v0.3-D strategy).
20
+
21
+ Mixed chains are supported: a profile can list a native Anthropic
22
+ provider first and fall through to an openai_compat provider second.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ from collections.abc import AsyncIterator
28
+ from typing import Final
29
+
30
+ from coderouter.adapters.anthropic_native import AnthropicAdapter
31
+ from coderouter.adapters.base import (
32
+ AdapterError,
33
+ BaseAdapter,
34
+ ChatRequest,
35
+ ChatResponse,
36
+ ProviderCallOverrides,
37
+ StreamChunk,
38
+ )
39
+ from coderouter.adapters.registry import build_adapter
40
+ from coderouter.config.schemas import CodeRouterConfig
41
+ from coderouter.errors import CodeRouterError
42
+ from coderouter.logging import get_logger, log_chain_paid_gate_blocked
43
+ from coderouter.routing.capability import (
44
+ anthropic_request_has_cache_control,
45
+ anthropic_request_requires_thinking,
46
+ log_capability_degraded,
47
+ provider_supports_cache_control,
48
+ provider_supports_thinking,
49
+ strip_thinking,
50
+ )
51
+ from coderouter.translation import (
52
+ AnthropicRequest,
53
+ AnthropicResponse,
54
+ AnthropicStreamEvent,
55
+ stream_chat_to_anthropic_events,
56
+ synthesize_anthropic_stream_from_response,
57
+ to_anthropic_response,
58
+ to_chat_request,
59
+ )
60
+
61
+ logger = get_logger(__name__)
62
+
63
+
64
+ class NoProvidersAvailableError(CodeRouterError):
65
+ """Raised when every provider in the chain has failed (or was filtered out)."""
66
+
67
+ def __init__(self, profile: str, errors: list[AdapterError]) -> None:
68
+ """Construct with the resolved profile name and per-provider errors.
69
+
70
+ ``errors`` may be empty when every provider was filtered out
71
+ before a call was attempted (e.g. the paid-gate blocked the
72
+ whole chain); in that case the rendered message falls back to
73
+ ``"no providers eligible"``.
74
+ """
75
+ self.profile = profile
76
+ self.errors = errors
77
+ detail = " | ".join(str(e) for e in errors) or "no providers eligible"
78
+ super().__init__(f"profile={profile!r}: all providers failed: {detail}")
79
+
80
+
81
+ class MidStreamError(CodeRouterError):
82
+ """Raised when a provider fails AFTER it has already emitted at least
83
+ one chunk to the client. Fallback is not attempted (the client has
84
+ received partial content, so switching providers would corrupt the
85
+ stream). Callers should surface this as a terminal error event.
86
+ """
87
+
88
+ def __init__(self, provider: str, original: AdapterError) -> None:
89
+ """Wrap the underlying :class:`AdapterError` with the provider name.
90
+
91
+ The ingress layer catches this and converts it into an in-stream
92
+ ``event: error`` (never a 5xx) because HTTP headers have already
93
+ shipped by the time we know the stream failed.
94
+ """
95
+ self.provider = provider
96
+ self.original = original
97
+ super().__init__(f"provider {provider!r} failed mid-stream: {original}")
98
+
99
+
100
+ # ---------------------------------------------------------------------------
101
+ # v0.5.1 A-3: "probable misconfig" warn
102
+ #
103
+ # Motivation (from v0.5-verify.md §Follow-ons, 2026-04-20 re-verify):
104
+ # The first verify run hit OpenRouter with a mis-read env var and got
105
+ # 401 back. The single-provider chain short-circuited as it should, but
106
+ # the surface error was just "all providers failed" — operators had to
107
+ # grep the ``provider-failed`` line to spot the common 401 in the
108
+ # `error` field. A one-line warn at the aggregate level turns that
109
+ # grep-and-diagnose into a directly-readable hint.
110
+ #
111
+ # Scope:
112
+ # - Fires only when EVERY attempt in the chain returned the SAME
113
+ # non-retryable auth status (401 or 403). A mixed chain (one 401 +
114
+ # one 429, etc.) is ambiguous and stays quiet; so does any chain
115
+ # where at least one error was retryable (transient / rate-limit).
116
+ # - Auth-only by design. 400 "model not found" is also non-retryable
117
+ # but reflects a config-vs-upstream-reality mismatch that a generic
118
+ # "probable misconfig" hint would mis-diagnose. Widening later is
119
+ # cheap if we see the need.
120
+ # - Fires for single-provider chains too (the verify scenario). "Every
121
+ # attempt" is trivially all attempts when there is one.
122
+ # ---------------------------------------------------------------------------
123
+
124
+ _AUTH_STATUS_CODES: Final[frozenset[int]] = frozenset({401, 403})
125
+
126
+
127
+ def _warn_if_uniform_auth_failure(errors: list[AdapterError], *, profile: str) -> None:
128
+ """Emit a ``chain-uniform-auth-failure`` warn when the whole chain 401/403'd.
129
+
130
+ Called from each of the four ``raise NoProvidersAvailableError`` sites
131
+ right before the raise. No-op when:
132
+ - ``errors`` is empty (nothing was attempted — e.g. every provider
133
+ was filtered out by paid-blocking).
134
+ - The first error's status is not in ``_AUTH_STATUS_CODES``.
135
+ - Any error has a different status_code, or is retryable.
136
+
137
+ The log is intentionally separate from the raised exception (which
138
+ stays unchanged for API stability) — it sits alongside the
139
+ ``provider-failed`` lines and gives operators a single-line diagnosis
140
+ without changing the ingress response shape.
141
+ """
142
+ if not errors:
143
+ return
144
+ status = errors[0].status_code
145
+ if status not in _AUTH_STATUS_CODES:
146
+ return
147
+ for exc in errors:
148
+ if exc.status_code != status or exc.retryable:
149
+ return
150
+ logger.warning(
151
+ "chain-uniform-auth-failure",
152
+ extra={
153
+ "profile": profile,
154
+ "status": status,
155
+ "count": len(errors),
156
+ "providers": [exc.provider for exc in errors],
157
+ "hint": "probable-misconfig",
158
+ },
159
+ )
160
+
161
+
162
+ class FallbackEngine:
163
+ """Sequential fallback router — the core of CodeRouter.
164
+
165
+ Holds the resolved :class:`CodeRouterConfig` plus a pre-built adapter
166
+ per provider (adapters are cheap but constructing them per-request
167
+ would repeatedly re-read provider config). Exposes four entry
168
+ points: :meth:`generate` / :meth:`stream` for OpenAI-shaped requests,
169
+ :meth:`generate_anthropic` / :meth:`stream_anthropic` for Anthropic
170
+ Messages API requests. See the module docstring for the per-kind
171
+ translation behavior.
172
+ """
173
+
174
+ def __init__(self, config: CodeRouterConfig) -> None:
175
+ """Pre-build one adapter per configured provider.
176
+
177
+ Adapters are stateless with respect to requests (all state is
178
+ held in the per-call ``ProviderCallOverrides``), so caching by
179
+ provider name across requests is safe and avoids the cost of
180
+ re-parsing YAML / re-resolving env vars on every request.
181
+ """
182
+ self.config = config
183
+ # Cache adapters so we don't re-instantiate per request
184
+ self._adapters: dict[str, BaseAdapter] = {
185
+ p.name: build_adapter(p) for p in config.providers
186
+ }
187
+
188
+ def _resolve_profile_overrides(self, profile_name: str | None) -> ProviderCallOverrides:
189
+ """v0.6-B: build the ProviderCallOverrides for the active profile.
190
+
191
+ Invariant across every adapter call on one chain (profiles are
192
+ immutable per request), so callers resolve this once at the top of
193
+ each engine method and pass to every adapter invocation.
194
+ """
195
+ chosen = profile_name or self.config.default_profile
196
+ profile = self.config.profile_by_name(chosen)
197
+ return ProviderCallOverrides(
198
+ timeout_s=profile.timeout_s,
199
+ append_system_prompt=profile.append_system_prompt,
200
+ )
201
+
202
+ def _resolve_chain(self, profile_name: str | None) -> list[BaseAdapter]:
203
+ """Return the list of adapters to try, in order, for this profile.
204
+
205
+ v0.6-C declarative ALLOW_PAID gate: when the paid gate filters
206
+ the chain to zero adapters, emit ``chain-paid-gate-blocked`` at
207
+ warn level via :func:`log_chain_paid_gate_blocked`. Per-provider
208
+ ``skip-paid-provider`` info lines are still emitted (one per
209
+ blocked provider) so per-provider traceability is intact; the
210
+ warn sits at chain granularity for operator diagnosis.
211
+ """
212
+ chosen = profile_name or self.config.default_profile
213
+ chain = self.config.profile_by_name(chosen)
214
+
215
+ adapters: list[BaseAdapter] = []
216
+ blocked_by_paid: list[str] = []
217
+ for prov_name in chain.providers:
218
+ try:
219
+ provider_cfg = self.config.provider_by_name(prov_name)
220
+ except KeyError:
221
+ logger.warning(
222
+ "skip-unknown-provider",
223
+ extra={"profile": chosen, "provider": prov_name},
224
+ )
225
+ continue
226
+ if provider_cfg.paid and not self.config.allow_paid:
227
+ logger.info(
228
+ "skip-paid-provider",
229
+ extra={"profile": chosen, "provider": prov_name},
230
+ )
231
+ blocked_by_paid.append(prov_name)
232
+ continue
233
+ adapters.append(self._adapters[prov_name])
234
+
235
+ # v0.6-C: aggregate warn fires ONLY when the paid gate left the
236
+ # chain empty. A mixed chain where at least one free provider
237
+ # survives stays quiet (the normal try-provider / provider-
238
+ # failed trail already narrates what happened).
239
+ if not adapters and blocked_by_paid:
240
+ log_chain_paid_gate_blocked(
241
+ logger,
242
+ profile=chosen,
243
+ blocked_providers=blocked_by_paid,
244
+ )
245
+ return adapters
246
+
247
+ def _resolve_anthropic_chain(self, request: AnthropicRequest) -> list[tuple[BaseAdapter, bool]]:
248
+ """Resolve a chain, annotating each adapter with a ``will_degrade`` flag.
249
+
250
+ v0.5-A capability gate: when ``request`` carries ``thinking: {type:
251
+ enabled}`` and a provider does not support it (per
252
+ ``provider_supports_thinking``), we still include that provider in
253
+ the chain — it becomes a degraded-fallback. The block will be
254
+ stripped before the call and a ``capability-degraded`` log line
255
+ will fire. Capable providers are pulled to the front (stable sort)
256
+ so the user's ordering is preserved within each bucket.
257
+
258
+ Returns a list of ``(adapter, will_degrade)`` pairs in the order
259
+ they should be tried. When the request has no capability
260
+ requirement, all entries have ``will_degrade=False`` and the order
261
+ matches ``_resolve_chain``.
262
+ """
263
+ base = self._resolve_chain(request.profile)
264
+ if not anthropic_request_requires_thinking(request):
265
+ return [(a, False) for a in base]
266
+
267
+ capable: list[tuple[BaseAdapter, bool]] = []
268
+ degraded: list[tuple[BaseAdapter, bool]] = []
269
+ for adapter in base:
270
+ if provider_supports_thinking(adapter.config):
271
+ capable.append((adapter, False))
272
+ else:
273
+ degraded.append((adapter, True))
274
+ return capable + degraded
275
+
276
+ async def generate(self, request: ChatRequest) -> ChatResponse:
277
+ """Non-streaming OpenAI-shaped generation with sequential fallback.
278
+
279
+ Walks the chain in order, returning the first provider's response.
280
+ On retryable :class:`AdapterError` (transport failure, rate
281
+ limit, upstream 5xx, etc.) the loop advances; on non-retryable
282
+ errors it breaks immediately. When every provider has been tried
283
+ without success, raises :class:`NoProvidersAvailableError` with
284
+ the full per-provider error list so the ingress layer can
285
+ surface a single 502.
286
+ """
287
+ adapters = self._resolve_chain(request.profile)
288
+ overrides = self._resolve_profile_overrides(request.profile)
289
+ errors: list[AdapterError] = []
290
+ for adapter in adapters:
291
+ logger.info(
292
+ "try-provider",
293
+ extra={"provider": adapter.name, "stream": False},
294
+ )
295
+ try:
296
+ response = await adapter.generate(request, overrides=overrides)
297
+ logger.info(
298
+ "provider-ok",
299
+ extra={"provider": adapter.name, "stream": False},
300
+ )
301
+ return response
302
+ except AdapterError as exc:
303
+ logger.warning(
304
+ "provider-failed",
305
+ extra={
306
+ "provider": adapter.name,
307
+ "status": exc.status_code,
308
+ "retryable": exc.retryable,
309
+ "error": str(exc)[:500],
310
+ },
311
+ )
312
+ errors.append(exc)
313
+ if not exc.retryable:
314
+ break
315
+ profile = request.profile or self.config.default_profile
316
+ _warn_if_uniform_auth_failure(errors, profile=profile)
317
+ raise NoProvidersAvailableError(profile=profile, errors=errors)
318
+
319
+ async def stream(self, request: ChatRequest) -> AsyncIterator[StreamChunk]:
320
+ """Stream from the first provider that successfully starts streaming.
321
+
322
+ Important: once we begin yielding chunks from an adapter, we cannot
323
+ fall back mid-stream (the client has already received partial content).
324
+ We only fall through if the *initial* response is an error.
325
+ """
326
+ adapters: list[BaseAdapter] = self._resolve_chain(request.profile)
327
+ overrides = self._resolve_profile_overrides(request.profile)
328
+ errors: list[AdapterError] = []
329
+ for adapter in adapters:
330
+ logger.info(
331
+ "try-provider",
332
+ extra={"provider": adapter.name, "stream": True},
333
+ )
334
+ stream_iter = adapter.stream(request, overrides=overrides)
335
+ try:
336
+ first = await anext(stream_iter)
337
+ except StopAsyncIteration:
338
+ # Adapter produced zero chunks — treat as failure, try next
339
+ errors.append(AdapterError("empty stream", provider=adapter.name, retryable=True))
340
+ continue
341
+ except AdapterError as exc:
342
+ logger.warning(
343
+ "provider-failed",
344
+ extra={
345
+ "provider": adapter.name,
346
+ "status": exc.status_code,
347
+ "retryable": exc.retryable,
348
+ "error": str(exc)[:500],
349
+ },
350
+ )
351
+ errors.append(exc)
352
+ if not exc.retryable:
353
+ break
354
+ continue
355
+
356
+ logger.info(
357
+ "provider-ok",
358
+ extra={"provider": adapter.name, "stream": True},
359
+ )
360
+ yield first
361
+ # Mid-stream fallback guard: once the first byte is out the door,
362
+ # any subsequent adapter exception is terminal — we cannot fall
363
+ # back without risking duplicate / interleaved content reaching
364
+ # the client.
365
+ try:
366
+ async for chunk in stream_iter:
367
+ yield chunk
368
+ except AdapterError as exc:
369
+ logger.warning(
370
+ "provider-failed-midstream",
371
+ extra={
372
+ "provider": adapter.name,
373
+ "status": exc.status_code,
374
+ "retryable": exc.retryable,
375
+ "error": str(exc)[:500],
376
+ },
377
+ )
378
+ raise MidStreamError(adapter.name, exc) from exc
379
+ return
380
+
381
+ profile = request.profile or self.config.default_profile
382
+ _warn_if_uniform_auth_failure(errors, profile=profile)
383
+ raise NoProvidersAvailableError(profile=profile, errors=errors)
384
+
385
+ # ==================================================================
386
+ # Anthropic-shaped entry points (v0.3.x-1)
387
+ # ==================================================================
388
+ #
389
+ # These exist so the /v1/messages ingress can route to a `kind:
390
+ # "anthropic"` provider without a lossy round-trip through the
391
+ # OpenAI-shaped internal format. Per-provider dispatch:
392
+ # - AnthropicAdapter: direct passthrough via generate_anthropic /
393
+ # stream_anthropic — no translation on either leg.
394
+ # - any other adapter: translate AnthropicRequest → ChatRequest,
395
+ # call the OpenAI-shaped methods, translate the result back.
396
+ # Tool-call repair + v0.3-D downgrade happen on this path.
397
+
398
+ async def generate_anthropic(self, request: AnthropicRequest) -> AnthropicResponse:
399
+ """Non-streaming Anthropic request, per-provider dispatch."""
400
+ chain = self._resolve_anthropic_chain(request)
401
+ overrides = self._resolve_profile_overrides(request.profile)
402
+ errors: list[AdapterError] = []
403
+ tool_names = [t.name for t in request.tools] if request.tools else None
404
+
405
+ for adapter, will_degrade in chain:
406
+ is_native = isinstance(adapter, AnthropicAdapter)
407
+ effective_request = request
408
+ if will_degrade:
409
+ # v0.5-A: strip unsupported blocks before handing to this
410
+ # provider and emit a structured log so operators can see
411
+ # the downgrade after the fact. Today only `thinking` is
412
+ # gated; the list is surfaced in the log for forward-compat.
413
+ effective_request = strip_thinking(request)
414
+ log_capability_degraded(
415
+ logger,
416
+ provider=adapter.name,
417
+ dropped=["thinking"],
418
+ reason="provider-does-not-support",
419
+ )
420
+ # v0.5-B: observability-only gate for cache_control. The
421
+ # field is silently dropped during Anthropic → OpenAI
422
+ # translation for openai_compat providers — no strip is
423
+ # needed here (to_chat_request already handles it) and no
424
+ # chain reorder is done (user ordering preserved). We just
425
+ # emit a log line so operators can see the lossiness.
426
+ if anthropic_request_has_cache_control(request) and not provider_supports_cache_control(
427
+ adapter.config
428
+ ):
429
+ log_capability_degraded(
430
+ logger,
431
+ provider=adapter.name,
432
+ dropped=["cache_control"],
433
+ reason="translation-lossy",
434
+ )
435
+ logger.info(
436
+ "try-provider",
437
+ extra={
438
+ "provider": adapter.name,
439
+ "stream": False,
440
+ "native_anthropic": is_native,
441
+ "degraded": will_degrade,
442
+ },
443
+ )
444
+ try:
445
+ # `is_native` is the same test as this `isinstance`; we do
446
+ # it directly here so mypy narrows `adapter` to
447
+ # AnthropicAdapter inside the branch (BaseAdapter itself
448
+ # does not declare the Anthropic-shaped methods).
449
+ if isinstance(adapter, AnthropicAdapter):
450
+ resp = await adapter.generate_anthropic(effective_request, overrides=overrides)
451
+ else:
452
+ chat_req = to_chat_request(effective_request)
453
+ chat_req.stream = False
454
+ chat_resp = await adapter.generate(chat_req, overrides=overrides)
455
+ resp = to_anthropic_response(chat_resp, allowed_tool_names=tool_names)
456
+ except AdapterError as exc:
457
+ logger.warning(
458
+ "provider-failed",
459
+ extra={
460
+ "provider": adapter.name,
461
+ "status": exc.status_code,
462
+ "retryable": exc.retryable,
463
+ "error": str(exc)[:500],
464
+ },
465
+ )
466
+ errors.append(exc)
467
+ if not exc.retryable:
468
+ break
469
+ continue
470
+
471
+ logger.info(
472
+ "provider-ok",
473
+ extra={
474
+ "provider": adapter.name,
475
+ "stream": False,
476
+ "native_anthropic": is_native,
477
+ },
478
+ )
479
+ return resp
480
+
481
+ profile = request.profile or self.config.default_profile
482
+ _warn_if_uniform_auth_failure(errors, profile=profile)
483
+ raise NoProvidersAvailableError(profile=profile, errors=errors)
484
+
485
+ async def stream_anthropic(
486
+ self, request: AnthropicRequest
487
+ ) -> AsyncIterator[AnthropicStreamEvent]:
488
+ """Streaming Anthropic request, per-provider dispatch.
489
+
490
+ For non-native providers with tools declared, we use the v0.3-D
491
+ downgrade path (run the request non-streaming internally, repair
492
+ tool calls, then synthesize an Anthropic SSE event sequence) —
493
+ the same logic that used to live in the ingress. Consolidating
494
+ it here keeps the ingress thin and lets native providers bypass
495
+ the downgrade entirely (Anthropic emits structured tool_use
496
+ blocks natively, no repair needed).
497
+ """
498
+ chain = self._resolve_anthropic_chain(request)
499
+ overrides = self._resolve_profile_overrides(request.profile)
500
+ errors: list[AdapterError] = []
501
+ tool_names = [t.name for t in request.tools] if request.tools else None
502
+
503
+ for adapter, will_degrade in chain:
504
+ is_native = isinstance(adapter, AnthropicAdapter)
505
+ downgrading = (not is_native) and bool(request.tools)
506
+ effective_request = request
507
+ if will_degrade:
508
+ effective_request = strip_thinking(request)
509
+ log_capability_degraded(
510
+ logger,
511
+ provider=adapter.name,
512
+ dropped=["thinking"],
513
+ reason="provider-does-not-support",
514
+ )
515
+ # v0.5-B: mirror of the non-streaming path — see comment there.
516
+ if anthropic_request_has_cache_control(request) and not provider_supports_cache_control(
517
+ adapter.config
518
+ ):
519
+ log_capability_degraded(
520
+ logger,
521
+ provider=adapter.name,
522
+ dropped=["cache_control"],
523
+ reason="translation-lossy",
524
+ )
525
+ logger.info(
526
+ "try-provider",
527
+ extra={
528
+ "provider": adapter.name,
529
+ "stream": True,
530
+ "native_anthropic": is_native,
531
+ "downgrade": downgrading,
532
+ "degraded": will_degrade,
533
+ },
534
+ )
535
+
536
+ # Stage 1: acquire an AnthropicStreamEvent iterator. Failures
537
+ # here are candidates for fallback (no bytes have been sent to
538
+ # the client yet).
539
+ event_iter: AsyncIterator[AnthropicStreamEvent]
540
+ first: AnthropicStreamEvent
541
+ try:
542
+ # See the non-streaming branch above: `is_native` and this
543
+ # isinstance test are the same check; we do it inline so
544
+ # mypy narrows for stream_anthropic (not on BaseAdapter).
545
+ if isinstance(adapter, AnthropicAdapter):
546
+ event_iter = adapter.stream_anthropic(effective_request, overrides=overrides)
547
+ first = await anext(event_iter)
548
+ elif downgrading:
549
+ # v0.3-D downgrade: run non-streaming, repair, replay.
550
+ chat_req = to_chat_request(effective_request)
551
+ chat_req.stream = False
552
+ chat_resp = await adapter.generate(chat_req, overrides=overrides)
553
+ anth_resp = to_anthropic_response(chat_resp, allowed_tool_names=tool_names)
554
+ event_iter = synthesize_anthropic_stream_from_response(anth_resp)
555
+ first = await anext(event_iter)
556
+ else:
557
+ chat_req = to_chat_request(effective_request)
558
+ chat_req.stream = True
559
+ event_iter = stream_chat_to_anthropic_events(
560
+ adapter.stream(chat_req, overrides=overrides)
561
+ )
562
+ first = await anext(event_iter)
563
+ except StopAsyncIteration:
564
+ errors.append(AdapterError("empty stream", provider=adapter.name, retryable=True))
565
+ continue
566
+ except AdapterError as exc:
567
+ logger.warning(
568
+ "provider-failed",
569
+ extra={
570
+ "provider": adapter.name,
571
+ "status": exc.status_code,
572
+ "retryable": exc.retryable,
573
+ "error": str(exc)[:500],
574
+ },
575
+ )
576
+ errors.append(exc)
577
+ if not exc.retryable:
578
+ break
579
+ continue
580
+
581
+ logger.info(
582
+ "provider-ok",
583
+ extra={
584
+ "provider": adapter.name,
585
+ "stream": True,
586
+ "native_anthropic": is_native,
587
+ "downgrade": downgrading,
588
+ },
589
+ )
590
+ yield first
591
+ # Mid-stream guard identical to stream() — any error after the
592
+ # first event is terminal.
593
+ try:
594
+ async for ev in event_iter:
595
+ yield ev
596
+ except AdapterError as exc:
597
+ logger.warning(
598
+ "provider-failed-midstream",
599
+ extra={
600
+ "provider": adapter.name,
601
+ "status": exc.status_code,
602
+ "retryable": exc.retryable,
603
+ "error": str(exc)[:500],
604
+ },
605
+ )
606
+ raise MidStreamError(adapter.name, exc) from exc
607
+ return
608
+
609
+ profile = request.profile or self.config.default_profile
610
+ _warn_if_uniform_auth_failure(errors, profile=profile)
611
+ raise NoProvidersAvailableError(profile=profile, errors=errors)
@@ -0,0 +1,57 @@
1
+ """Wire-format translators between Anthropic Messages and internal ChatRequest.
2
+
3
+ The internal ChatRequest / ChatResponse / StreamChunk shapes mirror OpenAI
4
+ Chat Completions (see coderouter/adapters/base.py). This package contains the
5
+ bidirectional translation layer used by the Anthropic ingress:
6
+
7
+ AnthropicRequest --to_chat_request--> ChatRequest --> adapter
8
+ ChatResponse --to_anthropic_response--> AnthropicResponse
9
+ StreamChunk... --stream_to_anthropic_events--> AnthropicStreamEvent...
10
+
11
+ v0.2 scope: spec-level translation for text + tool_use content blocks.
12
+ v1.0 scope: tool_call JSON repair / format normalization across local models.
13
+ """
14
+
15
+ from coderouter.translation.anthropic import (
16
+ AnthropicContentBlock,
17
+ AnthropicImageBlock,
18
+ AnthropicMessage,
19
+ AnthropicRequest,
20
+ AnthropicResponse,
21
+ AnthropicStreamEvent,
22
+ AnthropicTextBlock,
23
+ AnthropicTool,
24
+ AnthropicToolResultBlock,
25
+ AnthropicToolUseBlock,
26
+ AnthropicUsage,
27
+ )
28
+ from coderouter.translation.convert import (
29
+ stream_anthropic_to_chat_chunks,
30
+ stream_chat_to_anthropic_events,
31
+ synthesize_anthropic_stream_from_response,
32
+ to_anthropic_request,
33
+ to_anthropic_response,
34
+ to_chat_request,
35
+ to_chat_response,
36
+ )
37
+
38
+ __all__ = [
39
+ "AnthropicContentBlock",
40
+ "AnthropicImageBlock",
41
+ "AnthropicMessage",
42
+ "AnthropicRequest",
43
+ "AnthropicResponse",
44
+ "AnthropicStreamEvent",
45
+ "AnthropicTextBlock",
46
+ "AnthropicTool",
47
+ "AnthropicToolResultBlock",
48
+ "AnthropicToolUseBlock",
49
+ "AnthropicUsage",
50
+ "stream_anthropic_to_chat_chunks",
51
+ "stream_chat_to_anthropic_events",
52
+ "synthesize_anthropic_stream_from_response",
53
+ "to_anthropic_request",
54
+ "to_anthropic_response",
55
+ "to_chat_request",
56
+ "to_chat_response",
57
+ ]