coderouter-cli 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. coderouter/__init__.py +17 -0
  2. coderouter/__main__.py +6 -0
  3. coderouter/adapters/__init__.py +23 -0
  4. coderouter/adapters/anthropic_native.py +502 -0
  5. coderouter/adapters/base.py +220 -0
  6. coderouter/adapters/openai_compat.py +395 -0
  7. coderouter/adapters/registry.py +17 -0
  8. coderouter/cli.py +345 -0
  9. coderouter/cli_stats.py +751 -0
  10. coderouter/config/__init__.py +10 -0
  11. coderouter/config/capability_registry.py +339 -0
  12. coderouter/config/env_file.py +295 -0
  13. coderouter/config/loader.py +73 -0
  14. coderouter/config/schemas.py +515 -0
  15. coderouter/data/__init__.py +7 -0
  16. coderouter/data/model-capabilities.yaml +86 -0
  17. coderouter/doctor.py +1596 -0
  18. coderouter/env_security.py +434 -0
  19. coderouter/errors.py +29 -0
  20. coderouter/ingress/__init__.py +5 -0
  21. coderouter/ingress/anthropic_routes.py +205 -0
  22. coderouter/ingress/app.py +144 -0
  23. coderouter/ingress/dashboard_routes.py +493 -0
  24. coderouter/ingress/metrics_routes.py +92 -0
  25. coderouter/ingress/openai_routes.py +153 -0
  26. coderouter/logging.py +315 -0
  27. coderouter/metrics/__init__.py +39 -0
  28. coderouter/metrics/collector.py +471 -0
  29. coderouter/metrics/prometheus.py +221 -0
  30. coderouter/output_filters.py +407 -0
  31. coderouter/routing/__init__.py +13 -0
  32. coderouter/routing/auto_router.py +244 -0
  33. coderouter/routing/capability.py +285 -0
  34. coderouter/routing/fallback.py +611 -0
  35. coderouter/translation/__init__.py +57 -0
  36. coderouter/translation/anthropic.py +204 -0
  37. coderouter/translation/convert.py +1291 -0
  38. coderouter/translation/tool_repair.py +236 -0
  39. coderouter_cli-1.7.0.dist-info/METADATA +509 -0
  40. coderouter_cli-1.7.0.dist-info/RECORD +43 -0
  41. coderouter_cli-1.7.0.dist-info/WHEEL +4 -0
  42. coderouter_cli-1.7.0.dist-info/entry_points.txt +2 -0
  43. coderouter_cli-1.7.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,220 @@
1
+ """Common intermediate format + BaseAdapter ABC.
2
+
3
+ The shape mirrors OpenAI's Chat Completions API since memo.txt §2.4 chose
4
+ OpenAI-compat as the standard ingress. v0.2+ will add a separate Anthropic
5
+ adapter that converts Messages API into / out of this same format.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from abc import ABC, abstractmethod
11
+ from collections.abc import AsyncIterator
12
+ from typing import Any, Literal
13
+
14
+ from pydantic import BaseModel, ConfigDict, Field
15
+
16
+ from coderouter.config.schemas import ProviderConfig
17
+ from coderouter.errors import CodeRouterError
18
+
19
+
20
+ class Message(BaseModel):
21
+ """A single chat message in OpenAI Chat Completions shape.
22
+
23
+ Mirrors the OpenAI wire format (role + content, plus tool-call
24
+ fields for assistant/tool turns). ``content`` is ``None`` on
25
+ assistant messages that carry only ``tool_calls`` — the OpenAI
26
+ spec allows this, and the Anthropic→OpenAI translation in
27
+ :mod:`coderouter.translation.convert` emits it for tool-use turns.
28
+ """
29
+
30
+ model_config = ConfigDict(extra="allow")
31
+
32
+ role: Literal["system", "user", "assistant", "tool"]
33
+ # OpenAI spec allows content: null on assistant messages that carry only
34
+ # tool_calls. Anthropic → OpenAI translation also produces this when an
35
+ # assistant turn has only tool_use blocks (no text).
36
+ content: str | list[dict[str, Any]] | None = None
37
+ name: str | None = None
38
+ tool_call_id: str | None = None
39
+ tool_calls: list[dict[str, Any]] | None = None
40
+
41
+
42
+ class ChatRequest(BaseModel):
43
+ """An inbound OpenAI-shaped request to the engine.
44
+
45
+ Accepts the standard OpenAI Chat Completions fields plus the
46
+ CodeRouter-specific ``profile`` extension (carried in the body as
47
+ ``{"profile": "fast"}``; excluded from any upstream serialization
48
+ via ``Field(exclude=True)``). ``extra="allow"`` lets callers pass
49
+ provider-specific knobs (e.g. Ollama's ``think: false``) straight
50
+ through without a schema bump.
51
+ """
52
+
53
+ model_config = ConfigDict(extra="allow")
54
+
55
+ model: str | None = None
56
+ messages: list[Message]
57
+ stream: bool = False
58
+ temperature: float | None = None
59
+ max_tokens: int | None = None
60
+ top_p: float | None = None
61
+ stop: list[str] | None = None
62
+ tools: list[dict[str, Any]] | None = None
63
+ tool_choice: Any | None = None
64
+
65
+ # CodeRouter-specific extension (not sent upstream)
66
+ profile: str | None = Field(default=None, exclude=True)
67
+
68
+
69
+ class ChatResponse(BaseModel):
70
+ """A non-streaming response in OpenAI Chat Completions shape."""
71
+
72
+ model_config = ConfigDict(extra="allow")
73
+
74
+ id: str
75
+ object: str = "chat.completion"
76
+ created: int
77
+ model: str
78
+ choices: list[dict[str, Any]]
79
+ usage: dict[str, Any] | None = None
80
+
81
+ # Routing metadata — added by CodeRouter, not from upstream
82
+ coderouter_provider: str | None = Field(default=None)
83
+
84
+
85
+ class StreamChunk(BaseModel):
86
+ """A single SSE chunk in OpenAI streaming format."""
87
+
88
+ model_config = ConfigDict(extra="allow")
89
+
90
+ id: str
91
+ object: str = "chat.completion.chunk"
92
+ created: int
93
+ model: str
94
+ choices: list[dict[str, Any]]
95
+ # Present on the trailing chunk when a provider honors
96
+ # `stream_options.include_usage=true`. Also populated by the
97
+ # Anthropic→OpenAI reverse translation in
98
+ # coderouter.translation.convert when mirroring `message_delta`
99
+ # usage into an OpenAI stream.
100
+ usage: dict[str, Any] | None = None
101
+
102
+
103
+ class AdapterError(CodeRouterError):
104
+ """Raised when a provider call fails in a way the fallback engine should retry on."""
105
+
106
+ def __init__(
107
+ self,
108
+ message: str,
109
+ *,
110
+ provider: str,
111
+ status_code: int | None = None,
112
+ retryable: bool = True,
113
+ ) -> None:
114
+ """Construct an AdapterError.
115
+
116
+ Args:
117
+ message: Human-readable failure reason.
118
+ provider: The ``ProviderConfig.name`` that failed — used by
119
+ the fallback engine's log trail and by tests that assert
120
+ WHICH provider raised.
121
+ status_code: HTTP status code when the failure originated
122
+ from an upstream response. ``None`` for transport /
123
+ JSON-parse / pre-flight failures.
124
+ retryable: When True, the fallback engine may try the next
125
+ provider in the chain. When False, the engine stops
126
+ and surfaces the error as a terminal failure.
127
+ """
128
+ super().__init__(message)
129
+ self.provider = provider
130
+ self.status_code = status_code
131
+ self.retryable = retryable
132
+
133
+ def __str__(self) -> str:
134
+ """Render as ``[provider status=NNN] message`` for log trails."""
135
+ sc = f" status={self.status_code}" if self.status_code is not None else ""
136
+ return f"[{self.provider}{sc}] {super().__str__()}"
137
+
138
+
139
+ # v0.6-B: per-call overrides resolved from the active profile. The engine
140
+ # builds one instance per request (since a profile is invariant across its
141
+ # chain) and threads it through every adapter call on that chain. Adapters
142
+ # use :meth:`effective_timeout` / :meth:`effective_append_system_prompt` to
143
+ # pick the winning value (profile override > provider default).
144
+ #
145
+ # Design notes:
146
+ # - Both fields are Optional. ``None`` means "leave the provider default
147
+ # alone" — so ``ProviderCallOverrides()`` is a safe no-op default and
148
+ # legacy call sites that pass nothing keep their old behavior.
149
+ # - ``append_system_prompt=""`` is a meaningful explicit value: "for
150
+ # this profile, clear the provider's directive". The adapter must
151
+ # distinguish ``None`` (no override) from ``""`` (override-to-empty).
152
+ class ProviderCallOverrides(BaseModel):
153
+ """Per-call provider overrides, resolved from the active profile."""
154
+
155
+ model_config = ConfigDict(extra="forbid")
156
+
157
+ timeout_s: float | None = None
158
+ append_system_prompt: str | None = None
159
+
160
+
161
+ class BaseAdapter(ABC):
162
+ """Provider-specific adapter. Subclasses implement HTTP plumbing."""
163
+
164
+ def __init__(self, config: ProviderConfig) -> None:
165
+ """Bind the adapter to a :class:`ProviderConfig`.
166
+
167
+ Subclasses do not need to override this; HTTP clients are
168
+ constructed lazily inside :meth:`generate` / :meth:`stream` so
169
+ each call can honor a per-call timeout override.
170
+ """
171
+ self.config = config
172
+
173
+ @property
174
+ def name(self) -> str:
175
+ """Shortcut for ``self.config.name`` — used in log trails and errors."""
176
+ return self.config.name
177
+
178
+ # ---- v0.6-B override resolution helpers -----------------------------
179
+ def effective_timeout(self, overrides: ProviderCallOverrides | None) -> float:
180
+ """Profile override wins when set; else provider default."""
181
+ if overrides is not None and overrides.timeout_s is not None:
182
+ return overrides.timeout_s
183
+ return self.config.timeout_s
184
+
185
+ def effective_append_system_prompt(self, overrides: ProviderCallOverrides | None) -> str | None:
186
+ """Profile override replaces provider directive when set.
187
+
188
+ ``None`` means no override → fall through to provider. ``""``
189
+ (explicit empty) means "clear the provider directive for this
190
+ profile" → return None so the caller skips injection entirely.
191
+ """
192
+ if overrides is not None and overrides.append_system_prompt is not None:
193
+ return overrides.append_system_prompt or None
194
+ return self.config.append_system_prompt
195
+
196
+ @abstractmethod
197
+ async def healthcheck(self) -> bool:
198
+ """Lightweight check that the upstream is reachable. Return True if healthy."""
199
+
200
+ @abstractmethod
201
+ async def generate(
202
+ self,
203
+ request: ChatRequest,
204
+ *,
205
+ overrides: ProviderCallOverrides | None = None,
206
+ ) -> ChatResponse:
207
+ """Non-streaming completion. Raise AdapterError on failure.
208
+
209
+ ``overrides`` carries profile-level timeouts / directives (v0.6-B).
210
+ Legacy callers that pass nothing keep the pre-v0.6-B behavior.
211
+ """
212
+
213
+ @abstractmethod
214
+ def stream(
215
+ self,
216
+ request: ChatRequest,
217
+ *,
218
+ overrides: ProviderCallOverrides | None = None,
219
+ ) -> AsyncIterator[StreamChunk]:
220
+ """Streaming completion. Yield StreamChunks. Raise AdapterError on failure."""
@@ -0,0 +1,395 @@
1
+ """OpenAI-compatible HTTP adapter.
2
+
3
+ Single adapter that covers:
4
+ - Local llama.cpp server (--api-server mode)
5
+ - Local Ollama (/v1 endpoint)
6
+ - LM Studio
7
+ - OpenRouter (free + paid)
8
+ - Together / Fireworks / Groq / DeepInfra
9
+ - Any OpenAI-shaped /v1/chat/completions endpoint
10
+
11
+ We deliberately do NOT use the openai SDK — see plan.md §5.4 (dependency
12
+ minimalism). All upstream calls are plain httpx.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ from collections.abc import AsyncIterator
19
+ from typing import Any
20
+
21
+ import httpx
22
+
23
+ from coderouter.adapters.base import (
24
+ AdapterError,
25
+ BaseAdapter,
26
+ ChatRequest,
27
+ ChatResponse,
28
+ ProviderCallOverrides,
29
+ StreamChunk,
30
+ )
31
+ from coderouter.config.loader import resolve_api_key
32
+ from coderouter.logging import (
33
+ get_logger,
34
+ log_capability_degraded,
35
+ log_output_filter_applied,
36
+ )
37
+ from coderouter.output_filters import OutputFilterChain
38
+
39
+ logger = get_logger(__name__)
40
+
41
+ # httpx status codes that mean "fall through to next provider"
42
+ # - 404: upstream doesn't have the requested model — next provider has a
43
+ # different model so try it
44
+ # - 408 / 504: timeouts
45
+ # - 425: too early
46
+ # - 429: rate limit
47
+ # - 5xx: upstream errors
48
+ _RETRYABLE_STATUSES = {404, 408, 425, 429, 500, 502, 503, 504}
49
+
50
+
51
+ def _strip_reasoning_field(choices: list[dict[str, Any]] | None, *, delta_key: bool) -> bool:
52
+ """Remove non-standard ``reasoning`` keys from a choices list, in place.
53
+
54
+ v0.5-C: Some OpenRouter free models (confirmed on
55
+ ``openai/gpt-oss-120b:free`` 2026-04-20) return a ``reasoning`` field
56
+ alongside ``content`` on each choice. The field is not in the OpenAI
57
+ Chat Completions spec and strict clients can reject the unknown key.
58
+ We strip it at the adapter boundary so downstream layers never see it.
59
+
60
+ Args:
61
+ choices: The ``choices`` list from the response body or stream chunk.
62
+ When None (or empty) the function is a no-op.
63
+ delta_key: ``True`` for stream chunks (look in ``choice["delta"]``),
64
+ ``False`` for non-streaming responses (look in ``choice["message"]``).
65
+
66
+ Returns:
67
+ True iff at least one ``reasoning`` key was removed. Callers use
68
+ this to decide whether to emit a one-shot log line.
69
+ """
70
+ if not choices:
71
+ return False
72
+ stripped = False
73
+ inner_key = "delta" if delta_key else "message"
74
+ for choice in choices:
75
+ if not isinstance(choice, dict):
76
+ continue
77
+ inner = choice.get(inner_key)
78
+ if isinstance(inner, dict) and "reasoning" in inner:
79
+ inner.pop("reasoning", None)
80
+ stripped = True
81
+ return stripped
82
+
83
+
84
+ class OpenAICompatAdapter(BaseAdapter):
85
+ """Talks the OpenAI Chat Completions wire format over httpx."""
86
+
87
+ def _headers(self) -> dict[str, str]:
88
+ """Build per-request HTTP headers; injects ``Authorization`` if configured."""
89
+ headers = {
90
+ "Content-Type": "application/json",
91
+ "User-Agent": "CodeRouter/0.1",
92
+ }
93
+ api_key = resolve_api_key(self.config.api_key_env)
94
+ if api_key:
95
+ headers["Authorization"] = f"Bearer {api_key}"
96
+ return headers
97
+
98
+ def _prepare_messages(
99
+ self,
100
+ request: ChatRequest,
101
+ *,
102
+ overrides: ProviderCallOverrides | None = None,
103
+ ) -> list[dict[str, Any]]:
104
+ """Serialize messages and inject append_system_prompt if configured.
105
+
106
+ v0.6-B: profile-level ``append_system_prompt`` (carried in
107
+ ``overrides``) REPLACES the provider's own directive. An explicit
108
+ empty string in the profile clears the provider directive.
109
+ """
110
+ messages = [m.model_dump(exclude_none=True) for m in request.messages]
111
+ directive = self.effective_append_system_prompt(overrides)
112
+ if not directive:
113
+ return messages
114
+
115
+ # Augment an existing system message, or add a new one at the front.
116
+ for msg in messages:
117
+ if msg.get("role") == "system":
118
+ existing = msg.get("content", "")
119
+ if isinstance(existing, str):
120
+ msg["content"] = f"{existing}\n{directive}".strip()
121
+ elif isinstance(existing, list):
122
+ # multimodal content — append a text block
123
+ msg["content"] = [*existing, {"type": "text", "text": directive}]
124
+ else:
125
+ msg["content"] = directive
126
+ return messages
127
+
128
+ return [{"role": "system", "content": directive}, *messages]
129
+
130
+ def _payload(
131
+ self,
132
+ request: ChatRequest,
133
+ *,
134
+ stream: bool,
135
+ overrides: ProviderCallOverrides | None = None,
136
+ ) -> dict[str, Any]:
137
+ """Assemble the outbound JSON body for ``/v1/chat/completions``.
138
+
139
+ The provider's configured ``model`` is always used (client's
140
+ ``request.model`` is ignored by design — routing is a profile
141
+ concern, not a client concern). When streaming, adds
142
+ ``stream_options.include_usage`` so a terminal usage chunk
143
+ arrives for accounting.
144
+ """
145
+ # CodeRouter routing is decided by `profile`, NOT by `request.model`.
146
+ # The OpenAI API requires a `model` field in the body, but here it's
147
+ # always set from the provider config — clients that pass arbitrary
148
+ # placeholder strings (e.g. "anything") would otherwise blow up the
149
+ # upstream with 404 model-not-found.
150
+ #
151
+ # Start from provider's extra_body (e.g. `think: false` for Ollama
152
+ # thinking models) so that fields from the request can override them.
153
+ body: dict[str, Any] = dict(self.config.extra_body)
154
+ body.update(
155
+ {
156
+ "model": self.config.model,
157
+ "messages": self._prepare_messages(request, overrides=overrides),
158
+ "stream": stream,
159
+ }
160
+ )
161
+ for field in ("temperature", "max_tokens", "top_p", "stop", "tools", "tool_choice"):
162
+ value = getattr(request, field, None)
163
+ if value is not None:
164
+ body[field] = value
165
+ if stream:
166
+ # Request a terminal usage chunk. Providers that honor this
167
+ # (OpenAI, OpenRouter, Ollama >=0.x) will send one extra chunk
168
+ # with `choices: []` and `usage: {prompt_tokens, completion_tokens, ...}`
169
+ # at the end of the stream. Providers that don't understand the
170
+ # flag silently ignore it — so it's safe to always send.
171
+ body.setdefault("stream_options", {"include_usage": True})
172
+ return body
173
+
174
+ def _url(self) -> str:
175
+ """Build the ``{base_url}/chat/completions`` endpoint URL."""
176
+ # base_url is normalized to OpenAI shape: it should already include /v1
177
+ # We just append /chat/completions.
178
+ base = str(self.config.base_url).rstrip("/")
179
+ return f"{base}/chat/completions"
180
+
181
+ async def healthcheck(self) -> bool:
182
+ """GET base_url/models — most OpenAI-compat servers expose this cheaply."""
183
+ base = str(self.config.base_url).rstrip("/")
184
+ url = f"{base}/models"
185
+ try:
186
+ async with httpx.AsyncClient(timeout=5.0) as client:
187
+ resp = await client.get(url, headers=self._headers())
188
+ return resp.status_code < 500
189
+ except httpx.HTTPError:
190
+ return False
191
+
192
+ async def generate(
193
+ self,
194
+ request: ChatRequest,
195
+ *,
196
+ overrides: ProviderCallOverrides | None = None,
197
+ ) -> ChatResponse:
198
+ """Single HTTP POST; raises :class:`AdapterError` on any failure.
199
+
200
+ Transport / timeout / non-retryable-parse errors are always
201
+ raised. HTTP 4xx/5xx are raised with ``retryable`` set from
202
+ :data:`_RETRYABLE_STATUSES`. On success, applies the v0.5-C
203
+ ``reasoning`` field strip and the v1.0-A output-filter chain to
204
+ the response body before returning.
205
+ """
206
+ url = self._url()
207
+ payload = self._payload(request, stream=False, overrides=overrides)
208
+ timeout = self.effective_timeout(overrides)
209
+ try:
210
+ async with httpx.AsyncClient(timeout=timeout) as client:
211
+ resp = await client.post(url, json=payload, headers=self._headers())
212
+ except httpx.TimeoutException as exc:
213
+ raise AdapterError(
214
+ f"timeout contacting {url}", provider=self.name, retryable=True
215
+ ) from exc
216
+ except httpx.HTTPError as exc:
217
+ raise AdapterError(
218
+ f"transport error: {exc}", provider=self.name, retryable=True
219
+ ) from exc
220
+
221
+ if resp.status_code >= 400:
222
+ raise AdapterError(
223
+ f"{resp.status_code} from upstream: {resp.text[:200]}",
224
+ provider=self.name,
225
+ status_code=resp.status_code,
226
+ retryable=resp.status_code in _RETRYABLE_STATUSES,
227
+ )
228
+
229
+ try:
230
+ data = resp.json()
231
+ except json.JSONDecodeError as exc:
232
+ raise AdapterError(
233
+ f"invalid JSON from upstream: {exc}",
234
+ provider=self.name,
235
+ retryable=False,
236
+ ) from exc
237
+
238
+ # v0.5-C: passive strip of non-standard `reasoning` field on choices.
239
+ # No-op when the provider opted into passthrough.
240
+ if not self.config.capabilities.reasoning_passthrough and _strip_reasoning_field(
241
+ data.get("choices"), delta_key=False
242
+ ):
243
+ log_capability_degraded(
244
+ logger,
245
+ provider=self.name,
246
+ dropped=["reasoning"],
247
+ reason="non-standard-field",
248
+ )
249
+
250
+ # v1.0-A: apply output_filters chain to each choice's message.content
251
+ # (the non-standard `reasoning` field was already removed above, so
252
+ # we only see the client-visible content). A fresh chain per call
253
+ # keeps state-holding filters (strip_thinking) scoped to this request.
254
+ if self.config.output_filters:
255
+ chain = OutputFilterChain(self.config.output_filters)
256
+ for choice in data.get("choices") or []:
257
+ if not isinstance(choice, dict):
258
+ continue
259
+ msg = choice.get("message")
260
+ if isinstance(msg, dict):
261
+ content = msg.get("content")
262
+ if isinstance(content, str) and content:
263
+ msg["content"] = chain.feed(content, eof=True)
264
+ if chain.any_applied:
265
+ log_output_filter_applied(
266
+ logger,
267
+ provider=self.name,
268
+ filters=chain.applied_filters(),
269
+ streaming=False,
270
+ )
271
+
272
+ # Tag the response with which provider answered
273
+ data.setdefault("object", "chat.completion")
274
+ return ChatResponse(coderouter_provider=self.name, **data)
275
+
276
+ async def stream(
277
+ self,
278
+ request: ChatRequest,
279
+ *,
280
+ overrides: ProviderCallOverrides | None = None,
281
+ ) -> AsyncIterator[StreamChunk]:
282
+ """Yield :class:`StreamChunk` objects from an SSE response.
283
+
284
+ Applies the v0.5-C reasoning strip and the v1.0-A output-filter
285
+ chain incrementally (per SSE chunk). The chain is stateful so
286
+ ``<think>`` / stop markers split across chunk boundaries are
287
+ still recognized; at end-of-stream any held-back safe suffix is
288
+ flushed in a synthesized content-only chunk.
289
+ """
290
+ url = self._url()
291
+ payload = self._payload(request, stream=True, overrides=overrides)
292
+ timeout = self.effective_timeout(overrides)
293
+ # v0.5-C: one-shot dedupe flag for the `reasoning` strip log. We
294
+ # log once per stream request on the first chunk that carried the
295
+ # field, not per chunk — otherwise a long reasoning track would
296
+ # produce dozens of duplicate log lines.
297
+ strip_reasoning = not self.config.capabilities.reasoning_passthrough
298
+ reasoning_logged = False
299
+
300
+ # v1.0-A: stateful output_filters chain for the duration of this
301
+ # stream. Handles `<think>...</think>` / stop markers that split
302
+ # across SSE chunk boundaries. One chain instance per request;
303
+ # `output_filter_logged` dedupes the one-shot info log.
304
+ filter_chain: OutputFilterChain | None = (
305
+ OutputFilterChain(self.config.output_filters) if self.config.output_filters else None
306
+ )
307
+ output_filter_logged = False
308
+ # Captured for the closing flush chunk (if any): reuse the last
309
+ # seen chunk's id/model so the flush emission looks native.
310
+ last_chunk_template: dict[str, Any] | None = None
311
+ try:
312
+ async with (
313
+ httpx.AsyncClient(timeout=timeout) as client,
314
+ client.stream("POST", url, json=payload, headers=self._headers()) as resp,
315
+ ):
316
+ if resp.status_code >= 400:
317
+ body = await resp.aread()
318
+ raise AdapterError(
319
+ f"{resp.status_code} from upstream: {body[:200]!r}",
320
+ provider=self.name,
321
+ status_code=resp.status_code,
322
+ retryable=resp.status_code in _RETRYABLE_STATUSES,
323
+ )
324
+ async for line in resp.aiter_lines():
325
+ if not line:
326
+ continue
327
+ # SSE format: lines start with "data: "
328
+ if line.startswith(":"):
329
+ continue # comment / heartbeat
330
+ if not line.startswith("data:"):
331
+ continue
332
+ data_str = line[len("data:") :].strip()
333
+ if data_str == "[DONE]":
334
+ break
335
+ try:
336
+ payload_obj = json.loads(data_str)
337
+ except json.JSONDecodeError:
338
+ continue # skip malformed chunks rather than abort
339
+ if strip_reasoning:
340
+ stripped = _strip_reasoning_field(
341
+ payload_obj.get("choices"), delta_key=True
342
+ )
343
+ if stripped and not reasoning_logged:
344
+ log_capability_degraded(
345
+ logger,
346
+ provider=self.name,
347
+ dropped=["reasoning"],
348
+ reason="non-standard-field",
349
+ )
350
+ reasoning_logged = True
351
+ if filter_chain is not None:
352
+ for choice in payload_obj.get("choices") or []:
353
+ if not isinstance(choice, dict):
354
+ continue
355
+ delta = choice.get("delta")
356
+ if not isinstance(delta, dict):
357
+ continue
358
+ content = delta.get("content")
359
+ if isinstance(content, str) and content:
360
+ delta["content"] = filter_chain.feed(content)
361
+ last_chunk_template = payload_obj
362
+ yield StreamChunk(**payload_obj)
363
+
364
+ # v1.0-A: flush the chain at end-of-stream. If filters held
365
+ # back a partial-tag suffix that turned out NOT to be a tag,
366
+ # emit one synthetic content-only chunk so the client sees
367
+ # every safe byte. An unmatched `<think>` at EOF is silently
368
+ # dropped (the filter treats the partial block as thinking).
369
+ if filter_chain is not None:
370
+ tail = filter_chain.feed("", eof=True)
371
+ if tail and last_chunk_template is not None:
372
+ flush_chunk: dict[str, Any] = {
373
+ "id": last_chunk_template.get("id", ""),
374
+ "object": last_chunk_template.get("object", "chat.completion.chunk"),
375
+ "created": last_chunk_template.get("created", 0),
376
+ "model": last_chunk_template.get("model", self.config.model),
377
+ "choices": [{"index": 0, "delta": {"content": tail}}],
378
+ }
379
+ yield StreamChunk(**flush_chunk)
380
+ if filter_chain.any_applied and not output_filter_logged:
381
+ log_output_filter_applied(
382
+ logger,
383
+ provider=self.name,
384
+ filters=filter_chain.applied_filters(),
385
+ streaming=True,
386
+ )
387
+ output_filter_logged = True
388
+ except httpx.TimeoutException as exc:
389
+ raise AdapterError(
390
+ f"timeout streaming from {url}", provider=self.name, retryable=True
391
+ ) from exc
392
+ except httpx.HTTPError as exc:
393
+ raise AdapterError(
394
+ f"transport error: {exc}", provider=self.name, retryable=True
395
+ ) from exc
@@ -0,0 +1,17 @@
1
+ """Adapter factory — maps `kind` strings to adapter classes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from coderouter.adapters.anthropic_native import AnthropicAdapter
6
+ from coderouter.adapters.base import BaseAdapter
7
+ from coderouter.adapters.openai_compat import OpenAICompatAdapter
8
+ from coderouter.config.schemas import ProviderConfig
9
+
10
+
11
+ def build_adapter(provider: ProviderConfig) -> BaseAdapter:
12
+ """Construct an adapter from a ProviderConfig."""
13
+ if provider.kind == "openai_compat":
14
+ return OpenAICompatAdapter(provider)
15
+ if provider.kind == "anthropic":
16
+ return AnthropicAdapter(provider)
17
+ raise ValueError(f"Unknown adapter kind: {provider.kind!r}")