coderouter-cli 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/__init__.py +17 -0
- coderouter/__main__.py +6 -0
- coderouter/adapters/__init__.py +23 -0
- coderouter/adapters/anthropic_native.py +502 -0
- coderouter/adapters/base.py +220 -0
- coderouter/adapters/openai_compat.py +395 -0
- coderouter/adapters/registry.py +17 -0
- coderouter/cli.py +345 -0
- coderouter/cli_stats.py +751 -0
- coderouter/config/__init__.py +10 -0
- coderouter/config/capability_registry.py +339 -0
- coderouter/config/env_file.py +295 -0
- coderouter/config/loader.py +73 -0
- coderouter/config/schemas.py +515 -0
- coderouter/data/__init__.py +7 -0
- coderouter/data/model-capabilities.yaml +86 -0
- coderouter/doctor.py +1596 -0
- coderouter/env_security.py +434 -0
- coderouter/errors.py +29 -0
- coderouter/ingress/__init__.py +5 -0
- coderouter/ingress/anthropic_routes.py +205 -0
- coderouter/ingress/app.py +144 -0
- coderouter/ingress/dashboard_routes.py +493 -0
- coderouter/ingress/metrics_routes.py +92 -0
- coderouter/ingress/openai_routes.py +153 -0
- coderouter/logging.py +315 -0
- coderouter/metrics/__init__.py +39 -0
- coderouter/metrics/collector.py +471 -0
- coderouter/metrics/prometheus.py +221 -0
- coderouter/output_filters.py +407 -0
- coderouter/routing/__init__.py +13 -0
- coderouter/routing/auto_router.py +244 -0
- coderouter/routing/capability.py +285 -0
- coderouter/routing/fallback.py +611 -0
- coderouter/translation/__init__.py +57 -0
- coderouter/translation/anthropic.py +204 -0
- coderouter/translation/convert.py +1291 -0
- coderouter/translation/tool_repair.py +236 -0
- coderouter_cli-1.7.0.dist-info/METADATA +509 -0
- coderouter_cli-1.7.0.dist-info/RECORD +43 -0
- coderouter_cli-1.7.0.dist-info/WHEEL +4 -0
- coderouter_cli-1.7.0.dist-info/entry_points.txt +2 -0
- coderouter_cli-1.7.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""Common intermediate format + BaseAdapter ABC.
|
|
2
|
+
|
|
3
|
+
The shape mirrors OpenAI's Chat Completions API since memo.txt §2.4 chose
|
|
4
|
+
OpenAI-compat as the standard ingress. v0.2+ will add a separate Anthropic
|
|
5
|
+
adapter that converts Messages API into / out of this same format.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
from collections.abc import AsyncIterator
|
|
12
|
+
from typing import Any, Literal
|
|
13
|
+
|
|
14
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
15
|
+
|
|
16
|
+
from coderouter.config.schemas import ProviderConfig
|
|
17
|
+
from coderouter.errors import CodeRouterError
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Message(BaseModel):
|
|
21
|
+
"""A single chat message in OpenAI Chat Completions shape.
|
|
22
|
+
|
|
23
|
+
Mirrors the OpenAI wire format (role + content, plus tool-call
|
|
24
|
+
fields for assistant/tool turns). ``content`` is ``None`` on
|
|
25
|
+
assistant messages that carry only ``tool_calls`` — the OpenAI
|
|
26
|
+
spec allows this, and the Anthropic→OpenAI translation in
|
|
27
|
+
:mod:`coderouter.translation.convert` emits it for tool-use turns.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
model_config = ConfigDict(extra="allow")
|
|
31
|
+
|
|
32
|
+
role: Literal["system", "user", "assistant", "tool"]
|
|
33
|
+
# OpenAI spec allows content: null on assistant messages that carry only
|
|
34
|
+
# tool_calls. Anthropic → OpenAI translation also produces this when an
|
|
35
|
+
# assistant turn has only tool_use blocks (no text).
|
|
36
|
+
content: str | list[dict[str, Any]] | None = None
|
|
37
|
+
name: str | None = None
|
|
38
|
+
tool_call_id: str | None = None
|
|
39
|
+
tool_calls: list[dict[str, Any]] | None = None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ChatRequest(BaseModel):
|
|
43
|
+
"""An inbound OpenAI-shaped request to the engine.
|
|
44
|
+
|
|
45
|
+
Accepts the standard OpenAI Chat Completions fields plus the
|
|
46
|
+
CodeRouter-specific ``profile`` extension (carried in the body as
|
|
47
|
+
``{"profile": "fast"}``; excluded from any upstream serialization
|
|
48
|
+
via ``Field(exclude=True)``). ``extra="allow"`` lets callers pass
|
|
49
|
+
provider-specific knobs (e.g. Ollama's ``think: false``) straight
|
|
50
|
+
through without a schema bump.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
model_config = ConfigDict(extra="allow")
|
|
54
|
+
|
|
55
|
+
model: str | None = None
|
|
56
|
+
messages: list[Message]
|
|
57
|
+
stream: bool = False
|
|
58
|
+
temperature: float | None = None
|
|
59
|
+
max_tokens: int | None = None
|
|
60
|
+
top_p: float | None = None
|
|
61
|
+
stop: list[str] | None = None
|
|
62
|
+
tools: list[dict[str, Any]] | None = None
|
|
63
|
+
tool_choice: Any | None = None
|
|
64
|
+
|
|
65
|
+
# CodeRouter-specific extension (not sent upstream)
|
|
66
|
+
profile: str | None = Field(default=None, exclude=True)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class ChatResponse(BaseModel):
|
|
70
|
+
"""A non-streaming response in OpenAI Chat Completions shape."""
|
|
71
|
+
|
|
72
|
+
model_config = ConfigDict(extra="allow")
|
|
73
|
+
|
|
74
|
+
id: str
|
|
75
|
+
object: str = "chat.completion"
|
|
76
|
+
created: int
|
|
77
|
+
model: str
|
|
78
|
+
choices: list[dict[str, Any]]
|
|
79
|
+
usage: dict[str, Any] | None = None
|
|
80
|
+
|
|
81
|
+
# Routing metadata — added by CodeRouter, not from upstream
|
|
82
|
+
coderouter_provider: str | None = Field(default=None)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class StreamChunk(BaseModel):
|
|
86
|
+
"""A single SSE chunk in OpenAI streaming format."""
|
|
87
|
+
|
|
88
|
+
model_config = ConfigDict(extra="allow")
|
|
89
|
+
|
|
90
|
+
id: str
|
|
91
|
+
object: str = "chat.completion.chunk"
|
|
92
|
+
created: int
|
|
93
|
+
model: str
|
|
94
|
+
choices: list[dict[str, Any]]
|
|
95
|
+
# Present on the trailing chunk when a provider honors
|
|
96
|
+
# `stream_options.include_usage=true`. Also populated by the
|
|
97
|
+
# Anthropic→OpenAI reverse translation in
|
|
98
|
+
# coderouter.translation.convert when mirroring `message_delta`
|
|
99
|
+
# usage into an OpenAI stream.
|
|
100
|
+
usage: dict[str, Any] | None = None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class AdapterError(CodeRouterError):
|
|
104
|
+
"""Raised when a provider call fails in a way the fallback engine should retry on."""
|
|
105
|
+
|
|
106
|
+
def __init__(
|
|
107
|
+
self,
|
|
108
|
+
message: str,
|
|
109
|
+
*,
|
|
110
|
+
provider: str,
|
|
111
|
+
status_code: int | None = None,
|
|
112
|
+
retryable: bool = True,
|
|
113
|
+
) -> None:
|
|
114
|
+
"""Construct an AdapterError.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
message: Human-readable failure reason.
|
|
118
|
+
provider: The ``ProviderConfig.name`` that failed — used by
|
|
119
|
+
the fallback engine's log trail and by tests that assert
|
|
120
|
+
WHICH provider raised.
|
|
121
|
+
status_code: HTTP status code when the failure originated
|
|
122
|
+
from an upstream response. ``None`` for transport /
|
|
123
|
+
JSON-parse / pre-flight failures.
|
|
124
|
+
retryable: When True, the fallback engine may try the next
|
|
125
|
+
provider in the chain. When False, the engine stops
|
|
126
|
+
and surfaces the error as a terminal failure.
|
|
127
|
+
"""
|
|
128
|
+
super().__init__(message)
|
|
129
|
+
self.provider = provider
|
|
130
|
+
self.status_code = status_code
|
|
131
|
+
self.retryable = retryable
|
|
132
|
+
|
|
133
|
+
def __str__(self) -> str:
|
|
134
|
+
"""Render as ``[provider status=NNN] message`` for log trails."""
|
|
135
|
+
sc = f" status={self.status_code}" if self.status_code is not None else ""
|
|
136
|
+
return f"[{self.provider}{sc}] {super().__str__()}"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# v0.6-B: per-call overrides resolved from the active profile. The engine
|
|
140
|
+
# builds one instance per request (since a profile is invariant across its
|
|
141
|
+
# chain) and threads it through every adapter call on that chain. Adapters
|
|
142
|
+
# use :meth:`effective_timeout` / :meth:`effective_append_system_prompt` to
|
|
143
|
+
# pick the winning value (profile override > provider default).
|
|
144
|
+
#
|
|
145
|
+
# Design notes:
|
|
146
|
+
# - Both fields are Optional. ``None`` means "leave the provider default
|
|
147
|
+
# alone" — so ``ProviderCallOverrides()`` is a safe no-op default and
|
|
148
|
+
# legacy call sites that pass nothing keep their old behavior.
|
|
149
|
+
# - ``append_system_prompt=""`` is a meaningful explicit value: "for
|
|
150
|
+
# this profile, clear the provider's directive". The adapter must
|
|
151
|
+
# distinguish ``None`` (no override) from ``""`` (override-to-empty).
|
|
152
|
+
class ProviderCallOverrides(BaseModel):
|
|
153
|
+
"""Per-call provider overrides, resolved from the active profile."""
|
|
154
|
+
|
|
155
|
+
model_config = ConfigDict(extra="forbid")
|
|
156
|
+
|
|
157
|
+
timeout_s: float | None = None
|
|
158
|
+
append_system_prompt: str | None = None
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class BaseAdapter(ABC):
|
|
162
|
+
"""Provider-specific adapter. Subclasses implement HTTP plumbing."""
|
|
163
|
+
|
|
164
|
+
def __init__(self, config: ProviderConfig) -> None:
|
|
165
|
+
"""Bind the adapter to a :class:`ProviderConfig`.
|
|
166
|
+
|
|
167
|
+
Subclasses do not need to override this; HTTP clients are
|
|
168
|
+
constructed lazily inside :meth:`generate` / :meth:`stream` so
|
|
169
|
+
each call can honor a per-call timeout override.
|
|
170
|
+
"""
|
|
171
|
+
self.config = config
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def name(self) -> str:
|
|
175
|
+
"""Shortcut for ``self.config.name`` — used in log trails and errors."""
|
|
176
|
+
return self.config.name
|
|
177
|
+
|
|
178
|
+
# ---- v0.6-B override resolution helpers -----------------------------
|
|
179
|
+
def effective_timeout(self, overrides: ProviderCallOverrides | None) -> float:
|
|
180
|
+
"""Profile override wins when set; else provider default."""
|
|
181
|
+
if overrides is not None and overrides.timeout_s is not None:
|
|
182
|
+
return overrides.timeout_s
|
|
183
|
+
return self.config.timeout_s
|
|
184
|
+
|
|
185
|
+
def effective_append_system_prompt(self, overrides: ProviderCallOverrides | None) -> str | None:
|
|
186
|
+
"""Profile override replaces provider directive when set.
|
|
187
|
+
|
|
188
|
+
``None`` means no override → fall through to provider. ``""``
|
|
189
|
+
(explicit empty) means "clear the provider directive for this
|
|
190
|
+
profile" → return None so the caller skips injection entirely.
|
|
191
|
+
"""
|
|
192
|
+
if overrides is not None and overrides.append_system_prompt is not None:
|
|
193
|
+
return overrides.append_system_prompt or None
|
|
194
|
+
return self.config.append_system_prompt
|
|
195
|
+
|
|
196
|
+
@abstractmethod
|
|
197
|
+
async def healthcheck(self) -> bool:
|
|
198
|
+
"""Lightweight check that the upstream is reachable. Return True if healthy."""
|
|
199
|
+
|
|
200
|
+
@abstractmethod
|
|
201
|
+
async def generate(
|
|
202
|
+
self,
|
|
203
|
+
request: ChatRequest,
|
|
204
|
+
*,
|
|
205
|
+
overrides: ProviderCallOverrides | None = None,
|
|
206
|
+
) -> ChatResponse:
|
|
207
|
+
"""Non-streaming completion. Raise AdapterError on failure.
|
|
208
|
+
|
|
209
|
+
``overrides`` carries profile-level timeouts / directives (v0.6-B).
|
|
210
|
+
Legacy callers that pass nothing keep the pre-v0.6-B behavior.
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
@abstractmethod
|
|
214
|
+
def stream(
|
|
215
|
+
self,
|
|
216
|
+
request: ChatRequest,
|
|
217
|
+
*,
|
|
218
|
+
overrides: ProviderCallOverrides | None = None,
|
|
219
|
+
) -> AsyncIterator[StreamChunk]:
|
|
220
|
+
"""Streaming completion. Yield StreamChunks. Raise AdapterError on failure."""
|
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
"""OpenAI-compatible HTTP adapter.
|
|
2
|
+
|
|
3
|
+
Single adapter that covers:
|
|
4
|
+
- Local llama.cpp server (--api-server mode)
|
|
5
|
+
- Local Ollama (/v1 endpoint)
|
|
6
|
+
- LM Studio
|
|
7
|
+
- OpenRouter (free + paid)
|
|
8
|
+
- Together / Fireworks / Groq / DeepInfra
|
|
9
|
+
- Any OpenAI-shaped /v1/chat/completions endpoint
|
|
10
|
+
|
|
11
|
+
We deliberately do NOT use the openai SDK — see plan.md §5.4 (dependency
|
|
12
|
+
minimalism). All upstream calls are plain httpx.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
from collections.abc import AsyncIterator
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
import httpx
|
|
22
|
+
|
|
23
|
+
from coderouter.adapters.base import (
|
|
24
|
+
AdapterError,
|
|
25
|
+
BaseAdapter,
|
|
26
|
+
ChatRequest,
|
|
27
|
+
ChatResponse,
|
|
28
|
+
ProviderCallOverrides,
|
|
29
|
+
StreamChunk,
|
|
30
|
+
)
|
|
31
|
+
from coderouter.config.loader import resolve_api_key
|
|
32
|
+
from coderouter.logging import (
|
|
33
|
+
get_logger,
|
|
34
|
+
log_capability_degraded,
|
|
35
|
+
log_output_filter_applied,
|
|
36
|
+
)
|
|
37
|
+
from coderouter.output_filters import OutputFilterChain
|
|
38
|
+
|
|
39
|
+
logger = get_logger(__name__)
|
|
40
|
+
|
|
41
|
+
# httpx status codes that mean "fall through to next provider"
|
|
42
|
+
# - 404: upstream doesn't have the requested model — next provider has a
|
|
43
|
+
# different model so try it
|
|
44
|
+
# - 408 / 504: timeouts
|
|
45
|
+
# - 425: too early
|
|
46
|
+
# - 429: rate limit
|
|
47
|
+
# - 5xx: upstream errors
|
|
48
|
+
_RETRYABLE_STATUSES = {404, 408, 425, 429, 500, 502, 503, 504}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _strip_reasoning_field(choices: list[dict[str, Any]] | None, *, delta_key: bool) -> bool:
|
|
52
|
+
"""Remove non-standard ``reasoning`` keys from a choices list, in place.
|
|
53
|
+
|
|
54
|
+
v0.5-C: Some OpenRouter free models (confirmed on
|
|
55
|
+
``openai/gpt-oss-120b:free`` 2026-04-20) return a ``reasoning`` field
|
|
56
|
+
alongside ``content`` on each choice. The field is not in the OpenAI
|
|
57
|
+
Chat Completions spec and strict clients can reject the unknown key.
|
|
58
|
+
We strip it at the adapter boundary so downstream layers never see it.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
choices: The ``choices`` list from the response body or stream chunk.
|
|
62
|
+
When None (or empty) the function is a no-op.
|
|
63
|
+
delta_key: ``True`` for stream chunks (look in ``choice["delta"]``),
|
|
64
|
+
``False`` for non-streaming responses (look in ``choice["message"]``).
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
True iff at least one ``reasoning`` key was removed. Callers use
|
|
68
|
+
this to decide whether to emit a one-shot log line.
|
|
69
|
+
"""
|
|
70
|
+
if not choices:
|
|
71
|
+
return False
|
|
72
|
+
stripped = False
|
|
73
|
+
inner_key = "delta" if delta_key else "message"
|
|
74
|
+
for choice in choices:
|
|
75
|
+
if not isinstance(choice, dict):
|
|
76
|
+
continue
|
|
77
|
+
inner = choice.get(inner_key)
|
|
78
|
+
if isinstance(inner, dict) and "reasoning" in inner:
|
|
79
|
+
inner.pop("reasoning", None)
|
|
80
|
+
stripped = True
|
|
81
|
+
return stripped
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class OpenAICompatAdapter(BaseAdapter):
|
|
85
|
+
"""Talks the OpenAI Chat Completions wire format over httpx."""
|
|
86
|
+
|
|
87
|
+
def _headers(self) -> dict[str, str]:
|
|
88
|
+
"""Build per-request HTTP headers; injects ``Authorization`` if configured."""
|
|
89
|
+
headers = {
|
|
90
|
+
"Content-Type": "application/json",
|
|
91
|
+
"User-Agent": "CodeRouter/0.1",
|
|
92
|
+
}
|
|
93
|
+
api_key = resolve_api_key(self.config.api_key_env)
|
|
94
|
+
if api_key:
|
|
95
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
96
|
+
return headers
|
|
97
|
+
|
|
98
|
+
def _prepare_messages(
|
|
99
|
+
self,
|
|
100
|
+
request: ChatRequest,
|
|
101
|
+
*,
|
|
102
|
+
overrides: ProviderCallOverrides | None = None,
|
|
103
|
+
) -> list[dict[str, Any]]:
|
|
104
|
+
"""Serialize messages and inject append_system_prompt if configured.
|
|
105
|
+
|
|
106
|
+
v0.6-B: profile-level ``append_system_prompt`` (carried in
|
|
107
|
+
``overrides``) REPLACES the provider's own directive. An explicit
|
|
108
|
+
empty string in the profile clears the provider directive.
|
|
109
|
+
"""
|
|
110
|
+
messages = [m.model_dump(exclude_none=True) for m in request.messages]
|
|
111
|
+
directive = self.effective_append_system_prompt(overrides)
|
|
112
|
+
if not directive:
|
|
113
|
+
return messages
|
|
114
|
+
|
|
115
|
+
# Augment an existing system message, or add a new one at the front.
|
|
116
|
+
for msg in messages:
|
|
117
|
+
if msg.get("role") == "system":
|
|
118
|
+
existing = msg.get("content", "")
|
|
119
|
+
if isinstance(existing, str):
|
|
120
|
+
msg["content"] = f"{existing}\n{directive}".strip()
|
|
121
|
+
elif isinstance(existing, list):
|
|
122
|
+
# multimodal content — append a text block
|
|
123
|
+
msg["content"] = [*existing, {"type": "text", "text": directive}]
|
|
124
|
+
else:
|
|
125
|
+
msg["content"] = directive
|
|
126
|
+
return messages
|
|
127
|
+
|
|
128
|
+
return [{"role": "system", "content": directive}, *messages]
|
|
129
|
+
|
|
130
|
+
def _payload(
|
|
131
|
+
self,
|
|
132
|
+
request: ChatRequest,
|
|
133
|
+
*,
|
|
134
|
+
stream: bool,
|
|
135
|
+
overrides: ProviderCallOverrides | None = None,
|
|
136
|
+
) -> dict[str, Any]:
|
|
137
|
+
"""Assemble the outbound JSON body for ``/v1/chat/completions``.
|
|
138
|
+
|
|
139
|
+
The provider's configured ``model`` is always used (client's
|
|
140
|
+
``request.model`` is ignored by design — routing is a profile
|
|
141
|
+
concern, not a client concern). When streaming, adds
|
|
142
|
+
``stream_options.include_usage`` so a terminal usage chunk
|
|
143
|
+
arrives for accounting.
|
|
144
|
+
"""
|
|
145
|
+
# CodeRouter routing is decided by `profile`, NOT by `request.model`.
|
|
146
|
+
# The OpenAI API requires a `model` field in the body, but here it's
|
|
147
|
+
# always set from the provider config — clients that pass arbitrary
|
|
148
|
+
# placeholder strings (e.g. "anything") would otherwise blow up the
|
|
149
|
+
# upstream with 404 model-not-found.
|
|
150
|
+
#
|
|
151
|
+
# Start from provider's extra_body (e.g. `think: false` for Ollama
|
|
152
|
+
# thinking models) so that fields from the request can override them.
|
|
153
|
+
body: dict[str, Any] = dict(self.config.extra_body)
|
|
154
|
+
body.update(
|
|
155
|
+
{
|
|
156
|
+
"model": self.config.model,
|
|
157
|
+
"messages": self._prepare_messages(request, overrides=overrides),
|
|
158
|
+
"stream": stream,
|
|
159
|
+
}
|
|
160
|
+
)
|
|
161
|
+
for field in ("temperature", "max_tokens", "top_p", "stop", "tools", "tool_choice"):
|
|
162
|
+
value = getattr(request, field, None)
|
|
163
|
+
if value is not None:
|
|
164
|
+
body[field] = value
|
|
165
|
+
if stream:
|
|
166
|
+
# Request a terminal usage chunk. Providers that honor this
|
|
167
|
+
# (OpenAI, OpenRouter, Ollama >=0.x) will send one extra chunk
|
|
168
|
+
# with `choices: []` and `usage: {prompt_tokens, completion_tokens, ...}`
|
|
169
|
+
# at the end of the stream. Providers that don't understand the
|
|
170
|
+
# flag silently ignore it — so it's safe to always send.
|
|
171
|
+
body.setdefault("stream_options", {"include_usage": True})
|
|
172
|
+
return body
|
|
173
|
+
|
|
174
|
+
def _url(self) -> str:
|
|
175
|
+
"""Build the ``{base_url}/chat/completions`` endpoint URL."""
|
|
176
|
+
# base_url is normalized to OpenAI shape: it should already include /v1
|
|
177
|
+
# We just append /chat/completions.
|
|
178
|
+
base = str(self.config.base_url).rstrip("/")
|
|
179
|
+
return f"{base}/chat/completions"
|
|
180
|
+
|
|
181
|
+
async def healthcheck(self) -> bool:
|
|
182
|
+
"""GET base_url/models — most OpenAI-compat servers expose this cheaply."""
|
|
183
|
+
base = str(self.config.base_url).rstrip("/")
|
|
184
|
+
url = f"{base}/models"
|
|
185
|
+
try:
|
|
186
|
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
187
|
+
resp = await client.get(url, headers=self._headers())
|
|
188
|
+
return resp.status_code < 500
|
|
189
|
+
except httpx.HTTPError:
|
|
190
|
+
return False
|
|
191
|
+
|
|
192
|
+
async def generate(
|
|
193
|
+
self,
|
|
194
|
+
request: ChatRequest,
|
|
195
|
+
*,
|
|
196
|
+
overrides: ProviderCallOverrides | None = None,
|
|
197
|
+
) -> ChatResponse:
|
|
198
|
+
"""Single HTTP POST; raises :class:`AdapterError` on any failure.
|
|
199
|
+
|
|
200
|
+
Transport / timeout / non-retryable-parse errors are always
|
|
201
|
+
raised. HTTP 4xx/5xx are raised with ``retryable`` set from
|
|
202
|
+
:data:`_RETRYABLE_STATUSES`. On success, applies the v0.5-C
|
|
203
|
+
``reasoning`` field strip and the v1.0-A output-filter chain to
|
|
204
|
+
the response body before returning.
|
|
205
|
+
"""
|
|
206
|
+
url = self._url()
|
|
207
|
+
payload = self._payload(request, stream=False, overrides=overrides)
|
|
208
|
+
timeout = self.effective_timeout(overrides)
|
|
209
|
+
try:
|
|
210
|
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
211
|
+
resp = await client.post(url, json=payload, headers=self._headers())
|
|
212
|
+
except httpx.TimeoutException as exc:
|
|
213
|
+
raise AdapterError(
|
|
214
|
+
f"timeout contacting {url}", provider=self.name, retryable=True
|
|
215
|
+
) from exc
|
|
216
|
+
except httpx.HTTPError as exc:
|
|
217
|
+
raise AdapterError(
|
|
218
|
+
f"transport error: {exc}", provider=self.name, retryable=True
|
|
219
|
+
) from exc
|
|
220
|
+
|
|
221
|
+
if resp.status_code >= 400:
|
|
222
|
+
raise AdapterError(
|
|
223
|
+
f"{resp.status_code} from upstream: {resp.text[:200]}",
|
|
224
|
+
provider=self.name,
|
|
225
|
+
status_code=resp.status_code,
|
|
226
|
+
retryable=resp.status_code in _RETRYABLE_STATUSES,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
data = resp.json()
|
|
231
|
+
except json.JSONDecodeError as exc:
|
|
232
|
+
raise AdapterError(
|
|
233
|
+
f"invalid JSON from upstream: {exc}",
|
|
234
|
+
provider=self.name,
|
|
235
|
+
retryable=False,
|
|
236
|
+
) from exc
|
|
237
|
+
|
|
238
|
+
# v0.5-C: passive strip of non-standard `reasoning` field on choices.
|
|
239
|
+
# No-op when the provider opted into passthrough.
|
|
240
|
+
if not self.config.capabilities.reasoning_passthrough and _strip_reasoning_field(
|
|
241
|
+
data.get("choices"), delta_key=False
|
|
242
|
+
):
|
|
243
|
+
log_capability_degraded(
|
|
244
|
+
logger,
|
|
245
|
+
provider=self.name,
|
|
246
|
+
dropped=["reasoning"],
|
|
247
|
+
reason="non-standard-field",
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# v1.0-A: apply output_filters chain to each choice's message.content
|
|
251
|
+
# (the non-standard `reasoning` field was already removed above, so
|
|
252
|
+
# we only see the client-visible content). A fresh chain per call
|
|
253
|
+
# keeps state-holding filters (strip_thinking) scoped to this request.
|
|
254
|
+
if self.config.output_filters:
|
|
255
|
+
chain = OutputFilterChain(self.config.output_filters)
|
|
256
|
+
for choice in data.get("choices") or []:
|
|
257
|
+
if not isinstance(choice, dict):
|
|
258
|
+
continue
|
|
259
|
+
msg = choice.get("message")
|
|
260
|
+
if isinstance(msg, dict):
|
|
261
|
+
content = msg.get("content")
|
|
262
|
+
if isinstance(content, str) and content:
|
|
263
|
+
msg["content"] = chain.feed(content, eof=True)
|
|
264
|
+
if chain.any_applied:
|
|
265
|
+
log_output_filter_applied(
|
|
266
|
+
logger,
|
|
267
|
+
provider=self.name,
|
|
268
|
+
filters=chain.applied_filters(),
|
|
269
|
+
streaming=False,
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
# Tag the response with which provider answered
|
|
273
|
+
data.setdefault("object", "chat.completion")
|
|
274
|
+
return ChatResponse(coderouter_provider=self.name, **data)
|
|
275
|
+
|
|
276
|
+
async def stream(
|
|
277
|
+
self,
|
|
278
|
+
request: ChatRequest,
|
|
279
|
+
*,
|
|
280
|
+
overrides: ProviderCallOverrides | None = None,
|
|
281
|
+
) -> AsyncIterator[StreamChunk]:
|
|
282
|
+
"""Yield :class:`StreamChunk` objects from an SSE response.
|
|
283
|
+
|
|
284
|
+
Applies the v0.5-C reasoning strip and the v1.0-A output-filter
|
|
285
|
+
chain incrementally (per SSE chunk). The chain is stateful so
|
|
286
|
+
``<think>`` / stop markers split across chunk boundaries are
|
|
287
|
+
still recognized; at end-of-stream any held-back safe suffix is
|
|
288
|
+
flushed in a synthesized content-only chunk.
|
|
289
|
+
"""
|
|
290
|
+
url = self._url()
|
|
291
|
+
payload = self._payload(request, stream=True, overrides=overrides)
|
|
292
|
+
timeout = self.effective_timeout(overrides)
|
|
293
|
+
# v0.5-C: one-shot dedupe flag for the `reasoning` strip log. We
|
|
294
|
+
# log once per stream request on the first chunk that carried the
|
|
295
|
+
# field, not per chunk — otherwise a long reasoning track would
|
|
296
|
+
# produce dozens of duplicate log lines.
|
|
297
|
+
strip_reasoning = not self.config.capabilities.reasoning_passthrough
|
|
298
|
+
reasoning_logged = False
|
|
299
|
+
|
|
300
|
+
# v1.0-A: stateful output_filters chain for the duration of this
|
|
301
|
+
# stream. Handles `<think>...</think>` / stop markers that split
|
|
302
|
+
# across SSE chunk boundaries. One chain instance per request;
|
|
303
|
+
# `output_filter_logged` dedupes the one-shot info log.
|
|
304
|
+
filter_chain: OutputFilterChain | None = (
|
|
305
|
+
OutputFilterChain(self.config.output_filters) if self.config.output_filters else None
|
|
306
|
+
)
|
|
307
|
+
output_filter_logged = False
|
|
308
|
+
# Captured for the closing flush chunk (if any): reuse the last
|
|
309
|
+
# seen chunk's id/model so the flush emission looks native.
|
|
310
|
+
last_chunk_template: dict[str, Any] | None = None
|
|
311
|
+
try:
|
|
312
|
+
async with (
|
|
313
|
+
httpx.AsyncClient(timeout=timeout) as client,
|
|
314
|
+
client.stream("POST", url, json=payload, headers=self._headers()) as resp,
|
|
315
|
+
):
|
|
316
|
+
if resp.status_code >= 400:
|
|
317
|
+
body = await resp.aread()
|
|
318
|
+
raise AdapterError(
|
|
319
|
+
f"{resp.status_code} from upstream: {body[:200]!r}",
|
|
320
|
+
provider=self.name,
|
|
321
|
+
status_code=resp.status_code,
|
|
322
|
+
retryable=resp.status_code in _RETRYABLE_STATUSES,
|
|
323
|
+
)
|
|
324
|
+
async for line in resp.aiter_lines():
|
|
325
|
+
if not line:
|
|
326
|
+
continue
|
|
327
|
+
# SSE format: lines start with "data: "
|
|
328
|
+
if line.startswith(":"):
|
|
329
|
+
continue # comment / heartbeat
|
|
330
|
+
if not line.startswith("data:"):
|
|
331
|
+
continue
|
|
332
|
+
data_str = line[len("data:") :].strip()
|
|
333
|
+
if data_str == "[DONE]":
|
|
334
|
+
break
|
|
335
|
+
try:
|
|
336
|
+
payload_obj = json.loads(data_str)
|
|
337
|
+
except json.JSONDecodeError:
|
|
338
|
+
continue # skip malformed chunks rather than abort
|
|
339
|
+
if strip_reasoning:
|
|
340
|
+
stripped = _strip_reasoning_field(
|
|
341
|
+
payload_obj.get("choices"), delta_key=True
|
|
342
|
+
)
|
|
343
|
+
if stripped and not reasoning_logged:
|
|
344
|
+
log_capability_degraded(
|
|
345
|
+
logger,
|
|
346
|
+
provider=self.name,
|
|
347
|
+
dropped=["reasoning"],
|
|
348
|
+
reason="non-standard-field",
|
|
349
|
+
)
|
|
350
|
+
reasoning_logged = True
|
|
351
|
+
if filter_chain is not None:
|
|
352
|
+
for choice in payload_obj.get("choices") or []:
|
|
353
|
+
if not isinstance(choice, dict):
|
|
354
|
+
continue
|
|
355
|
+
delta = choice.get("delta")
|
|
356
|
+
if not isinstance(delta, dict):
|
|
357
|
+
continue
|
|
358
|
+
content = delta.get("content")
|
|
359
|
+
if isinstance(content, str) and content:
|
|
360
|
+
delta["content"] = filter_chain.feed(content)
|
|
361
|
+
last_chunk_template = payload_obj
|
|
362
|
+
yield StreamChunk(**payload_obj)
|
|
363
|
+
|
|
364
|
+
# v1.0-A: flush the chain at end-of-stream. If filters held
|
|
365
|
+
# back a partial-tag suffix that turned out NOT to be a tag,
|
|
366
|
+
# emit one synthetic content-only chunk so the client sees
|
|
367
|
+
# every safe byte. An unmatched `<think>` at EOF is silently
|
|
368
|
+
# dropped (the filter treats the partial block as thinking).
|
|
369
|
+
if filter_chain is not None:
|
|
370
|
+
tail = filter_chain.feed("", eof=True)
|
|
371
|
+
if tail and last_chunk_template is not None:
|
|
372
|
+
flush_chunk: dict[str, Any] = {
|
|
373
|
+
"id": last_chunk_template.get("id", ""),
|
|
374
|
+
"object": last_chunk_template.get("object", "chat.completion.chunk"),
|
|
375
|
+
"created": last_chunk_template.get("created", 0),
|
|
376
|
+
"model": last_chunk_template.get("model", self.config.model),
|
|
377
|
+
"choices": [{"index": 0, "delta": {"content": tail}}],
|
|
378
|
+
}
|
|
379
|
+
yield StreamChunk(**flush_chunk)
|
|
380
|
+
if filter_chain.any_applied and not output_filter_logged:
|
|
381
|
+
log_output_filter_applied(
|
|
382
|
+
logger,
|
|
383
|
+
provider=self.name,
|
|
384
|
+
filters=filter_chain.applied_filters(),
|
|
385
|
+
streaming=True,
|
|
386
|
+
)
|
|
387
|
+
output_filter_logged = True
|
|
388
|
+
except httpx.TimeoutException as exc:
|
|
389
|
+
raise AdapterError(
|
|
390
|
+
f"timeout streaming from {url}", provider=self.name, retryable=True
|
|
391
|
+
) from exc
|
|
392
|
+
except httpx.HTTPError as exc:
|
|
393
|
+
raise AdapterError(
|
|
394
|
+
f"transport error: {exc}", provider=self.name, retryable=True
|
|
395
|
+
) from exc
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Adapter factory — maps `kind` strings to adapter classes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from coderouter.adapters.anthropic_native import AnthropicAdapter
|
|
6
|
+
from coderouter.adapters.base import BaseAdapter
|
|
7
|
+
from coderouter.adapters.openai_compat import OpenAICompatAdapter
|
|
8
|
+
from coderouter.config.schemas import ProviderConfig
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def build_adapter(provider: ProviderConfig) -> BaseAdapter:
|
|
12
|
+
"""Construct an adapter from a ProviderConfig."""
|
|
13
|
+
if provider.kind == "openai_compat":
|
|
14
|
+
return OpenAICompatAdapter(provider)
|
|
15
|
+
if provider.kind == "anthropic":
|
|
16
|
+
return AnthropicAdapter(provider)
|
|
17
|
+
raise ValueError(f"Unknown adapter kind: {provider.kind!r}")
|