coderouter-cli 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/__init__.py +17 -0
- coderouter/__main__.py +6 -0
- coderouter/adapters/__init__.py +23 -0
- coderouter/adapters/anthropic_native.py +502 -0
- coderouter/adapters/base.py +220 -0
- coderouter/adapters/openai_compat.py +395 -0
- coderouter/adapters/registry.py +17 -0
- coderouter/cli.py +345 -0
- coderouter/cli_stats.py +751 -0
- coderouter/config/__init__.py +10 -0
- coderouter/config/capability_registry.py +339 -0
- coderouter/config/env_file.py +295 -0
- coderouter/config/loader.py +73 -0
- coderouter/config/schemas.py +515 -0
- coderouter/data/__init__.py +7 -0
- coderouter/data/model-capabilities.yaml +86 -0
- coderouter/doctor.py +1596 -0
- coderouter/env_security.py +434 -0
- coderouter/errors.py +29 -0
- coderouter/ingress/__init__.py +5 -0
- coderouter/ingress/anthropic_routes.py +205 -0
- coderouter/ingress/app.py +144 -0
- coderouter/ingress/dashboard_routes.py +493 -0
- coderouter/ingress/metrics_routes.py +92 -0
- coderouter/ingress/openai_routes.py +153 -0
- coderouter/logging.py +315 -0
- coderouter/metrics/__init__.py +39 -0
- coderouter/metrics/collector.py +471 -0
- coderouter/metrics/prometheus.py +221 -0
- coderouter/output_filters.py +407 -0
- coderouter/routing/__init__.py +13 -0
- coderouter/routing/auto_router.py +244 -0
- coderouter/routing/capability.py +285 -0
- coderouter/routing/fallback.py +611 -0
- coderouter/translation/__init__.py +57 -0
- coderouter/translation/anthropic.py +204 -0
- coderouter/translation/convert.py +1291 -0
- coderouter/translation/tool_repair.py +236 -0
- coderouter_cli-1.7.0.dist-info/METADATA +509 -0
- coderouter_cli-1.7.0.dist-info/RECORD +43 -0
- coderouter_cli-1.7.0.dist-info/WHEEL +4 -0
- coderouter_cli-1.7.0.dist-info/entry_points.txt +2 -0
- coderouter_cli-1.7.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
"""v1.0-A: Declarative output filter chain.
|
|
2
|
+
|
|
3
|
+
Context
|
|
4
|
+
-------
|
|
5
|
+
Quantized local models (and some cloud families) leak harness-internal
|
|
6
|
+
markers into the assistant text stream that spec-strict clients either
|
|
7
|
+
display verbatim or outright reject:
|
|
8
|
+
|
|
9
|
+
``<think>...</think>`` — Qwen3 / DeepSeek-R1-Distill thinking track.
|
|
10
|
+
``<|turn|>`` / ``<|end|>`` — Gemma-ish turn separators.
|
|
11
|
+
``<|python_tag|>`` — Llama 3.x tool-use marker.
|
|
12
|
+
``<|im_end|>`` — ChatML / Qwen end-of-turn.
|
|
13
|
+
``<|eot_id|>`` — Llama 3.x end-of-turn.
|
|
14
|
+
``<|channel>thought`` — OpenAI-harmony channel marker.
|
|
15
|
+
|
|
16
|
+
``capabilities.reasoning_passthrough`` (v0.5-C) only governs the
|
|
17
|
+
non-standard ``message.reasoning`` field; markers embedded in the
|
|
18
|
+
assistant *content* slip past it. v1.0-A adds a declarative, per-
|
|
19
|
+
provider opt-in chain that scrubs them at the adapter boundary.
|
|
20
|
+
|
|
21
|
+
Design decisions
|
|
22
|
+
----------------
|
|
23
|
+
- Opt-in per provider via ``output_filters: [strip_thinking, ...]``.
|
|
24
|
+
Empty by default — the existing v0.5-C passive strip remains
|
|
25
|
+
orthogonal, and providers that WANT the thinking track (e.g.
|
|
26
|
+
CodeRouter fronting a reasoning-aware downstream) stay untouched.
|
|
27
|
+
- Stateful streaming: a filter instance holds state across ``feed()``
|
|
28
|
+
calls so a ``<think>...</think>`` block spanning multiple SSE deltas
|
|
29
|
+
is coalesced. Safe-to-emit suffix management is explicit — callers
|
|
30
|
+
never see partial tags.
|
|
31
|
+
- Non-streaming convenience: ``apply_output_filters(names, text)``
|
|
32
|
+
creates a chain, feeds with ``eof=True``, returns the scrubbed text.
|
|
33
|
+
- Unknown filter names raise ``ValueError`` at chain construction, so
|
|
34
|
+
``schemas.ProviderConfig`` wires this through a ``model_validator``
|
|
35
|
+
and bad configs fail at load time rather than on first request.
|
|
36
|
+
- Pairs with ``coderouter doctor`` (v0.7-B): the reasoning-leak probe
|
|
37
|
+
is extended in this same sub-release to detect content-embedded
|
|
38
|
+
``<think>`` and suggest ``output_filters: [strip_thinking]``.
|
|
39
|
+
|
|
40
|
+
Reference: plan.md §10.2 "出力クリーニング" / docs/retrospectives/v0.7.md
|
|
41
|
+
"transformation には probe が伴う".
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
from __future__ import annotations
|
|
45
|
+
|
|
46
|
+
from typing import Protocol
|
|
47
|
+
|
|
48
|
+
__all__ = [
|
|
49
|
+
"DEFAULT_STOP_MARKERS",
|
|
50
|
+
"KNOWN_FILTERS",
|
|
51
|
+
"OutputFilter",
|
|
52
|
+
"OutputFilterChain",
|
|
53
|
+
"StripStopMarkersFilter",
|
|
54
|
+
"StripThinkingFilter",
|
|
55
|
+
"apply_output_filters",
|
|
56
|
+
"validate_output_filters",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
# Public constants
|
|
62
|
+
# ---------------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
DEFAULT_STOP_MARKERS: tuple[str, ...] = (
|
|
66
|
+
"<|turn|>",
|
|
67
|
+
"<|end|>",
|
|
68
|
+
"<|python_tag|>",
|
|
69
|
+
"<|im_end|>",
|
|
70
|
+
"<|eot_id|>",
|
|
71
|
+
"<|channel>thought",
|
|
72
|
+
)
|
|
73
|
+
"""Default stop/harness markers stripped by ``strip_stop_markers``.
|
|
74
|
+
|
|
75
|
+
Covers Llama 3.x (``<|python_tag|>``, ``<|eot_id|>``), ChatML / Qwen
|
|
76
|
+
(``<|im_end|>``, ``<|end|>``), Gemma-ish (``<|turn|>``) and OpenAI-
|
|
77
|
+
harmony (``<|channel>thought``). Extending this tuple is an ABI change
|
|
78
|
+
— users who need a bespoke set can add a dedicated filter entry in
|
|
79
|
+
a later minor; for v1.0-A the fixed list covers observed leaks.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# ---------------------------------------------------------------------------
|
|
84
|
+
# Protocol
|
|
85
|
+
# ---------------------------------------------------------------------------
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class OutputFilter(Protocol):
|
|
89
|
+
"""Stateful streaming text filter.
|
|
90
|
+
|
|
91
|
+
Implementations MUST:
|
|
92
|
+
- Tolerate arbitrary chunking: partial tags at the end of a
|
|
93
|
+
``feed`` input must be buffered and re-examined on the next
|
|
94
|
+
call. The caller will invoke ``feed(..., eof=True)`` exactly
|
|
95
|
+
once at the end to flush any remaining buffer.
|
|
96
|
+
- Set ``modified`` to True the first time any character in the
|
|
97
|
+
input stream would be suppressed or rewritten (regardless of
|
|
98
|
+
whether that specific ``feed`` call produced visible output).
|
|
99
|
+
- Be cheap to construct — one instance per request/stream.
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
name: str
|
|
103
|
+
modified: bool
|
|
104
|
+
|
|
105
|
+
def feed(self, text: str, *, eof: bool = False) -> str:
|
|
106
|
+
"""Consume ``text`` and return the portion safe to emit now."""
|
|
107
|
+
...
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# ---------------------------------------------------------------------------
|
|
111
|
+
# Helper: find how much of the trailing buffer could be a prefix of `needle`
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _max_suffix_overlap(buffer: str, needle: str) -> int:
|
|
116
|
+
"""Return the longest N where ``buffer[-N:]`` equals ``needle[:N]``.
|
|
117
|
+
|
|
118
|
+
Used to decide how much of the trailing buffer to hold back so a
|
|
119
|
+
partial tag spanning chunk boundaries is not prematurely emitted.
|
|
120
|
+
Zero means the buffer ends on a character that cannot be the start
|
|
121
|
+
of ``needle``, so every byte is safe to release.
|
|
122
|
+
"""
|
|
123
|
+
max_k = min(len(buffer), len(needle) - 1)
|
|
124
|
+
for k in range(max_k, 0, -1):
|
|
125
|
+
if buffer.endswith(needle[:k]):
|
|
126
|
+
return k
|
|
127
|
+
return 0
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _max_suffix_overlap_multi(buffer: str, needles: tuple[str, ...]) -> int:
|
|
131
|
+
"""``_max_suffix_overlap`` lifted over a tuple of needles — take the max."""
|
|
132
|
+
best = 0
|
|
133
|
+
for needle in needles:
|
|
134
|
+
k = _max_suffix_overlap(buffer, needle)
|
|
135
|
+
if k > best:
|
|
136
|
+
best = k
|
|
137
|
+
return best
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# ---------------------------------------------------------------------------
|
|
141
|
+
# strip_thinking
|
|
142
|
+
# ---------------------------------------------------------------------------
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
_THINK_OPEN = "<think>"
|
|
146
|
+
_THINK_CLOSE = "</think>"
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class StripThinkingFilter:
|
|
150
|
+
"""Remove ``<think>...</think>`` blocks from assistant content.
|
|
151
|
+
|
|
152
|
+
State spans ``feed`` calls so a block split across SSE chunks (or
|
|
153
|
+
across a single prose paragraph containing both tags) is handled
|
|
154
|
+
correctly. The filter does NOT attempt to preserve balanced nesting
|
|
155
|
+
— the first ``</think>`` after a ``<think>`` closes the block.
|
|
156
|
+
Unmatched open tag at EOF is suppressed (the entire remainder of
|
|
157
|
+
the stream is treated as thinking).
|
|
158
|
+
|
|
159
|
+
``modified`` flips True on the first ``<think>`` observed in the
|
|
160
|
+
stream, not on every suppressed character — the adapter uses it to
|
|
161
|
+
gate a log-once info line.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
name = "strip_thinking"
|
|
165
|
+
|
|
166
|
+
def __init__(self) -> None:
|
|
167
|
+
"""Initialize the per-request buffer + in-think state to empty."""
|
|
168
|
+
self.modified: bool = False
|
|
169
|
+
self._in_think: bool = False
|
|
170
|
+
self._buffer: str = ""
|
|
171
|
+
|
|
172
|
+
def feed(self, text: str, *, eof: bool = False) -> str:
|
|
173
|
+
"""Append ``text`` to the buffer and return the safe-to-emit prefix.
|
|
174
|
+
|
|
175
|
+
Tags are matched greedily; a partial prefix at the buffer end
|
|
176
|
+
is held back across calls so a ``<think>`` split across two
|
|
177
|
+
SSE deltas is still recognized. At ``eof`` any unmatched open
|
|
178
|
+
tag is silently dropped (remainder treated as thinking).
|
|
179
|
+
"""
|
|
180
|
+
self._buffer += text
|
|
181
|
+
out_parts: list[str] = []
|
|
182
|
+
|
|
183
|
+
while True:
|
|
184
|
+
if not self._in_think:
|
|
185
|
+
idx = self._buffer.find(_THINK_OPEN)
|
|
186
|
+
if idx != -1:
|
|
187
|
+
out_parts.append(self._buffer[:idx])
|
|
188
|
+
self._buffer = self._buffer[idx + len(_THINK_OPEN) :]
|
|
189
|
+
self._in_think = True
|
|
190
|
+
self.modified = True
|
|
191
|
+
continue
|
|
192
|
+
# No open tag — emit all but a potential partial prefix.
|
|
193
|
+
overlap = _max_suffix_overlap(self._buffer, _THINK_OPEN)
|
|
194
|
+
if overlap:
|
|
195
|
+
out_parts.append(self._buffer[:-overlap])
|
|
196
|
+
self._buffer = self._buffer[-overlap:]
|
|
197
|
+
else:
|
|
198
|
+
out_parts.append(self._buffer)
|
|
199
|
+
self._buffer = ""
|
|
200
|
+
break
|
|
201
|
+
# in_think: suppress until we find the close tag.
|
|
202
|
+
idx = self._buffer.find(_THINK_CLOSE)
|
|
203
|
+
if idx != -1:
|
|
204
|
+
self._buffer = self._buffer[idx + len(_THINK_CLOSE) :]
|
|
205
|
+
self._in_think = False
|
|
206
|
+
continue
|
|
207
|
+
# No close tag — retain potential partial suffix, drop the rest.
|
|
208
|
+
overlap = _max_suffix_overlap(self._buffer, _THINK_CLOSE)
|
|
209
|
+
self._buffer = self._buffer[-overlap:] if overlap else ""
|
|
210
|
+
break
|
|
211
|
+
|
|
212
|
+
if eof:
|
|
213
|
+
if not self._in_think:
|
|
214
|
+
# Flush any remaining buffer (known-safe at eof).
|
|
215
|
+
out_parts.append(self._buffer)
|
|
216
|
+
# If still in_think at eof, silently drop the partial block.
|
|
217
|
+
self._buffer = ""
|
|
218
|
+
return "".join(out_parts)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
# ---------------------------------------------------------------------------
|
|
222
|
+
# strip_stop_markers
|
|
223
|
+
# ---------------------------------------------------------------------------
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class StripStopMarkersFilter:
|
|
227
|
+
"""Remove harness/turn markers (``<|python_tag|>``, ``<|eot_id|>``, ...).
|
|
228
|
+
|
|
229
|
+
Unlike ``strip_thinking`` this is a set of point deletions rather
|
|
230
|
+
than a block suppression. The streaming concern is the same: any
|
|
231
|
+
chunk boundary might land inside a marker, so a trailing partial
|
|
232
|
+
prefix is held back until the next ``feed``.
|
|
233
|
+
|
|
234
|
+
The marker list is :data:`DEFAULT_STOP_MARKERS` — fixed for v1.0-A.
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
name = "strip_stop_markers"
|
|
238
|
+
|
|
239
|
+
def __init__(self, markers: tuple[str, ...] = DEFAULT_STOP_MARKERS) -> None:
|
|
240
|
+
"""Initialize with an optional custom marker set.
|
|
241
|
+
|
|
242
|
+
The default :data:`DEFAULT_STOP_MARKERS` covers the observed
|
|
243
|
+
Llama 3.x / ChatML / Qwen / Gemma / harmony leaks. Tests and
|
|
244
|
+
future extensions may pass a bespoke tuple; v1.0-A does not
|
|
245
|
+
expose this knob via providers.yaml.
|
|
246
|
+
"""
|
|
247
|
+
self.modified: bool = False
|
|
248
|
+
self._buffer: str = ""
|
|
249
|
+
self._markers: tuple[str, ...] = markers
|
|
250
|
+
|
|
251
|
+
def _earliest_match(self, buffer: str) -> tuple[int, str] | None:
|
|
252
|
+
"""Return (position, marker) of the earliest marker in ``buffer``."""
|
|
253
|
+
best: tuple[int, str] | None = None
|
|
254
|
+
for m in self._markers:
|
|
255
|
+
idx = buffer.find(m)
|
|
256
|
+
if idx == -1:
|
|
257
|
+
continue
|
|
258
|
+
if best is None or idx < best[0]:
|
|
259
|
+
best = (idx, m)
|
|
260
|
+
return best
|
|
261
|
+
|
|
262
|
+
def feed(self, text: str, *, eof: bool = False) -> str:
|
|
263
|
+
"""Emit ``text`` minus any marker matches; buffer partial prefixes.
|
|
264
|
+
|
|
265
|
+
A complete marker anywhere in the buffer is excised in place.
|
|
266
|
+
A trailing partial prefix that could complete on the next
|
|
267
|
+
:meth:`feed` is held back; at ``eof`` it is flushed verbatim
|
|
268
|
+
(we only hide bytes that are definitively part of a marker).
|
|
269
|
+
"""
|
|
270
|
+
self._buffer += text
|
|
271
|
+
out_parts: list[str] = []
|
|
272
|
+
|
|
273
|
+
while True:
|
|
274
|
+
hit = self._earliest_match(self._buffer)
|
|
275
|
+
if hit is None:
|
|
276
|
+
break
|
|
277
|
+
idx, marker = hit
|
|
278
|
+
if idx:
|
|
279
|
+
out_parts.append(self._buffer[:idx])
|
|
280
|
+
self._buffer = self._buffer[idx + len(marker) :]
|
|
281
|
+
self.modified = True
|
|
282
|
+
|
|
283
|
+
# No complete match — hold back a potential partial suffix.
|
|
284
|
+
overlap = _max_suffix_overlap_multi(self._buffer, self._markers)
|
|
285
|
+
if overlap and not eof:
|
|
286
|
+
out_parts.append(self._buffer[:-overlap])
|
|
287
|
+
self._buffer = self._buffer[-overlap:]
|
|
288
|
+
else:
|
|
289
|
+
out_parts.append(self._buffer)
|
|
290
|
+
self._buffer = ""
|
|
291
|
+
|
|
292
|
+
return "".join(out_parts)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
# ---------------------------------------------------------------------------
|
|
296
|
+
# Registry + chain
|
|
297
|
+
# ---------------------------------------------------------------------------
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
KNOWN_FILTERS: dict[str, type[OutputFilter]] = {
|
|
301
|
+
StripThinkingFilter.name: StripThinkingFilter,
|
|
302
|
+
StripStopMarkersFilter.name: StripStopMarkersFilter,
|
|
303
|
+
}
|
|
304
|
+
"""Registry of string-name → filter class.
|
|
305
|
+
|
|
306
|
+
Declared as a dict rather than a frozen mapping so tests and future
|
|
307
|
+
extensions can poke in additional filters without a schema change, but
|
|
308
|
+
adapter callers should treat it as read-only.
|
|
309
|
+
"""
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def validate_output_filters(names: list[str]) -> None:
|
|
313
|
+
"""Raise ``ValueError`` if any name in ``names`` is not registered.
|
|
314
|
+
|
|
315
|
+
Called from ``ProviderConfig`` at config-load time so a typo like
|
|
316
|
+
``output_filters: [strp_thinking]`` fails at startup rather than
|
|
317
|
+
silently no-op'ing forever. The error message lists all known
|
|
318
|
+
filter names so the fix is one line.
|
|
319
|
+
"""
|
|
320
|
+
unknown = [n for n in names if n not in KNOWN_FILTERS]
|
|
321
|
+
if unknown:
|
|
322
|
+
raise ValueError(
|
|
323
|
+
f"Unknown output_filters entries: {unknown}. Known filters: {sorted(KNOWN_FILTERS)}"
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
class OutputFilterChain:
|
|
328
|
+
"""Ordered composition of ``OutputFilter`` instances.
|
|
329
|
+
|
|
330
|
+
Each ``feed`` call pipes text through every filter in declaration
|
|
331
|
+
order. ``any_applied`` is the disjunction of per-filter ``modified``
|
|
332
|
+
flags — the adapter uses it to emit a single ``output-filter-
|
|
333
|
+
applied`` info log the first time a request would have been
|
|
334
|
+
affected (dedupe mirrors the v0.5-C reasoning-strip log-once).
|
|
335
|
+
|
|
336
|
+
An empty chain is a legal no-op: ``feed`` returns ``text`` verbatim
|
|
337
|
+
and ``any_applied`` never becomes True. Adapters can unconditionally
|
|
338
|
+
thread a chain through their hot path without branching on
|
|
339
|
+
``output_filters == []``.
|
|
340
|
+
"""
|
|
341
|
+
|
|
342
|
+
def __init__(self, filter_names: list[str]) -> None:
|
|
343
|
+
"""Construct a fresh chain of filters by name.
|
|
344
|
+
|
|
345
|
+
Raises :class:`ValueError` via :func:`validate_output_filters`
|
|
346
|
+
if any name is unknown — callers should be
|
|
347
|
+
:class:`ProviderConfig` (validation happens at config-load
|
|
348
|
+
time) so bad configs fail loudly at startup.
|
|
349
|
+
"""
|
|
350
|
+
validate_output_filters(filter_names)
|
|
351
|
+
self._filters: list[OutputFilter] = [KNOWN_FILTERS[n]() for n in filter_names]
|
|
352
|
+
self._names: list[str] = list(filter_names)
|
|
353
|
+
|
|
354
|
+
@property
|
|
355
|
+
def names(self) -> list[str]:
|
|
356
|
+
"""Ordered list of filter names (for log payloads / debugging)."""
|
|
357
|
+
return list(self._names)
|
|
358
|
+
|
|
359
|
+
@property
|
|
360
|
+
def is_empty(self) -> bool:
|
|
361
|
+
"""True when no filters were configured — lets callers skip the hot path."""
|
|
362
|
+
return not self._filters
|
|
363
|
+
|
|
364
|
+
@property
|
|
365
|
+
def any_applied(self) -> bool:
|
|
366
|
+
"""True if ANY filter has modified text since construction."""
|
|
367
|
+
return any(f.modified for f in self._filters)
|
|
368
|
+
|
|
369
|
+
def applied_filters(self) -> list[str]:
|
|
370
|
+
"""Names of filters that actually modified text (subset of ``names``).
|
|
371
|
+
|
|
372
|
+
Stable order — matches construction order. Useful in the log
|
|
373
|
+
payload so operators can see exactly which filter triggered.
|
|
374
|
+
"""
|
|
375
|
+
return [f.name for f in self._filters if f.modified]
|
|
376
|
+
|
|
377
|
+
def feed(self, text: str, *, eof: bool = False) -> str:
|
|
378
|
+
"""Pipe ``text`` through every filter. ``eof`` flushes at end."""
|
|
379
|
+
for f in self._filters:
|
|
380
|
+
text = f.feed(text, eof=eof)
|
|
381
|
+
return text
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
# ---------------------------------------------------------------------------
|
|
385
|
+
# Non-streaming convenience
|
|
386
|
+
# ---------------------------------------------------------------------------
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def apply_output_filters(filter_names: list[str], text: str) -> tuple[str, list[str]]:
|
|
390
|
+
"""Run a one-shot chain over a complete text.
|
|
391
|
+
|
|
392
|
+
Returns ``(scrubbed_text, applied_filter_names)``. The second
|
|
393
|
+
element is the subset of ``filter_names`` that actually modified
|
|
394
|
+
``text`` — the adapter passes it into the log-once helper on
|
|
395
|
+
non-streaming paths (streaming paths keep a live chain instead).
|
|
396
|
+
|
|
397
|
+
This is equivalent to::
|
|
398
|
+
|
|
399
|
+
chain = OutputFilterChain(filter_names)
|
|
400
|
+
out = chain.feed(text, eof=True)
|
|
401
|
+
applied = chain.applied_filters()
|
|
402
|
+
"""
|
|
403
|
+
if not filter_names:
|
|
404
|
+
return text, []
|
|
405
|
+
chain = OutputFilterChain(filter_names)
|
|
406
|
+
out = chain.feed(text, eof=True)
|
|
407
|
+
return out, chain.applied_filters()
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Profile-based routing and fallback engine."""
|
|
2
|
+
|
|
3
|
+
from coderouter.routing.fallback import (
|
|
4
|
+
FallbackEngine,
|
|
5
|
+
MidStreamError,
|
|
6
|
+
NoProvidersAvailableError,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"FallbackEngine",
|
|
11
|
+
"MidStreamError",
|
|
12
|
+
"NoProvidersAvailableError",
|
|
13
|
+
]
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
"""v1.6-A: task-aware auto routing — request-body inspection → profile name.
|
|
2
|
+
|
|
3
|
+
Slots into the v0.6-D precedence chain below the mode header and above
|
|
4
|
+
``default_profile``::
|
|
5
|
+
|
|
6
|
+
body.profile
|
|
7
|
+
> X-CodeRouter-Profile
|
|
8
|
+
> X-CodeRouter-Mode
|
|
9
|
+
> auto_router (fires only when default_profile == "auto")
|
|
10
|
+
> default_profile
|
|
11
|
+
|
|
12
|
+
The classifier is **rule-based** — no ML, no external calls, no small-LLM
|
|
13
|
+
pre-pass. Each rule is a matcher + target profile; first match wins. If
|
|
14
|
+
no rule matches, ``default_rule_profile`` is used.
|
|
15
|
+
|
|
16
|
+
Design reference: ``docs/designs/v1.6-auto-router.md``.
|
|
17
|
+
|
|
18
|
+
Pydantic schemas (:class:`RuleMatcher`, :class:`AutoRouteRule`,
|
|
19
|
+
:class:`AutoRouterConfig`) live in ``coderouter.config.schemas`` to keep
|
|
20
|
+
the routing package free of circular imports with the config loader;
|
|
21
|
+
they are re-exported here for call-site ergonomics.
|
|
22
|
+
|
|
23
|
+
Public surface:
|
|
24
|
+
|
|
25
|
+
- :data:`BUNDLED_RULES` — the zero-config default ruleset (image →
|
|
26
|
+
multi / dense-code → coding). Falls through to ``writing`` via
|
|
27
|
+
:data:`BUNDLED_DEFAULT_RULE_PROFILE`.
|
|
28
|
+
- :data:`BUNDLED_REQUIRED_PROFILES` — the three profile names the
|
|
29
|
+
bundled ruleset needs present in ``profiles[]`` (validated at load).
|
|
30
|
+
- :data:`RESERVED_PROFILE_NAME` — ``"auto"``. Not allowed as a
|
|
31
|
+
user-defined profile name.
|
|
32
|
+
- :func:`classify` — the classifier entry point.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
from __future__ import annotations
|
|
36
|
+
|
|
37
|
+
import logging
|
|
38
|
+
import re
|
|
39
|
+
from typing import TYPE_CHECKING, Any
|
|
40
|
+
|
|
41
|
+
from coderouter.config.schemas import AutoRouterConfig, AutoRouteRule, RuleMatcher
|
|
42
|
+
|
|
43
|
+
if TYPE_CHECKING:
|
|
44
|
+
from coderouter.config.schemas import CodeRouterConfig
|
|
45
|
+
|
|
46
|
+
logger = logging.getLogger("coderouter.routing.auto_router")
|
|
47
|
+
|
|
48
|
+
RESERVED_PROFILE_NAME = "auto"
|
|
49
|
+
BUNDLED_DEFAULT_RULE_PROFILE = "writing"
|
|
50
|
+
BUNDLED_REQUIRED_PROFILES: tuple[str, ...] = ("multi", "coding", "writing")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ---------------------------------------------------------------------------
|
|
54
|
+
# Bundled ruleset
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
BUNDLED_RULES: list[AutoRouteRule] = [
|
|
59
|
+
AutoRouteRule(
|
|
60
|
+
id="builtin:image-attachment",
|
|
61
|
+
profile="multi",
|
|
62
|
+
match=RuleMatcher(has_image=True),
|
|
63
|
+
),
|
|
64
|
+
AutoRouteRule(
|
|
65
|
+
id="builtin:code-fence-dense",
|
|
66
|
+
profile="coding",
|
|
67
|
+
match=RuleMatcher(code_fence_ratio_min=0.3),
|
|
68
|
+
),
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
# Classifier
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
_FENCE_RE = re.compile(r"```[\s\S]*?```")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _latest_user_message(body: dict[str, Any]) -> dict[str, Any] | None:
|
|
81
|
+
"""Return the most recent ``role: user`` message, or None."""
|
|
82
|
+
messages = body.get("messages")
|
|
83
|
+
if not isinstance(messages, list):
|
|
84
|
+
return None
|
|
85
|
+
for msg in reversed(messages):
|
|
86
|
+
if isinstance(msg, dict) and msg.get("role") == "user":
|
|
87
|
+
return msg
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _has_image(message: dict[str, Any]) -> bool:
|
|
92
|
+
"""True iff the message has any image content block.
|
|
93
|
+
|
|
94
|
+
Handles both OpenAI format (``type: image_url``) and Anthropic format
|
|
95
|
+
(``type: image``) plus the top-level ``input_image`` extension.
|
|
96
|
+
"""
|
|
97
|
+
content = message.get("content")
|
|
98
|
+
if isinstance(content, list):
|
|
99
|
+
for block in content:
|
|
100
|
+
if not isinstance(block, dict):
|
|
101
|
+
continue
|
|
102
|
+
btype = block.get("type")
|
|
103
|
+
if btype in ("image_url", "image", "input_image"):
|
|
104
|
+
return True
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _extract_text(message: dict[str, Any]) -> str:
|
|
109
|
+
"""Concatenate all text content from a message into one string.
|
|
110
|
+
|
|
111
|
+
String content stays verbatim. List content (OpenAI / Anthropic
|
|
112
|
+
multimodal format) contributes only the ``text`` of text-type blocks.
|
|
113
|
+
"""
|
|
114
|
+
content = message.get("content")
|
|
115
|
+
if isinstance(content, str):
|
|
116
|
+
return content
|
|
117
|
+
if isinstance(content, list):
|
|
118
|
+
pieces: list[str] = []
|
|
119
|
+
for block in content:
|
|
120
|
+
if not isinstance(block, dict):
|
|
121
|
+
continue
|
|
122
|
+
if block.get("type") == "text":
|
|
123
|
+
text = block.get("text")
|
|
124
|
+
if isinstance(text, str):
|
|
125
|
+
pieces.append(text)
|
|
126
|
+
elif "text" in block and isinstance(block["text"], str):
|
|
127
|
+
pieces.append(block["text"])
|
|
128
|
+
return "\n".join(pieces)
|
|
129
|
+
return ""
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _code_fence_ratio(text: str) -> float:
|
|
133
|
+
"""Return the fraction of ``text`` that lies inside ``` ``` fences.
|
|
134
|
+
|
|
135
|
+
0.0 if the text is empty or has no fences. Fenced regions include
|
|
136
|
+
their opening and closing triple backticks so the math stays stable
|
|
137
|
+
regardless of language hints (```` ```python ```` vs ```` ``` ````).
|
|
138
|
+
"""
|
|
139
|
+
if not text:
|
|
140
|
+
return 0.0
|
|
141
|
+
fenced = sum(len(m.group(0)) for m in _FENCE_RE.finditer(text))
|
|
142
|
+
return fenced / len(text)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _match_rule(rule: AutoRouteRule, message: dict[str, Any], text: str) -> bool:
|
|
146
|
+
m = rule.match
|
|
147
|
+
if m.has_image is True:
|
|
148
|
+
return _has_image(message)
|
|
149
|
+
if m.code_fence_ratio_min is not None:
|
|
150
|
+
return _code_fence_ratio(text) >= m.code_fence_ratio_min
|
|
151
|
+
if m.content_contains is not None:
|
|
152
|
+
return m.content_contains in text
|
|
153
|
+
if m.content_regex is not None:
|
|
154
|
+
return re.search(m.content_regex, text) is not None
|
|
155
|
+
return False # pragma: no cover — _exactly_one guards against this
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def classify(body: dict[str, Any], config: CodeRouterConfig) -> str:
|
|
159
|
+
"""Resolve an incoming request body to a profile name.
|
|
160
|
+
|
|
161
|
+
Rule order (first match wins) is determined by:
|
|
162
|
+
|
|
163
|
+
- ``config.auto_router.rules`` when ``auto_router`` is set and
|
|
164
|
+
``rules`` is non-empty.
|
|
165
|
+
- :data:`BUNDLED_RULES` otherwise.
|
|
166
|
+
|
|
167
|
+
Fallthrough (no rule matches, or ``disabled`` is True) goes to
|
|
168
|
+
``config.auto_router.default_rule_profile`` when configured, else
|
|
169
|
+
:data:`BUNDLED_DEFAULT_RULE_PROFILE`.
|
|
170
|
+
|
|
171
|
+
Emits one of two log events: ``auto-router-resolved`` on match, or
|
|
172
|
+
``auto-router-fallthrough`` on default-rule fall.
|
|
173
|
+
"""
|
|
174
|
+
user_msg = _latest_user_message(body)
|
|
175
|
+
text = _extract_text(user_msg) if user_msg is not None else ""
|
|
176
|
+
|
|
177
|
+
auto_cfg = config.auto_router
|
|
178
|
+
if auto_cfg is not None and auto_cfg.disabled:
|
|
179
|
+
_emit_fallthrough(auto_cfg.default_rule_profile, text, disabled=True)
|
|
180
|
+
return auto_cfg.default_rule_profile
|
|
181
|
+
|
|
182
|
+
rules = auto_cfg.rules if (auto_cfg is not None and auto_cfg.rules) else BUNDLED_RULES
|
|
183
|
+
default_profile = (
|
|
184
|
+
auto_cfg.default_rule_profile
|
|
185
|
+
if auto_cfg is not None
|
|
186
|
+
else BUNDLED_DEFAULT_RULE_PROFILE
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
if user_msg is None:
|
|
190
|
+
_emit_fallthrough(default_profile, text)
|
|
191
|
+
return default_profile
|
|
192
|
+
|
|
193
|
+
for rule in rules:
|
|
194
|
+
if _match_rule(rule, user_msg, text):
|
|
195
|
+
_emit_resolved(rule, user_msg, text)
|
|
196
|
+
return rule.profile
|
|
197
|
+
|
|
198
|
+
_emit_fallthrough(default_profile, text)
|
|
199
|
+
return default_profile
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _emit_resolved(
|
|
203
|
+
rule: AutoRouteRule, message: dict[str, Any], text: str
|
|
204
|
+
) -> None:
|
|
205
|
+
logger.info(
|
|
206
|
+
"auto-router-resolved",
|
|
207
|
+
extra={
|
|
208
|
+
"rule_id": rule.id,
|
|
209
|
+
"resolved_profile": rule.profile,
|
|
210
|
+
"signals": {
|
|
211
|
+
"has_image": _has_image(message),
|
|
212
|
+
"code_fence_ratio": round(_code_fence_ratio(text), 3),
|
|
213
|
+
"content_len": len(text),
|
|
214
|
+
},
|
|
215
|
+
},
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _emit_fallthrough(
|
|
220
|
+
profile: str, text: str, disabled: bool = False
|
|
221
|
+
) -> None:
|
|
222
|
+
logger.info(
|
|
223
|
+
"auto-router-fallthrough",
|
|
224
|
+
extra={
|
|
225
|
+
"resolved_profile": profile,
|
|
226
|
+
"signals": {
|
|
227
|
+
"code_fence_ratio": round(_code_fence_ratio(text), 3),
|
|
228
|
+
"content_len": len(text),
|
|
229
|
+
"disabled": disabled,
|
|
230
|
+
},
|
|
231
|
+
},
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
__all__ = [
|
|
236
|
+
"BUNDLED_DEFAULT_RULE_PROFILE",
|
|
237
|
+
"BUNDLED_REQUIRED_PROFILES",
|
|
238
|
+
"BUNDLED_RULES",
|
|
239
|
+
"RESERVED_PROFILE_NAME",
|
|
240
|
+
"AutoRouteRule",
|
|
241
|
+
"AutoRouterConfig",
|
|
242
|
+
"RuleMatcher",
|
|
243
|
+
"classify",
|
|
244
|
+
]
|