coderouter-cli 2.3.0a4__py3-none-any.whl → 2.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/cli.py +31 -0
- coderouter/config/schemas.py +157 -0
- coderouter/guards/__init__.py +2 -0
- coderouter/guards/_fingerprint.py +125 -0
- coderouter/guards/drift_detection.py +55 -0
- coderouter/ingress/app.py +11 -0
- coderouter/ingress/dashboard_routes.py +1 -0
- coderouter/ingress/launcher_routes.py +1176 -0
- coderouter/routing/fallback.py +33 -3
- coderouter/state/__init__.py +15 -11
- coderouter/state/suggest_rules.py +413 -0
- {coderouter_cli-2.3.0a4.dist-info → coderouter_cli-2.5.0.dist-info}/METADATA +36 -4
- {coderouter_cli-2.3.0a4.dist-info → coderouter_cli-2.5.0.dist-info}/RECORD +16 -13
- {coderouter_cli-2.3.0a4.dist-info → coderouter_cli-2.5.0.dist-info}/WHEEL +0 -0
- {coderouter_cli-2.3.0a4.dist-info → coderouter_cli-2.5.0.dist-info}/entry_points.txt +0 -0
- {coderouter_cli-2.3.0a4.dist-info → coderouter_cli-2.5.0.dist-info}/licenses/LICENSE +0 -0
coderouter/routing/fallback.py
CHANGED
|
@@ -1283,6 +1283,7 @@ class FallbackEngine:
|
|
|
1283
1283
|
stop_reason: str | None = None,
|
|
1284
1284
|
is_error: bool = False,
|
|
1285
1285
|
stream: bool = False,
|
|
1286
|
+
response_fingerprint: str | None = None,
|
|
1286
1287
|
) -> DriftVerdict | None:
|
|
1287
1288
|
"""v2.0-G (L4): record an observation and check for drift.
|
|
1288
1289
|
|
|
@@ -1294,9 +1295,18 @@ class FallbackEngine:
|
|
|
1294
1295
|
- Emits ``drift-detected`` log.
|
|
1295
1296
|
- If action is ``promote`` or ``reload``, demotes the provider
|
|
1296
1297
|
via the adaptive rank machinery.
|
|
1298
|
+
|
|
1299
|
+
Parameters
|
|
1300
|
+
----------
|
|
1301
|
+
response_fingerprint:
|
|
1302
|
+
P1-4: compact content fingerprint from
|
|
1303
|
+
:func:`coderouter.guards._fingerprint.fingerprint_response`.
|
|
1304
|
+
When set, enables the ``goal_progress_stall`` signal.
|
|
1305
|
+
Pass ``None`` (default) to skip that signal.
|
|
1297
1306
|
"""
|
|
1298
1307
|
from coderouter.guards.drift_detection import (
|
|
1299
1308
|
SENSITIVITY_PRESETS,
|
|
1309
|
+
THRESHOLDS_GOAL,
|
|
1300
1310
|
ResponseObservation,
|
|
1301
1311
|
detect_drift,
|
|
1302
1312
|
)
|
|
@@ -1322,6 +1332,7 @@ class FallbackEngine:
|
|
|
1322
1332
|
stop_reason=stop_reason,
|
|
1323
1333
|
is_error=is_error,
|
|
1324
1334
|
stream=stream,
|
|
1335
|
+
response_fingerprint=response_fingerprint,
|
|
1325
1336
|
)
|
|
1326
1337
|
self._drift_window.record(obs)
|
|
1327
1338
|
|
|
@@ -1344,10 +1355,15 @@ class FallbackEngine:
|
|
|
1344
1355
|
return None
|
|
1345
1356
|
|
|
1346
1357
|
# Run detection
|
|
1358
|
+
# P1-5: goal_mode overrides the sensitivity preset with the tighter
|
|
1359
|
+
# THRESHOLDS_GOAL regardless of drift_detection_sensitivity setting.
|
|
1347
1360
|
window = self._drift_window.get_window(provider)
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1361
|
+
if getattr(chain_cfg, "goal_mode", False):
|
|
1362
|
+
thresholds = THRESHOLDS_GOAL
|
|
1363
|
+
else:
|
|
1364
|
+
thresholds = SENSITIVITY_PRESETS.get(
|
|
1365
|
+
chain_cfg.drift_detection_sensitivity, SENSITIVITY_PRESETS["normal"]
|
|
1366
|
+
)
|
|
1351
1367
|
verdict = detect_drift(window, thresholds)
|
|
1352
1368
|
|
|
1353
1369
|
if not verdict.drifted:
|
|
@@ -2083,6 +2099,13 @@ class FallbackEngine:
|
|
|
2083
2099
|
adapter.name, profile=request.profile
|
|
2084
2100
|
)
|
|
2085
2101
|
# v2.0-G (L4): drift detection observation (success path).
|
|
2102
|
+
# P1-4: compute response fingerprint for goal_progress_stall.
|
|
2103
|
+
_fp_text = " ".join(
|
|
2104
|
+
getattr(b, "text", "") or (b.get("text", "") if isinstance(b, dict) else "")
|
|
2105
|
+
for b in (resp.content or [])
|
|
2106
|
+
if (getattr(b, "type", None) or (b.get("type") if isinstance(b, dict) else None)) == "text"
|
|
2107
|
+
)
|
|
2108
|
+
from coderouter.guards._fingerprint import fingerprint_response as _fp
|
|
2086
2109
|
self._observe_drift_signal(
|
|
2087
2110
|
adapter.name,
|
|
2088
2111
|
profile=request.profile,
|
|
@@ -2093,6 +2116,7 @@ class FallbackEngine:
|
|
|
2093
2116
|
request_had_tools=bool(request.tools),
|
|
2094
2117
|
stop_reason=resp.stop_reason,
|
|
2095
2118
|
stream=False,
|
|
2119
|
+
response_fingerprint=_fp(_fp_text) if _fp_text else None,
|
|
2096
2120
|
)
|
|
2097
2121
|
# v1.9-A: pair every successful Anthropic response with a
|
|
2098
2122
|
# cache-observed log line. Native Anthropic / LM Studio
|
|
@@ -2312,6 +2336,11 @@ class FallbackEngine:
|
|
|
2312
2336
|
adapter.name, exc, partial_content=acc.partial_content
|
|
2313
2337
|
) from exc
|
|
2314
2338
|
# v2.0-G (L4): drift detection observation (stream success).
|
|
2339
|
+
# P1-4: compute response fingerprint for goal_progress_stall.
|
|
2340
|
+
_stream_fp_text = " ".join(
|
|
2341
|
+
b.get("text", "") for b in acc.partial_content if b.get("type") == "text"
|
|
2342
|
+
)
|
|
2343
|
+
from coderouter.guards._fingerprint import fingerprint_response as _fp_s
|
|
2315
2344
|
self._observe_drift_signal(
|
|
2316
2345
|
adapter.name,
|
|
2317
2346
|
profile=request.profile,
|
|
@@ -2320,6 +2349,7 @@ class FallbackEngine:
|
|
|
2320
2349
|
request_had_tools=bool(request.tools),
|
|
2321
2350
|
stop_reason=acc.stop_reason,
|
|
2322
2351
|
stream=True,
|
|
2352
|
+
response_fingerprint=_fp_s(_stream_fp_text) if _stream_fp_text else None,
|
|
2323
2353
|
)
|
|
2324
2354
|
# v1.9-B2: pair the successful stream with a cache-observed
|
|
2325
2355
|
# log line carrying the aggregated usage counters that the
|
coderouter/state/__init__.py
CHANGED
|
@@ -1,15 +1,19 @@
|
|
|
1
1
|
"""Persistent state layer (v2.0-K).
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Five modules:
|
|
4
4
|
|
|
5
|
-
* :mod:`coderouter.state.store`
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
* :mod:`coderouter.state.audit_log`
|
|
9
|
-
|
|
10
|
-
* :mod:`coderouter.state.request_log`
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
* :mod:`coderouter.state.replay`
|
|
14
|
-
|
|
5
|
+
* :mod:`coderouter.state.store` — sqlite3 KV store for operational
|
|
6
|
+
metadata (budget totals, health
|
|
7
|
+
state, self-healing exclusions).
|
|
8
|
+
* :mod:`coderouter.state.audit_log` — JSONL structured event log with
|
|
9
|
+
rotation and CLI reader.
|
|
10
|
+
* :mod:`coderouter.state.request_log` — JSONL request metadata journal
|
|
11
|
+
(per-request token counts, cost,
|
|
12
|
+
provider — no request body).
|
|
13
|
+
* :mod:`coderouter.state.replay` — Statistical A/B analysis engine
|
|
14
|
+
over request journal entries.
|
|
15
|
+
* :mod:`coderouter.state.suggest_rules` — P1-6 rule suggestion engine:
|
|
16
|
+
analyses WindowSummary and emits
|
|
17
|
+
copy-paste YAML snippets for
|
|
18
|
+
routing optimisation.
|
|
15
19
|
"""
|
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
"""Rule suggestion engine for ``coderouter replay --suggest-rules`` (P1-6).
|
|
2
|
+
|
|
3
|
+
Analyses the request journal statistics produced by
|
|
4
|
+
:func:`coderouter.state.replay.summarize_window` and emits a list of
|
|
5
|
+
:class:`RuleSuggestion` objects — each containing a plain-English
|
|
6
|
+
description, a copy-paste YAML snippet, and the numeric evidence that
|
|
7
|
+
drove the recommendation.
|
|
8
|
+
|
|
9
|
+
Design
|
|
10
|
+
------
|
|
11
|
+
Pure statistical analysis — no LLM required. Rules are applied in
|
|
12
|
+
priority order; each rule is independently evaluated so multiple
|
|
13
|
+
suggestions can fire for the same provider.
|
|
14
|
+
|
|
15
|
+
Rules (v1.0)
|
|
16
|
+
------------
|
|
17
|
+
|
|
18
|
+
1. **provider_reorder** — If provider B costs less per request than
|
|
19
|
+
provider A *and* B has meaningful traffic, suggest moving B earlier
|
|
20
|
+
in the fallback chain.
|
|
21
|
+
|
|
22
|
+
2. **enable_prompt_cache** — If a provider has a large average input
|
|
23
|
+
token count (> ``CACHE_INPUT_THRESHOLD``) and a low cache-hit ratio
|
|
24
|
+
(< ``CACHE_HIT_RATIO_THRESHOLD``), suggest enabling
|
|
25
|
+
``capabilities.prompt_cache: true``.
|
|
26
|
+
|
|
27
|
+
3. **enable_drift_detection** — If any provider has a non-trivial
|
|
28
|
+
request volume and no drift-detection configuration is visible in
|
|
29
|
+
the stats (proxy: we see the provider at all), emit a reminder to
|
|
30
|
+
set ``drift_detection_action: promote``.
|
|
31
|
+
|
|
32
|
+
4. **raise_min_window_fill** — If a provider has a low request count
|
|
33
|
+
(< ``SMALL_WINDOW_THRESHOLD``) and drift detection would fire early,
|
|
34
|
+
suggest raising ``drift_detection_sensitivity: low`` to avoid false
|
|
35
|
+
positives.
|
|
36
|
+
|
|
37
|
+
5. **split_goal_profile** — If there is more than one provider with
|
|
38
|
+
significant traffic and average output tokens differ substantially,
|
|
39
|
+
suggest creating a ``goal`` profile with ``goal_mode: true`` that
|
|
40
|
+
routes to the highest-output provider.
|
|
41
|
+
|
|
42
|
+
Confidence levels
|
|
43
|
+
-----------------
|
|
44
|
+
``high`` — clear numeric evidence, low false-positive risk
|
|
45
|
+
``medium`` — heuristic, may need operator judgement
|
|
46
|
+
``low`` — informational / reminder
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
from __future__ import annotations
|
|
50
|
+
|
|
51
|
+
from dataclasses import dataclass, field
|
|
52
|
+
|
|
53
|
+
from coderouter.state.replay import ProviderSummary, WindowSummary
|
|
54
|
+
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
# Thresholds (module-level constants for easy tuning)
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
# Minimum requests per provider before we emit cost-based suggestions.
|
|
60
|
+
_MIN_TRAFFIC: int = 5
|
|
61
|
+
|
|
62
|
+
# Prompt-cache opportunity: avg input tokens above this → suggest caching.
|
|
63
|
+
_CACHE_INPUT_THRESHOLD: int = 2_000
|
|
64
|
+
|
|
65
|
+
# Prompt-cache opportunity: cache hit ratio below this → suggest enabling.
|
|
66
|
+
_CACHE_HIT_RATIO_THRESHOLD: float = 0.10
|
|
67
|
+
|
|
68
|
+
# Cost reorder: provider B is this fraction cheaper than A → suggest reorder.
|
|
69
|
+
_COST_REORDER_THRESHOLD: float = 0.20 # 20% cheaper
|
|
70
|
+
|
|
71
|
+
# Small-window guard: fewer requests than this → suggest low sensitivity.
|
|
72
|
+
_SMALL_WINDOW_THRESHOLD: int = 10
|
|
73
|
+
|
|
74
|
+
# Goal profile split: relative std-dev of avg output tokens across providers.
|
|
75
|
+
_OUTPUT_DIVERGENCE_THRESHOLD: float = 0.40
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# ---------------------------------------------------------------------------
|
|
79
|
+
# Data model
|
|
80
|
+
# ---------------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class RuleSuggestion:
|
|
85
|
+
"""One actionable suggestion derived from request journal statistics.
|
|
86
|
+
|
|
87
|
+
Attributes
|
|
88
|
+
----------
|
|
89
|
+
rule:
|
|
90
|
+
Internal rule identifier, e.g. ``"provider_reorder"``.
|
|
91
|
+
title:
|
|
92
|
+
Short human-readable title for the suggestion.
|
|
93
|
+
description:
|
|
94
|
+
Plain-English explanation of what was observed and why the
|
|
95
|
+
change is recommended.
|
|
96
|
+
yaml_snippet:
|
|
97
|
+
Copy-paste YAML fragment showing the recommended change.
|
|
98
|
+
May span multiple lines; always valid YAML in isolation.
|
|
99
|
+
evidence:
|
|
100
|
+
Dict of metric name → value that drove this suggestion.
|
|
101
|
+
confidence:
|
|
102
|
+
``"high"`` / ``"medium"`` / ``"low"``
|
|
103
|
+
providers_involved:
|
|
104
|
+
Provider names mentioned in this suggestion.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
rule: str
|
|
108
|
+
title: str
|
|
109
|
+
description: str
|
|
110
|
+
yaml_snippet: str
|
|
111
|
+
evidence: dict[str, object] = field(default_factory=dict)
|
|
112
|
+
confidence: str = "medium"
|
|
113
|
+
providers_involved: list[str] = field(default_factory=list)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# ---------------------------------------------------------------------------
|
|
117
|
+
# Rule implementations
|
|
118
|
+
# ---------------------------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _rule_provider_reorder(
|
|
122
|
+
providers: list[ProviderSummary],
|
|
123
|
+
) -> list[RuleSuggestion]:
|
|
124
|
+
"""Suggest reordering providers by cost-per-request."""
|
|
125
|
+
suggestions: list[RuleSuggestion] = []
|
|
126
|
+
# Only consider providers with meaningful traffic.
|
|
127
|
+
# Include free providers (cost=0) — they are the best candidates.
|
|
128
|
+
active = [p for p in providers if p.request_count >= _MIN_TRAFFIC]
|
|
129
|
+
if len(active) < 2:
|
|
130
|
+
return []
|
|
131
|
+
|
|
132
|
+
# Sort by avg cost ascending (cheapest / free first)
|
|
133
|
+
active_by_cost = sorted(active, key=lambda p: p.avg_cost_usd)
|
|
134
|
+
|
|
135
|
+
# Compare every pair where the expensive provider costs something.
|
|
136
|
+
for i in range(len(active_by_cost)):
|
|
137
|
+
for j in range(i + 1, len(active_by_cost)):
|
|
138
|
+
cheap = active_by_cost[i]
|
|
139
|
+
expensive = active_by_cost[j]
|
|
140
|
+
if expensive.avg_cost_usd <= 0:
|
|
141
|
+
continue # both free — no cost advantage to reorder
|
|
142
|
+
saving_pct = (expensive.avg_cost_usd - cheap.avg_cost_usd) / expensive.avg_cost_usd
|
|
143
|
+
if saving_pct >= _COST_REORDER_THRESHOLD:
|
|
144
|
+
suggestions.append(RuleSuggestion(
|
|
145
|
+
rule="provider_reorder",
|
|
146
|
+
title=f"Move {cheap.provider!r} before {expensive.provider!r}",
|
|
147
|
+
description=(
|
|
148
|
+
f"{cheap.provider!r} costs ${cheap.avg_cost_usd:.4f}/req on average, "
|
|
149
|
+
f"{saving_pct * 100:.0f}% cheaper than {expensive.provider!r} "
|
|
150
|
+
f"(${expensive.avg_cost_usd:.4f}/req). "
|
|
151
|
+
f"Listing the cheaper provider earlier in the fallback chain "
|
|
152
|
+
f"reduces cost without changing availability."
|
|
153
|
+
),
|
|
154
|
+
yaml_snippet=(
|
|
155
|
+
f"# In your profile's providers list, move {cheap.provider!r} earlier:\n"
|
|
156
|
+
f"profiles:\n"
|
|
157
|
+
f" - name: default # or your active profile\n"
|
|
158
|
+
f" providers:\n"
|
|
159
|
+
f" - {cheap.provider}\n"
|
|
160
|
+
f" - {expensive.provider}"
|
|
161
|
+
),
|
|
162
|
+
evidence={
|
|
163
|
+
"cheap_provider": cheap.provider,
|
|
164
|
+
"cheap_avg_cost_usd": round(cheap.avg_cost_usd, 6),
|
|
165
|
+
"expensive_provider": expensive.provider,
|
|
166
|
+
"expensive_avg_cost_usd": round(expensive.avg_cost_usd, 6),
|
|
167
|
+
"saving_pct": round(saving_pct * 100, 1),
|
|
168
|
+
},
|
|
169
|
+
confidence="high" if saving_pct >= 0.40 else "medium",
|
|
170
|
+
providers_involved=[cheap.provider, expensive.provider],
|
|
171
|
+
))
|
|
172
|
+
return suggestions
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _rule_enable_prompt_cache(
|
|
176
|
+
providers: list[ProviderSummary],
|
|
177
|
+
) -> list[RuleSuggestion]:
|
|
178
|
+
"""Suggest enabling prompt_cache for large-input, low-hit providers."""
|
|
179
|
+
suggestions: list[RuleSuggestion] = []
|
|
180
|
+
for p in providers:
|
|
181
|
+
if p.request_count < _MIN_TRAFFIC:
|
|
182
|
+
continue
|
|
183
|
+
if p.avg_input_tokens < _CACHE_INPUT_THRESHOLD:
|
|
184
|
+
continue
|
|
185
|
+
if p.cache_hit_ratio >= _CACHE_HIT_RATIO_THRESHOLD:
|
|
186
|
+
continue
|
|
187
|
+
suggestions.append(RuleSuggestion(
|
|
188
|
+
rule="enable_prompt_cache",
|
|
189
|
+
title=f"Enable prompt_cache for {p.provider!r}",
|
|
190
|
+
description=(
|
|
191
|
+
f"{p.provider!r} averages {p.avg_input_tokens:.0f} input tokens/req "
|
|
192
|
+
f"but has only a {p.cache_hit_ratio * 100:.1f}% cache-hit ratio. "
|
|
193
|
+
f"Enabling prompt caching can significantly reduce input token costs "
|
|
194
|
+
f"on repeated system prompts (Anthropic models: ~10% cache-read price)."
|
|
195
|
+
),
|
|
196
|
+
yaml_snippet=(
|
|
197
|
+
f"providers:\n"
|
|
198
|
+
f" - name: {p.provider}\n"
|
|
199
|
+
f" capabilities:\n"
|
|
200
|
+
f" prompt_cache: true"
|
|
201
|
+
),
|
|
202
|
+
evidence={
|
|
203
|
+
"provider": p.provider,
|
|
204
|
+
"avg_input_tokens": round(p.avg_input_tokens, 0),
|
|
205
|
+
"cache_hit_ratio_pct": round(p.cache_hit_ratio * 100, 1),
|
|
206
|
+
"requests": p.request_count,
|
|
207
|
+
},
|
|
208
|
+
confidence="high" if p.avg_input_tokens > 5_000 else "medium",
|
|
209
|
+
providers_involved=[p.provider],
|
|
210
|
+
))
|
|
211
|
+
return suggestions
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _rule_enable_drift_detection(
|
|
215
|
+
providers: list[ProviderSummary],
|
|
216
|
+
window_summary: WindowSummary,
|
|
217
|
+
) -> list[RuleSuggestion]:
|
|
218
|
+
"""Suggest enabling drift detection when there's meaningful traffic."""
|
|
219
|
+
active = [p for p in providers if p.request_count >= _MIN_TRAFFIC * 2]
|
|
220
|
+
if not active:
|
|
221
|
+
return []
|
|
222
|
+
# We can't know if drift detection is already on from stats alone,
|
|
223
|
+
# so this is a "low" confidence reminder for new operators.
|
|
224
|
+
names = ", ".join(f"{p.provider!r}" for p in active)
|
|
225
|
+
return [RuleSuggestion(
|
|
226
|
+
rule="enable_drift_detection",
|
|
227
|
+
title="Consider enabling L4 drift detection",
|
|
228
|
+
description=(
|
|
229
|
+
f"You have {window_summary.total_requests} requests across {names}. "
|
|
230
|
+
f"The L4 drift detector catches quality degradation in long-running "
|
|
231
|
+
f"agent sessions (empty responses, length collapse, tool silence). "
|
|
232
|
+
f"If not already configured, set drift_detection_action: promote to "
|
|
233
|
+
f"auto-demote providers that are silently degrading."
|
|
234
|
+
),
|
|
235
|
+
yaml_snippet=(
|
|
236
|
+
"# Add to your profile in providers.yaml:\n"
|
|
237
|
+
"profiles:\n"
|
|
238
|
+
" - name: default\n"
|
|
239
|
+
" providers: [...] # your provider list\n"
|
|
240
|
+
" drift_detection_action: promote\n"
|
|
241
|
+
" drift_detection_sensitivity: normal\n"
|
|
242
|
+
" drift_detection_window_size: 20\n"
|
|
243
|
+
" drift_detection_cooldown_s: 300"
|
|
244
|
+
),
|
|
245
|
+
evidence={
|
|
246
|
+
"total_requests": window_summary.total_requests,
|
|
247
|
+
"active_providers": [p.provider for p in active],
|
|
248
|
+
},
|
|
249
|
+
confidence="low",
|
|
250
|
+
providers_involved=[p.provider for p in active],
|
|
251
|
+
)]
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _rule_small_window_low_sensitivity(
|
|
255
|
+
providers: list[ProviderSummary],
|
|
256
|
+
) -> list[RuleSuggestion]:
|
|
257
|
+
"""Suggest low drift sensitivity for providers with small traffic."""
|
|
258
|
+
suggestions: list[RuleSuggestion] = []
|
|
259
|
+
for p in providers:
|
|
260
|
+
if 0 < p.request_count < _SMALL_WINDOW_THRESHOLD:
|
|
261
|
+
suggestions.append(RuleSuggestion(
|
|
262
|
+
rule="low_sensitivity_small_window",
|
|
263
|
+
title=f"Use low drift sensitivity for {p.provider!r} (sparse traffic)",
|
|
264
|
+
description=(
|
|
265
|
+
f"{p.provider!r} has only {p.request_count} requests in the journal window. "
|
|
266
|
+
f"With sparse traffic the drift detector's rolling window fills slowly, "
|
|
267
|
+
f"which can cause false-positives. Setting drift_detection_sensitivity: low "
|
|
268
|
+
f"requires more evidence before promoting the provider."
|
|
269
|
+
),
|
|
270
|
+
yaml_snippet=(
|
|
271
|
+
"profiles:\n"
|
|
272
|
+
" - name: default\n"
|
|
273
|
+
" drift_detection_sensitivity: low # was: normal or high\n"
|
|
274
|
+
" drift_detection_window_size: 30 # larger window = more stable"
|
|
275
|
+
),
|
|
276
|
+
evidence={
|
|
277
|
+
"provider": p.provider,
|
|
278
|
+
"request_count": p.request_count,
|
|
279
|
+
"threshold": _SMALL_WINDOW_THRESHOLD,
|
|
280
|
+
},
|
|
281
|
+
confidence="medium",
|
|
282
|
+
providers_involved=[p.provider],
|
|
283
|
+
))
|
|
284
|
+
return suggestions
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _rule_goal_profile(
|
|
288
|
+
providers: list[ProviderSummary],
|
|
289
|
+
) -> list[RuleSuggestion]:
|
|
290
|
+
"""Suggest creating a goal profile when providers differ significantly in output length."""
|
|
291
|
+
import statistics as _stats
|
|
292
|
+
|
|
293
|
+
active = [p for p in providers if p.request_count >= _MIN_TRAFFIC and p.avg_output_tokens > 0]
|
|
294
|
+
if len(active) < 2:
|
|
295
|
+
return []
|
|
296
|
+
|
|
297
|
+
output_values = [p.avg_output_tokens for p in active]
|
|
298
|
+
mean_out = _stats.mean(output_values)
|
|
299
|
+
if mean_out == 0:
|
|
300
|
+
return []
|
|
301
|
+
|
|
302
|
+
stdev_out = _stats.stdev(output_values) if len(output_values) > 1 else 0.0
|
|
303
|
+
rel_stdev = stdev_out / mean_out
|
|
304
|
+
|
|
305
|
+
if rel_stdev < _OUTPUT_DIVERGENCE_THRESHOLD:
|
|
306
|
+
return []
|
|
307
|
+
|
|
308
|
+
# Highest-output provider is probably best for goal sessions
|
|
309
|
+
best = max(active, key=lambda p: p.avg_output_tokens)
|
|
310
|
+
return [RuleSuggestion(
|
|
311
|
+
rule="goal_profile",
|
|
312
|
+
title=f"Create a 'goal' profile with goal_mode: true → {best.provider!r}",
|
|
313
|
+
description=(
|
|
314
|
+
f"Output token lengths vary significantly across providers "
|
|
315
|
+
f"(relative std-dev {rel_stdev * 100:.0f}%). "
|
|
316
|
+
f"{best.provider!r} produces the most tokens on average "
|
|
317
|
+
f"({best.avg_output_tokens:.0f} tokens/req), making it the "
|
|
318
|
+
f"best candidate for a dedicated goal/agent profile. "
|
|
319
|
+
f"goal_mode: true activates tighter drift thresholds and "
|
|
320
|
+
f"the goal_progress_stall signal for repetition detection."
|
|
321
|
+
),
|
|
322
|
+
yaml_snippet=(
|
|
323
|
+
"profiles:\n"
|
|
324
|
+
f" - name: goal\n"
|
|
325
|
+
f" providers:\n"
|
|
326
|
+
f" - {best.provider}\n"
|
|
327
|
+
f" goal_mode: true # P1-5: tighter thresholds\n"
|
|
328
|
+
f" drift_detection_action: promote\n"
|
|
329
|
+
f" drift_detection_sensitivity: high # overridden by goal_mode\n"
|
|
330
|
+
f" drift_detection_window_size: 15\n"
|
|
331
|
+
f" drift_detection_cooldown_s: 180"
|
|
332
|
+
),
|
|
333
|
+
evidence={
|
|
334
|
+
"best_provider": best.provider,
|
|
335
|
+
"best_avg_output_tokens": round(best.avg_output_tokens, 0),
|
|
336
|
+
"mean_output_tokens": round(mean_out, 0),
|
|
337
|
+
"relative_stdev_pct": round(rel_stdev * 100, 1),
|
|
338
|
+
},
|
|
339
|
+
confidence="medium",
|
|
340
|
+
providers_involved=[p.provider for p in active],
|
|
341
|
+
)]
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
# ---------------------------------------------------------------------------
|
|
345
|
+
# Public API
|
|
346
|
+
# ---------------------------------------------------------------------------
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def suggest_rules(summary: WindowSummary) -> list[RuleSuggestion]:
|
|
350
|
+
"""Analyse a :class:`WindowSummary` and return a list of rule suggestions.
|
|
351
|
+
|
|
352
|
+
Parameters
|
|
353
|
+
----------
|
|
354
|
+
summary:
|
|
355
|
+
Output of :func:`coderouter.state.replay.summarize_window`.
|
|
356
|
+
|
|
357
|
+
Returns
|
|
358
|
+
-------
|
|
359
|
+
List of :class:`RuleSuggestion` objects, ordered by confidence
|
|
360
|
+
(``high`` first) then rule name.
|
|
361
|
+
"""
|
|
362
|
+
providers = list(summary.providers.values())
|
|
363
|
+
suggestions: list[RuleSuggestion] = []
|
|
364
|
+
|
|
365
|
+
suggestions.extend(_rule_provider_reorder(providers))
|
|
366
|
+
suggestions.extend(_rule_enable_prompt_cache(providers))
|
|
367
|
+
suggestions.extend(_rule_enable_drift_detection(providers, summary))
|
|
368
|
+
suggestions.extend(_rule_small_window_low_sensitivity(providers))
|
|
369
|
+
suggestions.extend(_rule_goal_profile(providers))
|
|
370
|
+
|
|
371
|
+
_CONFIDENCE_ORDER = {"high": 0, "medium": 1, "low": 2}
|
|
372
|
+
suggestions.sort(key=lambda s: (_CONFIDENCE_ORDER.get(s.confidence, 9), s.rule))
|
|
373
|
+
return suggestions
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def format_suggestions(suggestions: list[RuleSuggestion]) -> str:
|
|
377
|
+
"""Render suggestions as a human-readable terminal report.
|
|
378
|
+
|
|
379
|
+
Returns a plain-text string with section headers, descriptions,
|
|
380
|
+
and copy-paste YAML snippets.
|
|
381
|
+
"""
|
|
382
|
+
if not suggestions:
|
|
383
|
+
return "No routing rule suggestions — current configuration looks healthy."
|
|
384
|
+
|
|
385
|
+
lines: list[str] = []
|
|
386
|
+
lines.append(f"Found {len(suggestions)} suggestion(s):\n")
|
|
387
|
+
|
|
388
|
+
for i, s in enumerate(suggestions, 1):
|
|
389
|
+
conf_badge = {"high": "[HIGH]", "medium": "[MED] ", "low": "[LOW] "}.get(
|
|
390
|
+
s.confidence, "[?] "
|
|
391
|
+
)
|
|
392
|
+
lines.append(f" {i}. {conf_badge} {s.title}")
|
|
393
|
+
lines.append(f" {s.description}")
|
|
394
|
+
if s.evidence:
|
|
395
|
+
evidence_str = ", ".join(f"{k}={v}" for k, v in s.evidence.items())
|
|
396
|
+
lines.append(f" Evidence: {evidence_str}")
|
|
397
|
+
lines.append("")
|
|
398
|
+
lines.append(" YAML:")
|
|
399
|
+
for yaml_line in s.yaml_snippet.splitlines():
|
|
400
|
+
lines.append(f" {yaml_line}")
|
|
401
|
+
lines.append("")
|
|
402
|
+
if i < len(suggestions):
|
|
403
|
+
lines.append(" " + "-" * 68)
|
|
404
|
+
lines.append("")
|
|
405
|
+
|
|
406
|
+
return "\n".join(lines)
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
__all__ = [
|
|
410
|
+
"RuleSuggestion",
|
|
411
|
+
"format_suggestions",
|
|
412
|
+
"suggest_rules",
|
|
413
|
+
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderouter-cli
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.5.0
|
|
4
4
|
Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
|
|
5
5
|
Project-URL: Homepage, https://github.com/zephel01/CodeRouter
|
|
6
6
|
Project-URL: Repository, https://github.com/zephel01/CodeRouter
|
|
@@ -47,7 +47,7 @@ Description-Content-Type: text/markdown
|
|
|
47
47
|
|
|
48
48
|
<p align="center">
|
|
49
49
|
<a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
|
|
50
|
-
<a href=""><img src="https://img.shields.io/
|
|
50
|
+
<a href="https://pypi.org/project/coderouter-cli/"><img src="https://img.shields.io/pypi/v/coderouter-cli?include_prereleases&color=blue&label=pypi" alt="pypi"></a>
|
|
51
51
|
<a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
|
|
52
52
|
<a href=""><img src="https://img.shields.io/badge/deps-5-brightgreen" alt="deps"></a>
|
|
53
53
|
<a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
|
|
@@ -141,7 +141,7 @@ ANTHROPIC_BASE_URL=http://localhost:8088 ANTHROPIC_AUTH_TOKEN=dummy claude
|
|
|
141
141
|
| ガード | 何から守るか |
|
|
142
142
|
|---|---|
|
|
143
143
|
| **Context Budget** | メッセージが溜まりすぎて context window 溢れ → 自動 trim |
|
|
144
|
-
| **Drift Detection** | モデルの応答品質が徐々に劣化 → 別 provider に切替 or KV cache flush |
|
|
144
|
+
| **Drift Detection** | モデルの応答品質が徐々に劣化 → 別 provider に切替 or KV cache flush (6 シグナル、`goal_mode` で目標達成停滞も検知) |
|
|
145
145
|
| **Self-healing** | backend が落ちた → 自動除外 + restart + 回復 probe で自動復帰 |
|
|
146
146
|
| **Tool Loop Guard** | 同じツールを無限に呼び続ける → 検知して停止 |
|
|
147
147
|
| **Memory Pressure** | GPU メモリ不足を検知 → 軽量モデルに切替 |
|
|
@@ -154,9 +154,40 @@ ANTHROPIC_BASE_URL=http://localhost:8088 ANTHROPIC_AUTH_TOKEN=dummy claude
|
|
|
154
154
|
| **`coderouter doctor`** | プロバイダの問題を 6 プローブで即診断 + 修正パッチ出力 |
|
|
155
155
|
| **`/dashboard`** | ブラウザで今何が起きてるかリアルタイム確認 |
|
|
156
156
|
| **`coderouter audit`** | guard 発火履歴を検索 |
|
|
157
|
-
| **`coderouter replay`** | provider 切替の効果を統計比較 (A/B 分析) |
|
|
157
|
+
| **`coderouter replay`** | provider 切替の効果を統計比較 (A/B 分析) / `--suggest-rules` でルール最適化提案 |
|
|
158
158
|
| **Continuous Probe** | idle 時も定期的に backend を監視 |
|
|
159
159
|
|
|
160
|
+
### Launcher — llama.cpp / vllm 起動 UI
|
|
161
|
+
|
|
162
|
+
`http://localhost:8088/launcher` で開けるブラウザ UI。llama.cpp や vllm を GUI で起動・管理できます。
|
|
163
|
+
|
|
164
|
+
| 機能 | 詳細 |
|
|
165
|
+
|---|---|
|
|
166
|
+
| **モデルスキャン** | `model_dirs` に指定したフォルダを再帰スキャンして `.gguf` / `.safetensors` をリスト化 |
|
|
167
|
+
| **オプションプロファイル** | `providers.yaml` に名前付きプリセットを定義 → ドロップダウンで選択するだけ |
|
|
168
|
+
| **複数プロセス管理** | llama.cpp と vllm を同時に起動し、ポートごとに独立管理 |
|
|
169
|
+
| **ログビューア** | 各プロセスの stdout/stderr をブラウザ内でリアルタイム確認 |
|
|
170
|
+
|
|
171
|
+
```yaml
|
|
172
|
+
# providers.yaml に追記するだけで有効になる
|
|
173
|
+
launcher:
|
|
174
|
+
model_dirs:
|
|
175
|
+
- ~/models
|
|
176
|
+
option_profiles:
|
|
177
|
+
llama.cpp:
|
|
178
|
+
- name: "GPU フル活用"
|
|
179
|
+
args:
|
|
180
|
+
"-ngl": 99
|
|
181
|
+
"--ctx-size": 4096
|
|
182
|
+
vllm:
|
|
183
|
+
- name: "標準"
|
|
184
|
+
args:
|
|
185
|
+
"--dtype": "auto"
|
|
186
|
+
"--max-model-len": 4096
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
詳細 → [Launcher ガイド](./docs/launcher.md)
|
|
190
|
+
|
|
160
191
|
---
|
|
161
192
|
|
|
162
193
|
## 設定例 (最小)
|
|
@@ -193,6 +224,7 @@ providers:
|
|
|
193
224
|
| すぐ動かす | [Quickstart](./docs/quickstart.md) |
|
|
194
225
|
| 使いこなす | [利用ガイド](./docs/usage-guide.md) |
|
|
195
226
|
| 無料で回す | [無料枠ガイド](./docs/free-tier-guide.md) |
|
|
227
|
+
| llama.cpp / vllm を GUI で起動 | [Launcher ガイド](./docs/launcher.md) |
|
|
196
228
|
| 詰まった | [トラブルシューティング](./docs/troubleshooting.md) |
|
|
197
229
|
| 設計を知りたい | [アーキテクチャ詳細](./docs/architecture.md) |
|
|
198
230
|
| 全リリース履歴 | [CHANGELOG](./CHANGELOG.md) |
|