coderouter-cli 2.3.0a3__py3-none-any.whl → 2.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/cli.py +31 -0
- coderouter/config/schemas.py +22 -0
- coderouter/guards/__init__.py +2 -0
- coderouter/guards/_fingerprint.py +125 -0
- coderouter/guards/drift_detection.py +55 -0
- coderouter/plugins/__init__.py +5 -8
- coderouter/routing/fallback.py +52 -4
- coderouter/state/__init__.py +15 -11
- coderouter/state/suggest_rules.py +413 -0
- {coderouter_cli-2.3.0a3.dist-info → coderouter_cli-2.4.0.dist-info}/METADATA +2 -2
- {coderouter_cli-2.3.0a3.dist-info → coderouter_cli-2.4.0.dist-info}/RECORD +14 -12
- {coderouter_cli-2.3.0a3.dist-info → coderouter_cli-2.4.0.dist-info}/WHEEL +0 -0
- {coderouter_cli-2.3.0a3.dist-info → coderouter_cli-2.4.0.dist-info}/entry_points.txt +0 -0
- {coderouter_cli-2.3.0a3.dist-info → coderouter_cli-2.4.0.dist-info}/licenses/LICENSE +0 -0
coderouter/cli.py
CHANGED
|
@@ -293,6 +293,18 @@ def _build_parser() -> argparse.ArgumentParser:
|
|
|
293
293
|
metavar="N",
|
|
294
294
|
help="Use only the last N entries (applied after --since and --provider filters).",
|
|
295
295
|
)
|
|
296
|
+
# P1-6: --suggest-rules — statistical analysis → routing rule proposals.
|
|
297
|
+
replay.add_argument(
|
|
298
|
+
"--suggest-rules",
|
|
299
|
+
action="store_true",
|
|
300
|
+
help=(
|
|
301
|
+
"P1-6: analyse the request journal and print actionable routing "
|
|
302
|
+
"rule suggestions as copy-paste YAML snippets. Suggestions cover "
|
|
303
|
+
"provider reordering by cost, prompt_cache enablement, drift "
|
|
304
|
+
"detection configuration, and goal profile creation. "
|
|
305
|
+
"Can be combined with --since / --limit to scope the analysis window."
|
|
306
|
+
),
|
|
307
|
+
)
|
|
296
308
|
|
|
297
309
|
return parser
|
|
298
310
|
|
|
@@ -684,6 +696,25 @@ def _run_replay(args: argparse.Namespace) -> int:
|
|
|
684
696
|
print("replay: no matching entries found.")
|
|
685
697
|
return 0
|
|
686
698
|
|
|
699
|
+
if getattr(args, "suggest_rules", False):
|
|
700
|
+
# P1-6: statistical rule suggestion mode.
|
|
701
|
+
# Always compute a full window summary (ignores --compare / --provider).
|
|
702
|
+
from coderouter.state.suggest_rules import format_suggestions, suggest_rules
|
|
703
|
+
from coderouter.state.replay import summarize_window as _sw
|
|
704
|
+
|
|
705
|
+
# Re-read without provider filter so we see all providers.
|
|
706
|
+
all_entries = read_request_log(log_path, since=args.since)
|
|
707
|
+
if args.limit is not None and args.limit > 0:
|
|
708
|
+
all_entries = all_entries[-args.limit:]
|
|
709
|
+
full_summary = _sw(all_entries)
|
|
710
|
+
suggestions = suggest_rules(full_summary)
|
|
711
|
+
print(f"Request journal: {len(all_entries)} entries analysed")
|
|
712
|
+
print(f" Window: {full_summary.first_ts} → {full_summary.last_ts}")
|
|
713
|
+
print(f" Providers: {', '.join(sorted(full_summary.providers))}")
|
|
714
|
+
print()
|
|
715
|
+
print(format_suggestions(suggestions))
|
|
716
|
+
return 0
|
|
717
|
+
|
|
687
718
|
if args.compare:
|
|
688
719
|
provider_a, provider_b = args.compare
|
|
689
720
|
comparison = compare_providers(entries, provider_a, provider_b)
|
coderouter/config/schemas.py
CHANGED
|
@@ -658,6 +658,28 @@ class FallbackChain(BaseModel):
|
|
|
658
658
|
),
|
|
659
659
|
)
|
|
660
660
|
|
|
661
|
+
# --- P1-5: goal_mode — tighter drift thresholds for /goal sessions -------
|
|
662
|
+
#
|
|
663
|
+
# When True, the drift detector automatically switches to the
|
|
664
|
+
# ``THRESHOLDS_GOAL`` preset regardless of ``drift_detection_sensitivity``,
|
|
665
|
+
# and lowers ``min_window_fill`` to 4 so stall detection fires faster.
|
|
666
|
+
#
|
|
667
|
+
# Intended for profiles routed by the ``/goal`` meta-command where
|
|
668
|
+
# the agent is expected to make steady forward progress. Repetition and
|
|
669
|
+
# length collapse are much more meaningful signals in that context than
|
|
670
|
+
# in a general-purpose chat session.
|
|
671
|
+
goal_mode: bool = Field(
|
|
672
|
+
default=False,
|
|
673
|
+
description=(
|
|
674
|
+
"P1-5: when True, automatically applies the ``goal`` drift "
|
|
675
|
+
"threshold preset (stricter thresholds, lower ``min_window_fill`` "
|
|
676
|
+
"of 4) for this profile. Overrides ``drift_detection_sensitivity`` "
|
|
677
|
+
"when drift_detection_action is not ``off``. Designed for "
|
|
678
|
+
"agent/goal sessions where forward-progress stalls are more "
|
|
679
|
+
"actionable than in ad-hoc chat."
|
|
680
|
+
),
|
|
681
|
+
)
|
|
682
|
+
|
|
661
683
|
# --- v2.0-H (L6): Mid-stream partial stitching --------------------------
|
|
662
684
|
# * ``off`` — discard partial content on mid-stream failure (legacy).
|
|
663
685
|
# * ``surface`` — return partial content as a truncated-but-valid response.
|
coderouter/guards/__init__.py
CHANGED
|
@@ -12,6 +12,8 @@ to hit:
|
|
|
12
12
|
* :mod:`coderouter.guards.self_healing` — v2.0-J auto-exclude +
|
|
13
13
|
restart + recovery probe
|
|
14
14
|
* :mod:`coderouter.guards.continuous_probe` — v2.0-I background probing
|
|
15
|
+
* :mod:`coderouter.guards._fingerprint` — P1-4 response fingerprinting
|
|
16
|
+
for goal_progress_stall signal
|
|
15
17
|
|
|
16
18
|
Each guard is a pure-functional / single-class module that the engine
|
|
17
19
|
consults at the appropriate dispatch point. Guards never block the
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""Response fingerprinting for goal_progress_stall detection (P1-4).
|
|
2
|
+
|
|
3
|
+
A "fingerprint" is a compact, order-independent signature of the *content*
|
|
4
|
+
of an assistant response — independent of surface variation (filler phrases,
|
|
5
|
+
minor rewordings). Two responses with the same fingerprint are considered
|
|
6
|
+
semantically repetitive for stall-detection purposes.
|
|
7
|
+
|
|
8
|
+
Algorithm
|
|
9
|
+
---------
|
|
10
|
+
1. Normalise: lowercase, strip punctuation, collapse whitespace.
|
|
11
|
+
2. Extract the N most-frequent content words (excluding a small stop-list).
|
|
12
|
+
3. Sort alphabetically, join with '|', SHA-256 → 12-hex prefix.
|
|
13
|
+
|
|
14
|
+
The 12-hex prefix gives 281 trillion distinct values — collision probability
|
|
15
|
+
across any 20-response window is negligible (< 1 in 10^15).
|
|
16
|
+
|
|
17
|
+
Why top-N content words instead of full hash?
|
|
18
|
+
----------------------------------------------
|
|
19
|
+
A verbatim hash would fail to catch "I cannot do X. Let me try Y" vs
|
|
20
|
+
"Let me try Y as I cannot do X" — same stall, different hash. By
|
|
21
|
+
extracting the dominant vocabulary we get useful fuzzy equality without
|
|
22
|
+
the overhead of embedding models.
|
|
23
|
+
|
|
24
|
+
Usage
|
|
25
|
+
-----
|
|
26
|
+
from coderouter.guards._fingerprint import fingerprint_response
|
|
27
|
+
|
|
28
|
+
fp = fingerprint_response(response_text)
|
|
29
|
+
obs = ResponseObservation(..., response_fingerprint=fp)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
import hashlib
|
|
35
|
+
import re
|
|
36
|
+
import unicodedata
|
|
37
|
+
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
# Stop-word list (English + common LLM filler)
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
_STOP_WORDS: frozenset[str] = frozenset(
|
|
43
|
+
{
|
|
44
|
+
# English function words
|
|
45
|
+
"a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "to",
|
|
46
|
+
"for", "of", "with", "by", "from", "as", "is", "it", "its", "be",
|
|
47
|
+
"was", "are", "were", "been", "has", "have", "had", "do", "does",
|
|
48
|
+
"did", "will", "would", "could", "should", "may", "might", "shall",
|
|
49
|
+
"this", "that", "these", "those", "i", "you", "he", "she", "we",
|
|
50
|
+
"they", "me", "him", "her", "us", "them", "my", "your", "his",
|
|
51
|
+
"their", "our", "what", "which", "who", "how", "when", "where",
|
|
52
|
+
"why", "not", "no", "so", "up", "out", "into", "about", "than",
|
|
53
|
+
"then", "there", "here", "also", "just", "can", "get", "all",
|
|
54
|
+
# Common LLM assistant filler
|
|
55
|
+
"certainly", "sure", "absolutely", "great", "happy", "help",
|
|
56
|
+
"please", "let", "know", "feel", "free", "answer", "question",
|
|
57
|
+
"response", "following", "based", "provide", "using",
|
|
58
|
+
}
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# ---------------------------------------------------------------------------
|
|
62
|
+
# Number of top content words to include in the fingerprint
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
_TOP_N: int = 12
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
# Public API
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def fingerprint_response(text: str, *, top_n: int = _TOP_N) -> str:
|
|
74
|
+
"""Return a 12-hex fingerprint string for *text*.
|
|
75
|
+
|
|
76
|
+
Parameters
|
|
77
|
+
----------
|
|
78
|
+
text:
|
|
79
|
+
Raw assistant response text (plain text, not JSON).
|
|
80
|
+
top_n:
|
|
81
|
+
Number of most-frequent content words to include in the signature.
|
|
82
|
+
Defaults to ``_TOP_N`` (12). Lower values are more fuzzy; higher
|
|
83
|
+
values are more precise.
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
A 12-character lowercase hexadecimal string, e.g. ``"a3f7b2c091de"``.
|
|
88
|
+
Returns ``""`` for empty / whitespace-only input.
|
|
89
|
+
"""
|
|
90
|
+
if not text or not text.strip():
|
|
91
|
+
return ""
|
|
92
|
+
|
|
93
|
+
# 1. Unicode normalisation + lowercase
|
|
94
|
+
normalised = unicodedata.normalize("NFKC", text).lower()
|
|
95
|
+
|
|
96
|
+
# 2. Strip punctuation / digits, collapse whitespace
|
|
97
|
+
normalised = re.sub(r"[^\w\s]", " ", normalised)
|
|
98
|
+
normalised = re.sub(r"\d+", " ", normalised)
|
|
99
|
+
normalised = re.sub(r"\s+", " ", normalised).strip()
|
|
100
|
+
|
|
101
|
+
# 3. Tokenise and filter stop words (also skip very short tokens)
|
|
102
|
+
tokens = [w for w in normalised.split() if len(w) > 2 and w not in _STOP_WORDS]
|
|
103
|
+
|
|
104
|
+
if not tokens:
|
|
105
|
+
return ""
|
|
106
|
+
|
|
107
|
+
# 4. Count frequencies, take top-N
|
|
108
|
+
freq: dict[str, int] = {}
|
|
109
|
+
for tok in tokens:
|
|
110
|
+
freq[tok] = freq.get(tok, 0) + 1
|
|
111
|
+
|
|
112
|
+
# Require at least 3 distinct content words; single-word or near-empty
|
|
113
|
+
# responses (e.g. "xxxxx..." test stubs, error codes, bare ACKs) produce
|
|
114
|
+
# the same fingerprint every time and would falsely inflate the repetition
|
|
115
|
+
# rate. Returning "" marks these as "not fingerprinted" so detect_drift
|
|
116
|
+
# skips them entirely.
|
|
117
|
+
if len(freq) < 3:
|
|
118
|
+
return ""
|
|
119
|
+
|
|
120
|
+
top_words = sorted(freq, key=lambda w: (-freq[w], w))[:top_n]
|
|
121
|
+
|
|
122
|
+
# 5. Sort alphabetically → stable join → hash
|
|
123
|
+
signature = "|".join(sorted(top_words))
|
|
124
|
+
digest = hashlib.sha256(signature.encode()).hexdigest()
|
|
125
|
+
return digest[:12]
|
|
@@ -34,6 +34,10 @@ Signals
|
|
|
34
34
|
* ``stop_anomaly_rate`` — fraction of responses with unexpected stop_reason
|
|
35
35
|
(not "end_turn" / "tool_use" / "max_tokens")
|
|
36
36
|
* ``error_rate`` — fraction of attempts that ended in failure
|
|
37
|
+
* ``goal_progress_stall`` (P1-4) — fraction of fingerprinted responses
|
|
38
|
+
whose fingerprint matches a previously-seen fingerprint in the window,
|
|
39
|
+
indicating the model is repeating itself without making progress.
|
|
40
|
+
Only fires when ``response_fingerprint`` is populated on observations.
|
|
37
41
|
|
|
38
42
|
Thresholds are bundled as :class:`DriftThresholds` with three presets
|
|
39
43
|
(``low`` / ``normal`` / ``high`` sensitivity).
|
|
@@ -71,6 +75,15 @@ class ResponseObservation:
|
|
|
71
75
|
is_error: bool = False
|
|
72
76
|
"""True if the attempt ended in provider-failed / provider-failed-midstream."""
|
|
73
77
|
stream: bool = False
|
|
78
|
+
response_fingerprint: str | None = None
|
|
79
|
+
"""P1-4: compact content fingerprint of the response text.
|
|
80
|
+
|
|
81
|
+
When set, used by the ``goal_progress_stall`` signal to detect
|
|
82
|
+
repetition: the same fingerprint appearing multiple times in the
|
|
83
|
+
window indicates the model is not making progress. Computed by
|
|
84
|
+
:func:`coderouter.guards._fingerprint.fingerprint_response`.
|
|
85
|
+
Pass ``None`` (default) to opt-out — the signal is silently skipped.
|
|
86
|
+
"""
|
|
74
87
|
|
|
75
88
|
|
|
76
89
|
# ---------------------------------------------------------------------------
|
|
@@ -100,6 +113,12 @@ class DriftThresholds:
|
|
|
100
113
|
length_collapse_ratio: float = 0.5
|
|
101
114
|
"""If recent half median is < 50% of earlier half median → collapse."""
|
|
102
115
|
|
|
116
|
+
# P1-4: repetition/stall threshold
|
|
117
|
+
repetition_rate_threshold: float = 0.4
|
|
118
|
+
"""P1-4: fraction of fingerprinted responses whose fingerprint has
|
|
119
|
+
appeared before in the window. Above this rate → goal_progress_stall
|
|
120
|
+
signal fires (mild). Default 0.4 = 2 out of 5 responses are repeats."""
|
|
121
|
+
|
|
103
122
|
# Minimum observations before detection fires
|
|
104
123
|
min_window_fill: int = 6
|
|
105
124
|
"""Don't trigger until at least this many observations in the window."""
|
|
@@ -112,6 +131,7 @@ THRESHOLDS_LOW = DriftThresholds(
|
|
|
112
131
|
tool_silence_rate=0.8,
|
|
113
132
|
stop_anomaly_rate=0.6,
|
|
114
133
|
error_rate=0.4,
|
|
134
|
+
repetition_rate_threshold=0.6,
|
|
115
135
|
min_window_fill=10,
|
|
116
136
|
)
|
|
117
137
|
|
|
@@ -123,6 +143,19 @@ THRESHOLDS_HIGH = DriftThresholds(
|
|
|
123
143
|
tool_silence_rate=0.5,
|
|
124
144
|
stop_anomaly_rate=0.3,
|
|
125
145
|
error_rate=0.15,
|
|
146
|
+
repetition_rate_threshold=0.25,
|
|
147
|
+
min_window_fill=4,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# P1-5: goal-mode preset — tighter thresholds + lower min_window_fill.
|
|
151
|
+
# Applied automatically when the profile has goal_mode=True.
|
|
152
|
+
THRESHOLDS_GOAL = DriftThresholds(
|
|
153
|
+
empty_response_rate=0.2,
|
|
154
|
+
length_collapse_ratio=0.6,
|
|
155
|
+
tool_silence_rate=0.5,
|
|
156
|
+
stop_anomaly_rate=0.3,
|
|
157
|
+
error_rate=0.15,
|
|
158
|
+
repetition_rate_threshold=0.2,
|
|
126
159
|
min_window_fill=4,
|
|
127
160
|
)
|
|
128
161
|
|
|
@@ -130,6 +163,7 @@ SENSITIVITY_PRESETS: dict[str, DriftThresholds] = {
|
|
|
130
163
|
"low": THRESHOLDS_LOW,
|
|
131
164
|
"normal": THRESHOLDS_NORMAL,
|
|
132
165
|
"high": THRESHOLDS_HIGH,
|
|
166
|
+
"goal": THRESHOLDS_GOAL,
|
|
133
167
|
}
|
|
134
168
|
|
|
135
169
|
|
|
@@ -244,6 +278,27 @@ def detect_drift(
|
|
|
244
278
|
if error_rate > thresholds.error_rate:
|
|
245
279
|
mild_flags.append(f"error_rate={error_rate:.2f}")
|
|
246
280
|
|
|
281
|
+
# --- Signal 6: Goal progress stall (P1-4) ---
|
|
282
|
+
# Only active when at least some observations have a fingerprint.
|
|
283
|
+
# Computes: how many fingerprinted responses repeat a fingerprint
|
|
284
|
+
# already seen earlier in the window. High repetition → stall.
|
|
285
|
+
fingerprinted = [
|
|
286
|
+
obs for obs in window if obs.response_fingerprint # excludes None and ""
|
|
287
|
+
]
|
|
288
|
+
if len(fingerprinted) >= 3:
|
|
289
|
+
seen: set[str] = set()
|
|
290
|
+
repeat_count = 0
|
|
291
|
+
for obs in fingerprinted:
|
|
292
|
+
fp = obs.response_fingerprint # guaranteed non-empty by filter above
|
|
293
|
+
if fp in seen:
|
|
294
|
+
repeat_count += 1
|
|
295
|
+
else:
|
|
296
|
+
seen.add(fp)
|
|
297
|
+
repetition_rate = repeat_count / len(fingerprinted)
|
|
298
|
+
signals["goal_progress_stall"] = round(repetition_rate, 3)
|
|
299
|
+
if repetition_rate > thresholds.repetition_rate_threshold:
|
|
300
|
+
mild_flags.append(f"goal_progress_stall={repetition_rate:.2f}")
|
|
301
|
+
|
|
247
302
|
# --- Severity synthesis ---
|
|
248
303
|
if severe_flags:
|
|
249
304
|
severity: Literal["none", "mild", "severe"] = "severe"
|
coderouter/plugins/__init__.py
CHANGED
|
@@ -43,17 +43,14 @@ from coderouter.plugins.loader import (
|
|
|
43
43
|
from coderouter.plugins.registry import PluginRegistry
|
|
44
44
|
|
|
45
45
|
__all__ = [
|
|
46
|
-
|
|
47
|
-
"
|
|
48
|
-
"
|
|
49
|
-
# Future hooks (Protocol-only, no engine integration yet)
|
|
46
|
+
"PLUGIN_GROUPS_FUTURE",
|
|
47
|
+
"PLUGIN_GROUPS_V2_3",
|
|
48
|
+
"Adapter",
|
|
50
49
|
"Frontend",
|
|
51
50
|
"Guard",
|
|
51
|
+
"InputFilter",
|
|
52
|
+
"Observer",
|
|
52
53
|
"OutputFilter",
|
|
53
|
-
"Adapter",
|
|
54
|
-
# Discovery + container
|
|
55
54
|
"PluginRegistry",
|
|
56
55
|
"discover_and_load",
|
|
57
|
-
"PLUGIN_GROUPS_V2_3",
|
|
58
|
-
"PLUGIN_GROUPS_FUTURE",
|
|
59
56
|
]
|
coderouter/routing/fallback.py
CHANGED
|
@@ -838,6 +838,12 @@ class FallbackEngine:
|
|
|
838
838
|
# so tests that build the engine via ``FallbackEngine.__new__``
|
|
839
839
|
# see an empty registry instead of AttributeError.
|
|
840
840
|
self._plugin_registry: PluginRegistry = plugins or PluginRegistry.empty()
|
|
841
|
+
# v2.3.0: holds strong refs to in-flight Observer fanout tasks
|
|
842
|
+
# so the asyncio event loop's weak-ref bookkeeping doesn't GC
|
|
843
|
+
# them mid-flight (RUF006). Tasks remove themselves on done
|
|
844
|
+
# via ``add_done_callback(_observer_tasks.discard)`` in
|
|
845
|
+
# :meth:`_fanout_observers`.
|
|
846
|
+
self._observer_tasks: set[asyncio.Task[None]] = set()
|
|
841
847
|
# Cache adapters so we don't re-instantiate per request
|
|
842
848
|
self._adapters: dict[str, BaseAdapter] = {
|
|
843
849
|
p.name: build_adapter(p) for p in config.providers
|
|
@@ -1277,6 +1283,7 @@ class FallbackEngine:
|
|
|
1277
1283
|
stop_reason: str | None = None,
|
|
1278
1284
|
is_error: bool = False,
|
|
1279
1285
|
stream: bool = False,
|
|
1286
|
+
response_fingerprint: str | None = None,
|
|
1280
1287
|
) -> DriftVerdict | None:
|
|
1281
1288
|
"""v2.0-G (L4): record an observation and check for drift.
|
|
1282
1289
|
|
|
@@ -1288,9 +1295,18 @@ class FallbackEngine:
|
|
|
1288
1295
|
- Emits ``drift-detected`` log.
|
|
1289
1296
|
- If action is ``promote`` or ``reload``, demotes the provider
|
|
1290
1297
|
via the adaptive rank machinery.
|
|
1298
|
+
|
|
1299
|
+
Parameters
|
|
1300
|
+
----------
|
|
1301
|
+
response_fingerprint:
|
|
1302
|
+
P1-4: compact content fingerprint from
|
|
1303
|
+
:func:`coderouter.guards._fingerprint.fingerprint_response`.
|
|
1304
|
+
When set, enables the ``goal_progress_stall`` signal.
|
|
1305
|
+
Pass ``None`` (default) to skip that signal.
|
|
1291
1306
|
"""
|
|
1292
1307
|
from coderouter.guards.drift_detection import (
|
|
1293
1308
|
SENSITIVITY_PRESETS,
|
|
1309
|
+
THRESHOLDS_GOAL,
|
|
1294
1310
|
ResponseObservation,
|
|
1295
1311
|
detect_drift,
|
|
1296
1312
|
)
|
|
@@ -1316,6 +1332,7 @@ class FallbackEngine:
|
|
|
1316
1332
|
stop_reason=stop_reason,
|
|
1317
1333
|
is_error=is_error,
|
|
1318
1334
|
stream=stream,
|
|
1335
|
+
response_fingerprint=response_fingerprint,
|
|
1319
1336
|
)
|
|
1320
1337
|
self._drift_window.record(obs)
|
|
1321
1338
|
|
|
@@ -1338,10 +1355,15 @@ class FallbackEngine:
|
|
|
1338
1355
|
return None
|
|
1339
1356
|
|
|
1340
1357
|
# Run detection
|
|
1358
|
+
# P1-5: goal_mode overrides the sensitivity preset with the tighter
|
|
1359
|
+
# THRESHOLDS_GOAL regardless of drift_detection_sensitivity setting.
|
|
1341
1360
|
window = self._drift_window.get_window(provider)
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1361
|
+
if getattr(chain_cfg, "goal_mode", False):
|
|
1362
|
+
thresholds = THRESHOLDS_GOAL
|
|
1363
|
+
else:
|
|
1364
|
+
thresholds = SENSITIVITY_PRESETS.get(
|
|
1365
|
+
chain_cfg.drift_detection_sensitivity, SENSITIVITY_PRESETS["normal"]
|
|
1366
|
+
)
|
|
1345
1367
|
verdict = detect_drift(window, thresholds)
|
|
1346
1368
|
|
|
1347
1369
|
if not verdict.drifted:
|
|
@@ -1881,10 +1903,22 @@ class FallbackEngine:
|
|
|
1881
1903
|
observers = self.plugins.observers
|
|
1882
1904
|
if not observers:
|
|
1883
1905
|
return
|
|
1906
|
+
# Lazy-init the task set for engines built via ``__new__`` —
|
|
1907
|
+
# mirrors the lazy ``plugins`` property pattern so legacy
|
|
1908
|
+
# tests that bypass __init__ don't crash here.
|
|
1909
|
+
if not hasattr(self, "_observer_tasks"):
|
|
1910
|
+
self._observer_tasks = set()
|
|
1884
1911
|
for obs in observers:
|
|
1885
|
-
asyncio.create_task(
|
|
1912
|
+
task = asyncio.create_task(
|
|
1886
1913
|
self._safe_observe(obs, event_type, payload)
|
|
1887
1914
|
)
|
|
1915
|
+
# Strong-ref keeps the task alive past the loop iteration;
|
|
1916
|
+
# ``discard`` cleans up after the task completes (success
|
|
1917
|
+
# or exception). Avoids the RUF006 footgun where
|
|
1918
|
+
# asyncio.create_task's weakref-only bookkeeping can let
|
|
1919
|
+
# the loop GC a fanout-in-progress task.
|
|
1920
|
+
self._observer_tasks.add(task)
|
|
1921
|
+
task.add_done_callback(self._observer_tasks.discard)
|
|
1888
1922
|
|
|
1889
1923
|
async def _safe_observe(
|
|
1890
1924
|
self,
|
|
@@ -2065,6 +2099,13 @@ class FallbackEngine:
|
|
|
2065
2099
|
adapter.name, profile=request.profile
|
|
2066
2100
|
)
|
|
2067
2101
|
# v2.0-G (L4): drift detection observation (success path).
|
|
2102
|
+
# P1-4: compute response fingerprint for goal_progress_stall.
|
|
2103
|
+
_fp_text = " ".join(
|
|
2104
|
+
getattr(b, "text", "") or (b.get("text", "") if isinstance(b, dict) else "")
|
|
2105
|
+
for b in (resp.content or [])
|
|
2106
|
+
if (getattr(b, "type", None) or (b.get("type") if isinstance(b, dict) else None)) == "text"
|
|
2107
|
+
)
|
|
2108
|
+
from coderouter.guards._fingerprint import fingerprint_response as _fp
|
|
2068
2109
|
self._observe_drift_signal(
|
|
2069
2110
|
adapter.name,
|
|
2070
2111
|
profile=request.profile,
|
|
@@ -2075,6 +2116,7 @@ class FallbackEngine:
|
|
|
2075
2116
|
request_had_tools=bool(request.tools),
|
|
2076
2117
|
stop_reason=resp.stop_reason,
|
|
2077
2118
|
stream=False,
|
|
2119
|
+
response_fingerprint=_fp(_fp_text) if _fp_text else None,
|
|
2078
2120
|
)
|
|
2079
2121
|
# v1.9-A: pair every successful Anthropic response with a
|
|
2080
2122
|
# cache-observed log line. Native Anthropic / LM Studio
|
|
@@ -2294,6 +2336,11 @@ class FallbackEngine:
|
|
|
2294
2336
|
adapter.name, exc, partial_content=acc.partial_content
|
|
2295
2337
|
) from exc
|
|
2296
2338
|
# v2.0-G (L4): drift detection observation (stream success).
|
|
2339
|
+
# P1-4: compute response fingerprint for goal_progress_stall.
|
|
2340
|
+
_stream_fp_text = " ".join(
|
|
2341
|
+
b.get("text", "") for b in acc.partial_content if b.get("type") == "text"
|
|
2342
|
+
)
|
|
2343
|
+
from coderouter.guards._fingerprint import fingerprint_response as _fp_s
|
|
2297
2344
|
self._observe_drift_signal(
|
|
2298
2345
|
adapter.name,
|
|
2299
2346
|
profile=request.profile,
|
|
@@ -2302,6 +2349,7 @@ class FallbackEngine:
|
|
|
2302
2349
|
request_had_tools=bool(request.tools),
|
|
2303
2350
|
stop_reason=acc.stop_reason,
|
|
2304
2351
|
stream=True,
|
|
2352
|
+
response_fingerprint=_fp_s(_stream_fp_text) if _stream_fp_text else None,
|
|
2305
2353
|
)
|
|
2306
2354
|
# v1.9-B2: pair the successful stream with a cache-observed
|
|
2307
2355
|
# log line carrying the aggregated usage counters that the
|
coderouter/state/__init__.py
CHANGED
|
@@ -1,15 +1,19 @@
|
|
|
1
1
|
"""Persistent state layer (v2.0-K).
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Five modules:
|
|
4
4
|
|
|
5
|
-
* :mod:`coderouter.state.store`
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
* :mod:`coderouter.state.audit_log`
|
|
9
|
-
|
|
10
|
-
* :mod:`coderouter.state.request_log`
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
* :mod:`coderouter.state.replay`
|
|
14
|
-
|
|
5
|
+
* :mod:`coderouter.state.store` — sqlite3 KV store for operational
|
|
6
|
+
metadata (budget totals, health
|
|
7
|
+
state, self-healing exclusions).
|
|
8
|
+
* :mod:`coderouter.state.audit_log` — JSONL structured event log with
|
|
9
|
+
rotation and CLI reader.
|
|
10
|
+
* :mod:`coderouter.state.request_log` — JSONL request metadata journal
|
|
11
|
+
(per-request token counts, cost,
|
|
12
|
+
provider — no request body).
|
|
13
|
+
* :mod:`coderouter.state.replay` — Statistical A/B analysis engine
|
|
14
|
+
over request journal entries.
|
|
15
|
+
* :mod:`coderouter.state.suggest_rules` — P1-6 rule suggestion engine:
|
|
16
|
+
analyses WindowSummary and emits
|
|
17
|
+
copy-paste YAML snippets for
|
|
18
|
+
routing optimisation.
|
|
15
19
|
"""
|
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
"""Rule suggestion engine for ``coderouter replay --suggest-rules`` (P1-6).
|
|
2
|
+
|
|
3
|
+
Analyses the request journal statistics produced by
|
|
4
|
+
:func:`coderouter.state.replay.summarize_window` and emits a list of
|
|
5
|
+
:class:`RuleSuggestion` objects — each containing a plain-English
|
|
6
|
+
description, a copy-paste YAML snippet, and the numeric evidence that
|
|
7
|
+
drove the recommendation.
|
|
8
|
+
|
|
9
|
+
Design
|
|
10
|
+
------
|
|
11
|
+
Pure statistical analysis — no LLM required. Rules are applied in
|
|
12
|
+
priority order; each rule is independently evaluated so multiple
|
|
13
|
+
suggestions can fire for the same provider.
|
|
14
|
+
|
|
15
|
+
Rules (v1.0)
|
|
16
|
+
------------
|
|
17
|
+
|
|
18
|
+
1. **provider_reorder** — If provider B costs less per request than
|
|
19
|
+
provider A *and* B has meaningful traffic, suggest moving B earlier
|
|
20
|
+
in the fallback chain.
|
|
21
|
+
|
|
22
|
+
2. **enable_prompt_cache** — If a provider has a large average input
|
|
23
|
+
token count (> ``CACHE_INPUT_THRESHOLD``) and a low cache-hit ratio
|
|
24
|
+
(< ``CACHE_HIT_RATIO_THRESHOLD``), suggest enabling
|
|
25
|
+
``capabilities.prompt_cache: true``.
|
|
26
|
+
|
|
27
|
+
3. **enable_drift_detection** — If any provider has a non-trivial
|
|
28
|
+
request volume and no drift-detection configuration is visible in
|
|
29
|
+
the stats (proxy: we see the provider at all), emit a reminder to
|
|
30
|
+
set ``drift_detection_action: promote``.
|
|
31
|
+
|
|
32
|
+
4. **raise_min_window_fill** — If a provider has a low request count
|
|
33
|
+
(< ``SMALL_WINDOW_THRESHOLD``) and drift detection would fire early,
|
|
34
|
+
suggest raising ``drift_detection_sensitivity: low`` to avoid false
|
|
35
|
+
positives.
|
|
36
|
+
|
|
37
|
+
5. **split_goal_profile** — If there is more than one provider with
|
|
38
|
+
significant traffic and average output tokens differ substantially,
|
|
39
|
+
suggest creating a ``goal`` profile with ``goal_mode: true`` that
|
|
40
|
+
routes to the highest-output provider.
|
|
41
|
+
|
|
42
|
+
Confidence levels
|
|
43
|
+
-----------------
|
|
44
|
+
``high`` — clear numeric evidence, low false-positive risk
|
|
45
|
+
``medium`` — heuristic, may need operator judgement
|
|
46
|
+
``low`` — informational / reminder
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
from __future__ import annotations
|
|
50
|
+
|
|
51
|
+
from dataclasses import dataclass, field
|
|
52
|
+
|
|
53
|
+
from coderouter.state.replay import ProviderSummary, WindowSummary
|
|
54
|
+
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
# Thresholds (module-level constants for easy tuning)
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
# Minimum requests per provider before we emit cost-based suggestions.
|
|
60
|
+
_MIN_TRAFFIC: int = 5
|
|
61
|
+
|
|
62
|
+
# Prompt-cache opportunity: avg input tokens above this → suggest caching.
|
|
63
|
+
_CACHE_INPUT_THRESHOLD: int = 2_000
|
|
64
|
+
|
|
65
|
+
# Prompt-cache opportunity: cache hit ratio below this → suggest enabling.
|
|
66
|
+
_CACHE_HIT_RATIO_THRESHOLD: float = 0.10
|
|
67
|
+
|
|
68
|
+
# Cost reorder: provider B is this fraction cheaper than A → suggest reorder.
|
|
69
|
+
_COST_REORDER_THRESHOLD: float = 0.20 # 20% cheaper
|
|
70
|
+
|
|
71
|
+
# Small-window guard: fewer requests than this → suggest low sensitivity.
|
|
72
|
+
_SMALL_WINDOW_THRESHOLD: int = 10
|
|
73
|
+
|
|
74
|
+
# Goal profile split: relative std-dev of avg output tokens across providers.
|
|
75
|
+
_OUTPUT_DIVERGENCE_THRESHOLD: float = 0.40
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# ---------------------------------------------------------------------------
|
|
79
|
+
# Data model
|
|
80
|
+
# ---------------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class RuleSuggestion:
|
|
85
|
+
"""One actionable suggestion derived from request journal statistics.
|
|
86
|
+
|
|
87
|
+
Attributes
|
|
88
|
+
----------
|
|
89
|
+
rule:
|
|
90
|
+
Internal rule identifier, e.g. ``"provider_reorder"``.
|
|
91
|
+
title:
|
|
92
|
+
Short human-readable title for the suggestion.
|
|
93
|
+
description:
|
|
94
|
+
Plain-English explanation of what was observed and why the
|
|
95
|
+
change is recommended.
|
|
96
|
+
yaml_snippet:
|
|
97
|
+
Copy-paste YAML fragment showing the recommended change.
|
|
98
|
+
May span multiple lines; always valid YAML in isolation.
|
|
99
|
+
evidence:
|
|
100
|
+
Dict of metric name → value that drove this suggestion.
|
|
101
|
+
confidence:
|
|
102
|
+
``"high"`` / ``"medium"`` / ``"low"``
|
|
103
|
+
providers_involved:
|
|
104
|
+
Provider names mentioned in this suggestion.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
rule: str
|
|
108
|
+
title: str
|
|
109
|
+
description: str
|
|
110
|
+
yaml_snippet: str
|
|
111
|
+
evidence: dict[str, object] = field(default_factory=dict)
|
|
112
|
+
confidence: str = "medium"
|
|
113
|
+
providers_involved: list[str] = field(default_factory=list)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# ---------------------------------------------------------------------------
|
|
117
|
+
# Rule implementations
|
|
118
|
+
# ---------------------------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _rule_provider_reorder(
|
|
122
|
+
providers: list[ProviderSummary],
|
|
123
|
+
) -> list[RuleSuggestion]:
|
|
124
|
+
"""Suggest reordering providers by cost-per-request."""
|
|
125
|
+
suggestions: list[RuleSuggestion] = []
|
|
126
|
+
# Only consider providers with meaningful traffic.
|
|
127
|
+
# Include free providers (cost=0) — they are the best candidates.
|
|
128
|
+
active = [p for p in providers if p.request_count >= _MIN_TRAFFIC]
|
|
129
|
+
if len(active) < 2:
|
|
130
|
+
return []
|
|
131
|
+
|
|
132
|
+
# Sort by avg cost ascending (cheapest / free first)
|
|
133
|
+
active_by_cost = sorted(active, key=lambda p: p.avg_cost_usd)
|
|
134
|
+
|
|
135
|
+
# Compare every pair where the expensive provider costs something.
|
|
136
|
+
for i in range(len(active_by_cost)):
|
|
137
|
+
for j in range(i + 1, len(active_by_cost)):
|
|
138
|
+
cheap = active_by_cost[i]
|
|
139
|
+
expensive = active_by_cost[j]
|
|
140
|
+
if expensive.avg_cost_usd <= 0:
|
|
141
|
+
continue # both free — no cost advantage to reorder
|
|
142
|
+
saving_pct = (expensive.avg_cost_usd - cheap.avg_cost_usd) / expensive.avg_cost_usd
|
|
143
|
+
if saving_pct >= _COST_REORDER_THRESHOLD:
|
|
144
|
+
suggestions.append(RuleSuggestion(
|
|
145
|
+
rule="provider_reorder",
|
|
146
|
+
title=f"Move {cheap.provider!r} before {expensive.provider!r}",
|
|
147
|
+
description=(
|
|
148
|
+
f"{cheap.provider!r} costs ${cheap.avg_cost_usd:.4f}/req on average, "
|
|
149
|
+
f"{saving_pct * 100:.0f}% cheaper than {expensive.provider!r} "
|
|
150
|
+
f"(${expensive.avg_cost_usd:.4f}/req). "
|
|
151
|
+
f"Listing the cheaper provider earlier in the fallback chain "
|
|
152
|
+
f"reduces cost without changing availability."
|
|
153
|
+
),
|
|
154
|
+
yaml_snippet=(
|
|
155
|
+
f"# In your profile's providers list, move {cheap.provider!r} earlier:\n"
|
|
156
|
+
f"profiles:\n"
|
|
157
|
+
f" - name: default # or your active profile\n"
|
|
158
|
+
f" providers:\n"
|
|
159
|
+
f" - {cheap.provider}\n"
|
|
160
|
+
f" - {expensive.provider}"
|
|
161
|
+
),
|
|
162
|
+
evidence={
|
|
163
|
+
"cheap_provider": cheap.provider,
|
|
164
|
+
"cheap_avg_cost_usd": round(cheap.avg_cost_usd, 6),
|
|
165
|
+
"expensive_provider": expensive.provider,
|
|
166
|
+
"expensive_avg_cost_usd": round(expensive.avg_cost_usd, 6),
|
|
167
|
+
"saving_pct": round(saving_pct * 100, 1),
|
|
168
|
+
},
|
|
169
|
+
confidence="high" if saving_pct >= 0.40 else "medium",
|
|
170
|
+
providers_involved=[cheap.provider, expensive.provider],
|
|
171
|
+
))
|
|
172
|
+
return suggestions
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _rule_enable_prompt_cache(
|
|
176
|
+
providers: list[ProviderSummary],
|
|
177
|
+
) -> list[RuleSuggestion]:
|
|
178
|
+
"""Suggest enabling prompt_cache for large-input, low-hit providers."""
|
|
179
|
+
suggestions: list[RuleSuggestion] = []
|
|
180
|
+
for p in providers:
|
|
181
|
+
if p.request_count < _MIN_TRAFFIC:
|
|
182
|
+
continue
|
|
183
|
+
if p.avg_input_tokens < _CACHE_INPUT_THRESHOLD:
|
|
184
|
+
continue
|
|
185
|
+
if p.cache_hit_ratio >= _CACHE_HIT_RATIO_THRESHOLD:
|
|
186
|
+
continue
|
|
187
|
+
suggestions.append(RuleSuggestion(
|
|
188
|
+
rule="enable_prompt_cache",
|
|
189
|
+
title=f"Enable prompt_cache for {p.provider!r}",
|
|
190
|
+
description=(
|
|
191
|
+
f"{p.provider!r} averages {p.avg_input_tokens:.0f} input tokens/req "
|
|
192
|
+
f"but has only a {p.cache_hit_ratio * 100:.1f}% cache-hit ratio. "
|
|
193
|
+
f"Enabling prompt caching can significantly reduce input token costs "
|
|
194
|
+
f"on repeated system prompts (Anthropic models: ~10% cache-read price)."
|
|
195
|
+
),
|
|
196
|
+
yaml_snippet=(
|
|
197
|
+
f"providers:\n"
|
|
198
|
+
f" - name: {p.provider}\n"
|
|
199
|
+
f" capabilities:\n"
|
|
200
|
+
f" prompt_cache: true"
|
|
201
|
+
),
|
|
202
|
+
evidence={
|
|
203
|
+
"provider": p.provider,
|
|
204
|
+
"avg_input_tokens": round(p.avg_input_tokens, 0),
|
|
205
|
+
"cache_hit_ratio_pct": round(p.cache_hit_ratio * 100, 1),
|
|
206
|
+
"requests": p.request_count,
|
|
207
|
+
},
|
|
208
|
+
confidence="high" if p.avg_input_tokens > 5_000 else "medium",
|
|
209
|
+
providers_involved=[p.provider],
|
|
210
|
+
))
|
|
211
|
+
return suggestions
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _rule_enable_drift_detection(
|
|
215
|
+
providers: list[ProviderSummary],
|
|
216
|
+
window_summary: WindowSummary,
|
|
217
|
+
) -> list[RuleSuggestion]:
|
|
218
|
+
"""Suggest enabling drift detection when there's meaningful traffic."""
|
|
219
|
+
active = [p for p in providers if p.request_count >= _MIN_TRAFFIC * 2]
|
|
220
|
+
if not active:
|
|
221
|
+
return []
|
|
222
|
+
# We can't know if drift detection is already on from stats alone,
|
|
223
|
+
# so this is a "low" confidence reminder for new operators.
|
|
224
|
+
names = ", ".join(f"{p.provider!r}" for p in active)
|
|
225
|
+
return [RuleSuggestion(
|
|
226
|
+
rule="enable_drift_detection",
|
|
227
|
+
title="Consider enabling L4 drift detection",
|
|
228
|
+
description=(
|
|
229
|
+
f"You have {window_summary.total_requests} requests across {names}. "
|
|
230
|
+
f"The L4 drift detector catches quality degradation in long-running "
|
|
231
|
+
f"agent sessions (empty responses, length collapse, tool silence). "
|
|
232
|
+
f"If not already configured, set drift_detection_action: promote to "
|
|
233
|
+
f"auto-demote providers that are silently degrading."
|
|
234
|
+
),
|
|
235
|
+
yaml_snippet=(
|
|
236
|
+
"# Add to your profile in providers.yaml:\n"
|
|
237
|
+
"profiles:\n"
|
|
238
|
+
" - name: default\n"
|
|
239
|
+
" providers: [...] # your provider list\n"
|
|
240
|
+
" drift_detection_action: promote\n"
|
|
241
|
+
" drift_detection_sensitivity: normal\n"
|
|
242
|
+
" drift_detection_window_size: 20\n"
|
|
243
|
+
" drift_detection_cooldown_s: 300"
|
|
244
|
+
),
|
|
245
|
+
evidence={
|
|
246
|
+
"total_requests": window_summary.total_requests,
|
|
247
|
+
"active_providers": [p.provider for p in active],
|
|
248
|
+
},
|
|
249
|
+
confidence="low",
|
|
250
|
+
providers_involved=[p.provider for p in active],
|
|
251
|
+
)]
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _rule_small_window_low_sensitivity(
|
|
255
|
+
providers: list[ProviderSummary],
|
|
256
|
+
) -> list[RuleSuggestion]:
|
|
257
|
+
"""Suggest low drift sensitivity for providers with small traffic."""
|
|
258
|
+
suggestions: list[RuleSuggestion] = []
|
|
259
|
+
for p in providers:
|
|
260
|
+
if 0 < p.request_count < _SMALL_WINDOW_THRESHOLD:
|
|
261
|
+
suggestions.append(RuleSuggestion(
|
|
262
|
+
rule="low_sensitivity_small_window",
|
|
263
|
+
title=f"Use low drift sensitivity for {p.provider!r} (sparse traffic)",
|
|
264
|
+
description=(
|
|
265
|
+
f"{p.provider!r} has only {p.request_count} requests in the journal window. "
|
|
266
|
+
f"With sparse traffic the drift detector's rolling window fills slowly, "
|
|
267
|
+
f"which can cause false-positives. Setting drift_detection_sensitivity: low "
|
|
268
|
+
f"requires more evidence before promoting the provider."
|
|
269
|
+
),
|
|
270
|
+
yaml_snippet=(
|
|
271
|
+
f"profiles:\n"
|
|
272
|
+
f" - name: default\n"
|
|
273
|
+
f" drift_detection_sensitivity: low # was: normal or high\n"
|
|
274
|
+
f" drift_detection_window_size: 30 # larger window = more stable"
|
|
275
|
+
),
|
|
276
|
+
evidence={
|
|
277
|
+
"provider": p.provider,
|
|
278
|
+
"request_count": p.request_count,
|
|
279
|
+
"threshold": _SMALL_WINDOW_THRESHOLD,
|
|
280
|
+
},
|
|
281
|
+
confidence="medium",
|
|
282
|
+
providers_involved=[p.provider],
|
|
283
|
+
))
|
|
284
|
+
return suggestions
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _rule_goal_profile(
|
|
288
|
+
providers: list[ProviderSummary],
|
|
289
|
+
) -> list[RuleSuggestion]:
|
|
290
|
+
"""Suggest creating a goal profile when providers differ significantly in output length."""
|
|
291
|
+
import statistics as _stats
|
|
292
|
+
|
|
293
|
+
active = [p for p in providers if p.request_count >= _MIN_TRAFFIC and p.avg_output_tokens > 0]
|
|
294
|
+
if len(active) < 2:
|
|
295
|
+
return []
|
|
296
|
+
|
|
297
|
+
output_values = [p.avg_output_tokens for p in active]
|
|
298
|
+
mean_out = _stats.mean(output_values)
|
|
299
|
+
if mean_out == 0:
|
|
300
|
+
return []
|
|
301
|
+
|
|
302
|
+
stdev_out = _stats.stdev(output_values) if len(output_values) > 1 else 0.0
|
|
303
|
+
rel_stdev = stdev_out / mean_out
|
|
304
|
+
|
|
305
|
+
if rel_stdev < _OUTPUT_DIVERGENCE_THRESHOLD:
|
|
306
|
+
return []
|
|
307
|
+
|
|
308
|
+
# Highest-output provider is probably best for goal sessions
|
|
309
|
+
best = max(active, key=lambda p: p.avg_output_tokens)
|
|
310
|
+
return [RuleSuggestion(
|
|
311
|
+
rule="goal_profile",
|
|
312
|
+
title=f"Create a 'goal' profile with goal_mode: true → {best.provider!r}",
|
|
313
|
+
description=(
|
|
314
|
+
f"Output token lengths vary significantly across providers "
|
|
315
|
+
f"(relative std-dev {rel_stdev * 100:.0f}%). "
|
|
316
|
+
f"{best.provider!r} produces the most tokens on average "
|
|
317
|
+
f"({best.avg_output_tokens:.0f} tokens/req), making it the "
|
|
318
|
+
f"best candidate for a dedicated goal/agent profile. "
|
|
319
|
+
f"goal_mode: true activates tighter drift thresholds and "
|
|
320
|
+
f"the goal_progress_stall signal for repetition detection."
|
|
321
|
+
),
|
|
322
|
+
yaml_snippet=(
|
|
323
|
+
"profiles:\n"
|
|
324
|
+
f" - name: goal\n"
|
|
325
|
+
f" providers:\n"
|
|
326
|
+
f" - {best.provider}\n"
|
|
327
|
+
f" goal_mode: true # P1-5: tighter thresholds\n"
|
|
328
|
+
f" drift_detection_action: promote\n"
|
|
329
|
+
f" drift_detection_sensitivity: high # overridden by goal_mode\n"
|
|
330
|
+
f" drift_detection_window_size: 15\n"
|
|
331
|
+
f" drift_detection_cooldown_s: 180"
|
|
332
|
+
),
|
|
333
|
+
evidence={
|
|
334
|
+
"best_provider": best.provider,
|
|
335
|
+
"best_avg_output_tokens": round(best.avg_output_tokens, 0),
|
|
336
|
+
"mean_output_tokens": round(mean_out, 0),
|
|
337
|
+
"relative_stdev_pct": round(rel_stdev * 100, 1),
|
|
338
|
+
},
|
|
339
|
+
confidence="medium",
|
|
340
|
+
providers_involved=[p.provider for p in active],
|
|
341
|
+
)]
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
# ---------------------------------------------------------------------------
|
|
345
|
+
# Public API
|
|
346
|
+
# ---------------------------------------------------------------------------
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def suggest_rules(summary: WindowSummary) -> list[RuleSuggestion]:
|
|
350
|
+
"""Analyse a :class:`WindowSummary` and return a list of rule suggestions.
|
|
351
|
+
|
|
352
|
+
Parameters
|
|
353
|
+
----------
|
|
354
|
+
summary:
|
|
355
|
+
Output of :func:`coderouter.state.replay.summarize_window`.
|
|
356
|
+
|
|
357
|
+
Returns
|
|
358
|
+
-------
|
|
359
|
+
List of :class:`RuleSuggestion` objects, ordered by confidence
|
|
360
|
+
(``high`` first) then rule name.
|
|
361
|
+
"""
|
|
362
|
+
providers = list(summary.providers.values())
|
|
363
|
+
suggestions: list[RuleSuggestion] = []
|
|
364
|
+
|
|
365
|
+
suggestions.extend(_rule_provider_reorder(providers))
|
|
366
|
+
suggestions.extend(_rule_enable_prompt_cache(providers))
|
|
367
|
+
suggestions.extend(_rule_enable_drift_detection(providers, summary))
|
|
368
|
+
suggestions.extend(_rule_small_window_low_sensitivity(providers))
|
|
369
|
+
suggestions.extend(_rule_goal_profile(providers))
|
|
370
|
+
|
|
371
|
+
_CONFIDENCE_ORDER = {"high": 0, "medium": 1, "low": 2}
|
|
372
|
+
suggestions.sort(key=lambda s: (_CONFIDENCE_ORDER.get(s.confidence, 9), s.rule))
|
|
373
|
+
return suggestions
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def format_suggestions(suggestions: list[RuleSuggestion]) -> str:
|
|
377
|
+
"""Render suggestions as a human-readable terminal report.
|
|
378
|
+
|
|
379
|
+
Returns a plain-text string with section headers, descriptions,
|
|
380
|
+
and copy-paste YAML snippets.
|
|
381
|
+
"""
|
|
382
|
+
if not suggestions:
|
|
383
|
+
return "No routing rule suggestions — current configuration looks healthy."
|
|
384
|
+
|
|
385
|
+
lines: list[str] = []
|
|
386
|
+
lines.append(f"Found {len(suggestions)} suggestion(s):\n")
|
|
387
|
+
|
|
388
|
+
for i, s in enumerate(suggestions, 1):
|
|
389
|
+
conf_badge = {"high": "[HIGH]", "medium": "[MED] ", "low": "[LOW] "}.get(
|
|
390
|
+
s.confidence, "[?] "
|
|
391
|
+
)
|
|
392
|
+
lines.append(f" {i}. {conf_badge} {s.title}")
|
|
393
|
+
lines.append(f" {s.description}")
|
|
394
|
+
if s.evidence:
|
|
395
|
+
evidence_str = ", ".join(f"{k}={v}" for k, v in s.evidence.items())
|
|
396
|
+
lines.append(f" Evidence: {evidence_str}")
|
|
397
|
+
lines.append("")
|
|
398
|
+
lines.append(" YAML:")
|
|
399
|
+
for yaml_line in s.yaml_snippet.splitlines():
|
|
400
|
+
lines.append(f" {yaml_line}")
|
|
401
|
+
lines.append("")
|
|
402
|
+
if i < len(suggestions):
|
|
403
|
+
lines.append(" " + "-" * 68)
|
|
404
|
+
lines.append("")
|
|
405
|
+
|
|
406
|
+
return "\n".join(lines)
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
__all__ = [
|
|
410
|
+
"RuleSuggestion",
|
|
411
|
+
"format_suggestions",
|
|
412
|
+
"suggest_rules",
|
|
413
|
+
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderouter-cli
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.4.0
|
|
4
4
|
Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
|
|
5
5
|
Project-URL: Homepage, https://github.com/zephel01/CodeRouter
|
|
6
6
|
Project-URL: Repository, https://github.com/zephel01/CodeRouter
|
|
@@ -47,7 +47,7 @@ Description-Content-Type: text/markdown
|
|
|
47
47
|
|
|
48
48
|
<p align="center">
|
|
49
49
|
<a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
|
|
50
|
-
<a href=""><img src="https://img.shields.io/
|
|
50
|
+
<a href="https://pypi.org/project/coderouter-cli/"><img src="https://img.shields.io/pypi/v/coderouter-cli?include_prereleases&color=blue&label=pypi" alt="pypi"></a>
|
|
51
51
|
<a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
|
|
52
52
|
<a href=""><img src="https://img.shields.io/badge/deps-5-brightgreen" alt="deps"></a>
|
|
53
53
|
<a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
coderouter/__init__.py,sha256=ghdjPrLtnRzY8fyQ4CJZI1UJKADyNTLtA3G7se8H7Ns,696
|
|
2
2
|
coderouter/__main__.py,sha256=-LCgxJnvgUV240HjQKv7ly-mn2NuKHpC4nCpvTHjeSU,130
|
|
3
|
-
coderouter/cli.py,sha256=
|
|
3
|
+
coderouter/cli.py,sha256=lSrQBj7iYz39EbPfBNm8l-ZGe-dPIHUd_wG0KPTvpiQ,28154
|
|
4
4
|
coderouter/cli_stats.py,sha256=CCjzc1G4hTRHZ2gG1XhxhDpUkJnnl3NXbcbp1T18jpg,29894
|
|
5
5
|
coderouter/cost.py,sha256=jf70XTTjEfWddHkOohErqSo5TfmIG42zQpBT_5faPNA,5881
|
|
6
6
|
coderouter/doctor.py,sha256=2luNk6BHSRvpQStJnHcqzNvNi-SKdOuKV0WZdorZhVk,82854
|
|
@@ -19,15 +19,16 @@ coderouter/config/__init__.py,sha256=FODEn74fN-qZnt4INPSHswqhOlEgpL6-_onxsitSx8g
|
|
|
19
19
|
coderouter/config/capability_registry.py,sha256=F6DetVL5oM03R4QeK1g6h_Q_zrXH0opnYDp3duZmkN4,15808
|
|
20
20
|
coderouter/config/env_file.py,sha256=CoMK27fuAXm-NtoLzXb8yN2E-wDFjHQuFwiIlmgTBQw,10356
|
|
21
21
|
coderouter/config/loader.py,sha256=FUEe8m4Tnmj_aul0vSctD8vKvNW-oLRoMRbTpSKqSmc,4077
|
|
22
|
-
coderouter/config/schemas.py,sha256=
|
|
22
|
+
coderouter/config/schemas.py,sha256=1vTYTYIe_vbjbgBKJ8hbrpU12K5l3XCKXAJE85HWebA,55853
|
|
23
23
|
coderouter/data/__init__.py,sha256=uNyfD9jaCvTWsBAWtaw1Fr25OSxzv3psGMfBjT1z0Cc,328
|
|
24
24
|
coderouter/data/model-capabilities.yaml,sha256=2ztY4PUbGN_cWG-UUB-iPy-baeVFnGV8OcZHJUfZE7c,19290
|
|
25
|
-
coderouter/guards/__init__.py,sha256=
|
|
25
|
+
coderouter/guards/__init__.py,sha256=5qliYBqygvVPneej7nx0uSjxDKsz7t8VzvrDgVBJlvU,1170
|
|
26
|
+
coderouter/guards/_fingerprint.py,sha256=qsgNzIq9jv3FHrKL39nGJARp0cMenpN_QmWoJu87vU4,4835
|
|
26
27
|
coderouter/guards/backend_health.py,sha256=Xx5OpX1x7atxghmBNDVxtwGg62zQIOsk6FmrQV4ILa4,9113
|
|
27
28
|
coderouter/guards/context_budget.py,sha256=moWulVr5NtVci13vXxS0ucV4EvX2b7tbA1W1d9eQnkw,13281
|
|
28
29
|
coderouter/guards/continuous_probe.py,sha256=AKNMbJ7hUJG-FDoU160BCbSEQQUyw0hBxFYMTaBZg84,11681
|
|
29
30
|
coderouter/guards/drift_actions.py,sha256=A6pY5CR480Ct5rCVyjlBvjPFVc93eu_r5qcUpK9mWKc,3602
|
|
30
|
-
coderouter/guards/drift_detection.py,sha256=
|
|
31
|
+
coderouter/guards/drift_detection.py,sha256=vlepNw_GjvmpHZHTkMS5JM7XKxHaRxzjj_GfopRa1M0,13489
|
|
31
32
|
coderouter/guards/memory_pressure.py,sha256=mul1KXO9oE1i424cs92Sk6uzoRrV6Seck2Lk3bu-w68,7903
|
|
32
33
|
coderouter/guards/self_healing.py,sha256=_fT_EJvTTp5VSi-qAP93J_1LkgPK5jkzsyrUHdKC45A,13853
|
|
33
34
|
coderouter/guards/tool_loop.py,sha256=EzeMcmU7BLeTW2jsRVevU81l5rhWcn1oUr7EpzgXjVM,15209
|
|
@@ -40,7 +41,7 @@ coderouter/ingress/openai_routes.py,sha256=Zw1efPw9DI6GgV8ZcLrzS6Cda0KLrFkKn2GBZ
|
|
|
40
41
|
coderouter/metrics/__init__.py,sha256=7Es351DPS7yLM0yVF_F0eesmiD83n7Zzhie44chht38,1465
|
|
41
42
|
coderouter/metrics/collector.py,sha256=Q0_CY0orX8_i0EICBME5sYW2RqL2VD4SpNs8qfCnBM0,47432
|
|
42
43
|
coderouter/metrics/prometheus.py,sha256=YRqyT931s40zVkIj07D-M2UNfDhIEElVFRz3izdJcnQ,24419
|
|
43
|
-
coderouter/plugins/__init__.py,sha256=
|
|
44
|
+
coderouter/plugins/__init__.py,sha256=76hMLe5dV_ilripHXzWn3HSYoIALjzlw4EJVyI-GyIM,1974
|
|
44
45
|
coderouter/plugins/base.py,sha256=n9hsck2NCSqi6oeHIumKC5zhQ8JGwCXUz7J5AZQCQss,5772
|
|
45
46
|
coderouter/plugins/loader.py,sha256=xAIf6bIuth0QXCzwxO_ja6aSUlLzIqZNbrbQNJDgSE8,6841
|
|
46
47
|
coderouter/plugins/registry.py,sha256=Tx0QHJHozZ5LTUliGylBdNVcdzHTBV0nedCUwGlbLMM,3236
|
|
@@ -49,18 +50,19 @@ coderouter/routing/adaptive.py,sha256=G2o377twGSjbUh65wiIFx6klnpFGjsD_nI3oDvcBwh
|
|
|
49
50
|
coderouter/routing/auto_router.py,sha256=4_sQR0ztSED9FgQSvQqgqSiydyQVY_qOSRvwyZ5BfRc,12909
|
|
50
51
|
coderouter/routing/budget.py,sha256=A3_i44tmS3SrqVNnoGkLKMsiYwI_Ug6m5-3gitVoQSM,8452
|
|
51
52
|
coderouter/routing/capability.py,sha256=ziIDuE5keH_jxYDlXSKufRVxxSYOAvUxJ6Rw5QkYDDU,18436
|
|
52
|
-
coderouter/routing/fallback.py,sha256=
|
|
53
|
-
coderouter/state/__init__.py,sha256=
|
|
53
|
+
coderouter/routing/fallback.py,sha256=P3f6Yna1EGnLAT-ZS5ADrrZ-qRWc-M5xvwEuan4rmcs,104568
|
|
54
|
+
coderouter/state/__init__.py,sha256=XoGcPmmBQSiZWML2S0juSveQ78xfhtdeCliNnVyzu7E,1088
|
|
54
55
|
coderouter/state/audit_log.py,sha256=JwGd0OkkDlkh0Fdc6SmnuyViwKzEaFA7Ux_VqHzakWE,8358
|
|
55
56
|
coderouter/state/replay.py,sha256=Z_YHKroTKZdrL8qObFxcoLOAQWWXZvXFdLfxzvBhEJg,11230
|
|
56
57
|
coderouter/state/request_log.py,sha256=bR814sOn--U_sKVtbezwS3bkZaNt4FGnboX75_2LLiU,5908
|
|
57
58
|
coderouter/state/store.py,sha256=h-rsMJq8GILsOfCP94nI40cuHaj4Vqycsm9UNN77REI,7445
|
|
59
|
+
coderouter/state/suggest_rules.py,sha256=z9a9dTUm-l7mvRV48G7_tWtV2p24F0bQo2ijz8kf_2k,16674
|
|
58
60
|
coderouter/translation/__init__.py,sha256=PYXN7XVEwpG1uC8RLy6fvnGbzEZhhrEuUapH8IYOtG8,1788
|
|
59
61
|
coderouter/translation/anthropic.py,sha256=JpvIWNXHUPVqOGvps7o_6ZADhXuJuvpU7RdMqQFtwwM,6421
|
|
60
62
|
coderouter/translation/convert.py,sha256=-qyzFzmmr9hhQV6_Sg75kJnvCZvHe3n7vRdaZtk_JqQ,47269
|
|
61
63
|
coderouter/translation/tool_repair.py,sha256=Ok2PF947Liegc5oaytfptv5MWMkpfJYQie-zdP1y3cY,9946
|
|
62
|
-
coderouter_cli-2.
|
|
63
|
-
coderouter_cli-2.
|
|
64
|
-
coderouter_cli-2.
|
|
65
|
-
coderouter_cli-2.
|
|
66
|
-
coderouter_cli-2.
|
|
64
|
+
coderouter_cli-2.4.0.dist-info/METADATA,sha256=nCXZKj29kM_o53ajIWgOI9PaGx_k4nBtIw-WvO_6MsU,10067
|
|
65
|
+
coderouter_cli-2.4.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
66
|
+
coderouter_cli-2.4.0.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
|
|
67
|
+
coderouter_cli-2.4.0.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
|
|
68
|
+
coderouter_cli-2.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|