@heytherevibin/skillforge 0.7.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/CONTRIBUTING.md +30 -19
- package/README.md +248 -198
- package/RELEASING.md +19 -7
- package/SECURITY.md +61 -13
- package/STRATEGY.md +40 -14
- package/bin/cli.js +112 -5
- package/ci/bundle-gate.json +4 -0
- package/lib/host-setup.js +312 -0
- package/lib/templates/claude-code-skillforge-global.md +19 -0
- package/lib/templates/cursor-skillforge-global.md +16 -0
- package/package.json +3 -2
- package/python/app/eval_cli.py +133 -0
- package/python/app/feedback_meta.py +96 -0
- package/python/app/health_cli.py +160 -0
- package/python/app/main.py +502 -26
- package/python/app/materialize.py +72 -4
- package/python/app/mcp_contract.py +13 -1
- package/python/app/mcp_server.py +344 -25
- package/python/app/route_cli.py +32 -13
- package/python/app/route_eval_harness.py +98 -0
- package/python/app/route_policies.py +243 -0
- package/python/app/route_quality.py +99 -0
- package/python/app/routing_signals.py +155 -0
- package/python/app/weights_cli.py +152 -0
- package/python/fixtures/route_eval/smoke.json +18 -0
- package/python/requirements.txt +1 -0
- package/python/tests/test_feedback_weights.py +77 -0
- package/python/tests/test_materialize.py +51 -0
- package/python/tests/test_mcp_contract.py +117 -0
- package/python/tests/test_route_eval_harness.py +45 -0
- package/python/tests/test_route_policies.py +115 -0
- package/python/tests/test_route_quality.py +120 -0
- package/python/tests/test_routing_overlay.py +55 -0
- package/python/tests/test_routing_signals.py +112 -0
package/python/app/route_cli.py
CHANGED
|
@@ -34,6 +34,11 @@ def _parse_args(argv: list[str] | None) -> argparse.Namespace:
|
|
|
34
34
|
)
|
|
35
35
|
p.add_argument("--session-id", default="", help="Stable session id (reuse across turns for reroute stats).")
|
|
36
36
|
p.add_argument("--user-id", default="", help="Logical user id for weights/sessions/events.")
|
|
37
|
+
p.add_argument(
|
|
38
|
+
"--picked-names",
|
|
39
|
+
default="",
|
|
40
|
+
help="Comma-separated catalog skill ids (host pick). Skips auto router/Haiku; same as MCP picked_names.",
|
|
41
|
+
)
|
|
37
42
|
p.add_argument("--json-meta", action="store_true", help="Print routing metadata as JSON on stderr after output.")
|
|
38
43
|
p.add_argument(
|
|
39
44
|
"--include-project-rag",
|
|
@@ -55,11 +60,16 @@ async def _run(args: argparse.Namespace) -> int:
|
|
|
55
60
|
return 2
|
|
56
61
|
db_path = resolve_orchestrator_db(pr)
|
|
57
62
|
con = init_db(db_path)
|
|
63
|
+
db_disp = redact_display_path(db_path) if redaction_enabled() else str(db_path)
|
|
58
64
|
|
|
59
65
|
router, skills = await asyncio.to_thread(build_router_and_skills, log=True, log_prefix="[skillforge-route]")
|
|
60
66
|
session_id = args.session_id.strip() or None
|
|
61
67
|
user_id = args.user_id.strip()
|
|
62
68
|
|
|
69
|
+
picked_raw = (args.picked_names or "").strip()
|
|
70
|
+
picked_supplied = bool(picked_raw)
|
|
71
|
+
picked_list = [x.strip() for x in picked_raw.split(",") if x.strip()] if picked_raw else []
|
|
72
|
+
|
|
63
73
|
try:
|
|
64
74
|
result = await run_route_turn(
|
|
65
75
|
con,
|
|
@@ -70,6 +80,8 @@ async def _run(args: argparse.Namespace) -> int:
|
|
|
70
80
|
session_id=session_id,
|
|
71
81
|
project_root=pr,
|
|
72
82
|
include_project_rag=bool(args.include_project_rag),
|
|
83
|
+
picked_names_from_host=picked_list if picked_supplied else None,
|
|
84
|
+
picked_names_from_host_supplied=picked_supplied,
|
|
73
85
|
)
|
|
74
86
|
finally:
|
|
75
87
|
con.close()
|
|
@@ -95,24 +107,28 @@ async def _run(args: argparse.Namespace) -> int:
|
|
|
95
107
|
"context_mode": router.context_mode,
|
|
96
108
|
"context_items_count": len(context_items),
|
|
97
109
|
"project_rag_items_count": (result.get("event") or {}).get("project_rag_items_count", 0),
|
|
110
|
+
"host_pick_shortlist": bool(result.get("host_pick_shortlist")),
|
|
98
111
|
}
|
|
99
112
|
(d / "last_route.json").write_text(json.dumps(snap, indent=2), encoding="utf-8")
|
|
100
113
|
except OSError:
|
|
101
114
|
pass
|
|
102
115
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
+
if result.get("host_pick_shortlist"):
|
|
117
|
+
response_text = ((result.get("host_pick_markdown") or "").strip() + f"\n\n---\n_session_id:_ `{sid}` · _DB:_ `{db_disp}`")
|
|
118
|
+
print(response_text.strip())
|
|
119
|
+
else:
|
|
120
|
+
blocks = [
|
|
121
|
+
f"# Skillforge — routed {len(picked_names)} skill(s); context=`{router.context_mode}`",
|
|
122
|
+
f"_DB:_ `{db_disp}`",
|
|
123
|
+
f"_Reasoning: {reasoning}_" if reasoning else "",
|
|
124
|
+
"",
|
|
125
|
+
]
|
|
126
|
+
if context_items:
|
|
127
|
+
blocks.append(format_context_items_markdown(context_items))
|
|
128
|
+
elif not picked_names:
|
|
129
|
+
blocks.append("_No skills matched this prompt closely enough to load._")
|
|
130
|
+
response_text = "\n".join(b for b in blocks if b is not None)
|
|
131
|
+
print(response_text)
|
|
116
132
|
|
|
117
133
|
if args.json_meta:
|
|
118
134
|
meta = build_route_skills_meta(
|
|
@@ -126,6 +142,9 @@ async def _run(args: argparse.Namespace) -> int:
|
|
|
126
142
|
fusion=(result.get("event") or {}).get("context_fusion"),
|
|
127
143
|
context_redaction=(result.get("event") or {}).get("context_redaction"),
|
|
128
144
|
)
|
|
145
|
+
if result.get("host_pick_shortlist"):
|
|
146
|
+
meta["host_pick_shortlist"] = True
|
|
147
|
+
meta["host_pick_candidates"] = result.get("host_pick_candidates") or []
|
|
129
148
|
print(json.dumps(meta, indent=2), file=sys.stderr)
|
|
130
149
|
|
|
131
150
|
return 0
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Pure helpers for route evaluation fixtures (embedding-first, no LLM)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_eval_fixture(path: Path) -> dict[str, Any]:
|
|
10
|
+
raw = path.read_text(encoding="utf-8")
|
|
11
|
+
data = json.loads(raw)
|
|
12
|
+
if not isinstance(data, dict):
|
|
13
|
+
raise ValueError("fixture root must be a JSON object")
|
|
14
|
+
cases = data.get("cases")
|
|
15
|
+
if not isinstance(cases, list) or not cases:
|
|
16
|
+
raise ValueError("fixture must contain a non-empty cases array")
|
|
17
|
+
return data
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _window(case: dict[str, Any], defaults: dict[str, Any]) -> int:
|
|
21
|
+
w = case.get("candidate_window")
|
|
22
|
+
if w is None:
|
|
23
|
+
w = defaults.get("candidate_window", 25)
|
|
24
|
+
return max(1, int(w))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def evaluate_case_result(
|
|
28
|
+
result: dict[str, Any],
|
|
29
|
+
case: dict[str, Any],
|
|
30
|
+
*,
|
|
31
|
+
defaults: dict[str, Any] | None = None,
|
|
32
|
+
) -> list[str]:
|
|
33
|
+
"""Return human-readable error strings; empty means pass."""
|
|
34
|
+
defaults = defaults or {}
|
|
35
|
+
errs: list[str] = []
|
|
36
|
+
case_id = case.get("id") or case.get("name") or "?"
|
|
37
|
+
|
|
38
|
+
if result.get("host_pick_shortlist"):
|
|
39
|
+
errs.append(f"{case_id}: host shortlist result — use embedding router mode for eval")
|
|
40
|
+
return errs
|
|
41
|
+
|
|
42
|
+
cands = result.get("candidates") or []
|
|
43
|
+
cand_names: list[str] = []
|
|
44
|
+
for item in cands:
|
|
45
|
+
if isinstance(item, tuple) and len(item) >= 1:
|
|
46
|
+
sk = item[0]
|
|
47
|
+
name = getattr(sk, "name", None)
|
|
48
|
+
if name:
|
|
49
|
+
cand_names.append(str(name))
|
|
50
|
+
elif isinstance(item, dict) and item.get("name"):
|
|
51
|
+
cand_names.append(str(item["name"]))
|
|
52
|
+
|
|
53
|
+
window = _window(case, defaults)
|
|
54
|
+
head = cand_names[:window]
|
|
55
|
+
head_set = set(head)
|
|
56
|
+
|
|
57
|
+
for label in (
|
|
58
|
+
"expect_in_candidates",
|
|
59
|
+
"expect_candidates_contain",
|
|
60
|
+
):
|
|
61
|
+
need = case.get(label)
|
|
62
|
+
if not need:
|
|
63
|
+
continue
|
|
64
|
+
if not isinstance(need, list):
|
|
65
|
+
errs.append(f"{case_id}: {label} must be a list")
|
|
66
|
+
continue
|
|
67
|
+
for skill_id in need:
|
|
68
|
+
sid = str(skill_id)
|
|
69
|
+
if sid not in head_set:
|
|
70
|
+
errs.append(
|
|
71
|
+
f"{case_id}: expected {sid!r} in first {window} candidates "
|
|
72
|
+
f"(have {head[:8]}{'…' if len(head) > 8 else ''})"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
picked = list(result.get("picked_names") or [])
|
|
76
|
+
picked_set = set(picked)
|
|
77
|
+
|
|
78
|
+
if case.get("expect_picked_any"):
|
|
79
|
+
need = case["expect_picked_any"]
|
|
80
|
+
if not isinstance(need, list):
|
|
81
|
+
errs.append(f"{case_id}: expect_picked_any must be a list")
|
|
82
|
+
elif not (picked_set & {str(x) for x in need}):
|
|
83
|
+
errs.append(
|
|
84
|
+
f"{case_id}: expected at least one of {need!r} in picked_names {picked!r}"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
if case.get("expect_picked_all"):
|
|
88
|
+
need = case["expect_picked_all"]
|
|
89
|
+
if not isinstance(need, list):
|
|
90
|
+
errs.append(f"{case_id}: expect_picked_all must be a list")
|
|
91
|
+
else:
|
|
92
|
+
for sid in need:
|
|
93
|
+
if str(sid) not in picked_set:
|
|
94
|
+
errs.append(
|
|
95
|
+
f"{case_id}: expected picked_names to include {sid!r} (have {picked!r})"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
return errs
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"""Pluggable route policies: regex on prompt → force-include skill names.
|
|
2
|
+
|
|
3
|
+
Load order (first file that exists / first successful parse wins for env):
|
|
4
|
+
|
|
5
|
+
1. ``SKILLFORGE_ROUTE_POLICIES`` — JSON object inline (e.g. ``{\"rules\":[...]}``).
|
|
6
|
+
2. ``SKILLFORGE_ROUTE_POLICIES_FILE`` — path to a JSON file.
|
|
7
|
+
3. ``<project_root>/.skillforge/policies.json``
|
|
8
|
+
4. ``<project_root>/skillforge-policies.json``
|
|
9
|
+
|
|
10
|
+
Rule shape::
|
|
11
|
+
|
|
12
|
+
{
|
|
13
|
+
"rules": [
|
|
14
|
+
{
|
|
15
|
+
"if_text_matches": "(?i)(auth|oauth|jwt|password)",
|
|
16
|
+
"include": ["security-review"]
|
|
17
|
+
}
|
|
18
|
+
]
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
``if_text_matches`` is passed to ``re.search`` (``re.DOTALL``). ``include`` is a skill
|
|
22
|
+
name or list of names. Forced skills are appended after router picks until
|
|
23
|
+
``MAX_ACTIVE_SKILLS`` is reached.
|
|
24
|
+
|
|
25
|
+
Optional **project routing overlay** (same JSON object):
|
|
26
|
+
|
|
27
|
+
- ``exclude_skills`` / ``host_exclude`` / ``denylist`` — skill ids excluded from the embedding
|
|
28
|
+
shortlist (hard filter).
|
|
29
|
+
- ``routing_boosts`` / ``skill_boosts`` — object mapping skill id → numeric delta added to the
|
|
30
|
+
routing score after learned weights (clamped to ±2).
|
|
31
|
+
- ``project_notes`` / ``routing_notes`` / ``rag_notes`` — free text prepended to the internal
|
|
32
|
+
routing query when **project_root** is set (stack/context hints for embedding).
|
|
33
|
+
|
|
34
|
+
``project_notes`` are **not** applied without ``project_root`` to avoid global prompt injection
|
|
35
|
+
from shared policy files.
|
|
36
|
+
"""
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
import json
|
|
40
|
+
import os
|
|
41
|
+
import re
|
|
42
|
+
import sqlite3
|
|
43
|
+
from pathlib import Path
|
|
44
|
+
from typing import Any
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def load_route_policies_config(project_root: str | None) -> dict[str, Any]:
|
|
48
|
+
"""Return a dict with key ``rules`` (list). Empty rules if nothing configured."""
|
|
49
|
+
raw_env = os.getenv("SKILLFORGE_ROUTE_POLICIES", "").strip()
|
|
50
|
+
if raw_env:
|
|
51
|
+
try:
|
|
52
|
+
data = json.loads(raw_env)
|
|
53
|
+
return data if isinstance(data, dict) else {"rules": []}
|
|
54
|
+
except json.JSONDecodeError:
|
|
55
|
+
return {"rules": []}
|
|
56
|
+
|
|
57
|
+
paths: list[Path] = []
|
|
58
|
+
path_env = os.getenv("SKILLFORGE_ROUTE_POLICIES_FILE", "").strip()
|
|
59
|
+
if path_env:
|
|
60
|
+
paths.append(Path(path_env).expanduser())
|
|
61
|
+
if project_root:
|
|
62
|
+
pr = Path(project_root).expanduser().resolve()
|
|
63
|
+
paths.append(pr / ".skillforge" / "policies.json")
|
|
64
|
+
paths.append(pr / "skillforge-policies.json")
|
|
65
|
+
|
|
66
|
+
for p in paths:
|
|
67
|
+
if p.is_file():
|
|
68
|
+
try:
|
|
69
|
+
data = json.loads(p.read_text(encoding="utf-8"))
|
|
70
|
+
return data if isinstance(data, dict) else {"rules": []}
|
|
71
|
+
except (OSError, json.JSONDecodeError):
|
|
72
|
+
continue
|
|
73
|
+
return {"rules": []}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def parse_routing_overlay(
|
|
77
|
+
policies: dict[str, Any] | None,
|
|
78
|
+
*,
|
|
79
|
+
by_name: dict[str, Any] | None = None,
|
|
80
|
+
audit_out: list[dict[str, Any]] | None = None,
|
|
81
|
+
) -> tuple[frozenset[str], dict[str, float], str]:
|
|
82
|
+
"""Parse exclude list, per-skill score boosts, and project notes from policies dict."""
|
|
83
|
+
policies = policies or {}
|
|
84
|
+
by_name = by_name or {}
|
|
85
|
+
boost_cap = 2.0
|
|
86
|
+
|
|
87
|
+
raw_ex = policies.get("exclude_skills") or policies.get("host_exclude") or policies.get("denylist") or []
|
|
88
|
+
if isinstance(raw_ex, str):
|
|
89
|
+
raw_ex = [raw_ex]
|
|
90
|
+
exclude: set[str] = set()
|
|
91
|
+
if isinstance(raw_ex, list):
|
|
92
|
+
for x in raw_ex:
|
|
93
|
+
if not isinstance(x, str) or not x.strip():
|
|
94
|
+
continue
|
|
95
|
+
name = x.strip()
|
|
96
|
+
if by_name and name not in by_name:
|
|
97
|
+
if audit_out is not None:
|
|
98
|
+
audit_out.append({"kind": "exclude", "skill": name, "effect": "unknown_skill"})
|
|
99
|
+
continue
|
|
100
|
+
exclude.add(name)
|
|
101
|
+
|
|
102
|
+
raw_boost = policies.get("routing_boosts") or policies.get("skill_boosts") or {}
|
|
103
|
+
boosts: dict[str, float] = {}
|
|
104
|
+
if isinstance(raw_boost, dict):
|
|
105
|
+
for k, v in raw_boost.items():
|
|
106
|
+
if not isinstance(k, str) or not k.strip():
|
|
107
|
+
continue
|
|
108
|
+
name = k.strip()
|
|
109
|
+
if by_name and name not in by_name:
|
|
110
|
+
if audit_out is not None:
|
|
111
|
+
audit_out.append({"kind": "boost", "skill": name, "effect": "unknown_skill"})
|
|
112
|
+
continue
|
|
113
|
+
try:
|
|
114
|
+
b = float(v)
|
|
115
|
+
except (TypeError, ValueError):
|
|
116
|
+
if audit_out is not None:
|
|
117
|
+
audit_out.append({"kind": "boost", "skill": name, "effect": "invalid_value"})
|
|
118
|
+
continue
|
|
119
|
+
boosts[name] = max(-boost_cap, min(boost_cap, b))
|
|
120
|
+
|
|
121
|
+
notes = ""
|
|
122
|
+
for key in ("project_notes", "routing_notes", "rag_notes"):
|
|
123
|
+
raw = policies.get(key)
|
|
124
|
+
if isinstance(raw, str) and raw.strip():
|
|
125
|
+
notes = raw.strip()
|
|
126
|
+
break
|
|
127
|
+
|
|
128
|
+
return frozenset(exclude), boosts, notes
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def merge_project_notes_into_route_query(
|
|
132
|
+
route_query: str,
|
|
133
|
+
notes: str,
|
|
134
|
+
project_root: str | None,
|
|
135
|
+
*,
|
|
136
|
+
max_chars: int | None = None,
|
|
137
|
+
) -> str:
|
|
138
|
+
"""Prefix routing query with project notes when ``project_root`` is set."""
|
|
139
|
+
notes = (notes or "").strip()
|
|
140
|
+
pr = (project_root or "").strip()
|
|
141
|
+
if not notes or not pr:
|
|
142
|
+
return route_query
|
|
143
|
+
mc = max_chars
|
|
144
|
+
if mc is None:
|
|
145
|
+
mc = int(os.getenv("SKILLFORGE_PROJECT_NOTES_MAX_CHARS", "1200"))
|
|
146
|
+
mc = max(0, mc)
|
|
147
|
+
clipped = notes if len(notes) <= mc else notes[: max(0, mc - 1)] + "…"
|
|
148
|
+
return f"Project routing notes:\n{clipped}\n\n{route_query}"
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def build_routing_overlay_payload(
|
|
152
|
+
*,
|
|
153
|
+
project_root: str,
|
|
154
|
+
exclude_skills: frozenset[str],
|
|
155
|
+
routing_boosts: dict[str, float],
|
|
156
|
+
project_notes_applied: bool,
|
|
157
|
+
project_notes_len: int,
|
|
158
|
+
audit: list[dict[str, Any]],
|
|
159
|
+
) -> dict[str, Any] | None:
|
|
160
|
+
"""Telemetry / MCP meta; omit when nothing configured."""
|
|
161
|
+
if not exclude_skills and not routing_boosts and not project_notes_applied and not audit:
|
|
162
|
+
return None
|
|
163
|
+
return {
|
|
164
|
+
"schema": "routing_overlay/1",
|
|
165
|
+
"project_root_set": bool((project_root or "").strip()),
|
|
166
|
+
"exclude_skills": sorted(exclude_skills),
|
|
167
|
+
"routing_boosts": {k: round(float(v), 4) for k, v in sorted(routing_boosts.items())},
|
|
168
|
+
"project_notes_applied": project_notes_applied,
|
|
169
|
+
"project_notes_len": int(project_notes_len),
|
|
170
|
+
"audit": list(audit),
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def merge_policy_includes(
|
|
175
|
+
prompt: str,
|
|
176
|
+
picked_names: list[str],
|
|
177
|
+
policies: dict[str, Any],
|
|
178
|
+
by_name: dict[str, Any],
|
|
179
|
+
con: sqlite3.Connection,
|
|
180
|
+
user_id: str,
|
|
181
|
+
*,
|
|
182
|
+
max_active: int,
|
|
183
|
+
) -> tuple[list[str], list[dict[str, Any]]]:
|
|
184
|
+
"""Append policy-driven skills after ``picked_names`` without duplicates.
|
|
185
|
+
|
|
186
|
+
Returns (merged_pick_list, audit_rows for events / explain_route).
|
|
187
|
+
"""
|
|
188
|
+
# Local import avoids circular import at module load time.
|
|
189
|
+
from app.main import get_skill_weight
|
|
190
|
+
|
|
191
|
+
rules = policies.get("rules") if isinstance(policies, dict) else None
|
|
192
|
+
if not isinstance(rules, list):
|
|
193
|
+
rules = []
|
|
194
|
+
|
|
195
|
+
audit: list[dict[str, Any]] = []
|
|
196
|
+
merged = list(picked_names)
|
|
197
|
+
extras: list[str] = []
|
|
198
|
+
|
|
199
|
+
for rule in rules:
|
|
200
|
+
if not isinstance(rule, dict):
|
|
201
|
+
continue
|
|
202
|
+
pat = rule.get("if_text_matches") or rule.get("pattern") or ""
|
|
203
|
+
if not isinstance(pat, str) or not pat.strip():
|
|
204
|
+
continue
|
|
205
|
+
try:
|
|
206
|
+
matched = bool(re.search(pat, prompt, flags=re.DOTALL))
|
|
207
|
+
except re.error:
|
|
208
|
+
audit.append({"pattern": pat, "effect": "invalid_regex"})
|
|
209
|
+
continue
|
|
210
|
+
if not matched:
|
|
211
|
+
continue
|
|
212
|
+
|
|
213
|
+
inc = rule.get("include")
|
|
214
|
+
if isinstance(inc, str):
|
|
215
|
+
inc = [inc]
|
|
216
|
+
if not isinstance(inc, list):
|
|
217
|
+
continue
|
|
218
|
+
|
|
219
|
+
for name in inc:
|
|
220
|
+
if not isinstance(name, str) or not name.strip():
|
|
221
|
+
continue
|
|
222
|
+
name = name.strip()
|
|
223
|
+
if name not in by_name:
|
|
224
|
+
audit.append({"pattern": pat, "skill": name, "effect": "unknown_skill"})
|
|
225
|
+
continue
|
|
226
|
+
_w, disabled = get_skill_weight(con, name, user_id=user_id)
|
|
227
|
+
if disabled:
|
|
228
|
+
audit.append({"pattern": pat, "skill": name, "effect": "disabled"})
|
|
229
|
+
continue
|
|
230
|
+
if name in merged or name in extras:
|
|
231
|
+
audit.append({"pattern": pat, "skill": name, "effect": "already_in_list"})
|
|
232
|
+
continue
|
|
233
|
+
extras.append(name)
|
|
234
|
+
audit.append({"pattern": pat, "skill": name, "effect": "added"})
|
|
235
|
+
|
|
236
|
+
for n in extras:
|
|
237
|
+
if len(merged) >= max_active:
|
|
238
|
+
audit.append({"skill": n, "effect": "skipped_max_active", "max": max_active})
|
|
239
|
+
break
|
|
240
|
+
if n not in merged:
|
|
241
|
+
merged.append(n)
|
|
242
|
+
|
|
243
|
+
return merged, audit
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Calibration metrics for route_skills MCP _meta and route events (local, no extra network)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import math
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def coerce_route_float(x: Any, *, default: float = 0.0) -> float:
|
|
9
|
+
"""Coerce to float for routing telemetry; never raises; maps NaN/inf to default."""
|
|
10
|
+
try:
|
|
11
|
+
v = float(x)
|
|
12
|
+
except (TypeError, ValueError):
|
|
13
|
+
return default
|
|
14
|
+
return v if math.isfinite(v) else default
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def policy_includes_added_count(audit: list[dict[str, Any]] | None) -> int:
|
|
18
|
+
if not audit:
|
|
19
|
+
return 0
|
|
20
|
+
return sum(1 for row in audit if isinstance(row, dict) and row.get("effect") == "added")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def top1_cosine_vs_routing_agreement(facets: list[dict[str, Any]]) -> bool | None:
|
|
24
|
+
"""Whether the #1 by routing_score matches the skill with max cosine (hybrid diagnostic)."""
|
|
25
|
+
if len(facets) < 2:
|
|
26
|
+
return None
|
|
27
|
+
top_route = facets[0].get("name")
|
|
28
|
+
best_cos_name = max(facets, key=lambda f: coerce_route_float(f.get("cosine_similarity"))).get("name")
|
|
29
|
+
if not top_route or not best_cos_name:
|
|
30
|
+
return None
|
|
31
|
+
return top_route == best_cos_name
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def build_route_quality(
|
|
35
|
+
*,
|
|
36
|
+
facet_list: list[dict[str, Any]],
|
|
37
|
+
router_mode: str,
|
|
38
|
+
router_hybrid: str,
|
|
39
|
+
picked_names: list[str],
|
|
40
|
+
rerouted: bool,
|
|
41
|
+
change: float,
|
|
42
|
+
policy_rules_loaded: int,
|
|
43
|
+
policy_audit: list[dict[str, Any]] | None,
|
|
44
|
+
host_picked: bool,
|
|
45
|
+
host_shortlist_only: bool = False,
|
|
46
|
+
haiku_rerank_applied: bool = False,
|
|
47
|
+
pick_path: str,
|
|
48
|
+
) -> dict[str, Any]:
|
|
49
|
+
"""Structured signals for operators and MCP hosts (JSON-serializable)."""
|
|
50
|
+
n = len(facet_list)
|
|
51
|
+
top_cos: float | None = None
|
|
52
|
+
second_cos: float | None = None
|
|
53
|
+
margin: float | None = None
|
|
54
|
+
top_routing_score: float | None = None
|
|
55
|
+
if facet_list:
|
|
56
|
+
top_cos = round(coerce_route_float(facet_list[0].get("cosine_similarity")), 6)
|
|
57
|
+
top_routing_score = round(coerce_route_float(facet_list[0].get("routing_score")), 6)
|
|
58
|
+
if len(facet_list) > 1:
|
|
59
|
+
second_cos = round(coerce_route_float(facet_list[1].get("cosine_similarity")), 6)
|
|
60
|
+
margin = round(float(top_cos - second_cos), 6)
|
|
61
|
+
|
|
62
|
+
agree = top1_cosine_vs_routing_agreement(facet_list) if router_hybrid not in ("", "off", None) else None
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
prl = int(policy_rules_loaded)
|
|
66
|
+
except (TypeError, ValueError):
|
|
67
|
+
prl = 0
|
|
68
|
+
prl = max(0, prl)
|
|
69
|
+
|
|
70
|
+
return {
|
|
71
|
+
"schema": "route_quality/1",
|
|
72
|
+
"shortlist": {
|
|
73
|
+
"size": n,
|
|
74
|
+
"top_cosine_similarity": top_cos,
|
|
75
|
+
"second_cosine_similarity": second_cos,
|
|
76
|
+
"cosine_margin": margin,
|
|
77
|
+
"top_routing_score": top_routing_score,
|
|
78
|
+
"hybrid_mode": router_hybrid or "off",
|
|
79
|
+
"top1_dense_and_fused_agree": agree,
|
|
80
|
+
},
|
|
81
|
+
"router": {
|
|
82
|
+
"mode": router_mode,
|
|
83
|
+
"pick_path": pick_path,
|
|
84
|
+
"host_picked": host_picked,
|
|
85
|
+
"host_shortlist_only": host_shortlist_only,
|
|
86
|
+
"haiku_rerank_applied": haiku_rerank_applied,
|
|
87
|
+
},
|
|
88
|
+
"session": {
|
|
89
|
+
"rerouted": rerouted,
|
|
90
|
+
"change_jaccard": round(coerce_route_float(change), 4),
|
|
91
|
+
"change_pct": round(coerce_route_float(change) * 100.0, 1),
|
|
92
|
+
},
|
|
93
|
+
"policy": {
|
|
94
|
+
"rules_loaded": prl,
|
|
95
|
+
"includes_added": policy_includes_added_count(policy_audit),
|
|
96
|
+
"audit_size": len(policy_audit or []),
|
|
97
|
+
},
|
|
98
|
+
"picked_count": len(picked_names),
|
|
99
|
+
}
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""Conversation-aware routing text, skill routing cards, and sparse retrieval signals."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
from typing import Any, Protocol
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
from app.route_quality import coerce_route_float
|
|
11
|
+
|
|
12
|
+
_TOKEN_RE = re.compile(r"[a-z0-9][a-z0-9_\-./]{2,}", re.I)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class _SkillCard(Protocol):
|
|
16
|
+
title: str
|
|
17
|
+
description: str
|
|
18
|
+
triggers: str
|
|
19
|
+
anti_triggers: str
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def build_route_query_text(
|
|
23
|
+
prompt: str,
|
|
24
|
+
conversation: list[Any] | None,
|
|
25
|
+
*,
|
|
26
|
+
max_turns: int | None = None,
|
|
27
|
+
max_chars_per_msg: int | None = None,
|
|
28
|
+
) -> str:
|
|
29
|
+
"""Merge recent turns with the current user message for embedding shortlist / hybrid scores.
|
|
30
|
+
|
|
31
|
+
When ``SKILLFORGE_ROUTER_CONV_MAX_TURNS`` is 0 (default), returns ``prompt`` only (legacy behavior).
|
|
32
|
+
"""
|
|
33
|
+
conv = conversation or []
|
|
34
|
+
mt = max_turns
|
|
35
|
+
if mt is None:
|
|
36
|
+
mt = int(os.getenv("SKILLFORGE_ROUTER_CONV_MAX_TURNS", "0"))
|
|
37
|
+
mc = max_chars_per_msg
|
|
38
|
+
if mc is None:
|
|
39
|
+
mc = int(os.getenv("SKILLFORGE_ROUTER_CONV_MSG_CHARS", "320"))
|
|
40
|
+
prompt = (prompt or "").strip()
|
|
41
|
+
if mt <= 0 or not conv:
|
|
42
|
+
return prompt
|
|
43
|
+
tail = conv[-mt:]
|
|
44
|
+
parts: list[str] = []
|
|
45
|
+
for m in tail:
|
|
46
|
+
if not isinstance(m, dict):
|
|
47
|
+
continue
|
|
48
|
+
role = str(m.get("role") or "user")
|
|
49
|
+
content = str(m.get("content") or "").strip()
|
|
50
|
+
if not content:
|
|
51
|
+
continue
|
|
52
|
+
if len(content) > mc:
|
|
53
|
+
content = content[:mc] + "…"
|
|
54
|
+
parts.append(f"{role}: {content}")
|
|
55
|
+
if not parts:
|
|
56
|
+
return prompt
|
|
57
|
+
return "Conversation context:\n" + "\n".join(parts) + "\n\nCurrent user message:\n" + prompt
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def skill_routing_card(s: _SkillCard) -> str:
|
|
61
|
+
"""Text embedded for each skill + used in hybrid / router prompts."""
|
|
62
|
+
title = (s.title or "").strip()
|
|
63
|
+
desc = (s.description or "").strip()
|
|
64
|
+
tr = (getattr(s, "triggers", None) or "").strip()
|
|
65
|
+
anti = (getattr(s, "anti_triggers", None) or "").strip()
|
|
66
|
+
parts = [f"{title}: {desc}"]
|
|
67
|
+
if tr:
|
|
68
|
+
parts.append(f"Triggers: {tr}")
|
|
69
|
+
if anti:
|
|
70
|
+
parts.append(f"Anti-triggers: {anti}")
|
|
71
|
+
return "\n".join(parts)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def tokenize_skills_query(text: str) -> list[str]:
|
|
75
|
+
return [t.lower() for t in _TOKEN_RE.findall(text or "")]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def normalize_minmax(arr: np.ndarray) -> np.ndarray:
|
|
79
|
+
a = np.asarray(arr, dtype=np.float64).reshape(-1)
|
|
80
|
+
if a.size == 0:
|
|
81
|
+
return a
|
|
82
|
+
lo, hi = float(a.min()), float(a.max())
|
|
83
|
+
if hi <= lo:
|
|
84
|
+
return np.zeros_like(a)
|
|
85
|
+
return (a - lo) / (hi - lo)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def keyword_overlap_scores(route_query: str, skill_cards: list[str]) -> np.ndarray:
|
|
89
|
+
"""Per-skill overlap counts (unnormalized); combine with dense via hybrid alpha."""
|
|
90
|
+
qt = set(tokenize_skills_query(route_query))
|
|
91
|
+
if not qt:
|
|
92
|
+
return np.zeros(len(skill_cards), dtype=np.float64)
|
|
93
|
+
out: list[float] = []
|
|
94
|
+
for card in skill_cards:
|
|
95
|
+
ct = set(tokenize_skills_query(card))
|
|
96
|
+
out.append(float(len(qt & ct)))
|
|
97
|
+
return np.array(out, dtype=np.float64)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def host_pick_shortlist_lines(
|
|
101
|
+
*,
|
|
102
|
+
prompt: str,
|
|
103
|
+
route_query: str,
|
|
104
|
+
facet_rows: list[dict[str, Any]],
|
|
105
|
+
max_candidates: int | None = None,
|
|
106
|
+
line_chars: int | None = None,
|
|
107
|
+
) -> tuple[str, list[dict[str, Any]]]:
|
|
108
|
+
"""Tight numbered list + structured rows for MCP host-pick phase (no in-process LLM)."""
|
|
109
|
+
mc = max_candidates
|
|
110
|
+
if mc is None:
|
|
111
|
+
mc = max(3, int(os.getenv("SKILLFORGE_HOST_PICK_MAX", "12")))
|
|
112
|
+
lc = line_chars if line_chars is not None else int(os.getenv("SKILLFORGE_HOST_PICK_LINE_CHARS", "120"))
|
|
113
|
+
prompt_one = (prompt or "").strip().replace("\n", " ")
|
|
114
|
+
if len(prompt_one) > 160:
|
|
115
|
+
prompt_one = prompt_one[:157] + "…"
|
|
116
|
+
rows_out: list[dict[str, Any]] = []
|
|
117
|
+
lines: list[str] = [
|
|
118
|
+
"# Host pick — choose skill names only from this list",
|
|
119
|
+
"",
|
|
120
|
+
f"Task: {prompt_one}",
|
|
121
|
+
"",
|
|
122
|
+
"Reply with JSON only:",
|
|
123
|
+
'{"picked": ["exact-skill-id", ...], "reasoning": "one line"}',
|
|
124
|
+
f"Use 0–{mc} names from the numbered lines only (empty picked is allowed). Copy names exactly.",
|
|
125
|
+
"",
|
|
126
|
+
"```",
|
|
127
|
+
]
|
|
128
|
+
for i, f in enumerate(facet_rows[:mc], start=1):
|
|
129
|
+
name = str(f.get("name") or "")
|
|
130
|
+
cos = coerce_route_float(f.get("cosine_similarity"))
|
|
131
|
+
card = f"{f.get('title') or name}: {(f.get('description_preview') or '')[:lc]}".replace("\n", " ").strip()
|
|
132
|
+
if len(card) > lc:
|
|
133
|
+
card = card[: lc - 1] + "…"
|
|
134
|
+
line = f"{i:>2}. {name} | cos={cos:.3f} | {card}"
|
|
135
|
+
lines.append(line)
|
|
136
|
+
rows_out.append({
|
|
137
|
+
"id": name,
|
|
138
|
+
"rank": i,
|
|
139
|
+
"name": name,
|
|
140
|
+
"cosine_similarity": round(cos, 6),
|
|
141
|
+
"routing_score": f.get("routing_score"),
|
|
142
|
+
"sparse_signal": f.get("sparse_signal"),
|
|
143
|
+
"learned_weight": f.get("learned_weight"),
|
|
144
|
+
"router_hybrid": f.get("router_hybrid"),
|
|
145
|
+
"source": f.get("source"),
|
|
146
|
+
"one_liner": card,
|
|
147
|
+
"rationale_one_liner": card,
|
|
148
|
+
})
|
|
149
|
+
lines.append("```")
|
|
150
|
+
rq = (route_query or "").strip()
|
|
151
|
+
if len(rq) > 400:
|
|
152
|
+
rq = rq[:397] + "…"
|
|
153
|
+
if rq:
|
|
154
|
+
lines.extend(["", f"_Retrieval query:_ {rq}"])
|
|
155
|
+
return "\n".join(lines), rows_out
|