@heytherevibin/skillforge 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/CONTRIBUTING.md +30 -19
- package/README.md +242 -234
- package/RELEASING.md +19 -7
- package/SECURITY.md +61 -13
- package/STRATEGY.md +40 -14
- package/bin/cli.js +112 -5
- package/ci/bundle-gate.json +4 -0
- package/lib/host-setup.js +312 -0
- package/lib/templates/claude-code-skillforge-global.md +19 -0
- package/lib/templates/cursor-skillforge-global.md +16 -0
- package/package.json +3 -2
- package/python/app/eval_cli.py +133 -0
- package/python/app/feedback_meta.py +96 -0
- package/python/app/health_cli.py +160 -0
- package/python/app/main.py +261 -22
- package/python/app/materialize.py +72 -4
- package/python/app/mcp_contract.py +13 -1
- package/python/app/mcp_server.py +124 -27
- package/python/app/route_cli.py +32 -13
- package/python/app/route_eval_harness.py +98 -0
- package/python/app/route_policies.py +110 -0
- package/python/app/route_quality.py +99 -0
- package/python/app/routing_signals.py +60 -0
- package/python/app/weights_cli.py +152 -0
- package/python/fixtures/route_eval/smoke.json +18 -0
- package/python/tests/test_feedback_weights.py +77 -0
- package/python/tests/test_materialize.py +51 -0
- package/python/tests/test_mcp_contract.py +117 -0
- package/python/tests/test_route_eval_harness.py +45 -0
- package/python/tests/test_route_quality.py +120 -0
- package/python/tests/test_routing_overlay.py +55 -0
- package/python/tests/test_routing_signals.py +35 -0
package/python/app/mcp_server.py
CHANGED
|
@@ -27,6 +27,7 @@ from app.db_paths import resolve_orchestrator_db
|
|
|
27
27
|
from app.main import (
|
|
28
28
|
TOP_K_CANDIDATES,
|
|
29
29
|
MAX_ACTIVE_SKILLS,
|
|
30
|
+
SKILLFORGE_ROUTER_MODE,
|
|
30
31
|
build_router_and_skills,
|
|
31
32
|
format_context_items_markdown,
|
|
32
33
|
init_db,
|
|
@@ -41,7 +42,13 @@ from app.main import (
|
|
|
41
42
|
from app.materialize import materialize_project_files
|
|
42
43
|
from app.mcp_contract import MCP_RESPONSE_SCHEMA_VERSION, build_route_skills_meta
|
|
43
44
|
from app.redaction import redaction_enabled, redact_display_path
|
|
44
|
-
from app.route_policies import
|
|
45
|
+
from app.route_policies import (
|
|
46
|
+
build_routing_overlay_payload,
|
|
47
|
+
load_route_policies_config,
|
|
48
|
+
merge_policy_includes,
|
|
49
|
+
merge_project_notes_into_route_query,
|
|
50
|
+
parse_routing_overlay,
|
|
51
|
+
)
|
|
45
52
|
from app.routing_signals import build_route_query_text
|
|
46
53
|
|
|
47
54
|
|
|
@@ -189,7 +196,7 @@ class MCPServer:
|
|
|
189
196
|
return {
|
|
190
197
|
"protocolVersion": "2024-11-05",
|
|
191
198
|
"capabilities": caps,
|
|
192
|
-
"serverInfo": {"name": "skillforge", "version": "0.
|
|
199
|
+
"serverInfo": {"name": "skillforge", "version": "0.10.0"},
|
|
193
200
|
}
|
|
194
201
|
|
|
195
202
|
def handle_tools_list(self, params):
|
|
@@ -198,20 +205,25 @@ class MCPServer:
|
|
|
198
205
|
{
|
|
199
206
|
"name": "route_skills",
|
|
200
207
|
"description": (
|
|
201
|
-
"
|
|
202
|
-
"
|
|
203
|
-
"
|
|
204
|
-
"
|
|
205
|
-
"
|
|
206
|
-
"
|
|
207
|
-
f"{MCP_RESPONSE_SCHEMA_VERSION}), sources[] (kind skill or file), "
|
|
208
|
-
"budget (chars_skill_bodies, chars_project_chunks), fusion (MMR when combined index+RAG), "
|
|
209
|
-
"candidates_preview, context_items_count."
|
|
208
|
+
"Two-step when SKILLFORGE_ROUTER_MODE=host (no in-process router LLM): (1) call with prompt "
|
|
209
|
+
"only — returns a tight numbered shortlist + session_id; (2) call again with the same prompt "
|
|
210
|
+
"and picked_names (JSON array of exact catalog ids from the list) to load SKILL.md chunks. "
|
|
211
|
+
"With auto router modes, one call returns context. Optional conversation, project_root, "
|
|
212
|
+
"include_project_rag. picked_names may also be passed in embedding/full mode to skip "
|
|
213
|
+
"auto-pick and use the host-provided list."
|
|
210
214
|
),
|
|
211
215
|
"inputSchema": {
|
|
212
216
|
"type": "object",
|
|
213
217
|
"properties": {
|
|
214
218
|
"prompt": {"type": "string", "description": "The user's prompt or task description"},
|
|
219
|
+
"picked_names": {
|
|
220
|
+
"type": "array",
|
|
221
|
+
"items": {"type": "string"},
|
|
222
|
+
"description": (
|
|
223
|
+
"Host-chosen skill ids from the shortlist (same prompt as step 1). "
|
|
224
|
+
"Omit on first host-mode call; required for finalize after shortlist."
|
|
225
|
+
),
|
|
226
|
+
},
|
|
215
227
|
"project_root": {
|
|
216
228
|
"type": "string",
|
|
217
229
|
"description": "Repo/workspace root — stores orchestrator state in .skillforge/",
|
|
@@ -375,6 +387,8 @@ class MCPServer:
|
|
|
375
387
|
"name": "materialize_project",
|
|
376
388
|
"description": (
|
|
377
389
|
"Write project-local Skillforge files: .cursor/rules/skillforge.mdc, "
|
|
390
|
+
".cursor/commands/skillforge.md (Cursor /skillforge), "
|
|
391
|
+
".claude/commands/skillforge.md (Claude Code /skillforge), "
|
|
378
392
|
"docs/SKILLFORGE-PRD.md, and a CLAUDE.md section. "
|
|
379
393
|
"Pass project_root (workspace path) and skill_names from route_skills. "
|
|
380
394
|
"Hosts must supply project_root; MCP does not infer cwd."
|
|
@@ -390,7 +404,11 @@ class MCPServer:
|
|
|
390
404
|
},
|
|
391
405
|
"merge": {
|
|
392
406
|
"type": "boolean",
|
|
393
|
-
"description":
|
|
407
|
+
"description": (
|
|
408
|
+
"If false and .cursor/rules/skillforge.mdc, "
|
|
409
|
+
".cursor/commands/skillforge.md, or "
|
|
410
|
+
".claude/commands/skillforge.md exists, skip overwriting those files"
|
|
411
|
+
),
|
|
394
412
|
"default": True,
|
|
395
413
|
},
|
|
396
414
|
},
|
|
@@ -474,6 +492,16 @@ class MCPServer:
|
|
|
474
492
|
),
|
|
475
493
|
}
|
|
476
494
|
|
|
495
|
+
picked_names_from_host_supplied = "picked_names" in args
|
|
496
|
+
if picked_names_from_host_supplied:
|
|
497
|
+
raw_pn = args.get("picked_names")
|
|
498
|
+
if isinstance(raw_pn, list):
|
|
499
|
+
picked_names_from_host = [str(x) for x in raw_pn if x is not None]
|
|
500
|
+
else:
|
|
501
|
+
picked_names_from_host = []
|
|
502
|
+
else:
|
|
503
|
+
picked_names_from_host = None
|
|
504
|
+
|
|
477
505
|
con = self._get_con(args)
|
|
478
506
|
result = await run_route_turn(
|
|
479
507
|
con,
|
|
@@ -484,6 +512,8 @@ class MCPServer:
|
|
|
484
512
|
session_id=session_id,
|
|
485
513
|
project_root=pr,
|
|
486
514
|
include_project_rag=self._include_project_rag_from_args(args),
|
|
515
|
+
picked_names_from_host=picked_names_from_host,
|
|
516
|
+
picked_names_from_host_supplied=picked_names_from_host_supplied,
|
|
487
517
|
)
|
|
488
518
|
picked_names = result["picked_names"]
|
|
489
519
|
reasoning = result["reasoning"]
|
|
@@ -503,22 +533,29 @@ class MCPServer:
|
|
|
503
533
|
"context_mode": self.router.context_mode,
|
|
504
534
|
"context_items_count": len(context_items),
|
|
505
535
|
"project_rag_items_count": (result.get("event") or {}).get("project_rag_items_count", 0),
|
|
536
|
+
"host_pick_shortlist": bool(result.get("host_pick_shortlist")),
|
|
506
537
|
}
|
|
507
538
|
(d / "last_route.json").write_text(json.dumps(snap, indent=2), encoding="utf-8")
|
|
508
539
|
except OSError:
|
|
509
540
|
pass
|
|
510
541
|
|
|
511
542
|
db_disp = redact_display_path(db_path) if redaction_enabled() else str(db_path)
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
543
|
+
if result.get("host_pick_shortlist"):
|
|
544
|
+
response_text = (result.get("host_pick_markdown") or "").strip() + (
|
|
545
|
+
f"\n\n---\n_session_id:_ `{result['session_id']}` · _orchestrator:_ `{db_disp}`"
|
|
546
|
+
)
|
|
547
|
+
blocks = [response_text]
|
|
548
|
+
else:
|
|
549
|
+
blocks = [
|
|
550
|
+
f"# Skillforge — routed {len(picked_names)} skill(s); context=`{self.router.context_mode}`",
|
|
551
|
+
f"_DB:_ `{db_disp}`",
|
|
552
|
+
f"_Reasoning: {reasoning}_" if reasoning else "",
|
|
553
|
+
"",
|
|
554
|
+
]
|
|
555
|
+
if context_items:
|
|
556
|
+
blocks.append(format_context_items_markdown(context_items))
|
|
557
|
+
elif not picked_names:
|
|
558
|
+
blocks.append("_No skills matched this prompt closely enough to load._")
|
|
522
559
|
response_text = "\n".join(b for b in blocks if b is not None)
|
|
523
560
|
meta = build_route_skills_meta(
|
|
524
561
|
result=result,
|
|
@@ -531,6 +568,9 @@ class MCPServer:
|
|
|
531
568
|
fusion=(result.get("event") or {}).get("context_fusion"),
|
|
532
569
|
context_redaction=(result.get("event") or {}).get("context_redaction"),
|
|
533
570
|
)
|
|
571
|
+
if result.get("host_pick_shortlist"):
|
|
572
|
+
meta["host_pick_shortlist"] = True
|
|
573
|
+
meta["host_pick_candidates"] = result.get("host_pick_candidates") or []
|
|
534
574
|
return {
|
|
535
575
|
"content": [{"type": "text", "text": response_text}],
|
|
536
576
|
"_meta": meta,
|
|
@@ -552,7 +592,22 @@ class MCPServer:
|
|
|
552
592
|
limit = TOP_K_CANDIDATES
|
|
553
593
|
limit = max(1, min(limit, 50))
|
|
554
594
|
con = self._get_con(args)
|
|
555
|
-
|
|
595
|
+
policies_cfg = load_route_policies_config(pr)
|
|
596
|
+
overlay_audit = []
|
|
597
|
+
exclude_skills, routing_boosts, project_notes = parse_routing_overlay(
|
|
598
|
+
policies_cfg,
|
|
599
|
+
by_name=self.router._by_name,
|
|
600
|
+
audit_out=overlay_audit,
|
|
601
|
+
)
|
|
602
|
+
q2 = merge_project_notes_into_route_query(query, project_notes, pr)
|
|
603
|
+
facets = self.router.shortlist_with_facets(
|
|
604
|
+
q2,
|
|
605
|
+
con,
|
|
606
|
+
k=limit,
|
|
607
|
+
user_id=user_id,
|
|
608
|
+
exclude_skills=exclude_skills,
|
|
609
|
+
routing_boosts=routing_boosts,
|
|
610
|
+
)
|
|
556
611
|
lines = ["# search_skills — embedding shortlist", ""]
|
|
557
612
|
for f in facets:
|
|
558
613
|
lines.append(
|
|
@@ -588,14 +643,38 @@ class MCPServer:
|
|
|
588
643
|
limit = TOP_K_CANDIDATES
|
|
589
644
|
limit = max(1, min(limit, 50))
|
|
590
645
|
con = self._get_con(args)
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
646
|
+
policies_cfg = load_route_policies_config(pr)
|
|
647
|
+
overlay_audit = []
|
|
648
|
+
exclude_skills, routing_boosts, project_notes = parse_routing_overlay(
|
|
649
|
+
policies_cfg,
|
|
650
|
+
by_name=self.router._by_name,
|
|
651
|
+
audit_out=overlay_audit,
|
|
652
|
+
)
|
|
653
|
+
route_query = merge_project_notes_into_route_query(
|
|
654
|
+
build_route_query_text(prompt, conversation),
|
|
655
|
+
project_notes,
|
|
656
|
+
pr,
|
|
657
|
+
)
|
|
658
|
+
facets = self.router.shortlist_with_facets(
|
|
659
|
+
route_query,
|
|
660
|
+
con,
|
|
661
|
+
k=limit,
|
|
662
|
+
user_id=user_id,
|
|
663
|
+
exclude_skills=exclude_skills,
|
|
664
|
+
routing_boosts=routing_boosts,
|
|
665
|
+
)
|
|
666
|
+
candidates = self.router.shortlist(
|
|
667
|
+
route_query,
|
|
668
|
+
con,
|
|
669
|
+
limit,
|
|
670
|
+
user_id,
|
|
671
|
+
exclude_skills=exclude_skills,
|
|
672
|
+
routing_boosts=routing_boosts,
|
|
673
|
+
)
|
|
594
674
|
candidates = await self.router.rerank_candidates_haiku(route_query, conversation, candidates)
|
|
595
675
|
picked, reasoning = await self.router.pick_final(
|
|
596
676
|
prompt, conversation, candidates, route_query=route_query
|
|
597
677
|
)
|
|
598
|
-
policies_cfg = load_route_policies_config(pr)
|
|
599
678
|
merged, policy_audit = merge_policy_includes(
|
|
600
679
|
prompt,
|
|
601
680
|
list(picked),
|
|
@@ -606,6 +685,15 @@ class MCPServer:
|
|
|
606
685
|
max_active=MAX_ACTIVE_SKILLS,
|
|
607
686
|
)
|
|
608
687
|
router_mode = "full" if self.router.anthropic else "embedding-only"
|
|
688
|
+
notes_effective = bool(project_notes.strip() and (pr or "").strip())
|
|
689
|
+
routing_ov = build_routing_overlay_payload(
|
|
690
|
+
project_root=pr or "",
|
|
691
|
+
exclude_skills=exclude_skills,
|
|
692
|
+
routing_boosts=routing_boosts,
|
|
693
|
+
project_notes_applied=notes_effective,
|
|
694
|
+
project_notes_len=len(project_notes) if project_notes else 0,
|
|
695
|
+
audit=overlay_audit,
|
|
696
|
+
)
|
|
609
697
|
explain = {
|
|
610
698
|
"schema_version": MCP_RESPONSE_SCHEMA_VERSION,
|
|
611
699
|
"tool": "explain_route",
|
|
@@ -622,6 +710,8 @@ class MCPServer:
|
|
|
622
710
|
"audit": policy_audit,
|
|
623
711
|
},
|
|
624
712
|
}
|
|
713
|
+
if routing_ov is not None:
|
|
714
|
+
explain["routing_overlay"] = routing_ov
|
|
625
715
|
lines = [
|
|
626
716
|
"# explain_route — routing diagnostics (no DB writes)",
|
|
627
717
|
"",
|
|
@@ -773,6 +863,13 @@ class MCPServer:
|
|
|
773
863
|
session_id = args.get("session_id") or None
|
|
774
864
|
user_id = self._mcp_user_id(args)
|
|
775
865
|
merge = args.get("merge", True)
|
|
866
|
+
if SKILLFORGE_ROUTER_MODE == "host":
|
|
867
|
+
msg = (
|
|
868
|
+
"skillforge_bootstrap does not support SKILLFORGE_ROUTER_MODE=host (two-step routing). "
|
|
869
|
+
"Set SKILLFORGE_ROUTER_MODE=embedding for one-shot bootstrap, or call route_skills twice "
|
|
870
|
+
"(shortlist then picked_names) and materialize_project yourself."
|
|
871
|
+
)
|
|
872
|
+
return {"content": [{"type": "text", "text": msg}], "isError": True}
|
|
776
873
|
if not prompt.strip():
|
|
777
874
|
return {"content": [{"type": "text", "text": "No prompt provided."}], "isError": True}
|
|
778
875
|
if not root:
|
package/python/app/route_cli.py
CHANGED
|
@@ -34,6 +34,11 @@ def _parse_args(argv: list[str] | None) -> argparse.Namespace:
|
|
|
34
34
|
)
|
|
35
35
|
p.add_argument("--session-id", default="", help="Stable session id (reuse across turns for reroute stats).")
|
|
36
36
|
p.add_argument("--user-id", default="", help="Logical user id for weights/sessions/events.")
|
|
37
|
+
p.add_argument(
|
|
38
|
+
"--picked-names",
|
|
39
|
+
default="",
|
|
40
|
+
help="Comma-separated catalog skill ids (host pick). Skips auto router/Haiku; same as MCP picked_names.",
|
|
41
|
+
)
|
|
37
42
|
p.add_argument("--json-meta", action="store_true", help="Print routing metadata as JSON on stderr after output.")
|
|
38
43
|
p.add_argument(
|
|
39
44
|
"--include-project-rag",
|
|
@@ -55,11 +60,16 @@ async def _run(args: argparse.Namespace) -> int:
|
|
|
55
60
|
return 2
|
|
56
61
|
db_path = resolve_orchestrator_db(pr)
|
|
57
62
|
con = init_db(db_path)
|
|
63
|
+
db_disp = redact_display_path(db_path) if redaction_enabled() else str(db_path)
|
|
58
64
|
|
|
59
65
|
router, skills = await asyncio.to_thread(build_router_and_skills, log=True, log_prefix="[skillforge-route]")
|
|
60
66
|
session_id = args.session_id.strip() or None
|
|
61
67
|
user_id = args.user_id.strip()
|
|
62
68
|
|
|
69
|
+
picked_raw = (args.picked_names or "").strip()
|
|
70
|
+
picked_supplied = bool(picked_raw)
|
|
71
|
+
picked_list = [x.strip() for x in picked_raw.split(",") if x.strip()] if picked_raw else []
|
|
72
|
+
|
|
63
73
|
try:
|
|
64
74
|
result = await run_route_turn(
|
|
65
75
|
con,
|
|
@@ -70,6 +80,8 @@ async def _run(args: argparse.Namespace) -> int:
|
|
|
70
80
|
session_id=session_id,
|
|
71
81
|
project_root=pr,
|
|
72
82
|
include_project_rag=bool(args.include_project_rag),
|
|
83
|
+
picked_names_from_host=picked_list if picked_supplied else None,
|
|
84
|
+
picked_names_from_host_supplied=picked_supplied,
|
|
73
85
|
)
|
|
74
86
|
finally:
|
|
75
87
|
con.close()
|
|
@@ -95,24 +107,28 @@ async def _run(args: argparse.Namespace) -> int:
|
|
|
95
107
|
"context_mode": router.context_mode,
|
|
96
108
|
"context_items_count": len(context_items),
|
|
97
109
|
"project_rag_items_count": (result.get("event") or {}).get("project_rag_items_count", 0),
|
|
110
|
+
"host_pick_shortlist": bool(result.get("host_pick_shortlist")),
|
|
98
111
|
}
|
|
99
112
|
(d / "last_route.json").write_text(json.dumps(snap, indent=2), encoding="utf-8")
|
|
100
113
|
except OSError:
|
|
101
114
|
pass
|
|
102
115
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
+
if result.get("host_pick_shortlist"):
|
|
117
|
+
response_text = ((result.get("host_pick_markdown") or "").strip() + f"\n\n---\n_session_id:_ `{sid}` · _DB:_ `{db_disp}`")
|
|
118
|
+
print(response_text.strip())
|
|
119
|
+
else:
|
|
120
|
+
blocks = [
|
|
121
|
+
f"# Skillforge — routed {len(picked_names)} skill(s); context=`{router.context_mode}`",
|
|
122
|
+
f"_DB:_ `{db_disp}`",
|
|
123
|
+
f"_Reasoning: {reasoning}_" if reasoning else "",
|
|
124
|
+
"",
|
|
125
|
+
]
|
|
126
|
+
if context_items:
|
|
127
|
+
blocks.append(format_context_items_markdown(context_items))
|
|
128
|
+
elif not picked_names:
|
|
129
|
+
blocks.append("_No skills matched this prompt closely enough to load._")
|
|
130
|
+
response_text = "\n".join(b for b in blocks if b is not None)
|
|
131
|
+
print(response_text)
|
|
116
132
|
|
|
117
133
|
if args.json_meta:
|
|
118
134
|
meta = build_route_skills_meta(
|
|
@@ -126,6 +142,9 @@ async def _run(args: argparse.Namespace) -> int:
|
|
|
126
142
|
fusion=(result.get("event") or {}).get("context_fusion"),
|
|
127
143
|
context_redaction=(result.get("event") or {}).get("context_redaction"),
|
|
128
144
|
)
|
|
145
|
+
if result.get("host_pick_shortlist"):
|
|
146
|
+
meta["host_pick_shortlist"] = True
|
|
147
|
+
meta["host_pick_candidates"] = result.get("host_pick_candidates") or []
|
|
129
148
|
print(json.dumps(meta, indent=2), file=sys.stderr)
|
|
130
149
|
|
|
131
150
|
return 0
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Pure helpers for route evaluation fixtures (embedding-first, no LLM)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_eval_fixture(path: Path) -> dict[str, Any]:
|
|
10
|
+
raw = path.read_text(encoding="utf-8")
|
|
11
|
+
data = json.loads(raw)
|
|
12
|
+
if not isinstance(data, dict):
|
|
13
|
+
raise ValueError("fixture root must be a JSON object")
|
|
14
|
+
cases = data.get("cases")
|
|
15
|
+
if not isinstance(cases, list) or not cases:
|
|
16
|
+
raise ValueError("fixture must contain a non-empty cases array")
|
|
17
|
+
return data
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _window(case: dict[str, Any], defaults: dict[str, Any]) -> int:
|
|
21
|
+
w = case.get("candidate_window")
|
|
22
|
+
if w is None:
|
|
23
|
+
w = defaults.get("candidate_window", 25)
|
|
24
|
+
return max(1, int(w))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def evaluate_case_result(
|
|
28
|
+
result: dict[str, Any],
|
|
29
|
+
case: dict[str, Any],
|
|
30
|
+
*,
|
|
31
|
+
defaults: dict[str, Any] | None = None,
|
|
32
|
+
) -> list[str]:
|
|
33
|
+
"""Return human-readable error strings; empty means pass."""
|
|
34
|
+
defaults = defaults or {}
|
|
35
|
+
errs: list[str] = []
|
|
36
|
+
case_id = case.get("id") or case.get("name") or "?"
|
|
37
|
+
|
|
38
|
+
if result.get("host_pick_shortlist"):
|
|
39
|
+
errs.append(f"{case_id}: host shortlist result — use embedding router mode for eval")
|
|
40
|
+
return errs
|
|
41
|
+
|
|
42
|
+
cands = result.get("candidates") or []
|
|
43
|
+
cand_names: list[str] = []
|
|
44
|
+
for item in cands:
|
|
45
|
+
if isinstance(item, tuple) and len(item) >= 1:
|
|
46
|
+
sk = item[0]
|
|
47
|
+
name = getattr(sk, "name", None)
|
|
48
|
+
if name:
|
|
49
|
+
cand_names.append(str(name))
|
|
50
|
+
elif isinstance(item, dict) and item.get("name"):
|
|
51
|
+
cand_names.append(str(item["name"]))
|
|
52
|
+
|
|
53
|
+
window = _window(case, defaults)
|
|
54
|
+
head = cand_names[:window]
|
|
55
|
+
head_set = set(head)
|
|
56
|
+
|
|
57
|
+
for label in (
|
|
58
|
+
"expect_in_candidates",
|
|
59
|
+
"expect_candidates_contain",
|
|
60
|
+
):
|
|
61
|
+
need = case.get(label)
|
|
62
|
+
if not need:
|
|
63
|
+
continue
|
|
64
|
+
if not isinstance(need, list):
|
|
65
|
+
errs.append(f"{case_id}: {label} must be a list")
|
|
66
|
+
continue
|
|
67
|
+
for skill_id in need:
|
|
68
|
+
sid = str(skill_id)
|
|
69
|
+
if sid not in head_set:
|
|
70
|
+
errs.append(
|
|
71
|
+
f"{case_id}: expected {sid!r} in first {window} candidates "
|
|
72
|
+
f"(have {head[:8]}{'…' if len(head) > 8 else ''})"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
picked = list(result.get("picked_names") or [])
|
|
76
|
+
picked_set = set(picked)
|
|
77
|
+
|
|
78
|
+
if case.get("expect_picked_any"):
|
|
79
|
+
need = case["expect_picked_any"]
|
|
80
|
+
if not isinstance(need, list):
|
|
81
|
+
errs.append(f"{case_id}: expect_picked_any must be a list")
|
|
82
|
+
elif not (picked_set & {str(x) for x in need}):
|
|
83
|
+
errs.append(
|
|
84
|
+
f"{case_id}: expected at least one of {need!r} in picked_names {picked!r}"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
if case.get("expect_picked_all"):
|
|
88
|
+
need = case["expect_picked_all"]
|
|
89
|
+
if not isinstance(need, list):
|
|
90
|
+
errs.append(f"{case_id}: expect_picked_all must be a list")
|
|
91
|
+
else:
|
|
92
|
+
for sid in need:
|
|
93
|
+
if str(sid) not in picked_set:
|
|
94
|
+
errs.append(
|
|
95
|
+
f"{case_id}: expected picked_names to include {sid!r} (have {picked!r})"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
return errs
|
|
@@ -21,6 +21,18 @@ Rule shape::
|
|
|
21
21
|
``if_text_matches`` is passed to ``re.search`` (``re.DOTALL``). ``include`` is a skill
|
|
22
22
|
name or list of names. Forced skills are appended after router picks until
|
|
23
23
|
``MAX_ACTIVE_SKILLS`` is reached.
|
|
24
|
+
|
|
25
|
+
Optional **project routing overlay** (same JSON object):
|
|
26
|
+
|
|
27
|
+
- ``exclude_skills`` / ``host_exclude`` / ``denylist`` — skill ids excluded from the embedding
|
|
28
|
+
shortlist (hard filter).
|
|
29
|
+
- ``routing_boosts`` / ``skill_boosts`` — object mapping skill id → numeric delta added to the
|
|
30
|
+
routing score after learned weights (clamped to ±2).
|
|
31
|
+
- ``project_notes`` / ``routing_notes`` / ``rag_notes`` — free text prepended to the internal
|
|
32
|
+
routing query when **project_root** is set (stack/context hints for embedding).
|
|
33
|
+
|
|
34
|
+
``project_notes`` are **not** applied without ``project_root`` to avoid global prompt injection
|
|
35
|
+
from shared policy files.
|
|
24
36
|
"""
|
|
25
37
|
from __future__ import annotations
|
|
26
38
|
|
|
@@ -61,6 +73,104 @@ def load_route_policies_config(project_root: str | None) -> dict[str, Any]:
|
|
|
61
73
|
return {"rules": []}
|
|
62
74
|
|
|
63
75
|
|
|
76
|
+
def parse_routing_overlay(
|
|
77
|
+
policies: dict[str, Any] | None,
|
|
78
|
+
*,
|
|
79
|
+
by_name: dict[str, Any] | None = None,
|
|
80
|
+
audit_out: list[dict[str, Any]] | None = None,
|
|
81
|
+
) -> tuple[frozenset[str], dict[str, float], str]:
|
|
82
|
+
"""Parse exclude list, per-skill score boosts, and project notes from policies dict."""
|
|
83
|
+
policies = policies or {}
|
|
84
|
+
by_name = by_name or {}
|
|
85
|
+
boost_cap = 2.0
|
|
86
|
+
|
|
87
|
+
raw_ex = policies.get("exclude_skills") or policies.get("host_exclude") or policies.get("denylist") or []
|
|
88
|
+
if isinstance(raw_ex, str):
|
|
89
|
+
raw_ex = [raw_ex]
|
|
90
|
+
exclude: set[str] = set()
|
|
91
|
+
if isinstance(raw_ex, list):
|
|
92
|
+
for x in raw_ex:
|
|
93
|
+
if not isinstance(x, str) or not x.strip():
|
|
94
|
+
continue
|
|
95
|
+
name = x.strip()
|
|
96
|
+
if by_name and name not in by_name:
|
|
97
|
+
if audit_out is not None:
|
|
98
|
+
audit_out.append({"kind": "exclude", "skill": name, "effect": "unknown_skill"})
|
|
99
|
+
continue
|
|
100
|
+
exclude.add(name)
|
|
101
|
+
|
|
102
|
+
raw_boost = policies.get("routing_boosts") or policies.get("skill_boosts") or {}
|
|
103
|
+
boosts: dict[str, float] = {}
|
|
104
|
+
if isinstance(raw_boost, dict):
|
|
105
|
+
for k, v in raw_boost.items():
|
|
106
|
+
if not isinstance(k, str) or not k.strip():
|
|
107
|
+
continue
|
|
108
|
+
name = k.strip()
|
|
109
|
+
if by_name and name not in by_name:
|
|
110
|
+
if audit_out is not None:
|
|
111
|
+
audit_out.append({"kind": "boost", "skill": name, "effect": "unknown_skill"})
|
|
112
|
+
continue
|
|
113
|
+
try:
|
|
114
|
+
b = float(v)
|
|
115
|
+
except (TypeError, ValueError):
|
|
116
|
+
if audit_out is not None:
|
|
117
|
+
audit_out.append({"kind": "boost", "skill": name, "effect": "invalid_value"})
|
|
118
|
+
continue
|
|
119
|
+
boosts[name] = max(-boost_cap, min(boost_cap, b))
|
|
120
|
+
|
|
121
|
+
notes = ""
|
|
122
|
+
for key in ("project_notes", "routing_notes", "rag_notes"):
|
|
123
|
+
raw = policies.get(key)
|
|
124
|
+
if isinstance(raw, str) and raw.strip():
|
|
125
|
+
notes = raw.strip()
|
|
126
|
+
break
|
|
127
|
+
|
|
128
|
+
return frozenset(exclude), boosts, notes
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def merge_project_notes_into_route_query(
|
|
132
|
+
route_query: str,
|
|
133
|
+
notes: str,
|
|
134
|
+
project_root: str | None,
|
|
135
|
+
*,
|
|
136
|
+
max_chars: int | None = None,
|
|
137
|
+
) -> str:
|
|
138
|
+
"""Prefix routing query with project notes when ``project_root`` is set."""
|
|
139
|
+
notes = (notes or "").strip()
|
|
140
|
+
pr = (project_root or "").strip()
|
|
141
|
+
if not notes or not pr:
|
|
142
|
+
return route_query
|
|
143
|
+
mc = max_chars
|
|
144
|
+
if mc is None:
|
|
145
|
+
mc = int(os.getenv("SKILLFORGE_PROJECT_NOTES_MAX_CHARS", "1200"))
|
|
146
|
+
mc = max(0, mc)
|
|
147
|
+
clipped = notes if len(notes) <= mc else notes[: max(0, mc - 1)] + "…"
|
|
148
|
+
return f"Project routing notes:\n{clipped}\n\n{route_query}"
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def build_routing_overlay_payload(
|
|
152
|
+
*,
|
|
153
|
+
project_root: str,
|
|
154
|
+
exclude_skills: frozenset[str],
|
|
155
|
+
routing_boosts: dict[str, float],
|
|
156
|
+
project_notes_applied: bool,
|
|
157
|
+
project_notes_len: int,
|
|
158
|
+
audit: list[dict[str, Any]],
|
|
159
|
+
) -> dict[str, Any] | None:
|
|
160
|
+
"""Telemetry / MCP meta; omit when nothing configured."""
|
|
161
|
+
if not exclude_skills and not routing_boosts and not project_notes_applied and not audit:
|
|
162
|
+
return None
|
|
163
|
+
return {
|
|
164
|
+
"schema": "routing_overlay/1",
|
|
165
|
+
"project_root_set": bool((project_root or "").strip()),
|
|
166
|
+
"exclude_skills": sorted(exclude_skills),
|
|
167
|
+
"routing_boosts": {k: round(float(v), 4) for k, v in sorted(routing_boosts.items())},
|
|
168
|
+
"project_notes_applied": project_notes_applied,
|
|
169
|
+
"project_notes_len": int(project_notes_len),
|
|
170
|
+
"audit": list(audit),
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
|
|
64
174
|
def merge_policy_includes(
|
|
65
175
|
prompt: str,
|
|
66
176
|
picked_names: list[str],
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Calibration metrics for route_skills MCP _meta and route events (local, no extra network)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import math
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def coerce_route_float(x: Any, *, default: float = 0.0) -> float:
|
|
9
|
+
"""Coerce to float for routing telemetry; never raises; maps NaN/inf to default."""
|
|
10
|
+
try:
|
|
11
|
+
v = float(x)
|
|
12
|
+
except (TypeError, ValueError):
|
|
13
|
+
return default
|
|
14
|
+
return v if math.isfinite(v) else default
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def policy_includes_added_count(audit: list[dict[str, Any]] | None) -> int:
|
|
18
|
+
if not audit:
|
|
19
|
+
return 0
|
|
20
|
+
return sum(1 for row in audit if isinstance(row, dict) and row.get("effect") == "added")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def top1_cosine_vs_routing_agreement(facets: list[dict[str, Any]]) -> bool | None:
|
|
24
|
+
"""Whether the #1 by routing_score matches the skill with max cosine (hybrid diagnostic)."""
|
|
25
|
+
if len(facets) < 2:
|
|
26
|
+
return None
|
|
27
|
+
top_route = facets[0].get("name")
|
|
28
|
+
best_cos_name = max(facets, key=lambda f: coerce_route_float(f.get("cosine_similarity"))).get("name")
|
|
29
|
+
if not top_route or not best_cos_name:
|
|
30
|
+
return None
|
|
31
|
+
return top_route == best_cos_name
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def build_route_quality(
|
|
35
|
+
*,
|
|
36
|
+
facet_list: list[dict[str, Any]],
|
|
37
|
+
router_mode: str,
|
|
38
|
+
router_hybrid: str,
|
|
39
|
+
picked_names: list[str],
|
|
40
|
+
rerouted: bool,
|
|
41
|
+
change: float,
|
|
42
|
+
policy_rules_loaded: int,
|
|
43
|
+
policy_audit: list[dict[str, Any]] | None,
|
|
44
|
+
host_picked: bool,
|
|
45
|
+
host_shortlist_only: bool = False,
|
|
46
|
+
haiku_rerank_applied: bool = False,
|
|
47
|
+
pick_path: str,
|
|
48
|
+
) -> dict[str, Any]:
|
|
49
|
+
"""Structured signals for operators and MCP hosts (JSON-serializable)."""
|
|
50
|
+
n = len(facet_list)
|
|
51
|
+
top_cos: float | None = None
|
|
52
|
+
second_cos: float | None = None
|
|
53
|
+
margin: float | None = None
|
|
54
|
+
top_routing_score: float | None = None
|
|
55
|
+
if facet_list:
|
|
56
|
+
top_cos = round(coerce_route_float(facet_list[0].get("cosine_similarity")), 6)
|
|
57
|
+
top_routing_score = round(coerce_route_float(facet_list[0].get("routing_score")), 6)
|
|
58
|
+
if len(facet_list) > 1:
|
|
59
|
+
second_cos = round(coerce_route_float(facet_list[1].get("cosine_similarity")), 6)
|
|
60
|
+
margin = round(float(top_cos - second_cos), 6)
|
|
61
|
+
|
|
62
|
+
agree = top1_cosine_vs_routing_agreement(facet_list) if router_hybrid not in ("", "off", None) else None
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
prl = int(policy_rules_loaded)
|
|
66
|
+
except (TypeError, ValueError):
|
|
67
|
+
prl = 0
|
|
68
|
+
prl = max(0, prl)
|
|
69
|
+
|
|
70
|
+
return {
|
|
71
|
+
"schema": "route_quality/1",
|
|
72
|
+
"shortlist": {
|
|
73
|
+
"size": n,
|
|
74
|
+
"top_cosine_similarity": top_cos,
|
|
75
|
+
"second_cosine_similarity": second_cos,
|
|
76
|
+
"cosine_margin": margin,
|
|
77
|
+
"top_routing_score": top_routing_score,
|
|
78
|
+
"hybrid_mode": router_hybrid or "off",
|
|
79
|
+
"top1_dense_and_fused_agree": agree,
|
|
80
|
+
},
|
|
81
|
+
"router": {
|
|
82
|
+
"mode": router_mode,
|
|
83
|
+
"pick_path": pick_path,
|
|
84
|
+
"host_picked": host_picked,
|
|
85
|
+
"host_shortlist_only": host_shortlist_only,
|
|
86
|
+
"haiku_rerank_applied": haiku_rerank_applied,
|
|
87
|
+
},
|
|
88
|
+
"session": {
|
|
89
|
+
"rerouted": rerouted,
|
|
90
|
+
"change_jaccard": round(coerce_route_float(change), 4),
|
|
91
|
+
"change_pct": round(coerce_route_float(change) * 100.0, 1),
|
|
92
|
+
},
|
|
93
|
+
"policy": {
|
|
94
|
+
"rules_loaded": prl,
|
|
95
|
+
"includes_added": policy_includes_added_count(policy_audit),
|
|
96
|
+
"audit_size": len(policy_audit or []),
|
|
97
|
+
},
|
|
98
|
+
"picked_count": len(picked_names),
|
|
99
|
+
}
|