@heytherevibin/skillforge 0.7.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,6 +34,11 @@ def _parse_args(argv: list[str] | None) -> argparse.Namespace:
34
34
  )
35
35
  p.add_argument("--session-id", default="", help="Stable session id (reuse across turns for reroute stats).")
36
36
  p.add_argument("--user-id", default="", help="Logical user id for weights/sessions/events.")
37
+ p.add_argument(
38
+ "--picked-names",
39
+ default="",
40
+ help="Comma-separated catalog skill ids (host pick). Skips auto router/Haiku; same as MCP picked_names.",
41
+ )
37
42
  p.add_argument("--json-meta", action="store_true", help="Print routing metadata as JSON on stderr after output.")
38
43
  p.add_argument(
39
44
  "--include-project-rag",
@@ -55,11 +60,16 @@ async def _run(args: argparse.Namespace) -> int:
55
60
  return 2
56
61
  db_path = resolve_orchestrator_db(pr)
57
62
  con = init_db(db_path)
63
+ db_disp = redact_display_path(db_path) if redaction_enabled() else str(db_path)
58
64
 
59
65
  router, skills = await asyncio.to_thread(build_router_and_skills, log=True, log_prefix="[skillforge-route]")
60
66
  session_id = args.session_id.strip() or None
61
67
  user_id = args.user_id.strip()
62
68
 
69
+ picked_raw = (args.picked_names or "").strip()
70
+ picked_supplied = bool(picked_raw)
71
+ picked_list = [x.strip() for x in picked_raw.split(",") if x.strip()] if picked_raw else []
72
+
63
73
  try:
64
74
  result = await run_route_turn(
65
75
  con,
@@ -70,6 +80,8 @@ async def _run(args: argparse.Namespace) -> int:
70
80
  session_id=session_id,
71
81
  project_root=pr,
72
82
  include_project_rag=bool(args.include_project_rag),
83
+ picked_names_from_host=picked_list if picked_supplied else None,
84
+ picked_names_from_host_supplied=picked_supplied,
73
85
  )
74
86
  finally:
75
87
  con.close()
@@ -95,24 +107,28 @@ async def _run(args: argparse.Namespace) -> int:
95
107
  "context_mode": router.context_mode,
96
108
  "context_items_count": len(context_items),
97
109
  "project_rag_items_count": (result.get("event") or {}).get("project_rag_items_count", 0),
110
+ "host_pick_shortlist": bool(result.get("host_pick_shortlist")),
98
111
  }
99
112
  (d / "last_route.json").write_text(json.dumps(snap, indent=2), encoding="utf-8")
100
113
  except OSError:
101
114
  pass
102
115
 
103
- db_disp = redact_display_path(db_path) if redaction_enabled() else str(db_path)
104
- blocks = [
105
- f"# Skillforge — routed {len(picked_names)} skill(s); context=`{router.context_mode}`",
106
- f"_DB:_ `{db_disp}`",
107
- f"_Reasoning: {reasoning}_" if reasoning else "",
108
- "",
109
- ]
110
- if context_items:
111
- blocks.append(format_context_items_markdown(context_items))
112
- elif not picked_names:
113
- blocks.append("_No skills matched this prompt closely enough to load._")
114
- response_text = "\n".join(b for b in blocks if b is not None)
115
- print(response_text)
116
+ if result.get("host_pick_shortlist"):
117
+ response_text = ((result.get("host_pick_markdown") or "").strip() + f"\n\n---\n_session_id:_ `{sid}` · _DB:_ `{db_disp}`")
118
+ print(response_text.strip())
119
+ else:
120
+ blocks = [
121
+ f"# Skillforge — routed {len(picked_names)} skill(s); context=`{router.context_mode}`",
122
+ f"_DB:_ `{db_disp}`",
123
+ f"_Reasoning: {reasoning}_" if reasoning else "",
124
+ "",
125
+ ]
126
+ if context_items:
127
+ blocks.append(format_context_items_markdown(context_items))
128
+ elif not picked_names:
129
+ blocks.append("_No skills matched this prompt closely enough to load._")
130
+ response_text = "\n".join(b for b in blocks if b is not None)
131
+ print(response_text)
116
132
 
117
133
  if args.json_meta:
118
134
  meta = build_route_skills_meta(
@@ -126,6 +142,9 @@ async def _run(args: argparse.Namespace) -> int:
126
142
  fusion=(result.get("event") or {}).get("context_fusion"),
127
143
  context_redaction=(result.get("event") or {}).get("context_redaction"),
128
144
  )
145
+ if result.get("host_pick_shortlist"):
146
+ meta["host_pick_shortlist"] = True
147
+ meta["host_pick_candidates"] = result.get("host_pick_candidates") or []
129
148
  print(json.dumps(meta, indent=2), file=sys.stderr)
130
149
 
131
150
  return 0
@@ -0,0 +1,98 @@
1
+ """Pure helpers for route evaluation fixtures (embedding-first, no LLM)."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+
9
+ def load_eval_fixture(path: Path) -> dict[str, Any]:
10
+ raw = path.read_text(encoding="utf-8")
11
+ data = json.loads(raw)
12
+ if not isinstance(data, dict):
13
+ raise ValueError("fixture root must be a JSON object")
14
+ cases = data.get("cases")
15
+ if not isinstance(cases, list) or not cases:
16
+ raise ValueError("fixture must contain a non-empty cases array")
17
+ return data
18
+
19
+
20
+ def _window(case: dict[str, Any], defaults: dict[str, Any]) -> int:
21
+ w = case.get("candidate_window")
22
+ if w is None:
23
+ w = defaults.get("candidate_window", 25)
24
+ return max(1, int(w))
25
+
26
+
27
+ def evaluate_case_result(
28
+ result: dict[str, Any],
29
+ case: dict[str, Any],
30
+ *,
31
+ defaults: dict[str, Any] | None = None,
32
+ ) -> list[str]:
33
+ """Return human-readable error strings; empty means pass."""
34
+ defaults = defaults or {}
35
+ errs: list[str] = []
36
+ case_id = case.get("id") or case.get("name") or "?"
37
+
38
+ if result.get("host_pick_shortlist"):
39
+ errs.append(f"{case_id}: host shortlist result — use embedding router mode for eval")
40
+ return errs
41
+
42
+ cands = result.get("candidates") or []
43
+ cand_names: list[str] = []
44
+ for item in cands:
45
+ if isinstance(item, tuple) and len(item) >= 1:
46
+ sk = item[0]
47
+ name = getattr(sk, "name", None)
48
+ if name:
49
+ cand_names.append(str(name))
50
+ elif isinstance(item, dict) and item.get("name"):
51
+ cand_names.append(str(item["name"]))
52
+
53
+ window = _window(case, defaults)
54
+ head = cand_names[:window]
55
+ head_set = set(head)
56
+
57
+ for label in (
58
+ "expect_in_candidates",
59
+ "expect_candidates_contain",
60
+ ):
61
+ need = case.get(label)
62
+ if not need:
63
+ continue
64
+ if not isinstance(need, list):
65
+ errs.append(f"{case_id}: {label} must be a list")
66
+ continue
67
+ for skill_id in need:
68
+ sid = str(skill_id)
69
+ if sid not in head_set:
70
+ errs.append(
71
+ f"{case_id}: expected {sid!r} in first {window} candidates "
72
+ f"(have {head[:8]}{'…' if len(head) > 8 else ''})"
73
+ )
74
+
75
+ picked = list(result.get("picked_names") or [])
76
+ picked_set = set(picked)
77
+
78
+ if case.get("expect_picked_any"):
79
+ need = case["expect_picked_any"]
80
+ if not isinstance(need, list):
81
+ errs.append(f"{case_id}: expect_picked_any must be a list")
82
+ elif not (picked_set & {str(x) for x in need}):
83
+ errs.append(
84
+ f"{case_id}: expected at least one of {need!r} in picked_names {picked!r}"
85
+ )
86
+
87
+ if case.get("expect_picked_all"):
88
+ need = case["expect_picked_all"]
89
+ if not isinstance(need, list):
90
+ errs.append(f"{case_id}: expect_picked_all must be a list")
91
+ else:
92
+ for sid in need:
93
+ if str(sid) not in picked_set:
94
+ errs.append(
95
+ f"{case_id}: expected picked_names to include {sid!r} (have {picked!r})"
96
+ )
97
+
98
+ return errs
@@ -0,0 +1,243 @@
1
+ """Pluggable route policies: regex on prompt → force-include skill names.
2
+
3
+ Load order (first file that exists / first successful parse wins for env):
4
+
5
+ 1. ``SKILLFORGE_ROUTE_POLICIES`` — JSON object inline (e.g. ``{\"rules\":[...]}``).
6
+ 2. ``SKILLFORGE_ROUTE_POLICIES_FILE`` — path to a JSON file.
7
+ 3. ``<project_root>/.skillforge/policies.json``
8
+ 4. ``<project_root>/skillforge-policies.json``
9
+
10
+ Rule shape::
11
+
12
+ {
13
+ "rules": [
14
+ {
15
+ "if_text_matches": "(?i)(auth|oauth|jwt|password)",
16
+ "include": ["security-review"]
17
+ }
18
+ ]
19
+ }
20
+
21
+ ``if_text_matches`` is passed to ``re.search`` (``re.DOTALL``). ``include`` is a skill
22
+ name or list of names. Forced skills are appended after router picks until
23
+ ``MAX_ACTIVE_SKILLS`` is reached.
24
+
25
+ Optional **project routing overlay** (same JSON object):
26
+
27
+ - ``exclude_skills`` / ``host_exclude`` / ``denylist`` — skill ids excluded from the embedding
28
+ shortlist (hard filter).
29
+ - ``routing_boosts`` / ``skill_boosts`` — object mapping skill id → numeric delta added to the
30
+ routing score after learned weights (clamped to ±2).
31
+ - ``project_notes`` / ``routing_notes`` / ``rag_notes`` — free text prepended to the internal
32
+ routing query when **project_root** is set (stack/context hints for embedding).
33
+
34
+ ``project_notes`` are **not** applied without ``project_root`` to avoid global prompt injection
35
+ from shared policy files.
36
+ """
37
+ from __future__ import annotations
38
+
39
+ import json
40
+ import os
41
+ import re
42
+ import sqlite3
43
+ from pathlib import Path
44
+ from typing import Any
45
+
46
+
47
+ def load_route_policies_config(project_root: str | None) -> dict[str, Any]:
48
+ """Return a dict with key ``rules`` (list). Empty rules if nothing configured."""
49
+ raw_env = os.getenv("SKILLFORGE_ROUTE_POLICIES", "").strip()
50
+ if raw_env:
51
+ try:
52
+ data = json.loads(raw_env)
53
+ return data if isinstance(data, dict) else {"rules": []}
54
+ except json.JSONDecodeError:
55
+ return {"rules": []}
56
+
57
+ paths: list[Path] = []
58
+ path_env = os.getenv("SKILLFORGE_ROUTE_POLICIES_FILE", "").strip()
59
+ if path_env:
60
+ paths.append(Path(path_env).expanduser())
61
+ if project_root:
62
+ pr = Path(project_root).expanduser().resolve()
63
+ paths.append(pr / ".skillforge" / "policies.json")
64
+ paths.append(pr / "skillforge-policies.json")
65
+
66
+ for p in paths:
67
+ if p.is_file():
68
+ try:
69
+ data = json.loads(p.read_text(encoding="utf-8"))
70
+ return data if isinstance(data, dict) else {"rules": []}
71
+ except (OSError, json.JSONDecodeError):
72
+ continue
73
+ return {"rules": []}
74
+
75
+
76
+ def parse_routing_overlay(
77
+ policies: dict[str, Any] | None,
78
+ *,
79
+ by_name: dict[str, Any] | None = None,
80
+ audit_out: list[dict[str, Any]] | None = None,
81
+ ) -> tuple[frozenset[str], dict[str, float], str]:
82
+ """Parse exclude list, per-skill score boosts, and project notes from policies dict."""
83
+ policies = policies or {}
84
+ by_name = by_name or {}
85
+ boost_cap = 2.0
86
+
87
+ raw_ex = policies.get("exclude_skills") or policies.get("host_exclude") or policies.get("denylist") or []
88
+ if isinstance(raw_ex, str):
89
+ raw_ex = [raw_ex]
90
+ exclude: set[str] = set()
91
+ if isinstance(raw_ex, list):
92
+ for x in raw_ex:
93
+ if not isinstance(x, str) or not x.strip():
94
+ continue
95
+ name = x.strip()
96
+ if by_name and name not in by_name:
97
+ if audit_out is not None:
98
+ audit_out.append({"kind": "exclude", "skill": name, "effect": "unknown_skill"})
99
+ continue
100
+ exclude.add(name)
101
+
102
+ raw_boost = policies.get("routing_boosts") or policies.get("skill_boosts") or {}
103
+ boosts: dict[str, float] = {}
104
+ if isinstance(raw_boost, dict):
105
+ for k, v in raw_boost.items():
106
+ if not isinstance(k, str) or not k.strip():
107
+ continue
108
+ name = k.strip()
109
+ if by_name and name not in by_name:
110
+ if audit_out is not None:
111
+ audit_out.append({"kind": "boost", "skill": name, "effect": "unknown_skill"})
112
+ continue
113
+ try:
114
+ b = float(v)
115
+ except (TypeError, ValueError):
116
+ if audit_out is not None:
117
+ audit_out.append({"kind": "boost", "skill": name, "effect": "invalid_value"})
118
+ continue
119
+ boosts[name] = max(-boost_cap, min(boost_cap, b))
120
+
121
+ notes = ""
122
+ for key in ("project_notes", "routing_notes", "rag_notes"):
123
+ raw = policies.get(key)
124
+ if isinstance(raw, str) and raw.strip():
125
+ notes = raw.strip()
126
+ break
127
+
128
+ return frozenset(exclude), boosts, notes
129
+
130
+
131
+ def merge_project_notes_into_route_query(
132
+ route_query: str,
133
+ notes: str,
134
+ project_root: str | None,
135
+ *,
136
+ max_chars: int | None = None,
137
+ ) -> str:
138
+ """Prefix routing query with project notes when ``project_root`` is set."""
139
+ notes = (notes or "").strip()
140
+ pr = (project_root or "").strip()
141
+ if not notes or not pr:
142
+ return route_query
143
+ mc = max_chars
144
+ if mc is None:
145
+ mc = int(os.getenv("SKILLFORGE_PROJECT_NOTES_MAX_CHARS", "1200"))
146
+ mc = max(0, mc)
147
+ clipped = notes if len(notes) <= mc else notes[: max(0, mc - 1)] + "…"
148
+ return f"Project routing notes:\n{clipped}\n\n{route_query}"
149
+
150
+
151
+ def build_routing_overlay_payload(
152
+ *,
153
+ project_root: str,
154
+ exclude_skills: frozenset[str],
155
+ routing_boosts: dict[str, float],
156
+ project_notes_applied: bool,
157
+ project_notes_len: int,
158
+ audit: list[dict[str, Any]],
159
+ ) -> dict[str, Any] | None:
160
+ """Telemetry / MCP meta; omit when nothing configured."""
161
+ if not exclude_skills and not routing_boosts and not project_notes_applied and not audit:
162
+ return None
163
+ return {
164
+ "schema": "routing_overlay/1",
165
+ "project_root_set": bool((project_root or "").strip()),
166
+ "exclude_skills": sorted(exclude_skills),
167
+ "routing_boosts": {k: round(float(v), 4) for k, v in sorted(routing_boosts.items())},
168
+ "project_notes_applied": project_notes_applied,
169
+ "project_notes_len": int(project_notes_len),
170
+ "audit": list(audit),
171
+ }
172
+
173
+
174
+ def merge_policy_includes(
175
+ prompt: str,
176
+ picked_names: list[str],
177
+ policies: dict[str, Any],
178
+ by_name: dict[str, Any],
179
+ con: sqlite3.Connection,
180
+ user_id: str,
181
+ *,
182
+ max_active: int,
183
+ ) -> tuple[list[str], list[dict[str, Any]]]:
184
+ """Append policy-driven skills after ``picked_names`` without duplicates.
185
+
186
+ Returns (merged_pick_list, audit_rows for events / explain_route).
187
+ """
188
+ # Local import avoids circular import at module load time.
189
+ from app.main import get_skill_weight
190
+
191
+ rules = policies.get("rules") if isinstance(policies, dict) else None
192
+ if not isinstance(rules, list):
193
+ rules = []
194
+
195
+ audit: list[dict[str, Any]] = []
196
+ merged = list(picked_names)
197
+ extras: list[str] = []
198
+
199
+ for rule in rules:
200
+ if not isinstance(rule, dict):
201
+ continue
202
+ pat = rule.get("if_text_matches") or rule.get("pattern") or ""
203
+ if not isinstance(pat, str) or not pat.strip():
204
+ continue
205
+ try:
206
+ matched = bool(re.search(pat, prompt, flags=re.DOTALL))
207
+ except re.error:
208
+ audit.append({"pattern": pat, "effect": "invalid_regex"})
209
+ continue
210
+ if not matched:
211
+ continue
212
+
213
+ inc = rule.get("include")
214
+ if isinstance(inc, str):
215
+ inc = [inc]
216
+ if not isinstance(inc, list):
217
+ continue
218
+
219
+ for name in inc:
220
+ if not isinstance(name, str) or not name.strip():
221
+ continue
222
+ name = name.strip()
223
+ if name not in by_name:
224
+ audit.append({"pattern": pat, "skill": name, "effect": "unknown_skill"})
225
+ continue
226
+ _w, disabled = get_skill_weight(con, name, user_id=user_id)
227
+ if disabled:
228
+ audit.append({"pattern": pat, "skill": name, "effect": "disabled"})
229
+ continue
230
+ if name in merged or name in extras:
231
+ audit.append({"pattern": pat, "skill": name, "effect": "already_in_list"})
232
+ continue
233
+ extras.append(name)
234
+ audit.append({"pattern": pat, "skill": name, "effect": "added"})
235
+
236
+ for n in extras:
237
+ if len(merged) >= max_active:
238
+ audit.append({"skill": n, "effect": "skipped_max_active", "max": max_active})
239
+ break
240
+ if n not in merged:
241
+ merged.append(n)
242
+
243
+ return merged, audit
@@ -0,0 +1,99 @@
1
+ """Calibration metrics for route_skills MCP _meta and route events (local, no extra network)."""
2
+ from __future__ import annotations
3
+
4
+ import math
5
+ from typing import Any
6
+
7
+
8
+ def coerce_route_float(x: Any, *, default: float = 0.0) -> float:
9
+ """Coerce to float for routing telemetry; never raises; maps NaN/inf to default."""
10
+ try:
11
+ v = float(x)
12
+ except (TypeError, ValueError):
13
+ return default
14
+ return v if math.isfinite(v) else default
15
+
16
+
17
+ def policy_includes_added_count(audit: list[dict[str, Any]] | None) -> int:
18
+ if not audit:
19
+ return 0
20
+ return sum(1 for row in audit if isinstance(row, dict) and row.get("effect") == "added")
21
+
22
+
23
+ def top1_cosine_vs_routing_agreement(facets: list[dict[str, Any]]) -> bool | None:
24
+ """Whether the #1 by routing_score matches the skill with max cosine (hybrid diagnostic)."""
25
+ if len(facets) < 2:
26
+ return None
27
+ top_route = facets[0].get("name")
28
+ best_cos_name = max(facets, key=lambda f: coerce_route_float(f.get("cosine_similarity"))).get("name")
29
+ if not top_route or not best_cos_name:
30
+ return None
31
+ return top_route == best_cos_name
32
+
33
+
34
+ def build_route_quality(
35
+ *,
36
+ facet_list: list[dict[str, Any]],
37
+ router_mode: str,
38
+ router_hybrid: str,
39
+ picked_names: list[str],
40
+ rerouted: bool,
41
+ change: float,
42
+ policy_rules_loaded: int,
43
+ policy_audit: list[dict[str, Any]] | None,
44
+ host_picked: bool,
45
+ host_shortlist_only: bool = False,
46
+ haiku_rerank_applied: bool = False,
47
+ pick_path: str,
48
+ ) -> dict[str, Any]:
49
+ """Structured signals for operators and MCP hosts (JSON-serializable)."""
50
+ n = len(facet_list)
51
+ top_cos: float | None = None
52
+ second_cos: float | None = None
53
+ margin: float | None = None
54
+ top_routing_score: float | None = None
55
+ if facet_list:
56
+ top_cos = round(coerce_route_float(facet_list[0].get("cosine_similarity")), 6)
57
+ top_routing_score = round(coerce_route_float(facet_list[0].get("routing_score")), 6)
58
+ if len(facet_list) > 1:
59
+ second_cos = round(coerce_route_float(facet_list[1].get("cosine_similarity")), 6)
60
+ margin = round(float(top_cos - second_cos), 6)
61
+
62
+ agree = top1_cosine_vs_routing_agreement(facet_list) if router_hybrid not in ("", "off", None) else None
63
+
64
+ try:
65
+ prl = int(policy_rules_loaded)
66
+ except (TypeError, ValueError):
67
+ prl = 0
68
+ prl = max(0, prl)
69
+
70
+ return {
71
+ "schema": "route_quality/1",
72
+ "shortlist": {
73
+ "size": n,
74
+ "top_cosine_similarity": top_cos,
75
+ "second_cosine_similarity": second_cos,
76
+ "cosine_margin": margin,
77
+ "top_routing_score": top_routing_score,
78
+ "hybrid_mode": router_hybrid or "off",
79
+ "top1_dense_and_fused_agree": agree,
80
+ },
81
+ "router": {
82
+ "mode": router_mode,
83
+ "pick_path": pick_path,
84
+ "host_picked": host_picked,
85
+ "host_shortlist_only": host_shortlist_only,
86
+ "haiku_rerank_applied": haiku_rerank_applied,
87
+ },
88
+ "session": {
89
+ "rerouted": rerouted,
90
+ "change_jaccard": round(coerce_route_float(change), 4),
91
+ "change_pct": round(coerce_route_float(change) * 100.0, 1),
92
+ },
93
+ "policy": {
94
+ "rules_loaded": prl,
95
+ "includes_added": policy_includes_added_count(policy_audit),
96
+ "audit_size": len(policy_audit or []),
97
+ },
98
+ "picked_count": len(picked_names),
99
+ }
@@ -0,0 +1,155 @@
1
+ """Conversation-aware routing text, skill routing cards, and sparse retrieval signals."""
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ import re
6
+ from typing import Any, Protocol
7
+
8
+ import numpy as np
9
+
10
+ from app.route_quality import coerce_route_float
11
+
12
+ _TOKEN_RE = re.compile(r"[a-z0-9][a-z0-9_\-./]{2,}", re.I)
13
+
14
+
15
+ class _SkillCard(Protocol):
16
+ title: str
17
+ description: str
18
+ triggers: str
19
+ anti_triggers: str
20
+
21
+
22
+ def build_route_query_text(
23
+ prompt: str,
24
+ conversation: list[Any] | None,
25
+ *,
26
+ max_turns: int | None = None,
27
+ max_chars_per_msg: int | None = None,
28
+ ) -> str:
29
+ """Merge recent turns with the current user message for embedding shortlist / hybrid scores.
30
+
31
+ When ``SKILLFORGE_ROUTER_CONV_MAX_TURNS`` is 0 (default), returns ``prompt`` only (legacy behavior).
32
+ """
33
+ conv = conversation or []
34
+ mt = max_turns
35
+ if mt is None:
36
+ mt = int(os.getenv("SKILLFORGE_ROUTER_CONV_MAX_TURNS", "0"))
37
+ mc = max_chars_per_msg
38
+ if mc is None:
39
+ mc = int(os.getenv("SKILLFORGE_ROUTER_CONV_MSG_CHARS", "320"))
40
+ prompt = (prompt or "").strip()
41
+ if mt <= 0 or not conv:
42
+ return prompt
43
+ tail = conv[-mt:]
44
+ parts: list[str] = []
45
+ for m in tail:
46
+ if not isinstance(m, dict):
47
+ continue
48
+ role = str(m.get("role") or "user")
49
+ content = str(m.get("content") or "").strip()
50
+ if not content:
51
+ continue
52
+ if len(content) > mc:
53
+ content = content[:mc] + "…"
54
+ parts.append(f"{role}: {content}")
55
+ if not parts:
56
+ return prompt
57
+ return "Conversation context:\n" + "\n".join(parts) + "\n\nCurrent user message:\n" + prompt
58
+
59
+
60
+ def skill_routing_card(s: _SkillCard) -> str:
61
+ """Text embedded for each skill + used in hybrid / router prompts."""
62
+ title = (s.title or "").strip()
63
+ desc = (s.description or "").strip()
64
+ tr = (getattr(s, "triggers", None) or "").strip()
65
+ anti = (getattr(s, "anti_triggers", None) or "").strip()
66
+ parts = [f"{title}: {desc}"]
67
+ if tr:
68
+ parts.append(f"Triggers: {tr}")
69
+ if anti:
70
+ parts.append(f"Anti-triggers: {anti}")
71
+ return "\n".join(parts)
72
+
73
+
74
+ def tokenize_skills_query(text: str) -> list[str]:
75
+ return [t.lower() for t in _TOKEN_RE.findall(text or "")]
76
+
77
+
78
+ def normalize_minmax(arr: np.ndarray) -> np.ndarray:
79
+ a = np.asarray(arr, dtype=np.float64).reshape(-1)
80
+ if a.size == 0:
81
+ return a
82
+ lo, hi = float(a.min()), float(a.max())
83
+ if hi <= lo:
84
+ return np.zeros_like(a)
85
+ return (a - lo) / (hi - lo)
86
+
87
+
88
+ def keyword_overlap_scores(route_query: str, skill_cards: list[str]) -> np.ndarray:
89
+ """Per-skill overlap counts (unnormalized); combine with dense via hybrid alpha."""
90
+ qt = set(tokenize_skills_query(route_query))
91
+ if not qt:
92
+ return np.zeros(len(skill_cards), dtype=np.float64)
93
+ out: list[float] = []
94
+ for card in skill_cards:
95
+ ct = set(tokenize_skills_query(card))
96
+ out.append(float(len(qt & ct)))
97
+ return np.array(out, dtype=np.float64)
98
+
99
+
100
+ def host_pick_shortlist_lines(
101
+ *,
102
+ prompt: str,
103
+ route_query: str,
104
+ facet_rows: list[dict[str, Any]],
105
+ max_candidates: int | None = None,
106
+ line_chars: int | None = None,
107
+ ) -> tuple[str, list[dict[str, Any]]]:
108
+ """Tight numbered list + structured rows for MCP host-pick phase (no in-process LLM)."""
109
+ mc = max_candidates
110
+ if mc is None:
111
+ mc = max(3, int(os.getenv("SKILLFORGE_HOST_PICK_MAX", "12")))
112
+ lc = line_chars if line_chars is not None else int(os.getenv("SKILLFORGE_HOST_PICK_LINE_CHARS", "120"))
113
+ prompt_one = (prompt or "").strip().replace("\n", " ")
114
+ if len(prompt_one) > 160:
115
+ prompt_one = prompt_one[:157] + "…"
116
+ rows_out: list[dict[str, Any]] = []
117
+ lines: list[str] = [
118
+ "# Host pick — choose skill names only from this list",
119
+ "",
120
+ f"Task: {prompt_one}",
121
+ "",
122
+ "Reply with JSON only:",
123
+ '{"picked": ["exact-skill-id", ...], "reasoning": "one line"}',
124
+ f"Use 0–{mc} names from the numbered lines only (empty picked is allowed). Copy names exactly.",
125
+ "",
126
+ "```",
127
+ ]
128
+ for i, f in enumerate(facet_rows[:mc], start=1):
129
+ name = str(f.get("name") or "")
130
+ cos = coerce_route_float(f.get("cosine_similarity"))
131
+ card = f"{f.get('title') or name}: {(f.get('description_preview') or '')[:lc]}".replace("\n", " ").strip()
132
+ if len(card) > lc:
133
+ card = card[: lc - 1] + "…"
134
+ line = f"{i:>2}. {name} | cos={cos:.3f} | {card}"
135
+ lines.append(line)
136
+ rows_out.append({
137
+ "id": name,
138
+ "rank": i,
139
+ "name": name,
140
+ "cosine_similarity": round(cos, 6),
141
+ "routing_score": f.get("routing_score"),
142
+ "sparse_signal": f.get("sparse_signal"),
143
+ "learned_weight": f.get("learned_weight"),
144
+ "router_hybrid": f.get("router_hybrid"),
145
+ "source": f.get("source"),
146
+ "one_liner": card,
147
+ "rationale_one_liner": card,
148
+ })
149
+ lines.append("```")
150
+ rq = (route_query or "").strip()
151
+ if len(rq) > 400:
152
+ rq = rq[:397] + "…"
153
+ if rq:
154
+ lines.extend(["", f"_Retrieval query:_ {rq}"])
155
+ return "\n".join(lines), rows_out