@heytherevibin/skillforge 0.2.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,31 +4,42 @@ skillforge — skill orchestrator co-tool for Claude (MCP-first).
4
4
  Primary surface: MCP stdio — route_skills and related tools for hosts
5
5
  (Claude Desktop, Cursor, Claude Code).
6
6
 
7
- Optional: headless HTTP API (POST /chat, /events, …) for integrations.
8
7
  Live usage: `skillforge events --watch` (terminal).
9
8
  """
10
9
  from __future__ import annotations
11
10
 
12
- import asyncio
13
11
  import json
14
12
  import os
15
13
  import sqlite3
16
14
  import sys
17
15
  import time
18
16
  import uuid
19
- from contextlib import asynccontextmanager
20
17
  from dataclasses import dataclass
21
18
  from pathlib import Path
22
19
  from typing import Any, Optional
23
20
 
24
21
  import numpy as np
25
22
  from anthropic import AsyncAnthropic
26
- from fastapi import FastAPI, Request
27
- from fastapi.responses import StreamingResponse
28
- from pydantic import BaseModel
29
23
  from sentence_transformers import SentenceTransformer
30
24
 
31
25
  from app.db_paths import global_db_path, resolve_orchestrator_db
26
+ from app.chunking import SkillChunk, chunk_max_chars, chunk_overlap_chars, chunk_skill_body
27
+ from app.context_fusion import mmr_select
28
+ from app.project_index import (
29
+ ensure_project_index_schema,
30
+ load_project_fusion_pool,
31
+ project_rag_max_chars,
32
+ retrieve_project_context_items,
33
+ )
34
+ from app.redaction import redaction_enabled, redact_secret_patterns, sanitize_context_items
35
+ from app.route_policies import load_route_policies_config, merge_policy_includes
36
+ from app.routing_signals import (
37
+ build_route_query_text,
38
+ keyword_overlap_scores,
39
+ normalize_minmax,
40
+ skill_routing_card,
41
+ tokenize_skills_query,
42
+ )
32
43
 
33
44
  # ---------- Config (env-driven so the Node wrapper controls paths) ----------
34
45
  BUNDLED_SKILLS = Path(os.getenv("SKILLFORGE_BUNDLED_SKILLS", "./skills"))
@@ -40,12 +51,43 @@ DB_PATH = global_db_path()
40
51
 
41
52
  EMBED_MODEL = os.getenv("SKILLFORGE_EMBED_MODEL", "all-MiniLM-L6-v2")
42
53
  ROUTER_MODEL = os.getenv("SKILLFORGE_ROUTER_MODEL", "claude-haiku-4-5-20251001")
43
- ANSWER_MODEL = os.getenv("SKILLFORGE_ANSWER_MODEL", "claude-opus-4-7")
44
54
  TOP_K_CANDIDATES = int(os.getenv("SKILLFORGE_TOP_K", "15"))
45
55
  MAX_ACTIVE_SKILLS = int(os.getenv("SKILLFORGE_MAX_ACTIVE", "7"))
46
56
  REROUTE_THRESHOLD = float(os.getenv("SKILLFORGE_REROUTE_THRESHOLD", "0.4"))
47
57
  # "" | "full" | "embedding" — embedding skips Haiku and takes top skills from the shortlist only.
48
58
  SKILLFORGE_ROUTER_MODE = os.getenv("SKILLFORGE_ROUTER_MODE", "").strip().lower()
59
+ # chunks: RAG-style line-bounded chunks from picked skills. full_body: inject entire SKILL.md per pick (legacy).
60
+ SKILLFORGE_CONTEXT_MODE = os.getenv("SKILLFORGE_CONTEXT_MODE", "chunks").strip().lower()
61
+ ROUTE_MAX_CONTEXT_CHARS = int(os.getenv("SKILLFORGE_ROUTE_MAX_CHARS", "60000"))
62
+ CONTEXT_FUSION = os.getenv("SKILLFORGE_CONTEXT_FUSION", "1").strip().lower() not in ("0", "false", "no", "")
63
+ CONTEXT_MMR_LAMBDA = max(0.0, min(1.0, float(os.getenv("SKILLFORGE_CONTEXT_MMR_LAMBDA", "0.7"))))
64
+ FUSION_POOL_SKILL = max(8, int(os.getenv("SKILLFORGE_FUSION_POOL_SKILL", "96")))
65
+ FUSION_POOL_PROJECT = max(8, int(os.getenv("SKILLFORGE_FUSION_POOL_PROJECT", "96")))
66
+ FUSION_FULL_BODY_PREVIEW_CHARS = max(400, int(os.getenv("SKILLFORGE_FUSION_FULL_BODY_PREVIEW_CHARS", "4000")))
67
+ CONTEXT_OVERHEAD_SKILL = 48
68
+ CONTEXT_OVERHEAD_FILE = 56
69
+
70
+ ROUTER_HYBRID_MODE = os.getenv("SKILLFORGE_ROUTER_HYBRID", "off").strip().lower()
71
+ ROUTER_HYBRID_ALPHA = max(0.0, min(1.0, float(os.getenv("SKILLFORGE_ROUTER_HYBRID_ALPHA", "0.72"))))
72
+ ROUTER_PROMPT_HISTORY_MSGS = max(1, int(os.getenv("SKILLFORGE_ROUTER_PROMPT_HISTORY_MSGS", "8")))
73
+ ROUTER_PROMPT_HISTORY_CHARS = max(80, int(os.getenv("SKILLFORGE_ROUTER_PROMPT_HISTORY_CHARS", "360")))
74
+ ROUTER_CATALOG_PREVIEW_CHARS = max(80, int(os.getenv("SKILLFORGE_ROUTER_CATALOG_PREVIEW_CHARS", "280")))
75
+ HAIKU_RERANK_MAX = max(3, int(os.getenv("SKILLFORGE_HAIKU_RERANK_MAX", str(TOP_K_CANDIDATES))))
76
+
77
+
78
+ def _hybrid_mode_active(mode: str) -> bool:
79
+ return mode not in ("", "off", "0", "false", "no")
80
+
81
+
82
+ def _env_truthy(name: str, default: str = "0") -> bool:
83
+ return os.getenv(name, default).strip().lower() not in ("0", "false", "no", "")
84
+
85
+
86
+ def _context_budget_unified() -> int:
87
+ raw = os.getenv("SKILLFORGE_CONTEXT_BUDGET_CHARS", "").strip()
88
+ if raw:
89
+ return max(4000, int(raw))
90
+ return ROUTE_MAX_CONTEXT_CHARS + int(project_rag_max_chars())
49
91
 
50
92
 
51
93
  def build_router_and_skills(
@@ -103,6 +145,8 @@ class Skill:
103
145
  source: str # "bundled" | "user"
104
146
  disabled: bool = False
105
147
  embedding: np.ndarray | None = None
148
+ triggers: str = ""
149
+ anti_triggers: str = ""
106
150
 
107
151
 
108
152
  def parse_skill_md(path: Path, source: str) -> Skill | None:
@@ -118,6 +162,8 @@ def parse_skill_md(path: Path, source: str) -> Skill | None:
118
162
  name = path.parent.name
119
163
  title = name.replace("-", " ").title()
120
164
  description = ""
165
+ triggers = ""
166
+ anti_triggers = ""
121
167
  body = text
122
168
  if text.startswith("---"):
123
169
  end = text.find("---", 3)
@@ -147,6 +193,10 @@ def parse_skill_md(path: Path, source: str) -> Skill | None:
147
193
  title = v
148
194
  elif k == "description":
149
195
  description = v
196
+ elif k in ("triggers", "trigger"):
197
+ triggers = v
198
+ elif k in ("anti_triggers", "anti-triggers"):
199
+ anti_triggers = v
150
200
  i += 1
151
201
  if not description:
152
202
  for chunk in body.split("\n\n"):
@@ -154,7 +204,15 @@ def parse_skill_md(path: Path, source: str) -> Skill | None:
154
204
  if chunk and not chunk.startswith("#"):
155
205
  description = chunk[:500]
156
206
  break
157
- return Skill(name=name, title=title, description=description, body=body, source=source)
207
+ return Skill(
208
+ name=name,
209
+ title=title,
210
+ description=description,
211
+ body=body,
212
+ source=source,
213
+ triggers=triggers,
214
+ anti_triggers=anti_triggers,
215
+ )
158
216
 
159
217
 
160
218
  def load_all_skills() -> list[Skill]:
@@ -235,6 +293,7 @@ def init_db(db_file: Path | None = None):
235
293
  con.execute(f"ALTER TABLE {table} ADD COLUMN user_id TEXT DEFAULT ''")
236
294
  except sqlite3.OperationalError:
237
295
  pass # already exists
296
+ ensure_project_index_schema(con)
238
297
  con.commit()
239
298
  return con
240
299
 
@@ -299,21 +358,101 @@ class Router:
299
358
  self.skills = skills
300
359
  self.embed_model = embed_model
301
360
  self.anthropic = anthropic
302
- texts = [f"{s.title}: {s.description}" for s in skills]
303
- print(f"[skillforge] Embedding {len(skills)} skills...")
361
+ self.context_mode = SKILLFORGE_CONTEXT_MODE if SKILLFORGE_CONTEXT_MODE in (
362
+ "chunks",
363
+ "full_body",
364
+ ) else "chunks"
365
+ self._by_name: dict[str, Skill] = {s.name: s for s in skills}
366
+ self._hybrid_mode = ROUTER_HYBRID_MODE
367
+ self._hybrid_alpha = ROUTER_HYBRID_ALPHA
368
+ self._routing_cards = [skill_routing_card(s) for s in skills]
369
+ self._bm25 = None
370
+ if self._hybrid_mode == "bm25" and skills:
371
+ try:
372
+ from rank_bm25 import BM25Okapi
373
+
374
+ toks = [tokenize_skills_query(c) for c in self._routing_cards]
375
+ if any(toks):
376
+ self._bm25 = BM25Okapi(toks)
377
+ except ImportError:
378
+ print(
379
+ "[skillforge] SKILLFORGE_ROUTER_HYBRID=bm25 but rank-bm25 is not installed; "
380
+ "using keyword overlap for sparse signal.",
381
+ file=sys.stderr,
382
+ )
383
+
384
+ texts = self._routing_cards
385
+ print(f"[skillforge] Embedding {len(skills)} skills (summary cards)...", file=sys.stderr)
304
386
  embeddings = embed_model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
305
387
  for s, e in zip(skills, embeddings):
306
388
  s.embedding = e / np.linalg.norm(e)
307
389
  self.matrix = np.stack([s.embedding for s in skills]) if skills else np.zeros((0, 0))
308
- print(f"[skillforge] Ready. {len(skills)} skills, matrix shape: {self.matrix.shape}")
309
390
 
310
- def shortlist(self, prompt, con, k=TOP_K_CANDIDATES, user_id=""):
391
+ # Chunk index for CONTEXT_MODE=chunks
392
+ self._chunk_meta: list[tuple[str, SkillChunk]] = []
393
+ edim = int(embed_model.get_sentence_embedding_dimension())
394
+ self._chunk_embeddings: np.ndarray = np.zeros((0, edim))
395
+ if self.context_mode == "chunks" and skills:
396
+ flat_texts: list[str] = []
397
+ self._chunk_meta = []
398
+ mc = chunk_max_chars()
399
+ oc = chunk_overlap_chars()
400
+ for s in skills:
401
+ for ch in chunk_skill_body(s.body, max_chars=mc, overlap=oc):
402
+ # Embed with in-chunk disambiguation
403
+ flat_texts.append(f"{s.title} — {s.name}\n{ch.text}")
404
+ self._chunk_meta.append((s.name, ch))
405
+ if flat_texts:
406
+ print(f"[skillforge] Embedding {len(flat_texts)} skill chunks...", file=sys.stderr)
407
+ ce = embed_model.encode(
408
+ flat_texts, show_progress_bar=False, convert_to_numpy=True
409
+ )
410
+ ce = ce / np.linalg.norm(ce, axis=1, keepdims=True)
411
+ self._chunk_embeddings = ce
412
+ print(
413
+ f"[skillforge] Ready. {len(skills)} skills; chunk matrix {self._chunk_embeddings.shape}; "
414
+ f"context_mode={self.context_mode}; router_hybrid={self._hybrid_mode}",
415
+ file=sys.stderr,
416
+ )
417
+ else:
418
+ print(
419
+ f"[skillforge] Ready. {len(skills)} skills, matrix shape: {self.matrix.shape}; "
420
+ f"context_mode={self.context_mode}; router_hybrid={self._hybrid_mode}",
421
+ file=sys.stderr,
422
+ )
423
+
424
+ def _sparse_scores(self, route_query: str) -> np.ndarray:
425
+ if not _hybrid_mode_active(self._hybrid_mode):
426
+ return np.zeros(len(self.skills), dtype=np.float64)
427
+ if self._hybrid_mode == "keyword":
428
+ return keyword_overlap_scores(route_query, self._routing_cards)
429
+ if self._hybrid_mode == "bm25":
430
+ if self._bm25 is not None:
431
+ q = tokenize_skills_query(route_query)
432
+ if not q:
433
+ return np.zeros(len(self.skills), dtype=np.float64)
434
+ return np.asarray(self._bm25.get_scores(q), dtype=np.float64)
435
+ return keyword_overlap_scores(route_query, self._routing_cards)
436
+ return keyword_overlap_scores(route_query, self._routing_cards)
437
+
438
+ def _base_routing_scores(self, route_query: str, q: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
439
+ """Dense cosine similarities and fused ranking scores (or dense-only if hybrid off)."""
440
+ sims = (self.matrix @ q).flatten()
441
+ if not _hybrid_mode_active(self._hybrid_mode):
442
+ return sims, sims
443
+ sparse = self._sparse_scores(route_query)
444
+ d_norm = normalize_minmax(sims)
445
+ s_norm = normalize_minmax(sparse)
446
+ fused = self._hybrid_alpha * d_norm + (1.0 - self._hybrid_alpha) * s_norm
447
+ return sims, fused
448
+
449
+ def shortlist(self, route_query, con, k=TOP_K_CANDIDATES, user_id=""):
311
450
  if len(self.skills) == 0:
312
451
  return []
313
- q = self.embed_model.encode(prompt, convert_to_numpy=True)
452
+ q = self.embed_model.encode(route_query, convert_to_numpy=True)
314
453
  q = q / np.linalg.norm(q)
315
- sims = self.matrix @ q
316
- biased = sims.copy()
454
+ sims, rank_scores = self._base_routing_scores(route_query, q)
455
+ biased = rank_scores.copy()
317
456
  for i, s in enumerate(self.skills):
318
457
  w, disabled = get_skill_weight(con, s.name, user_id=user_id)
319
458
  if disabled:
@@ -323,6 +462,294 @@ class Router:
323
462
  top_idx = np.argsort(-biased)[:k]
324
463
  return [(self.skills[i], float(sims[i])) for i in top_idx if biased[i] > -100]
325
464
 
465
+ def shortlist_with_facets(
466
+ self,
467
+ route_query: str,
468
+ con: sqlite3.Connection,
469
+ *,
470
+ k: int | None = None,
471
+ user_id: str = "",
472
+ ) -> list[dict[str, Any]]:
473
+ """Embedding shortlist with cosine sim, learned weight, and routing score (no LLM)."""
474
+ limit = k if k is not None else TOP_K_CANDIDATES
475
+ if len(self.skills) == 0:
476
+ return []
477
+ q = self.embed_model.encode(route_query, convert_to_numpy=True)
478
+ q = q / np.linalg.norm(q)
479
+ sims, rank_scores = self._base_routing_scores(route_query, q)
480
+ sparse_full = (
481
+ self._sparse_scores(route_query) if _hybrid_mode_active(self._hybrid_mode) else np.zeros(
482
+ len(self.skills), dtype=np.float64
483
+ )
484
+ )
485
+ biased = rank_scores.copy()
486
+ for i, s in enumerate(self.skills):
487
+ w, disabled = get_skill_weight(con, s.name, user_id=user_id)
488
+ if disabled:
489
+ biased[i] = -999.0
490
+ else:
491
+ biased[i] += w
492
+ top_idx = np.argsort(-biased)[:limit]
493
+ out: list[dict[str, Any]] = []
494
+ for i in top_idx:
495
+ if biased[i] <= -100:
496
+ continue
497
+ s = self.skills[i]
498
+ w, _dis = get_skill_weight(con, s.name, user_id=user_id)
499
+ out.append({
500
+ "name": s.name,
501
+ "title": s.title,
502
+ "description_preview": (s.description or "")[:280],
503
+ "cosine_similarity": round(float(sims[i]), 6),
504
+ "sparse_signal": round(float(sparse_full[i]), 6),
505
+ "learned_weight": round(float(w), 4),
506
+ "routing_score": round(float(biased[i]), 6),
507
+ "source": s.source,
508
+ "router_hybrid": self._hybrid_mode,
509
+ })
510
+ return out
511
+
512
+ def build_context_items(
513
+ self,
514
+ prompt: str,
515
+ skill_names: list[str],
516
+ max_total_chars: int | None = None,
517
+ ) -> list[dict[str, Any]]:
518
+ """Return ordered context dicts: skill, line_start, line_end, text, score."""
519
+ cap = max_total_chars if max_total_chars is not None else ROUTE_MAX_CONTEXT_CHARS
520
+ if self.context_mode == "full_body":
521
+ out: list[dict[str, Any]] = []
522
+ for n in skill_names:
523
+ s = self._by_name.get(n)
524
+ if not s:
525
+ continue
526
+ out.append({
527
+ "skill": n,
528
+ "path": None,
529
+ "line_start": None,
530
+ "line_end": None,
531
+ "text": s.body,
532
+ "score": 1.0,
533
+ })
534
+ return out
535
+ if not skill_names or self._chunk_embeddings.shape[0] == 0:
536
+ return []
537
+ allowed = set(skill_names)
538
+ indices = [i for i, (sn, _) in enumerate(self._chunk_meta) if sn in allowed]
539
+ if not indices:
540
+ return []
541
+ qv = self.embed_model.encode(prompt, convert_to_numpy=True)
542
+ qv = qv / np.linalg.norm(qv)
543
+ sub = self._chunk_embeddings[indices]
544
+ scores = (sub @ qv).flatten()
545
+ order = np.argsort(-scores)
546
+ out = []
547
+ total = 0
548
+ overhead = CONTEXT_OVERHEAD_SKILL
549
+ for o in order:
550
+ idx = indices[int(o)]
551
+ sn, ch = self._chunk_meta[idx]
552
+ piece_len = len(ch.text) + overhead
553
+ if total + piece_len > cap:
554
+ continue
555
+ out.append({
556
+ "skill": sn,
557
+ "path": None,
558
+ "line_start": ch.line_start,
559
+ "line_end": ch.line_end,
560
+ "text": ch.text,
561
+ "score": float(scores[int(o)]),
562
+ })
563
+ total += piece_len
564
+ return out
565
+
566
+ def build_fusion_skill_pool(
567
+ self,
568
+ prompt: str,
569
+ skill_names: list[str],
570
+ pool_limit: int,
571
+ ) -> tuple[list[dict[str, Any]], np.ndarray, np.ndarray]:
572
+ """Candidate skill chunks (or one row per skill in full_body) with embeddings for MMR."""
573
+ edim = int(self.embed_model.get_sentence_embedding_dimension())
574
+ if not skill_names:
575
+ return [], np.zeros((0, edim)), np.array([], dtype=np.float32)
576
+ qv = self.embed_model.encode(prompt, convert_to_numpy=True)
577
+ qv = np.asarray(qv, dtype=np.float32).reshape(-1)
578
+ qv = qv / max(float(np.linalg.norm(qv)), 1e-12)
579
+
580
+ if self.context_mode == "full_body":
581
+ ordered = [n for n in skill_names if n in self._by_name]
582
+ if not ordered:
583
+ return [], np.zeros((0, edim)), np.array([], dtype=np.float32)
584
+ texts = [
585
+ f"{self._by_name[n].title} — {n}\n{(self._by_name[n].body or '')[:FUSION_FULL_BODY_PREVIEW_CHARS]}"
586
+ for n in ordered
587
+ ]
588
+ em = self.embed_model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
589
+ em = np.asarray(em, dtype=np.float32)
590
+ em = em / np.maximum(np.linalg.norm(em, axis=1, keepdims=True), 1e-12)
591
+ rel = (em @ qv).flatten()
592
+ order = np.argsort(-rel)[: min(pool_limit, em.shape[0])]
593
+ items: list[dict[str, Any]] = []
594
+ em_rows: list[np.ndarray] = []
595
+ rel_out: list[float] = []
596
+ for o in order:
597
+ i = int(o)
598
+ n = ordered[i]
599
+ s = self._by_name[n]
600
+ items.append({
601
+ "skill": n,
602
+ "path": None,
603
+ "line_start": None,
604
+ "line_end": None,
605
+ "text": s.body,
606
+ "score": float(rel[i]),
607
+ "source": "skill",
608
+ })
609
+ em_rows.append(em[i])
610
+ rel_out.append(float(rel[i]))
611
+ return items, np.stack(em_rows), np.asarray(rel_out, dtype=np.float32)
612
+
613
+ if self._chunk_embeddings.shape[0] == 0:
614
+ return self._fusion_skill_pool_fallback_bodies(skill_names, qv, pool_limit)
615
+
616
+ allowed = set(skill_names)
617
+ indices = [i for i, (sn, _) in enumerate(self._chunk_meta) if sn in allowed]
618
+ if not indices:
619
+ return self._fusion_skill_pool_fallback_bodies(skill_names, qv, pool_limit)
620
+ sub = self._chunk_embeddings[indices]
621
+ scores = (sub @ qv).flatten()
622
+ order = np.argsort(-scores)[: min(pool_limit, len(indices))]
623
+ items = []
624
+ em_rows = []
625
+ rel_out = []
626
+ for o in order:
627
+ pos = int(o)
628
+ idx = indices[pos]
629
+ sn, ch = self._chunk_meta[idx]
630
+ items.append({
631
+ "skill": sn,
632
+ "path": None,
633
+ "line_start": ch.line_start,
634
+ "line_end": ch.line_end,
635
+ "text": ch.text,
636
+ "score": float(scores[pos]),
637
+ "source": "skill",
638
+ })
639
+ em_rows.append(sub[pos])
640
+ rel_out.append(float(scores[pos]))
641
+ return items, np.stack(em_rows), np.asarray(rel_out, dtype=np.float32)
642
+
643
+ def _fusion_skill_pool_fallback_bodies(
644
+ self,
645
+ skill_names: list[str],
646
+ qv: np.ndarray,
647
+ pool_limit: int,
648
+ ) -> tuple[list[dict[str, Any]], np.ndarray, np.ndarray]:
649
+ ordered = [n for n in skill_names if n in self._by_name]
650
+ edim = int(self.embed_model.get_sentence_embedding_dimension())
651
+ if not ordered:
652
+ return [], np.zeros((0, edim)), np.array([], dtype=np.float32)
653
+ texts = [
654
+ f"{self._by_name[n].title} — {n}\n{(self._by_name[n].body or '')[:FUSION_FULL_BODY_PREVIEW_CHARS]}"
655
+ for n in ordered
656
+ ]
657
+ em = self.embed_model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
658
+ em = np.asarray(em, dtype=np.float32)
659
+ em = em / np.maximum(np.linalg.norm(em, axis=1, keepdims=True), 1e-12)
660
+ rel = (em @ qv).flatten()
661
+ order = np.argsort(-rel)[: min(pool_limit, em.shape[0])]
662
+ items = []
663
+ em_rows = []
664
+ rel_out = []
665
+ for o in order:
666
+ i = int(o)
667
+ n = ordered[i]
668
+ s = self._by_name[n]
669
+ items.append({
670
+ "skill": n,
671
+ "path": None,
672
+ "line_start": None,
673
+ "line_end": None,
674
+ "text": s.body,
675
+ "score": float(rel[i]),
676
+ "source": "skill",
677
+ })
678
+ em_rows.append(em[i])
679
+ rel_out.append(float(rel[i]))
680
+ return items, np.stack(em_rows), np.asarray(rel_out, dtype=np.float32)
681
+
682
+ async def rerank_candidates_haiku(
683
+ self,
684
+ route_query: str,
685
+ conversation: list | None,
686
+ candidates: list[tuple[Skill, float]],
687
+ ) -> list[tuple[Skill, float]]:
688
+ if (
689
+ not candidates
690
+ or self.anthropic is None
691
+ or not _env_truthy("SKILLFORGE_HAIKU_RERANK", "0")
692
+ ):
693
+ return candidates
694
+ cap = max(3, min(HAIKU_RERANK_MAX, len(candidates)))
695
+ head = candidates[:cap]
696
+ tail = candidates[cap:]
697
+ by_name = {s.name: (s, sc) for s, sc in head}
698
+ lines: list[str] = []
699
+ for idx, (s, _sc) in enumerate(head, start=1):
700
+ card = skill_routing_card(s)
701
+ preview = card[:220].replace("\n", " ")
702
+ lines.append(f"{idx}. {s.name} — {preview}")
703
+ hist = ""
704
+ if conversation:
705
+ msgs = conversation[-ROUTER_PROMPT_HISTORY_MSGS:]
706
+ parts: list[str] = []
707
+ for m in msgs:
708
+ if not isinstance(m, dict):
709
+ continue
710
+ role = str(m.get("role") or "user")
711
+ c = str(m.get("content") or "").strip()
712
+ if not c:
713
+ continue
714
+ parts.append(f"{role}: {c[:ROUTER_PROMPT_HISTORY_CHARS]}")
715
+ if parts:
716
+ hist = "\n\nConversation (recent):\n" + "\n".join(parts)
717
+ sys = (
718
+ "You reorder skill candidates by relevance to the user's task. "
719
+ "Output ONLY JSON: {\"order\": [\"skill_name\", ...]} with each candidate "
720
+ "skill name appearing exactly once, best match first. No extra keys."
721
+ )
722
+ user = (
723
+ f"Routing focus:\n{route_query}{hist}\n\nCandidates:\n" + "\n".join(lines)
724
+ )
725
+ try:
726
+ rerank_model = os.getenv("SKILLFORGE_HAIKU_RERANK_MODEL", "").strip() or ROUTER_MODEL
727
+ resp = await self.anthropic.messages.create(
728
+ model=rerank_model,
729
+ max_tokens=500,
730
+ system=sys,
731
+ messages=[{"role": "user", "content": user}],
732
+ )
733
+ text = resp.content[0].text.strip()
734
+ if text.startswith("```"):
735
+ text = text.split("```")[1]
736
+ if text.startswith("json"):
737
+ text = text[4:]
738
+ data = json.loads(text.strip())
739
+ order = data.get("order") or []
740
+ ordered: list[tuple[Skill, float]] = []
741
+ seen: set[str] = set()
742
+ for n in order:
743
+ if isinstance(n, str) and n in by_name and n not in seen:
744
+ ordered.append(by_name[n])
745
+ seen.add(n)
746
+ for s, sc in head:
747
+ if s.name not in seen:
748
+ ordered.append((s, sc))
749
+ return ordered + tail
750
+ except Exception:
751
+ return candidates
752
+
326
753
  def pick_final_embedding_only(self, candidates):
327
754
  """Pick up to MAX_ACTIVE_SKILLS from the shortlist order (similarity + weights). No LLM call."""
328
755
  if not candidates:
@@ -332,26 +759,46 @@ class Router:
332
759
  "embedding-only: top candidates by similarity and learned weights"
333
760
  )
334
761
 
335
- async def pick_final(self, prompt, conversation, candidates):
762
+ async def pick_final(
763
+ self,
764
+ prompt,
765
+ conversation,
766
+ candidates,
767
+ route_query: str | None = None,
768
+ ):
769
+ rq = (route_query if route_query is not None else prompt) or ""
336
770
  if self.anthropic is None:
337
771
  return self.pick_final_embedding_only(candidates)
338
772
  if not candidates:
339
773
  return [], "no candidates available"
340
774
  catalog = "\n".join(
341
- f"- {s.name}: {s.description[:200]}" for s, _ in candidates
775
+ f"- {s.name}: {skill_routing_card(s)[:ROUTER_CATALOG_PREVIEW_CHARS]}"
776
+ for s, _ in candidates
342
777
  )
343
778
  recent = ""
344
779
  if conversation:
345
- recent = "\n\nRecent conversation:\n" + "\n".join(
346
- f"{m['role']}: {m['content'][:200]}" for m in conversation[-4:]
347
- )
780
+ msgs = conversation[-ROUTER_PROMPT_HISTORY_MSGS:]
781
+ parts: list[str] = []
782
+ for m in msgs:
783
+ if not isinstance(m, dict):
784
+ continue
785
+ role = str(m.get("role") or "user")
786
+ c = str(m.get("content") or "").strip()
787
+ if not c:
788
+ continue
789
+ parts.append(f"{role}: {c[:ROUTER_PROMPT_HISTORY_CHARS]}")
790
+ if parts:
791
+ recent = "\n\nRecent conversation:\n" + "\n".join(parts)
348
792
  sys = (
349
793
  "You are a skill router. Given a user prompt and a candidate list of skills, "
350
794
  f"pick 0 to {MAX_ACTIVE_SKILLS} skills that would genuinely help answer this prompt. "
351
795
  "Be ruthless — only include a skill if it directly applies. Empty list is valid. "
352
796
  'Respond ONLY in JSON: {"skills": ["name1","name2"], "reasoning": "one sentence"}'
353
797
  )
354
- user = f"User prompt:\n{prompt}{recent}\n\nCandidate skills:\n{catalog}"
798
+ user = (
799
+ f"User prompt:\n{prompt}\n\nRouting context (retrieval query):\n{rq}{recent}"
800
+ f"\n\nCandidate skills:\n{catalog}"
801
+ )
355
802
  try:
356
803
  resp = await self.anthropic.messages.create(
357
804
  model=ROUTER_MODEL,
@@ -381,6 +828,23 @@ def jaccard_change(old, new):
381
828
  return 1.0 - (inter / union)
382
829
 
383
830
 
831
+ def format_context_items_markdown(context_items: list[dict[str, Any]]) -> str:
832
+ """Human-readable block list for MCP / CLI from context items (skills + optional project files)."""
833
+ blocks = []
834
+ for c in context_items:
835
+ ls, le = c.get("line_start"), c.get("line_end")
836
+ if ls is not None and le is not None:
837
+ loc = f" (lines {ls}-{le})"
838
+ else:
839
+ loc = " (full document)"
840
+ path = c.get("path")
841
+ if path:
842
+ blocks.append(f"### File: `{path}`{loc}\n\n{c['text']}\n")
843
+ else:
844
+ blocks.append(f"### Skill: {c['skill']}{loc}\n\n{c['text']}\n")
845
+ return "\n".join(blocks)
846
+
847
+
384
848
  async def run_route_turn(
385
849
  con: sqlite3.Connection,
386
850
  router: Router,
@@ -388,15 +852,33 @@ async def run_route_turn(
388
852
  conversation: list,
389
853
  user_id: str = "",
390
854
  session_id: str | None = None,
855
+ *,
856
+ project_root: str | None = None,
857
+ include_project_rag: bool = False,
391
858
  ) -> dict[str, Any]:
392
- """Shared routing + session + telemetry for HTTP /chat and MCP route_skills.
859
+ """Shared routing + session + telemetry for MCP route_skills and ``skillforge route``.
393
860
 
394
861
  Updates sessions, skill usage stats, and writes a route row to events.
395
862
  """
396
863
  sid = session_id or str(uuid.uuid4())
397
864
  t0 = time.time()
398
- candidates = router.shortlist(prompt, con, user_id=user_id)
399
- picked_names, reasoning = await router.pick_final(prompt, conversation, candidates)
865
+ route_query = build_route_query_text(prompt, conversation)
866
+ candidates = router.shortlist(route_query, con, user_id=user_id)
867
+ candidates = await router.rerank_candidates_haiku(route_query, conversation, candidates)
868
+ picked_names, reasoning = await router.pick_final(
869
+ prompt, conversation, candidates, route_query=route_query
870
+ )
871
+ pr = (project_root or "").strip()
872
+ policies_cfg = load_route_policies_config(pr or None)
873
+ picked_names, policy_audit = merge_policy_includes(
874
+ prompt,
875
+ picked_names,
876
+ policies_cfg,
877
+ router._by_name,
878
+ con,
879
+ user_id,
880
+ max_active=MAX_ACTIVE_SKILLS,
881
+ )
400
882
  route_ms = (time.time() - t0) * 1000
401
883
 
402
884
  prev_active: set[str] = set()
@@ -410,6 +892,108 @@ async def run_route_turn(
410
892
  change = jaccard_change(prev_active, set(picked_names))
411
893
  rerouted = change >= REROUTE_THRESHOLD and bool(prev_active)
412
894
 
895
+ want_fusion = CONTEXT_FUSION and include_project_rag and bool(pr)
896
+ context_fusion: dict[str, Any] | None = None
897
+ context_items: list[dict[str, Any]] = []
898
+
899
+ proj_pool: list[dict[str, Any]] = []
900
+ proj_emb = np.zeros((0, int(router.embed_model.get_sentence_embedding_dimension())))
901
+ proj_rel = np.array([], dtype=np.float32)
902
+
903
+ if want_fusion:
904
+ try:
905
+ proj_pool, proj_emb, proj_rel = load_project_fusion_pool(
906
+ con, router.embed_model, prompt, FUSION_POOL_PROJECT
907
+ )
908
+ except Exception:
909
+ proj_pool = []
910
+ proj_emb = np.zeros((0, int(router.embed_model.get_sentence_embedding_dimension())))
911
+ proj_rel = np.array([], dtype=np.float32)
912
+
913
+ if want_fusion and proj_pool:
914
+ skill_pool, skill_emb, skill_rel = router.build_fusion_skill_pool(
915
+ prompt, picked_names, FUSION_POOL_SKILL
916
+ )
917
+ n_skill = len(skill_pool)
918
+ n_proj = len(proj_pool)
919
+ pool = skill_pool + proj_pool
920
+ if n_skill and n_proj:
921
+ em = np.vstack([skill_emb, proj_emb])
922
+ rel = np.concatenate([skill_rel, proj_rel])
923
+ elif n_skill:
924
+ em = skill_emb
925
+ rel = skill_rel
926
+ else:
927
+ em = proj_emb
928
+ rel = proj_rel
929
+ lens = np.array([len(c["text"]) for c in pool], dtype=np.int64)
930
+ ovh = np.array([
931
+ CONTEXT_OVERHEAD_SKILL if not c.get("path") else CONTEXT_OVERHEAD_FILE
932
+ for c in pool
933
+ ], dtype=np.int64)
934
+ budget = _context_budget_unified()
935
+ order, mmr_trace = mmr_select(
936
+ em,
937
+ rel,
938
+ lens,
939
+ char_budget=budget,
940
+ overhead_per_chunk=ovh,
941
+ lambda_mult=CONTEXT_MMR_LAMBDA,
942
+ )
943
+ for rank, idx in enumerate(order, start=1):
944
+ item = dict(pool[idx])
945
+ item.pop("source", None)
946
+ tr = mmr_trace[rank - 1]
947
+ item["mmr_rank"] = rank
948
+ item["mmr_score"] = tr["mmr"]
949
+ item["retrieval_relevance"] = tr["relevance"]
950
+ item["max_sim_to_prior"] = tr["max_sim_to_selected"]
951
+ context_items.append(item)
952
+ context_fusion = {
953
+ "enabled": True,
954
+ "lambda": CONTEXT_MMR_LAMBDA,
955
+ "budget_chars": budget,
956
+ "pool_skill": n_skill,
957
+ "pool_project": n_proj,
958
+ "selected_count": len(context_items),
959
+ "mmr_trace": mmr_trace,
960
+ }
961
+ else:
962
+ context_items = router.build_context_items(prompt, picked_names)
963
+ if picked_names and not context_items:
964
+ context_items = [
965
+ {
966
+ "skill": n,
967
+ "path": None,
968
+ "line_start": None,
969
+ "line_end": None,
970
+ "text": router._by_name[n].body,
971
+ "score": 1.0,
972
+ }
973
+ for n in picked_names
974
+ if n in router._by_name
975
+ ]
976
+ project_add: list[dict[str, Any]] = []
977
+ if include_project_rag and pr:
978
+ try:
979
+ project_add = retrieve_project_context_items(con, router.embed_model, prompt)
980
+ except Exception:
981
+ project_add = []
982
+ context_items = [*context_items, *project_add]
983
+ context_fusion = {"enabled": False}
984
+
985
+ project_rag_items_count = sum(1 for c in context_items if c.get("path"))
986
+
987
+ reasoning_out = reasoning
988
+ safe_prompt_snip = prompt[:300]
989
+ context_redaction_stats: dict[str, Any] = {"enabled": False, "secret_hits": 0, "path_hits": 0}
990
+ if redaction_enabled():
991
+ safe_prompt_snip, _ = redact_secret_patterns(prompt[:300])
992
+ sh, ph = sanitize_context_items(context_items)
993
+ context_redaction_stats = {"enabled": True, "secret_hits": sh, "path_hits": ph}
994
+ if reasoning_out:
995
+ reasoning_out, _ = redact_secret_patterns(reasoning_out)
996
+
413
997
  con.execute(
414
998
  """INSERT INTO sessions (id, user_id, created_at, active_skills, turn_count) VALUES (?, ?, ?, ?, 1)
415
999
  ON CONFLICT(id) DO UPDATE SET active_skills = ?, turn_count = turn_count + 1""",
@@ -423,225 +1007,44 @@ async def run_route_turn(
423
1007
  "type": "route",
424
1008
  "session_id": sid,
425
1009
  "user_id": user_id,
426
- "prompt": prompt[:300],
1010
+ "prompt": safe_prompt_snip,
427
1011
  "candidates": [{"name": s.name, "score": sc} for s, sc in candidates[:10]],
428
1012
  "picked": picked_names,
429
- "reasoning": reasoning,
1013
+ "reasoning": reasoning_out,
430
1014
  "rerouted": rerouted,
431
1015
  "change_pct": round(change * 100, 1),
432
1016
  "route_ms": round(route_ms, 1),
433
1017
  "ts": time.time(),
1018
+ "context_mode": router.context_mode,
1019
+ "context_items_count": len(context_items),
1020
+ "project_rag_items_count": project_rag_items_count,
1021
+ "include_project_rag": bool(include_project_rag and pr),
1022
+ "context_fusion": context_fusion,
1023
+ "context_redaction": context_redaction_stats,
1024
+ "policy": {
1025
+ "rules_loaded": len(policies_cfg.get("rules") or []) if isinstance(policies_cfg.get("rules"), list) else 0,
1026
+ "audit": policy_audit,
1027
+ },
1028
+ "chunk_sources_preview": [
1029
+ {
1030
+ "skill": c.get("skill"),
1031
+ "path": c.get("path"),
1032
+ "line_start": c.get("line_start"),
1033
+ "line_end": c.get("line_end"),
1034
+ "mmr_rank": c.get("mmr_rank"),
1035
+ }
1036
+ for c in context_items[:24]
1037
+ ],
434
1038
  }
435
1039
  log_event(con, sid, "route", event, user_id=user_id)
436
1040
  return {
437
1041
  "session_id": sid,
438
1042
  "picked_names": picked_names,
439
- "reasoning": reasoning,
1043
+ "reasoning": reasoning_out,
440
1044
  "candidates": candidates,
441
1045
  "route_ms": route_ms,
442
1046
  "rerouted": rerouted,
443
1047
  "change": change,
444
1048
  "event": event,
445
- }
446
-
447
-
448
- # ---------- App ----------
449
- app_state: dict[str, Any] = {}
450
-
451
-
452
- @asynccontextmanager
453
- async def lifespan(app: FastAPI):
454
- print(f"[skillforge] Loading skills from {BUNDLED_SKILLS} + {USER_SKILLS}")
455
- skills = load_all_skills()
456
- print(f"[skillforge] Loaded {len(skills)} skills")
457
- if not skills:
458
- print("[skillforge] WARNING: no skills found")
459
- embed_model = SentenceTransformer(EMBED_MODEL)
460
- anthropic = AsyncAnthropic()
461
- router_anthropic = None if SKILLFORGE_ROUTER_MODE == "embedding" else anthropic
462
- if router_anthropic is None:
463
- print("[skillforge] Router mode: embedding-only (Haiku step skipped; /chat still uses ANSWER model)")
464
- print("[skillforge] Live usage (terminal): skillforge events --watch")
465
- router = Router(skills, embed_model, router_anthropic)
466
- con = init_db()
467
- app_state.update(
468
- skills={s.name: s for s in skills},
469
- router=router,
470
- anthropic=anthropic,
471
- con=con,
472
- )
473
- yield
474
- con.close()
475
-
476
-
477
- app = FastAPI(lifespan=lifespan, title="skillforge")
478
-
479
-
480
- class ChatRequest(BaseModel):
481
- prompt: str
482
- session_id: str | None = None
483
- conversation: list[dict] = []
484
-
485
-
486
- class FeedbackRequest(BaseModel):
487
- session_id: str
488
- skill_name: str
489
- thumbs: int
490
-
491
-
492
- class DisableRequest(BaseModel):
493
- skill_name: str
494
- disabled: bool
495
-
496
-
497
- @app.post("/chat")
498
- async def chat(req: ChatRequest, request: Request):
499
- from app.auth import resolve_user
500
- user_id = resolve_user(request)
501
- router: Router = app_state["router"]
502
- con = app_state["con"]
503
- anthropic: AsyncAnthropic = app_state["anthropic"]
504
-
505
- result = await run_route_turn(
506
- con,
507
- router,
508
- req.prompt,
509
- req.conversation,
510
- user_id=user_id,
511
- session_id=req.session_id,
512
- )
513
- session_id = result["session_id"]
514
- picked_names = result["picked_names"]
515
-
516
- skills_map = app_state["skills"]
517
- skill_blocks = []
518
- for n in picked_names:
519
- s = skills_map.get(n)
520
- if s:
521
- skill_blocks.append(f'<skill name="{s.name}">\n{s.body}\n</skill>')
522
- system_prompt = (
523
- "You are a helpful assistant. The following skills have been dynamically loaded "
524
- "for this turn based on the user's request. Use them when relevant; ignore them when not.\n\n"
525
- + "\n\n".join(skill_blocks)
526
- ) if skill_blocks else "You are a helpful assistant."
527
-
528
- messages = req.conversation + [{"role": "user", "content": req.prompt}]
529
-
530
- async def stream():
531
- full_text = []
532
- try:
533
- async with anthropic.messages.stream(
534
- model=ANSWER_MODEL,
535
- max_tokens=4096,
536
- system=system_prompt,
537
- messages=messages,
538
- ) as s:
539
- async for chunk in s.text_stream:
540
- full_text.append(chunk)
541
- yield f"data: {json.dumps({'delta': chunk})}\n\n"
542
- except Exception as e:
543
- yield f"data: {json.dumps({'error': str(e)})}\n\n"
544
- return
545
- response_text = "".join(full_text)
546
- for n in picked_names:
547
- s = skills_map.get(n)
548
- if not s:
549
- continue
550
- keywords = [w for w in s.body.split()[:50] if len(w) > 6][:5]
551
- hits = sum(1 for kw in keywords if kw.lower() in response_text.lower())
552
- if hits >= 2 or s.name in response_text.lower():
553
- update_skill_stat(con, n, "referenced", 1, user_id=user_id)
554
- yield f"data: {json.dumps({'done': True, 'session_id': session_id, 'picked': picked_names})}\n\n"
555
-
556
- return StreamingResponse(stream(), media_type="text/event-stream")
557
-
558
-
559
- @app.post("/feedback")
560
- def feedback(req: FeedbackRequest, request: Request):
561
- from app.auth import resolve_user
562
- user_id = resolve_user(request)
563
- con = app_state["con"]
564
- field = "thumbs_up" if req.thumbs > 0 else "thumbs_down"
565
- update_skill_stat(con, req.skill_name, field, 1, user_id=user_id)
566
- log_event(con, req.session_id, "feedback",
567
- {"skill": req.skill_name, "thumbs": req.thumbs},
568
- user_id=user_id)
569
- return {"ok": True}
570
-
571
-
572
- @app.post("/skills/disable")
573
- def disable(req: DisableRequest, request: Request):
574
- from app.auth import resolve_user
575
- user_id = resolve_user(request)
576
- con = app_state["con"]
577
- set_skill_disabled(con, req.skill_name, req.disabled, user_id=user_id)
578
- return {"ok": True}
579
-
580
-
581
- @app.get("/skills")
582
- def list_skills(request: Request):
583
- from app.auth import resolve_user
584
- user_id = resolve_user(request)
585
- con = app_state["con"]
586
- skills_map = app_state["skills"]
587
- out = []
588
- for name, s in skills_map.items():
589
- cur = con.execute(
590
- "SELECT weight, uses, referenced, thumbs_up, thumbs_down, disabled FROM skill_weights WHERE user_id = ? AND skill_name = ?",
591
- (user_id, name),
592
- )
593
- row = cur.fetchone()
594
- weight, uses, ref, up, down, disabled = row if row else (0.0, 0, 0, 0, 0, 0)
595
- out.append({
596
- "name": name,
597
- "title": s.title,
598
- "description": s.description[:200],
599
- "source": s.source,
600
- "weight": weight,
601
- "uses": uses,
602
- "referenced": ref,
603
- "thumbs_up": up,
604
- "thumbs_down": down,
605
- "disabled": bool(disabled),
606
- })
607
- out.sort(key=lambda x: -x["uses"])
608
- return out
609
-
610
-
611
- @app.get("/events")
612
- def recent_events(request: Request, limit: int = 50):
613
- from app.auth import resolve_user, auth_enabled
614
- user_id = resolve_user(request)
615
- con = app_state["con"]
616
- if auth_enabled():
617
- cur = con.execute(
618
- "SELECT ts, session_id, event_type, payload FROM events WHERE user_id = ? ORDER BY ts DESC LIMIT ?",
619
- (user_id, limit),
620
- )
621
- else:
622
- cur = con.execute(
623
- "SELECT ts, session_id, event_type, payload FROM events ORDER BY ts DESC LIMIT ?",
624
- (limit,),
625
- )
626
- return [
627
- {"ts": ts, "session_id": sid, "type": et, "payload": json.loads(p)}
628
- for ts, sid, et, p in cur.fetchall()
629
- ]
630
-
631
-
632
- @app.get("/")
633
- def root():
634
- return {
635
- "service": "skillforge",
636
- "docs": "POST /chat, GET /events, GET /skills, GET /healthz",
637
- "live_log": "skillforge events --watch",
638
- }
639
-
640
-
641
- @app.get("/healthz")
642
- def health():
643
- return {
644
- "skills_loaded": len(app_state.get("skills", {})),
645
- "ok": True,
646
- "live_log": "skillforge events --watch",
1049
+ "context_items": context_items,
647
1050
  }