@heytherevibin/skillforge 0.2.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +43 -0
- package/README.md +89 -56
- package/RELEASING.md +1 -1
- package/SECURITY.md +2 -2
- package/STRATEGY.md +1 -3
- package/bin/cli.js +32 -138
- package/package.json +2 -2
- package/python/app/chunking.py +116 -0
- package/python/app/context_fusion.py +77 -0
- package/python/app/events_cli.py +1 -1
- package/python/app/index_cli.py +89 -0
- package/python/app/main.py +632 -229
- package/python/app/mcp_contract.py +121 -0
- package/python/app/mcp_server.py +304 -30
- package/python/app/project_index.py +600 -0
- package/python/app/redaction.py +128 -0
- package/python/app/route_cli.py +42 -19
- package/python/app/route_policies.py +133 -0
- package/python/app/routing_signals.py +95 -0
- package/python/requirements.txt +1 -4
- package/python/tests/test_chunking.py +34 -0
- package/python/tests/test_context_fusion.py +45 -0
- package/python/tests/test_mcp_contract.py +137 -0
- package/python/tests/test_project_index.py +76 -0
- package/python/tests/test_redaction.py +51 -0
- package/python/tests/test_route_policies.py +115 -0
- package/python/tests/test_routing_signals.py +77 -0
- package/python/app/auth.py +0 -63
- package/python/app/cli.py +0 -78
package/python/app/main.py
CHANGED
|
@@ -4,31 +4,42 @@ skillforge — skill orchestrator co-tool for Claude (MCP-first).
|
|
|
4
4
|
Primary surface: MCP stdio — route_skills and related tools for hosts
|
|
5
5
|
(Claude Desktop, Cursor, Claude Code).
|
|
6
6
|
|
|
7
|
-
Optional: headless HTTP API (POST /chat, /events, …) for integrations.
|
|
8
7
|
Live usage: `skillforge events --watch` (terminal).
|
|
9
8
|
"""
|
|
10
9
|
from __future__ import annotations
|
|
11
10
|
|
|
12
|
-
import asyncio
|
|
13
11
|
import json
|
|
14
12
|
import os
|
|
15
13
|
import sqlite3
|
|
16
14
|
import sys
|
|
17
15
|
import time
|
|
18
16
|
import uuid
|
|
19
|
-
from contextlib import asynccontextmanager
|
|
20
17
|
from dataclasses import dataclass
|
|
21
18
|
from pathlib import Path
|
|
22
19
|
from typing import Any, Optional
|
|
23
20
|
|
|
24
21
|
import numpy as np
|
|
25
22
|
from anthropic import AsyncAnthropic
|
|
26
|
-
from fastapi import FastAPI, Request
|
|
27
|
-
from fastapi.responses import StreamingResponse
|
|
28
|
-
from pydantic import BaseModel
|
|
29
23
|
from sentence_transformers import SentenceTransformer
|
|
30
24
|
|
|
31
25
|
from app.db_paths import global_db_path, resolve_orchestrator_db
|
|
26
|
+
from app.chunking import SkillChunk, chunk_max_chars, chunk_overlap_chars, chunk_skill_body
|
|
27
|
+
from app.context_fusion import mmr_select
|
|
28
|
+
from app.project_index import (
|
|
29
|
+
ensure_project_index_schema,
|
|
30
|
+
load_project_fusion_pool,
|
|
31
|
+
project_rag_max_chars,
|
|
32
|
+
retrieve_project_context_items,
|
|
33
|
+
)
|
|
34
|
+
from app.redaction import redaction_enabled, redact_secret_patterns, sanitize_context_items
|
|
35
|
+
from app.route_policies import load_route_policies_config, merge_policy_includes
|
|
36
|
+
from app.routing_signals import (
|
|
37
|
+
build_route_query_text,
|
|
38
|
+
keyword_overlap_scores,
|
|
39
|
+
normalize_minmax,
|
|
40
|
+
skill_routing_card,
|
|
41
|
+
tokenize_skills_query,
|
|
42
|
+
)
|
|
32
43
|
|
|
33
44
|
# ---------- Config (env-driven so the Node wrapper controls paths) ----------
|
|
34
45
|
BUNDLED_SKILLS = Path(os.getenv("SKILLFORGE_BUNDLED_SKILLS", "./skills"))
|
|
@@ -40,12 +51,43 @@ DB_PATH = global_db_path()
|
|
|
40
51
|
|
|
41
52
|
EMBED_MODEL = os.getenv("SKILLFORGE_EMBED_MODEL", "all-MiniLM-L6-v2")
|
|
42
53
|
ROUTER_MODEL = os.getenv("SKILLFORGE_ROUTER_MODEL", "claude-haiku-4-5-20251001")
|
|
43
|
-
ANSWER_MODEL = os.getenv("SKILLFORGE_ANSWER_MODEL", "claude-opus-4-7")
|
|
44
54
|
TOP_K_CANDIDATES = int(os.getenv("SKILLFORGE_TOP_K", "15"))
|
|
45
55
|
MAX_ACTIVE_SKILLS = int(os.getenv("SKILLFORGE_MAX_ACTIVE", "7"))
|
|
46
56
|
REROUTE_THRESHOLD = float(os.getenv("SKILLFORGE_REROUTE_THRESHOLD", "0.4"))
|
|
47
57
|
# "" | "full" | "embedding" — embedding skips Haiku and takes top skills from the shortlist only.
|
|
48
58
|
SKILLFORGE_ROUTER_MODE = os.getenv("SKILLFORGE_ROUTER_MODE", "").strip().lower()
|
|
59
|
+
# chunks: RAG-style line-bounded chunks from picked skills. full_body: inject entire SKILL.md per pick (legacy).
|
|
60
|
+
SKILLFORGE_CONTEXT_MODE = os.getenv("SKILLFORGE_CONTEXT_MODE", "chunks").strip().lower()
|
|
61
|
+
ROUTE_MAX_CONTEXT_CHARS = int(os.getenv("SKILLFORGE_ROUTE_MAX_CHARS", "60000"))
|
|
62
|
+
CONTEXT_FUSION = os.getenv("SKILLFORGE_CONTEXT_FUSION", "1").strip().lower() not in ("0", "false", "no", "")
|
|
63
|
+
CONTEXT_MMR_LAMBDA = max(0.0, min(1.0, float(os.getenv("SKILLFORGE_CONTEXT_MMR_LAMBDA", "0.7"))))
|
|
64
|
+
FUSION_POOL_SKILL = max(8, int(os.getenv("SKILLFORGE_FUSION_POOL_SKILL", "96")))
|
|
65
|
+
FUSION_POOL_PROJECT = max(8, int(os.getenv("SKILLFORGE_FUSION_POOL_PROJECT", "96")))
|
|
66
|
+
FUSION_FULL_BODY_PREVIEW_CHARS = max(400, int(os.getenv("SKILLFORGE_FUSION_FULL_BODY_PREVIEW_CHARS", "4000")))
|
|
67
|
+
CONTEXT_OVERHEAD_SKILL = 48
|
|
68
|
+
CONTEXT_OVERHEAD_FILE = 56
|
|
69
|
+
|
|
70
|
+
ROUTER_HYBRID_MODE = os.getenv("SKILLFORGE_ROUTER_HYBRID", "off").strip().lower()
|
|
71
|
+
ROUTER_HYBRID_ALPHA = max(0.0, min(1.0, float(os.getenv("SKILLFORGE_ROUTER_HYBRID_ALPHA", "0.72"))))
|
|
72
|
+
ROUTER_PROMPT_HISTORY_MSGS = max(1, int(os.getenv("SKILLFORGE_ROUTER_PROMPT_HISTORY_MSGS", "8")))
|
|
73
|
+
ROUTER_PROMPT_HISTORY_CHARS = max(80, int(os.getenv("SKILLFORGE_ROUTER_PROMPT_HISTORY_CHARS", "360")))
|
|
74
|
+
ROUTER_CATALOG_PREVIEW_CHARS = max(80, int(os.getenv("SKILLFORGE_ROUTER_CATALOG_PREVIEW_CHARS", "280")))
|
|
75
|
+
HAIKU_RERANK_MAX = max(3, int(os.getenv("SKILLFORGE_HAIKU_RERANK_MAX", str(TOP_K_CANDIDATES))))
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _hybrid_mode_active(mode: str) -> bool:
|
|
79
|
+
return mode not in ("", "off", "0", "false", "no")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _env_truthy(name: str, default: str = "0") -> bool:
|
|
83
|
+
return os.getenv(name, default).strip().lower() not in ("0", "false", "no", "")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _context_budget_unified() -> int:
|
|
87
|
+
raw = os.getenv("SKILLFORGE_CONTEXT_BUDGET_CHARS", "").strip()
|
|
88
|
+
if raw:
|
|
89
|
+
return max(4000, int(raw))
|
|
90
|
+
return ROUTE_MAX_CONTEXT_CHARS + int(project_rag_max_chars())
|
|
49
91
|
|
|
50
92
|
|
|
51
93
|
def build_router_and_skills(
|
|
@@ -103,6 +145,8 @@ class Skill:
|
|
|
103
145
|
source: str # "bundled" | "user"
|
|
104
146
|
disabled: bool = False
|
|
105
147
|
embedding: np.ndarray | None = None
|
|
148
|
+
triggers: str = ""
|
|
149
|
+
anti_triggers: str = ""
|
|
106
150
|
|
|
107
151
|
|
|
108
152
|
def parse_skill_md(path: Path, source: str) -> Skill | None:
|
|
@@ -118,6 +162,8 @@ def parse_skill_md(path: Path, source: str) -> Skill | None:
|
|
|
118
162
|
name = path.parent.name
|
|
119
163
|
title = name.replace("-", " ").title()
|
|
120
164
|
description = ""
|
|
165
|
+
triggers = ""
|
|
166
|
+
anti_triggers = ""
|
|
121
167
|
body = text
|
|
122
168
|
if text.startswith("---"):
|
|
123
169
|
end = text.find("---", 3)
|
|
@@ -147,6 +193,10 @@ def parse_skill_md(path: Path, source: str) -> Skill | None:
|
|
|
147
193
|
title = v
|
|
148
194
|
elif k == "description":
|
|
149
195
|
description = v
|
|
196
|
+
elif k in ("triggers", "trigger"):
|
|
197
|
+
triggers = v
|
|
198
|
+
elif k in ("anti_triggers", "anti-triggers"):
|
|
199
|
+
anti_triggers = v
|
|
150
200
|
i += 1
|
|
151
201
|
if not description:
|
|
152
202
|
for chunk in body.split("\n\n"):
|
|
@@ -154,7 +204,15 @@ def parse_skill_md(path: Path, source: str) -> Skill | None:
|
|
|
154
204
|
if chunk and not chunk.startswith("#"):
|
|
155
205
|
description = chunk[:500]
|
|
156
206
|
break
|
|
157
|
-
return Skill(
|
|
207
|
+
return Skill(
|
|
208
|
+
name=name,
|
|
209
|
+
title=title,
|
|
210
|
+
description=description,
|
|
211
|
+
body=body,
|
|
212
|
+
source=source,
|
|
213
|
+
triggers=triggers,
|
|
214
|
+
anti_triggers=anti_triggers,
|
|
215
|
+
)
|
|
158
216
|
|
|
159
217
|
|
|
160
218
|
def load_all_skills() -> list[Skill]:
|
|
@@ -235,6 +293,7 @@ def init_db(db_file: Path | None = None):
|
|
|
235
293
|
con.execute(f"ALTER TABLE {table} ADD COLUMN user_id TEXT DEFAULT ''")
|
|
236
294
|
except sqlite3.OperationalError:
|
|
237
295
|
pass # already exists
|
|
296
|
+
ensure_project_index_schema(con)
|
|
238
297
|
con.commit()
|
|
239
298
|
return con
|
|
240
299
|
|
|
@@ -299,21 +358,101 @@ class Router:
|
|
|
299
358
|
self.skills = skills
|
|
300
359
|
self.embed_model = embed_model
|
|
301
360
|
self.anthropic = anthropic
|
|
302
|
-
|
|
303
|
-
|
|
361
|
+
self.context_mode = SKILLFORGE_CONTEXT_MODE if SKILLFORGE_CONTEXT_MODE in (
|
|
362
|
+
"chunks",
|
|
363
|
+
"full_body",
|
|
364
|
+
) else "chunks"
|
|
365
|
+
self._by_name: dict[str, Skill] = {s.name: s for s in skills}
|
|
366
|
+
self._hybrid_mode = ROUTER_HYBRID_MODE
|
|
367
|
+
self._hybrid_alpha = ROUTER_HYBRID_ALPHA
|
|
368
|
+
self._routing_cards = [skill_routing_card(s) for s in skills]
|
|
369
|
+
self._bm25 = None
|
|
370
|
+
if self._hybrid_mode == "bm25" and skills:
|
|
371
|
+
try:
|
|
372
|
+
from rank_bm25 import BM25Okapi
|
|
373
|
+
|
|
374
|
+
toks = [tokenize_skills_query(c) for c in self._routing_cards]
|
|
375
|
+
if any(toks):
|
|
376
|
+
self._bm25 = BM25Okapi(toks)
|
|
377
|
+
except ImportError:
|
|
378
|
+
print(
|
|
379
|
+
"[skillforge] SKILLFORGE_ROUTER_HYBRID=bm25 but rank-bm25 is not installed; "
|
|
380
|
+
"using keyword overlap for sparse signal.",
|
|
381
|
+
file=sys.stderr,
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
texts = self._routing_cards
|
|
385
|
+
print(f"[skillforge] Embedding {len(skills)} skills (summary cards)...", file=sys.stderr)
|
|
304
386
|
embeddings = embed_model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
|
|
305
387
|
for s, e in zip(skills, embeddings):
|
|
306
388
|
s.embedding = e / np.linalg.norm(e)
|
|
307
389
|
self.matrix = np.stack([s.embedding for s in skills]) if skills else np.zeros((0, 0))
|
|
308
|
-
print(f"[skillforge] Ready. {len(skills)} skills, matrix shape: {self.matrix.shape}")
|
|
309
390
|
|
|
310
|
-
|
|
391
|
+
# Chunk index for CONTEXT_MODE=chunks
|
|
392
|
+
self._chunk_meta: list[tuple[str, SkillChunk]] = []
|
|
393
|
+
edim = int(embed_model.get_sentence_embedding_dimension())
|
|
394
|
+
self._chunk_embeddings: np.ndarray = np.zeros((0, edim))
|
|
395
|
+
if self.context_mode == "chunks" and skills:
|
|
396
|
+
flat_texts: list[str] = []
|
|
397
|
+
self._chunk_meta = []
|
|
398
|
+
mc = chunk_max_chars()
|
|
399
|
+
oc = chunk_overlap_chars()
|
|
400
|
+
for s in skills:
|
|
401
|
+
for ch in chunk_skill_body(s.body, max_chars=mc, overlap=oc):
|
|
402
|
+
# Embed with in-chunk disambiguation
|
|
403
|
+
flat_texts.append(f"{s.title} — {s.name}\n{ch.text}")
|
|
404
|
+
self._chunk_meta.append((s.name, ch))
|
|
405
|
+
if flat_texts:
|
|
406
|
+
print(f"[skillforge] Embedding {len(flat_texts)} skill chunks...", file=sys.stderr)
|
|
407
|
+
ce = embed_model.encode(
|
|
408
|
+
flat_texts, show_progress_bar=False, convert_to_numpy=True
|
|
409
|
+
)
|
|
410
|
+
ce = ce / np.linalg.norm(ce, axis=1, keepdims=True)
|
|
411
|
+
self._chunk_embeddings = ce
|
|
412
|
+
print(
|
|
413
|
+
f"[skillforge] Ready. {len(skills)} skills; chunk matrix {self._chunk_embeddings.shape}; "
|
|
414
|
+
f"context_mode={self.context_mode}; router_hybrid={self._hybrid_mode}",
|
|
415
|
+
file=sys.stderr,
|
|
416
|
+
)
|
|
417
|
+
else:
|
|
418
|
+
print(
|
|
419
|
+
f"[skillforge] Ready. {len(skills)} skills, matrix shape: {self.matrix.shape}; "
|
|
420
|
+
f"context_mode={self.context_mode}; router_hybrid={self._hybrid_mode}",
|
|
421
|
+
file=sys.stderr,
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
def _sparse_scores(self, route_query: str) -> np.ndarray:
|
|
425
|
+
if not _hybrid_mode_active(self._hybrid_mode):
|
|
426
|
+
return np.zeros(len(self.skills), dtype=np.float64)
|
|
427
|
+
if self._hybrid_mode == "keyword":
|
|
428
|
+
return keyword_overlap_scores(route_query, self._routing_cards)
|
|
429
|
+
if self._hybrid_mode == "bm25":
|
|
430
|
+
if self._bm25 is not None:
|
|
431
|
+
q = tokenize_skills_query(route_query)
|
|
432
|
+
if not q:
|
|
433
|
+
return np.zeros(len(self.skills), dtype=np.float64)
|
|
434
|
+
return np.asarray(self._bm25.get_scores(q), dtype=np.float64)
|
|
435
|
+
return keyword_overlap_scores(route_query, self._routing_cards)
|
|
436
|
+
return keyword_overlap_scores(route_query, self._routing_cards)
|
|
437
|
+
|
|
438
|
+
def _base_routing_scores(self, route_query: str, q: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
|
439
|
+
"""Dense cosine similarities and fused ranking scores (or dense-only if hybrid off)."""
|
|
440
|
+
sims = (self.matrix @ q).flatten()
|
|
441
|
+
if not _hybrid_mode_active(self._hybrid_mode):
|
|
442
|
+
return sims, sims
|
|
443
|
+
sparse = self._sparse_scores(route_query)
|
|
444
|
+
d_norm = normalize_minmax(sims)
|
|
445
|
+
s_norm = normalize_minmax(sparse)
|
|
446
|
+
fused = self._hybrid_alpha * d_norm + (1.0 - self._hybrid_alpha) * s_norm
|
|
447
|
+
return sims, fused
|
|
448
|
+
|
|
449
|
+
def shortlist(self, route_query, con, k=TOP_K_CANDIDATES, user_id=""):
|
|
311
450
|
if len(self.skills) == 0:
|
|
312
451
|
return []
|
|
313
|
-
q = self.embed_model.encode(
|
|
452
|
+
q = self.embed_model.encode(route_query, convert_to_numpy=True)
|
|
314
453
|
q = q / np.linalg.norm(q)
|
|
315
|
-
sims = self.
|
|
316
|
-
biased =
|
|
454
|
+
sims, rank_scores = self._base_routing_scores(route_query, q)
|
|
455
|
+
biased = rank_scores.copy()
|
|
317
456
|
for i, s in enumerate(self.skills):
|
|
318
457
|
w, disabled = get_skill_weight(con, s.name, user_id=user_id)
|
|
319
458
|
if disabled:
|
|
@@ -323,6 +462,294 @@ class Router:
|
|
|
323
462
|
top_idx = np.argsort(-biased)[:k]
|
|
324
463
|
return [(self.skills[i], float(sims[i])) for i in top_idx if biased[i] > -100]
|
|
325
464
|
|
|
465
|
+
def shortlist_with_facets(
|
|
466
|
+
self,
|
|
467
|
+
route_query: str,
|
|
468
|
+
con: sqlite3.Connection,
|
|
469
|
+
*,
|
|
470
|
+
k: int | None = None,
|
|
471
|
+
user_id: str = "",
|
|
472
|
+
) -> list[dict[str, Any]]:
|
|
473
|
+
"""Embedding shortlist with cosine sim, learned weight, and routing score (no LLM)."""
|
|
474
|
+
limit = k if k is not None else TOP_K_CANDIDATES
|
|
475
|
+
if len(self.skills) == 0:
|
|
476
|
+
return []
|
|
477
|
+
q = self.embed_model.encode(route_query, convert_to_numpy=True)
|
|
478
|
+
q = q / np.linalg.norm(q)
|
|
479
|
+
sims, rank_scores = self._base_routing_scores(route_query, q)
|
|
480
|
+
sparse_full = (
|
|
481
|
+
self._sparse_scores(route_query) if _hybrid_mode_active(self._hybrid_mode) else np.zeros(
|
|
482
|
+
len(self.skills), dtype=np.float64
|
|
483
|
+
)
|
|
484
|
+
)
|
|
485
|
+
biased = rank_scores.copy()
|
|
486
|
+
for i, s in enumerate(self.skills):
|
|
487
|
+
w, disabled = get_skill_weight(con, s.name, user_id=user_id)
|
|
488
|
+
if disabled:
|
|
489
|
+
biased[i] = -999.0
|
|
490
|
+
else:
|
|
491
|
+
biased[i] += w
|
|
492
|
+
top_idx = np.argsort(-biased)[:limit]
|
|
493
|
+
out: list[dict[str, Any]] = []
|
|
494
|
+
for i in top_idx:
|
|
495
|
+
if biased[i] <= -100:
|
|
496
|
+
continue
|
|
497
|
+
s = self.skills[i]
|
|
498
|
+
w, _dis = get_skill_weight(con, s.name, user_id=user_id)
|
|
499
|
+
out.append({
|
|
500
|
+
"name": s.name,
|
|
501
|
+
"title": s.title,
|
|
502
|
+
"description_preview": (s.description or "")[:280],
|
|
503
|
+
"cosine_similarity": round(float(sims[i]), 6),
|
|
504
|
+
"sparse_signal": round(float(sparse_full[i]), 6),
|
|
505
|
+
"learned_weight": round(float(w), 4),
|
|
506
|
+
"routing_score": round(float(biased[i]), 6),
|
|
507
|
+
"source": s.source,
|
|
508
|
+
"router_hybrid": self._hybrid_mode,
|
|
509
|
+
})
|
|
510
|
+
return out
|
|
511
|
+
|
|
512
|
+
def build_context_items(
|
|
513
|
+
self,
|
|
514
|
+
prompt: str,
|
|
515
|
+
skill_names: list[str],
|
|
516
|
+
max_total_chars: int | None = None,
|
|
517
|
+
) -> list[dict[str, Any]]:
|
|
518
|
+
"""Return ordered context dicts: skill, line_start, line_end, text, score."""
|
|
519
|
+
cap = max_total_chars if max_total_chars is not None else ROUTE_MAX_CONTEXT_CHARS
|
|
520
|
+
if self.context_mode == "full_body":
|
|
521
|
+
out: list[dict[str, Any]] = []
|
|
522
|
+
for n in skill_names:
|
|
523
|
+
s = self._by_name.get(n)
|
|
524
|
+
if not s:
|
|
525
|
+
continue
|
|
526
|
+
out.append({
|
|
527
|
+
"skill": n,
|
|
528
|
+
"path": None,
|
|
529
|
+
"line_start": None,
|
|
530
|
+
"line_end": None,
|
|
531
|
+
"text": s.body,
|
|
532
|
+
"score": 1.0,
|
|
533
|
+
})
|
|
534
|
+
return out
|
|
535
|
+
if not skill_names or self._chunk_embeddings.shape[0] == 0:
|
|
536
|
+
return []
|
|
537
|
+
allowed = set(skill_names)
|
|
538
|
+
indices = [i for i, (sn, _) in enumerate(self._chunk_meta) if sn in allowed]
|
|
539
|
+
if not indices:
|
|
540
|
+
return []
|
|
541
|
+
qv = self.embed_model.encode(prompt, convert_to_numpy=True)
|
|
542
|
+
qv = qv / np.linalg.norm(qv)
|
|
543
|
+
sub = self._chunk_embeddings[indices]
|
|
544
|
+
scores = (sub @ qv).flatten()
|
|
545
|
+
order = np.argsort(-scores)
|
|
546
|
+
out = []
|
|
547
|
+
total = 0
|
|
548
|
+
overhead = CONTEXT_OVERHEAD_SKILL
|
|
549
|
+
for o in order:
|
|
550
|
+
idx = indices[int(o)]
|
|
551
|
+
sn, ch = self._chunk_meta[idx]
|
|
552
|
+
piece_len = len(ch.text) + overhead
|
|
553
|
+
if total + piece_len > cap:
|
|
554
|
+
continue
|
|
555
|
+
out.append({
|
|
556
|
+
"skill": sn,
|
|
557
|
+
"path": None,
|
|
558
|
+
"line_start": ch.line_start,
|
|
559
|
+
"line_end": ch.line_end,
|
|
560
|
+
"text": ch.text,
|
|
561
|
+
"score": float(scores[int(o)]),
|
|
562
|
+
})
|
|
563
|
+
total += piece_len
|
|
564
|
+
return out
|
|
565
|
+
|
|
566
|
+
def build_fusion_skill_pool(
|
|
567
|
+
self,
|
|
568
|
+
prompt: str,
|
|
569
|
+
skill_names: list[str],
|
|
570
|
+
pool_limit: int,
|
|
571
|
+
) -> tuple[list[dict[str, Any]], np.ndarray, np.ndarray]:
|
|
572
|
+
"""Candidate skill chunks (or one row per skill in full_body) with embeddings for MMR."""
|
|
573
|
+
edim = int(self.embed_model.get_sentence_embedding_dimension())
|
|
574
|
+
if not skill_names:
|
|
575
|
+
return [], np.zeros((0, edim)), np.array([], dtype=np.float32)
|
|
576
|
+
qv = self.embed_model.encode(prompt, convert_to_numpy=True)
|
|
577
|
+
qv = np.asarray(qv, dtype=np.float32).reshape(-1)
|
|
578
|
+
qv = qv / max(float(np.linalg.norm(qv)), 1e-12)
|
|
579
|
+
|
|
580
|
+
if self.context_mode == "full_body":
|
|
581
|
+
ordered = [n for n in skill_names if n in self._by_name]
|
|
582
|
+
if not ordered:
|
|
583
|
+
return [], np.zeros((0, edim)), np.array([], dtype=np.float32)
|
|
584
|
+
texts = [
|
|
585
|
+
f"{self._by_name[n].title} — {n}\n{(self._by_name[n].body or '')[:FUSION_FULL_BODY_PREVIEW_CHARS]}"
|
|
586
|
+
for n in ordered
|
|
587
|
+
]
|
|
588
|
+
em = self.embed_model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
|
|
589
|
+
em = np.asarray(em, dtype=np.float32)
|
|
590
|
+
em = em / np.maximum(np.linalg.norm(em, axis=1, keepdims=True), 1e-12)
|
|
591
|
+
rel = (em @ qv).flatten()
|
|
592
|
+
order = np.argsort(-rel)[: min(pool_limit, em.shape[0])]
|
|
593
|
+
items: list[dict[str, Any]] = []
|
|
594
|
+
em_rows: list[np.ndarray] = []
|
|
595
|
+
rel_out: list[float] = []
|
|
596
|
+
for o in order:
|
|
597
|
+
i = int(o)
|
|
598
|
+
n = ordered[i]
|
|
599
|
+
s = self._by_name[n]
|
|
600
|
+
items.append({
|
|
601
|
+
"skill": n,
|
|
602
|
+
"path": None,
|
|
603
|
+
"line_start": None,
|
|
604
|
+
"line_end": None,
|
|
605
|
+
"text": s.body,
|
|
606
|
+
"score": float(rel[i]),
|
|
607
|
+
"source": "skill",
|
|
608
|
+
})
|
|
609
|
+
em_rows.append(em[i])
|
|
610
|
+
rel_out.append(float(rel[i]))
|
|
611
|
+
return items, np.stack(em_rows), np.asarray(rel_out, dtype=np.float32)
|
|
612
|
+
|
|
613
|
+
if self._chunk_embeddings.shape[0] == 0:
|
|
614
|
+
return self._fusion_skill_pool_fallback_bodies(skill_names, qv, pool_limit)
|
|
615
|
+
|
|
616
|
+
allowed = set(skill_names)
|
|
617
|
+
indices = [i for i, (sn, _) in enumerate(self._chunk_meta) if sn in allowed]
|
|
618
|
+
if not indices:
|
|
619
|
+
return self._fusion_skill_pool_fallback_bodies(skill_names, qv, pool_limit)
|
|
620
|
+
sub = self._chunk_embeddings[indices]
|
|
621
|
+
scores = (sub @ qv).flatten()
|
|
622
|
+
order = np.argsort(-scores)[: min(pool_limit, len(indices))]
|
|
623
|
+
items = []
|
|
624
|
+
em_rows = []
|
|
625
|
+
rel_out = []
|
|
626
|
+
for o in order:
|
|
627
|
+
pos = int(o)
|
|
628
|
+
idx = indices[pos]
|
|
629
|
+
sn, ch = self._chunk_meta[idx]
|
|
630
|
+
items.append({
|
|
631
|
+
"skill": sn,
|
|
632
|
+
"path": None,
|
|
633
|
+
"line_start": ch.line_start,
|
|
634
|
+
"line_end": ch.line_end,
|
|
635
|
+
"text": ch.text,
|
|
636
|
+
"score": float(scores[pos]),
|
|
637
|
+
"source": "skill",
|
|
638
|
+
})
|
|
639
|
+
em_rows.append(sub[pos])
|
|
640
|
+
rel_out.append(float(scores[pos]))
|
|
641
|
+
return items, np.stack(em_rows), np.asarray(rel_out, dtype=np.float32)
|
|
642
|
+
|
|
643
|
+
def _fusion_skill_pool_fallback_bodies(
|
|
644
|
+
self,
|
|
645
|
+
skill_names: list[str],
|
|
646
|
+
qv: np.ndarray,
|
|
647
|
+
pool_limit: int,
|
|
648
|
+
) -> tuple[list[dict[str, Any]], np.ndarray, np.ndarray]:
|
|
649
|
+
ordered = [n for n in skill_names if n in self._by_name]
|
|
650
|
+
edim = int(self.embed_model.get_sentence_embedding_dimension())
|
|
651
|
+
if not ordered:
|
|
652
|
+
return [], np.zeros((0, edim)), np.array([], dtype=np.float32)
|
|
653
|
+
texts = [
|
|
654
|
+
f"{self._by_name[n].title} — {n}\n{(self._by_name[n].body or '')[:FUSION_FULL_BODY_PREVIEW_CHARS]}"
|
|
655
|
+
for n in ordered
|
|
656
|
+
]
|
|
657
|
+
em = self.embed_model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
|
|
658
|
+
em = np.asarray(em, dtype=np.float32)
|
|
659
|
+
em = em / np.maximum(np.linalg.norm(em, axis=1, keepdims=True), 1e-12)
|
|
660
|
+
rel = (em @ qv).flatten()
|
|
661
|
+
order = np.argsort(-rel)[: min(pool_limit, em.shape[0])]
|
|
662
|
+
items = []
|
|
663
|
+
em_rows = []
|
|
664
|
+
rel_out = []
|
|
665
|
+
for o in order:
|
|
666
|
+
i = int(o)
|
|
667
|
+
n = ordered[i]
|
|
668
|
+
s = self._by_name[n]
|
|
669
|
+
items.append({
|
|
670
|
+
"skill": n,
|
|
671
|
+
"path": None,
|
|
672
|
+
"line_start": None,
|
|
673
|
+
"line_end": None,
|
|
674
|
+
"text": s.body,
|
|
675
|
+
"score": float(rel[i]),
|
|
676
|
+
"source": "skill",
|
|
677
|
+
})
|
|
678
|
+
em_rows.append(em[i])
|
|
679
|
+
rel_out.append(float(rel[i]))
|
|
680
|
+
return items, np.stack(em_rows), np.asarray(rel_out, dtype=np.float32)
|
|
681
|
+
|
|
682
|
+
async def rerank_candidates_haiku(
|
|
683
|
+
self,
|
|
684
|
+
route_query: str,
|
|
685
|
+
conversation: list | None,
|
|
686
|
+
candidates: list[tuple[Skill, float]],
|
|
687
|
+
) -> list[tuple[Skill, float]]:
|
|
688
|
+
if (
|
|
689
|
+
not candidates
|
|
690
|
+
or self.anthropic is None
|
|
691
|
+
or not _env_truthy("SKILLFORGE_HAIKU_RERANK", "0")
|
|
692
|
+
):
|
|
693
|
+
return candidates
|
|
694
|
+
cap = max(3, min(HAIKU_RERANK_MAX, len(candidates)))
|
|
695
|
+
head = candidates[:cap]
|
|
696
|
+
tail = candidates[cap:]
|
|
697
|
+
by_name = {s.name: (s, sc) for s, sc in head}
|
|
698
|
+
lines: list[str] = []
|
|
699
|
+
for idx, (s, _sc) in enumerate(head, start=1):
|
|
700
|
+
card = skill_routing_card(s)
|
|
701
|
+
preview = card[:220].replace("\n", " ")
|
|
702
|
+
lines.append(f"{idx}. {s.name} — {preview}")
|
|
703
|
+
hist = ""
|
|
704
|
+
if conversation:
|
|
705
|
+
msgs = conversation[-ROUTER_PROMPT_HISTORY_MSGS:]
|
|
706
|
+
parts: list[str] = []
|
|
707
|
+
for m in msgs:
|
|
708
|
+
if not isinstance(m, dict):
|
|
709
|
+
continue
|
|
710
|
+
role = str(m.get("role") or "user")
|
|
711
|
+
c = str(m.get("content") or "").strip()
|
|
712
|
+
if not c:
|
|
713
|
+
continue
|
|
714
|
+
parts.append(f"{role}: {c[:ROUTER_PROMPT_HISTORY_CHARS]}")
|
|
715
|
+
if parts:
|
|
716
|
+
hist = "\n\nConversation (recent):\n" + "\n".join(parts)
|
|
717
|
+
sys = (
|
|
718
|
+
"You reorder skill candidates by relevance to the user's task. "
|
|
719
|
+
"Output ONLY JSON: {\"order\": [\"skill_name\", ...]} with each candidate "
|
|
720
|
+
"skill name appearing exactly once, best match first. No extra keys."
|
|
721
|
+
)
|
|
722
|
+
user = (
|
|
723
|
+
f"Routing focus:\n{route_query}{hist}\n\nCandidates:\n" + "\n".join(lines)
|
|
724
|
+
)
|
|
725
|
+
try:
|
|
726
|
+
rerank_model = os.getenv("SKILLFORGE_HAIKU_RERANK_MODEL", "").strip() or ROUTER_MODEL
|
|
727
|
+
resp = await self.anthropic.messages.create(
|
|
728
|
+
model=rerank_model,
|
|
729
|
+
max_tokens=500,
|
|
730
|
+
system=sys,
|
|
731
|
+
messages=[{"role": "user", "content": user}],
|
|
732
|
+
)
|
|
733
|
+
text = resp.content[0].text.strip()
|
|
734
|
+
if text.startswith("```"):
|
|
735
|
+
text = text.split("```")[1]
|
|
736
|
+
if text.startswith("json"):
|
|
737
|
+
text = text[4:]
|
|
738
|
+
data = json.loads(text.strip())
|
|
739
|
+
order = data.get("order") or []
|
|
740
|
+
ordered: list[tuple[Skill, float]] = []
|
|
741
|
+
seen: set[str] = set()
|
|
742
|
+
for n in order:
|
|
743
|
+
if isinstance(n, str) and n in by_name and n not in seen:
|
|
744
|
+
ordered.append(by_name[n])
|
|
745
|
+
seen.add(n)
|
|
746
|
+
for s, sc in head:
|
|
747
|
+
if s.name not in seen:
|
|
748
|
+
ordered.append((s, sc))
|
|
749
|
+
return ordered + tail
|
|
750
|
+
except Exception:
|
|
751
|
+
return candidates
|
|
752
|
+
|
|
326
753
|
def pick_final_embedding_only(self, candidates):
|
|
327
754
|
"""Pick up to MAX_ACTIVE_SKILLS from the shortlist order (similarity + weights). No LLM call."""
|
|
328
755
|
if not candidates:
|
|
@@ -332,26 +759,46 @@ class Router:
|
|
|
332
759
|
"embedding-only: top candidates by similarity and learned weights"
|
|
333
760
|
)
|
|
334
761
|
|
|
335
|
-
async def pick_final(
|
|
762
|
+
async def pick_final(
|
|
763
|
+
self,
|
|
764
|
+
prompt,
|
|
765
|
+
conversation,
|
|
766
|
+
candidates,
|
|
767
|
+
route_query: str | None = None,
|
|
768
|
+
):
|
|
769
|
+
rq = (route_query if route_query is not None else prompt) or ""
|
|
336
770
|
if self.anthropic is None:
|
|
337
771
|
return self.pick_final_embedding_only(candidates)
|
|
338
772
|
if not candidates:
|
|
339
773
|
return [], "no candidates available"
|
|
340
774
|
catalog = "\n".join(
|
|
341
|
-
f"- {s.name}: {s
|
|
775
|
+
f"- {s.name}: {skill_routing_card(s)[:ROUTER_CATALOG_PREVIEW_CHARS]}"
|
|
776
|
+
for s, _ in candidates
|
|
342
777
|
)
|
|
343
778
|
recent = ""
|
|
344
779
|
if conversation:
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
780
|
+
msgs = conversation[-ROUTER_PROMPT_HISTORY_MSGS:]
|
|
781
|
+
parts: list[str] = []
|
|
782
|
+
for m in msgs:
|
|
783
|
+
if not isinstance(m, dict):
|
|
784
|
+
continue
|
|
785
|
+
role = str(m.get("role") or "user")
|
|
786
|
+
c = str(m.get("content") or "").strip()
|
|
787
|
+
if not c:
|
|
788
|
+
continue
|
|
789
|
+
parts.append(f"{role}: {c[:ROUTER_PROMPT_HISTORY_CHARS]}")
|
|
790
|
+
if parts:
|
|
791
|
+
recent = "\n\nRecent conversation:\n" + "\n".join(parts)
|
|
348
792
|
sys = (
|
|
349
793
|
"You are a skill router. Given a user prompt and a candidate list of skills, "
|
|
350
794
|
f"pick 0 to {MAX_ACTIVE_SKILLS} skills that would genuinely help answer this prompt. "
|
|
351
795
|
"Be ruthless — only include a skill if it directly applies. Empty list is valid. "
|
|
352
796
|
'Respond ONLY in JSON: {"skills": ["name1","name2"], "reasoning": "one sentence"}'
|
|
353
797
|
)
|
|
354
|
-
user =
|
|
798
|
+
user = (
|
|
799
|
+
f"User prompt:\n{prompt}\n\nRouting context (retrieval query):\n{rq}{recent}"
|
|
800
|
+
f"\n\nCandidate skills:\n{catalog}"
|
|
801
|
+
)
|
|
355
802
|
try:
|
|
356
803
|
resp = await self.anthropic.messages.create(
|
|
357
804
|
model=ROUTER_MODEL,
|
|
@@ -381,6 +828,23 @@ def jaccard_change(old, new):
|
|
|
381
828
|
return 1.0 - (inter / union)
|
|
382
829
|
|
|
383
830
|
|
|
831
|
+
def format_context_items_markdown(context_items: list[dict[str, Any]]) -> str:
|
|
832
|
+
"""Human-readable block list for MCP / CLI from context items (skills + optional project files)."""
|
|
833
|
+
blocks = []
|
|
834
|
+
for c in context_items:
|
|
835
|
+
ls, le = c.get("line_start"), c.get("line_end")
|
|
836
|
+
if ls is not None and le is not None:
|
|
837
|
+
loc = f" (lines {ls}-{le})"
|
|
838
|
+
else:
|
|
839
|
+
loc = " (full document)"
|
|
840
|
+
path = c.get("path")
|
|
841
|
+
if path:
|
|
842
|
+
blocks.append(f"### File: `{path}`{loc}\n\n{c['text']}\n")
|
|
843
|
+
else:
|
|
844
|
+
blocks.append(f"### Skill: {c['skill']}{loc}\n\n{c['text']}\n")
|
|
845
|
+
return "\n".join(blocks)
|
|
846
|
+
|
|
847
|
+
|
|
384
848
|
async def run_route_turn(
|
|
385
849
|
con: sqlite3.Connection,
|
|
386
850
|
router: Router,
|
|
@@ -388,15 +852,33 @@ async def run_route_turn(
|
|
|
388
852
|
conversation: list,
|
|
389
853
|
user_id: str = "",
|
|
390
854
|
session_id: str | None = None,
|
|
855
|
+
*,
|
|
856
|
+
project_root: str | None = None,
|
|
857
|
+
include_project_rag: bool = False,
|
|
391
858
|
) -> dict[str, Any]:
|
|
392
|
-
"""Shared routing + session + telemetry for
|
|
859
|
+
"""Shared routing + session + telemetry for MCP route_skills and ``skillforge route``.
|
|
393
860
|
|
|
394
861
|
Updates sessions, skill usage stats, and writes a route row to events.
|
|
395
862
|
"""
|
|
396
863
|
sid = session_id or str(uuid.uuid4())
|
|
397
864
|
t0 = time.time()
|
|
398
|
-
|
|
399
|
-
|
|
865
|
+
route_query = build_route_query_text(prompt, conversation)
|
|
866
|
+
candidates = router.shortlist(route_query, con, user_id=user_id)
|
|
867
|
+
candidates = await router.rerank_candidates_haiku(route_query, conversation, candidates)
|
|
868
|
+
picked_names, reasoning = await router.pick_final(
|
|
869
|
+
prompt, conversation, candidates, route_query=route_query
|
|
870
|
+
)
|
|
871
|
+
pr = (project_root or "").strip()
|
|
872
|
+
policies_cfg = load_route_policies_config(pr or None)
|
|
873
|
+
picked_names, policy_audit = merge_policy_includes(
|
|
874
|
+
prompt,
|
|
875
|
+
picked_names,
|
|
876
|
+
policies_cfg,
|
|
877
|
+
router._by_name,
|
|
878
|
+
con,
|
|
879
|
+
user_id,
|
|
880
|
+
max_active=MAX_ACTIVE_SKILLS,
|
|
881
|
+
)
|
|
400
882
|
route_ms = (time.time() - t0) * 1000
|
|
401
883
|
|
|
402
884
|
prev_active: set[str] = set()
|
|
@@ -410,6 +892,108 @@ async def run_route_turn(
|
|
|
410
892
|
change = jaccard_change(prev_active, set(picked_names))
|
|
411
893
|
rerouted = change >= REROUTE_THRESHOLD and bool(prev_active)
|
|
412
894
|
|
|
895
|
+
want_fusion = CONTEXT_FUSION and include_project_rag and bool(pr)
|
|
896
|
+
context_fusion: dict[str, Any] | None = None
|
|
897
|
+
context_items: list[dict[str, Any]] = []
|
|
898
|
+
|
|
899
|
+
proj_pool: list[dict[str, Any]] = []
|
|
900
|
+
proj_emb = np.zeros((0, int(router.embed_model.get_sentence_embedding_dimension())))
|
|
901
|
+
proj_rel = np.array([], dtype=np.float32)
|
|
902
|
+
|
|
903
|
+
if want_fusion:
|
|
904
|
+
try:
|
|
905
|
+
proj_pool, proj_emb, proj_rel = load_project_fusion_pool(
|
|
906
|
+
con, router.embed_model, prompt, FUSION_POOL_PROJECT
|
|
907
|
+
)
|
|
908
|
+
except Exception:
|
|
909
|
+
proj_pool = []
|
|
910
|
+
proj_emb = np.zeros((0, int(router.embed_model.get_sentence_embedding_dimension())))
|
|
911
|
+
proj_rel = np.array([], dtype=np.float32)
|
|
912
|
+
|
|
913
|
+
if want_fusion and proj_pool:
|
|
914
|
+
skill_pool, skill_emb, skill_rel = router.build_fusion_skill_pool(
|
|
915
|
+
prompt, picked_names, FUSION_POOL_SKILL
|
|
916
|
+
)
|
|
917
|
+
n_skill = len(skill_pool)
|
|
918
|
+
n_proj = len(proj_pool)
|
|
919
|
+
pool = skill_pool + proj_pool
|
|
920
|
+
if n_skill and n_proj:
|
|
921
|
+
em = np.vstack([skill_emb, proj_emb])
|
|
922
|
+
rel = np.concatenate([skill_rel, proj_rel])
|
|
923
|
+
elif n_skill:
|
|
924
|
+
em = skill_emb
|
|
925
|
+
rel = skill_rel
|
|
926
|
+
else:
|
|
927
|
+
em = proj_emb
|
|
928
|
+
rel = proj_rel
|
|
929
|
+
lens = np.array([len(c["text"]) for c in pool], dtype=np.int64)
|
|
930
|
+
ovh = np.array([
|
|
931
|
+
CONTEXT_OVERHEAD_SKILL if not c.get("path") else CONTEXT_OVERHEAD_FILE
|
|
932
|
+
for c in pool
|
|
933
|
+
], dtype=np.int64)
|
|
934
|
+
budget = _context_budget_unified()
|
|
935
|
+
order, mmr_trace = mmr_select(
|
|
936
|
+
em,
|
|
937
|
+
rel,
|
|
938
|
+
lens,
|
|
939
|
+
char_budget=budget,
|
|
940
|
+
overhead_per_chunk=ovh,
|
|
941
|
+
lambda_mult=CONTEXT_MMR_LAMBDA,
|
|
942
|
+
)
|
|
943
|
+
for rank, idx in enumerate(order, start=1):
|
|
944
|
+
item = dict(pool[idx])
|
|
945
|
+
item.pop("source", None)
|
|
946
|
+
tr = mmr_trace[rank - 1]
|
|
947
|
+
item["mmr_rank"] = rank
|
|
948
|
+
item["mmr_score"] = tr["mmr"]
|
|
949
|
+
item["retrieval_relevance"] = tr["relevance"]
|
|
950
|
+
item["max_sim_to_prior"] = tr["max_sim_to_selected"]
|
|
951
|
+
context_items.append(item)
|
|
952
|
+
context_fusion = {
|
|
953
|
+
"enabled": True,
|
|
954
|
+
"lambda": CONTEXT_MMR_LAMBDA,
|
|
955
|
+
"budget_chars": budget,
|
|
956
|
+
"pool_skill": n_skill,
|
|
957
|
+
"pool_project": n_proj,
|
|
958
|
+
"selected_count": len(context_items),
|
|
959
|
+
"mmr_trace": mmr_trace,
|
|
960
|
+
}
|
|
961
|
+
else:
|
|
962
|
+
context_items = router.build_context_items(prompt, picked_names)
|
|
963
|
+
if picked_names and not context_items:
|
|
964
|
+
context_items = [
|
|
965
|
+
{
|
|
966
|
+
"skill": n,
|
|
967
|
+
"path": None,
|
|
968
|
+
"line_start": None,
|
|
969
|
+
"line_end": None,
|
|
970
|
+
"text": router._by_name[n].body,
|
|
971
|
+
"score": 1.0,
|
|
972
|
+
}
|
|
973
|
+
for n in picked_names
|
|
974
|
+
if n in router._by_name
|
|
975
|
+
]
|
|
976
|
+
project_add: list[dict[str, Any]] = []
|
|
977
|
+
if include_project_rag and pr:
|
|
978
|
+
try:
|
|
979
|
+
project_add = retrieve_project_context_items(con, router.embed_model, prompt)
|
|
980
|
+
except Exception:
|
|
981
|
+
project_add = []
|
|
982
|
+
context_items = [*context_items, *project_add]
|
|
983
|
+
context_fusion = {"enabled": False}
|
|
984
|
+
|
|
985
|
+
project_rag_items_count = sum(1 for c in context_items if c.get("path"))
|
|
986
|
+
|
|
987
|
+
reasoning_out = reasoning
|
|
988
|
+
safe_prompt_snip = prompt[:300]
|
|
989
|
+
context_redaction_stats: dict[str, Any] = {"enabled": False, "secret_hits": 0, "path_hits": 0}
|
|
990
|
+
if redaction_enabled():
|
|
991
|
+
safe_prompt_snip, _ = redact_secret_patterns(prompt[:300])
|
|
992
|
+
sh, ph = sanitize_context_items(context_items)
|
|
993
|
+
context_redaction_stats = {"enabled": True, "secret_hits": sh, "path_hits": ph}
|
|
994
|
+
if reasoning_out:
|
|
995
|
+
reasoning_out, _ = redact_secret_patterns(reasoning_out)
|
|
996
|
+
|
|
413
997
|
con.execute(
|
|
414
998
|
"""INSERT INTO sessions (id, user_id, created_at, active_skills, turn_count) VALUES (?, ?, ?, ?, 1)
|
|
415
999
|
ON CONFLICT(id) DO UPDATE SET active_skills = ?, turn_count = turn_count + 1""",
|
|
@@ -423,225 +1007,44 @@ async def run_route_turn(
|
|
|
423
1007
|
"type": "route",
|
|
424
1008
|
"session_id": sid,
|
|
425
1009
|
"user_id": user_id,
|
|
426
|
-
"prompt":
|
|
1010
|
+
"prompt": safe_prompt_snip,
|
|
427
1011
|
"candidates": [{"name": s.name, "score": sc} for s, sc in candidates[:10]],
|
|
428
1012
|
"picked": picked_names,
|
|
429
|
-
"reasoning":
|
|
1013
|
+
"reasoning": reasoning_out,
|
|
430
1014
|
"rerouted": rerouted,
|
|
431
1015
|
"change_pct": round(change * 100, 1),
|
|
432
1016
|
"route_ms": round(route_ms, 1),
|
|
433
1017
|
"ts": time.time(),
|
|
1018
|
+
"context_mode": router.context_mode,
|
|
1019
|
+
"context_items_count": len(context_items),
|
|
1020
|
+
"project_rag_items_count": project_rag_items_count,
|
|
1021
|
+
"include_project_rag": bool(include_project_rag and pr),
|
|
1022
|
+
"context_fusion": context_fusion,
|
|
1023
|
+
"context_redaction": context_redaction_stats,
|
|
1024
|
+
"policy": {
|
|
1025
|
+
"rules_loaded": len(policies_cfg.get("rules") or []) if isinstance(policies_cfg.get("rules"), list) else 0,
|
|
1026
|
+
"audit": policy_audit,
|
|
1027
|
+
},
|
|
1028
|
+
"chunk_sources_preview": [
|
|
1029
|
+
{
|
|
1030
|
+
"skill": c.get("skill"),
|
|
1031
|
+
"path": c.get("path"),
|
|
1032
|
+
"line_start": c.get("line_start"),
|
|
1033
|
+
"line_end": c.get("line_end"),
|
|
1034
|
+
"mmr_rank": c.get("mmr_rank"),
|
|
1035
|
+
}
|
|
1036
|
+
for c in context_items[:24]
|
|
1037
|
+
],
|
|
434
1038
|
}
|
|
435
1039
|
log_event(con, sid, "route", event, user_id=user_id)
|
|
436
1040
|
return {
|
|
437
1041
|
"session_id": sid,
|
|
438
1042
|
"picked_names": picked_names,
|
|
439
|
-
"reasoning":
|
|
1043
|
+
"reasoning": reasoning_out,
|
|
440
1044
|
"candidates": candidates,
|
|
441
1045
|
"route_ms": route_ms,
|
|
442
1046
|
"rerouted": rerouted,
|
|
443
1047
|
"change": change,
|
|
444
1048
|
"event": event,
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
# ---------- App ----------
|
|
449
|
-
app_state: dict[str, Any] = {}
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
@asynccontextmanager
|
|
453
|
-
async def lifespan(app: FastAPI):
|
|
454
|
-
print(f"[skillforge] Loading skills from {BUNDLED_SKILLS} + {USER_SKILLS}")
|
|
455
|
-
skills = load_all_skills()
|
|
456
|
-
print(f"[skillforge] Loaded {len(skills)} skills")
|
|
457
|
-
if not skills:
|
|
458
|
-
print("[skillforge] WARNING: no skills found")
|
|
459
|
-
embed_model = SentenceTransformer(EMBED_MODEL)
|
|
460
|
-
anthropic = AsyncAnthropic()
|
|
461
|
-
router_anthropic = None if SKILLFORGE_ROUTER_MODE == "embedding" else anthropic
|
|
462
|
-
if router_anthropic is None:
|
|
463
|
-
print("[skillforge] Router mode: embedding-only (Haiku step skipped; /chat still uses ANSWER model)")
|
|
464
|
-
print("[skillforge] Live usage (terminal): skillforge events --watch")
|
|
465
|
-
router = Router(skills, embed_model, router_anthropic)
|
|
466
|
-
con = init_db()
|
|
467
|
-
app_state.update(
|
|
468
|
-
skills={s.name: s for s in skills},
|
|
469
|
-
router=router,
|
|
470
|
-
anthropic=anthropic,
|
|
471
|
-
con=con,
|
|
472
|
-
)
|
|
473
|
-
yield
|
|
474
|
-
con.close()
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
app = FastAPI(lifespan=lifespan, title="skillforge")
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
class ChatRequest(BaseModel):
|
|
481
|
-
prompt: str
|
|
482
|
-
session_id: str | None = None
|
|
483
|
-
conversation: list[dict] = []
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
class FeedbackRequest(BaseModel):
|
|
487
|
-
session_id: str
|
|
488
|
-
skill_name: str
|
|
489
|
-
thumbs: int
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
class DisableRequest(BaseModel):
|
|
493
|
-
skill_name: str
|
|
494
|
-
disabled: bool
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
@app.post("/chat")
|
|
498
|
-
async def chat(req: ChatRequest, request: Request):
|
|
499
|
-
from app.auth import resolve_user
|
|
500
|
-
user_id = resolve_user(request)
|
|
501
|
-
router: Router = app_state["router"]
|
|
502
|
-
con = app_state["con"]
|
|
503
|
-
anthropic: AsyncAnthropic = app_state["anthropic"]
|
|
504
|
-
|
|
505
|
-
result = await run_route_turn(
|
|
506
|
-
con,
|
|
507
|
-
router,
|
|
508
|
-
req.prompt,
|
|
509
|
-
req.conversation,
|
|
510
|
-
user_id=user_id,
|
|
511
|
-
session_id=req.session_id,
|
|
512
|
-
)
|
|
513
|
-
session_id = result["session_id"]
|
|
514
|
-
picked_names = result["picked_names"]
|
|
515
|
-
|
|
516
|
-
skills_map = app_state["skills"]
|
|
517
|
-
skill_blocks = []
|
|
518
|
-
for n in picked_names:
|
|
519
|
-
s = skills_map.get(n)
|
|
520
|
-
if s:
|
|
521
|
-
skill_blocks.append(f'<skill name="{s.name}">\n{s.body}\n</skill>')
|
|
522
|
-
system_prompt = (
|
|
523
|
-
"You are a helpful assistant. The following skills have been dynamically loaded "
|
|
524
|
-
"for this turn based on the user's request. Use them when relevant; ignore them when not.\n\n"
|
|
525
|
-
+ "\n\n".join(skill_blocks)
|
|
526
|
-
) if skill_blocks else "You are a helpful assistant."
|
|
527
|
-
|
|
528
|
-
messages = req.conversation + [{"role": "user", "content": req.prompt}]
|
|
529
|
-
|
|
530
|
-
async def stream():
|
|
531
|
-
full_text = []
|
|
532
|
-
try:
|
|
533
|
-
async with anthropic.messages.stream(
|
|
534
|
-
model=ANSWER_MODEL,
|
|
535
|
-
max_tokens=4096,
|
|
536
|
-
system=system_prompt,
|
|
537
|
-
messages=messages,
|
|
538
|
-
) as s:
|
|
539
|
-
async for chunk in s.text_stream:
|
|
540
|
-
full_text.append(chunk)
|
|
541
|
-
yield f"data: {json.dumps({'delta': chunk})}\n\n"
|
|
542
|
-
except Exception as e:
|
|
543
|
-
yield f"data: {json.dumps({'error': str(e)})}\n\n"
|
|
544
|
-
return
|
|
545
|
-
response_text = "".join(full_text)
|
|
546
|
-
for n in picked_names:
|
|
547
|
-
s = skills_map.get(n)
|
|
548
|
-
if not s:
|
|
549
|
-
continue
|
|
550
|
-
keywords = [w for w in s.body.split()[:50] if len(w) > 6][:5]
|
|
551
|
-
hits = sum(1 for kw in keywords if kw.lower() in response_text.lower())
|
|
552
|
-
if hits >= 2 or s.name in response_text.lower():
|
|
553
|
-
update_skill_stat(con, n, "referenced", 1, user_id=user_id)
|
|
554
|
-
yield f"data: {json.dumps({'done': True, 'session_id': session_id, 'picked': picked_names})}\n\n"
|
|
555
|
-
|
|
556
|
-
return StreamingResponse(stream(), media_type="text/event-stream")
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
@app.post("/feedback")
|
|
560
|
-
def feedback(req: FeedbackRequest, request: Request):
|
|
561
|
-
from app.auth import resolve_user
|
|
562
|
-
user_id = resolve_user(request)
|
|
563
|
-
con = app_state["con"]
|
|
564
|
-
field = "thumbs_up" if req.thumbs > 0 else "thumbs_down"
|
|
565
|
-
update_skill_stat(con, req.skill_name, field, 1, user_id=user_id)
|
|
566
|
-
log_event(con, req.session_id, "feedback",
|
|
567
|
-
{"skill": req.skill_name, "thumbs": req.thumbs},
|
|
568
|
-
user_id=user_id)
|
|
569
|
-
return {"ok": True}
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
@app.post("/skills/disable")
|
|
573
|
-
def disable(req: DisableRequest, request: Request):
|
|
574
|
-
from app.auth import resolve_user
|
|
575
|
-
user_id = resolve_user(request)
|
|
576
|
-
con = app_state["con"]
|
|
577
|
-
set_skill_disabled(con, req.skill_name, req.disabled, user_id=user_id)
|
|
578
|
-
return {"ok": True}
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
@app.get("/skills")
|
|
582
|
-
def list_skills(request: Request):
|
|
583
|
-
from app.auth import resolve_user
|
|
584
|
-
user_id = resolve_user(request)
|
|
585
|
-
con = app_state["con"]
|
|
586
|
-
skills_map = app_state["skills"]
|
|
587
|
-
out = []
|
|
588
|
-
for name, s in skills_map.items():
|
|
589
|
-
cur = con.execute(
|
|
590
|
-
"SELECT weight, uses, referenced, thumbs_up, thumbs_down, disabled FROM skill_weights WHERE user_id = ? AND skill_name = ?",
|
|
591
|
-
(user_id, name),
|
|
592
|
-
)
|
|
593
|
-
row = cur.fetchone()
|
|
594
|
-
weight, uses, ref, up, down, disabled = row if row else (0.0, 0, 0, 0, 0, 0)
|
|
595
|
-
out.append({
|
|
596
|
-
"name": name,
|
|
597
|
-
"title": s.title,
|
|
598
|
-
"description": s.description[:200],
|
|
599
|
-
"source": s.source,
|
|
600
|
-
"weight": weight,
|
|
601
|
-
"uses": uses,
|
|
602
|
-
"referenced": ref,
|
|
603
|
-
"thumbs_up": up,
|
|
604
|
-
"thumbs_down": down,
|
|
605
|
-
"disabled": bool(disabled),
|
|
606
|
-
})
|
|
607
|
-
out.sort(key=lambda x: -x["uses"])
|
|
608
|
-
return out
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
@app.get("/events")
|
|
612
|
-
def recent_events(request: Request, limit: int = 50):
|
|
613
|
-
from app.auth import resolve_user, auth_enabled
|
|
614
|
-
user_id = resolve_user(request)
|
|
615
|
-
con = app_state["con"]
|
|
616
|
-
if auth_enabled():
|
|
617
|
-
cur = con.execute(
|
|
618
|
-
"SELECT ts, session_id, event_type, payload FROM events WHERE user_id = ? ORDER BY ts DESC LIMIT ?",
|
|
619
|
-
(user_id, limit),
|
|
620
|
-
)
|
|
621
|
-
else:
|
|
622
|
-
cur = con.execute(
|
|
623
|
-
"SELECT ts, session_id, event_type, payload FROM events ORDER BY ts DESC LIMIT ?",
|
|
624
|
-
(limit,),
|
|
625
|
-
)
|
|
626
|
-
return [
|
|
627
|
-
{"ts": ts, "session_id": sid, "type": et, "payload": json.loads(p)}
|
|
628
|
-
for ts, sid, et, p in cur.fetchall()
|
|
629
|
-
]
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
@app.get("/")
|
|
633
|
-
def root():
|
|
634
|
-
return {
|
|
635
|
-
"service": "skillforge",
|
|
636
|
-
"docs": "POST /chat, GET /events, GET /skills, GET /healthz",
|
|
637
|
-
"live_log": "skillforge events --watch",
|
|
638
|
-
}
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
@app.get("/healthz")
|
|
642
|
-
def health():
|
|
643
|
-
return {
|
|
644
|
-
"skills_loaded": len(app_state.get("skills", {})),
|
|
645
|
-
"ok": True,
|
|
646
|
-
"live_log": "skillforge events --watch",
|
|
1049
|
+
"context_items": context_items,
|
|
647
1050
|
}
|