@heytherevibin/skillforge 0.2.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +44 -53
- package/RELEASING.md +1 -1
- package/SECURITY.md +2 -2
- package/STRATEGY.md +1 -3
- package/bin/cli.js +32 -138
- package/package.json +2 -2
- package/python/app/chunking.py +116 -0
- package/python/app/context_fusion.py +77 -0
- package/python/app/events_cli.py +1 -1
- package/python/app/index_cli.py +89 -0
- package/python/app/main.py +380 -214
- package/python/app/mcp_contract.py +121 -0
- package/python/app/mcp_server.py +80 -28
- package/python/app/project_index.py +600 -0
- package/python/app/redaction.py +128 -0
- package/python/app/route_cli.py +42 -19
- package/python/requirements.txt +0 -4
- package/python/tests/test_chunking.py +34 -0
- package/python/tests/test_context_fusion.py +45 -0
- package/python/tests/test_mcp_contract.py +137 -0
- package/python/tests/test_project_index.py +76 -0
- package/python/tests/test_redaction.py +51 -0
- package/python/app/auth.py +0 -63
- package/python/app/cli.py +0 -78
package/python/app/main.py
CHANGED
|
@@ -4,7 +4,6 @@ skillforge — skill orchestrator co-tool for Claude (MCP-first).
|
|
|
4
4
|
Primary surface: MCP stdio — route_skills and related tools for hosts
|
|
5
5
|
(Claude Desktop, Cursor, Claude Code).
|
|
6
6
|
|
|
7
|
-
Optional: headless HTTP API (POST /chat, /events, …) for integrations.
|
|
8
7
|
Live usage: `skillforge events --watch` (terminal).
|
|
9
8
|
"""
|
|
10
9
|
from __future__ import annotations
|
|
@@ -16,19 +15,24 @@ import sqlite3
|
|
|
16
15
|
import sys
|
|
17
16
|
import time
|
|
18
17
|
import uuid
|
|
19
|
-
from contextlib import asynccontextmanager
|
|
20
18
|
from dataclasses import dataclass
|
|
21
19
|
from pathlib import Path
|
|
22
20
|
from typing import Any, Optional
|
|
23
21
|
|
|
24
22
|
import numpy as np
|
|
25
23
|
from anthropic import AsyncAnthropic
|
|
26
|
-
from fastapi import FastAPI, Request
|
|
27
|
-
from fastapi.responses import StreamingResponse
|
|
28
|
-
from pydantic import BaseModel
|
|
29
24
|
from sentence_transformers import SentenceTransformer
|
|
30
25
|
|
|
31
26
|
from app.db_paths import global_db_path, resolve_orchestrator_db
|
|
27
|
+
from app.chunking import SkillChunk, chunk_max_chars, chunk_overlap_chars, chunk_skill_body
|
|
28
|
+
from app.context_fusion import mmr_select
|
|
29
|
+
from app.project_index import (
|
|
30
|
+
ensure_project_index_schema,
|
|
31
|
+
load_project_fusion_pool,
|
|
32
|
+
project_rag_max_chars,
|
|
33
|
+
retrieve_project_context_items,
|
|
34
|
+
)
|
|
35
|
+
from app.redaction import redaction_enabled, redact_secret_patterns, sanitize_context_items
|
|
32
36
|
|
|
33
37
|
# ---------- Config (env-driven so the Node wrapper controls paths) ----------
|
|
34
38
|
BUNDLED_SKILLS = Path(os.getenv("SKILLFORGE_BUNDLED_SKILLS", "./skills"))
|
|
@@ -40,12 +44,28 @@ DB_PATH = global_db_path()
|
|
|
40
44
|
|
|
41
45
|
EMBED_MODEL = os.getenv("SKILLFORGE_EMBED_MODEL", "all-MiniLM-L6-v2")
|
|
42
46
|
ROUTER_MODEL = os.getenv("SKILLFORGE_ROUTER_MODEL", "claude-haiku-4-5-20251001")
|
|
43
|
-
ANSWER_MODEL = os.getenv("SKILLFORGE_ANSWER_MODEL", "claude-opus-4-7")
|
|
44
47
|
TOP_K_CANDIDATES = int(os.getenv("SKILLFORGE_TOP_K", "15"))
|
|
45
48
|
MAX_ACTIVE_SKILLS = int(os.getenv("SKILLFORGE_MAX_ACTIVE", "7"))
|
|
46
49
|
REROUTE_THRESHOLD = float(os.getenv("SKILLFORGE_REROUTE_THRESHOLD", "0.4"))
|
|
47
50
|
# "" | "full" | "embedding" — embedding skips Haiku and takes top skills from the shortlist only.
|
|
48
51
|
SKILLFORGE_ROUTER_MODE = os.getenv("SKILLFORGE_ROUTER_MODE", "").strip().lower()
|
|
52
|
+
# chunks: RAG-style line-bounded chunks from picked skills. full_body: inject entire SKILL.md per pick (legacy).
|
|
53
|
+
SKILLFORGE_CONTEXT_MODE = os.getenv("SKILLFORGE_CONTEXT_MODE", "chunks").strip().lower()
|
|
54
|
+
ROUTE_MAX_CONTEXT_CHARS = int(os.getenv("SKILLFORGE_ROUTE_MAX_CHARS", "60000"))
|
|
55
|
+
CONTEXT_FUSION = os.getenv("SKILLFORGE_CONTEXT_FUSION", "1").strip().lower() not in ("0", "false", "no", "")
|
|
56
|
+
CONTEXT_MMR_LAMBDA = max(0.0, min(1.0, float(os.getenv("SKILLFORGE_CONTEXT_MMR_LAMBDA", "0.7"))))
|
|
57
|
+
FUSION_POOL_SKILL = max(8, int(os.getenv("SKILLFORGE_FUSION_POOL_SKILL", "96")))
|
|
58
|
+
FUSION_POOL_PROJECT = max(8, int(os.getenv("SKILLFORGE_FUSION_POOL_PROJECT", "96")))
|
|
59
|
+
FUSION_FULL_BODY_PREVIEW_CHARS = max(400, int(os.getenv("SKILLFORGE_FUSION_FULL_BODY_PREVIEW_CHARS", "4000")))
|
|
60
|
+
CONTEXT_OVERHEAD_SKILL = 48
|
|
61
|
+
CONTEXT_OVERHEAD_FILE = 56
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _context_budget_unified() -> int:
|
|
65
|
+
raw = os.getenv("SKILLFORGE_CONTEXT_BUDGET_CHARS", "").strip()
|
|
66
|
+
if raw:
|
|
67
|
+
return max(4000, int(raw))
|
|
68
|
+
return ROUTE_MAX_CONTEXT_CHARS + int(project_rag_max_chars())
|
|
49
69
|
|
|
50
70
|
|
|
51
71
|
def build_router_and_skills(
|
|
@@ -235,6 +255,7 @@ def init_db(db_file: Path | None = None):
|
|
|
235
255
|
con.execute(f"ALTER TABLE {table} ADD COLUMN user_id TEXT DEFAULT ''")
|
|
236
256
|
except sqlite3.OperationalError:
|
|
237
257
|
pass # already exists
|
|
258
|
+
ensure_project_index_schema(con)
|
|
238
259
|
con.commit()
|
|
239
260
|
return con
|
|
240
261
|
|
|
@@ -299,13 +320,50 @@ class Router:
|
|
|
299
320
|
self.skills = skills
|
|
300
321
|
self.embed_model = embed_model
|
|
301
322
|
self.anthropic = anthropic
|
|
323
|
+
self.context_mode = SKILLFORGE_CONTEXT_MODE if SKILLFORGE_CONTEXT_MODE in (
|
|
324
|
+
"chunks",
|
|
325
|
+
"full_body",
|
|
326
|
+
) else "chunks"
|
|
327
|
+
self._by_name: dict[str, Skill] = {s.name: s for s in skills}
|
|
302
328
|
texts = [f"{s.title}: {s.description}" for s in skills]
|
|
303
|
-
print(f"[skillforge] Embedding {len(skills)} skills...")
|
|
329
|
+
print(f"[skillforge] Embedding {len(skills)} skills (summary)...", file=sys.stderr)
|
|
304
330
|
embeddings = embed_model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
|
|
305
331
|
for s, e in zip(skills, embeddings):
|
|
306
332
|
s.embedding = e / np.linalg.norm(e)
|
|
307
333
|
self.matrix = np.stack([s.embedding for s in skills]) if skills else np.zeros((0, 0))
|
|
308
|
-
|
|
334
|
+
|
|
335
|
+
# Chunk index for CONTEXT_MODE=chunks
|
|
336
|
+
self._chunk_meta: list[tuple[str, SkillChunk]] = []
|
|
337
|
+
edim = int(embed_model.get_sentence_embedding_dimension())
|
|
338
|
+
self._chunk_embeddings: np.ndarray = np.zeros((0, edim))
|
|
339
|
+
if self.context_mode == "chunks" and skills:
|
|
340
|
+
flat_texts: list[str] = []
|
|
341
|
+
self._chunk_meta = []
|
|
342
|
+
mc = chunk_max_chars()
|
|
343
|
+
oc = chunk_overlap_chars()
|
|
344
|
+
for s in skills:
|
|
345
|
+
for ch in chunk_skill_body(s.body, max_chars=mc, overlap=oc):
|
|
346
|
+
# Embed with in-chunk disambiguation
|
|
347
|
+
flat_texts.append(f"{s.title} — {s.name}\n{ch.text}")
|
|
348
|
+
self._chunk_meta.append((s.name, ch))
|
|
349
|
+
if flat_texts:
|
|
350
|
+
print(f"[skillforge] Embedding {len(flat_texts)} skill chunks...", file=sys.stderr)
|
|
351
|
+
ce = embed_model.encode(
|
|
352
|
+
flat_texts, show_progress_bar=False, convert_to_numpy=True
|
|
353
|
+
)
|
|
354
|
+
ce = ce / np.linalg.norm(ce, axis=1, keepdims=True)
|
|
355
|
+
self._chunk_embeddings = ce
|
|
356
|
+
print(
|
|
357
|
+
f"[skillforge] Ready. {len(skills)} skills; chunk matrix {self._chunk_embeddings.shape}; "
|
|
358
|
+
f"context_mode={self.context_mode}",
|
|
359
|
+
file=sys.stderr,
|
|
360
|
+
)
|
|
361
|
+
else:
|
|
362
|
+
print(
|
|
363
|
+
f"[skillforge] Ready. {len(skills)} skills, matrix shape: {self.matrix.shape}; "
|
|
364
|
+
f"context_mode={self.context_mode}",
|
|
365
|
+
file=sys.stderr,
|
|
366
|
+
)
|
|
309
367
|
|
|
310
368
|
def shortlist(self, prompt, con, k=TOP_K_CANDIDATES, user_id=""):
|
|
311
369
|
if len(self.skills) == 0:
|
|
@@ -323,6 +381,176 @@ class Router:
|
|
|
323
381
|
top_idx = np.argsort(-biased)[:k]
|
|
324
382
|
return [(self.skills[i], float(sims[i])) for i in top_idx if biased[i] > -100]
|
|
325
383
|
|
|
384
|
+
def build_context_items(
|
|
385
|
+
self,
|
|
386
|
+
prompt: str,
|
|
387
|
+
skill_names: list[str],
|
|
388
|
+
max_total_chars: int | None = None,
|
|
389
|
+
) -> list[dict[str, Any]]:
|
|
390
|
+
"""Return ordered context dicts: skill, line_start, line_end, text, score."""
|
|
391
|
+
cap = max_total_chars if max_total_chars is not None else ROUTE_MAX_CONTEXT_CHARS
|
|
392
|
+
if self.context_mode == "full_body":
|
|
393
|
+
out: list[dict[str, Any]] = []
|
|
394
|
+
for n in skill_names:
|
|
395
|
+
s = self._by_name.get(n)
|
|
396
|
+
if not s:
|
|
397
|
+
continue
|
|
398
|
+
out.append({
|
|
399
|
+
"skill": n,
|
|
400
|
+
"path": None,
|
|
401
|
+
"line_start": None,
|
|
402
|
+
"line_end": None,
|
|
403
|
+
"text": s.body,
|
|
404
|
+
"score": 1.0,
|
|
405
|
+
})
|
|
406
|
+
return out
|
|
407
|
+
if not skill_names or self._chunk_embeddings.shape[0] == 0:
|
|
408
|
+
return []
|
|
409
|
+
allowed = set(skill_names)
|
|
410
|
+
indices = [i for i, (sn, _) in enumerate(self._chunk_meta) if sn in allowed]
|
|
411
|
+
if not indices:
|
|
412
|
+
return []
|
|
413
|
+
qv = self.embed_model.encode(prompt, convert_to_numpy=True)
|
|
414
|
+
qv = qv / np.linalg.norm(qv)
|
|
415
|
+
sub = self._chunk_embeddings[indices]
|
|
416
|
+
scores = (sub @ qv).flatten()
|
|
417
|
+
order = np.argsort(-scores)
|
|
418
|
+
out = []
|
|
419
|
+
total = 0
|
|
420
|
+
overhead = CONTEXT_OVERHEAD_SKILL
|
|
421
|
+
for o in order:
|
|
422
|
+
idx = indices[int(o)]
|
|
423
|
+
sn, ch = self._chunk_meta[idx]
|
|
424
|
+
piece_len = len(ch.text) + overhead
|
|
425
|
+
if total + piece_len > cap:
|
|
426
|
+
continue
|
|
427
|
+
out.append({
|
|
428
|
+
"skill": sn,
|
|
429
|
+
"path": None,
|
|
430
|
+
"line_start": ch.line_start,
|
|
431
|
+
"line_end": ch.line_end,
|
|
432
|
+
"text": ch.text,
|
|
433
|
+
"score": float(scores[int(o)]),
|
|
434
|
+
})
|
|
435
|
+
total += piece_len
|
|
436
|
+
return out
|
|
437
|
+
|
|
438
|
+
def build_fusion_skill_pool(
|
|
439
|
+
self,
|
|
440
|
+
prompt: str,
|
|
441
|
+
skill_names: list[str],
|
|
442
|
+
pool_limit: int,
|
|
443
|
+
) -> tuple[list[dict[str, Any]], np.ndarray, np.ndarray]:
|
|
444
|
+
"""Candidate skill chunks (or one row per skill in full_body) with embeddings for MMR."""
|
|
445
|
+
edim = int(self.embed_model.get_sentence_embedding_dimension())
|
|
446
|
+
if not skill_names:
|
|
447
|
+
return [], np.zeros((0, edim)), np.array([], dtype=np.float32)
|
|
448
|
+
qv = self.embed_model.encode(prompt, convert_to_numpy=True)
|
|
449
|
+
qv = np.asarray(qv, dtype=np.float32).reshape(-1)
|
|
450
|
+
qv = qv / max(float(np.linalg.norm(qv)), 1e-12)
|
|
451
|
+
|
|
452
|
+
if self.context_mode == "full_body":
|
|
453
|
+
ordered = [n for n in skill_names if n in self._by_name]
|
|
454
|
+
if not ordered:
|
|
455
|
+
return [], np.zeros((0, edim)), np.array([], dtype=np.float32)
|
|
456
|
+
texts = [
|
|
457
|
+
f"{self._by_name[n].title} — {n}\n{(self._by_name[n].body or '')[:FUSION_FULL_BODY_PREVIEW_CHARS]}"
|
|
458
|
+
for n in ordered
|
|
459
|
+
]
|
|
460
|
+
em = self.embed_model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
|
|
461
|
+
em = np.asarray(em, dtype=np.float32)
|
|
462
|
+
em = em / np.maximum(np.linalg.norm(em, axis=1, keepdims=True), 1e-12)
|
|
463
|
+
rel = (em @ qv).flatten()
|
|
464
|
+
order = np.argsort(-rel)[: min(pool_limit, em.shape[0])]
|
|
465
|
+
items: list[dict[str, Any]] = []
|
|
466
|
+
em_rows: list[np.ndarray] = []
|
|
467
|
+
rel_out: list[float] = []
|
|
468
|
+
for o in order:
|
|
469
|
+
i = int(o)
|
|
470
|
+
n = ordered[i]
|
|
471
|
+
s = self._by_name[n]
|
|
472
|
+
items.append({
|
|
473
|
+
"skill": n,
|
|
474
|
+
"path": None,
|
|
475
|
+
"line_start": None,
|
|
476
|
+
"line_end": None,
|
|
477
|
+
"text": s.body,
|
|
478
|
+
"score": float(rel[i]),
|
|
479
|
+
"source": "skill",
|
|
480
|
+
})
|
|
481
|
+
em_rows.append(em[i])
|
|
482
|
+
rel_out.append(float(rel[i]))
|
|
483
|
+
return items, np.stack(em_rows), np.asarray(rel_out, dtype=np.float32)
|
|
484
|
+
|
|
485
|
+
if self._chunk_embeddings.shape[0] == 0:
|
|
486
|
+
return self._fusion_skill_pool_fallback_bodies(skill_names, qv, pool_limit)
|
|
487
|
+
|
|
488
|
+
allowed = set(skill_names)
|
|
489
|
+
indices = [i for i, (sn, _) in enumerate(self._chunk_meta) if sn in allowed]
|
|
490
|
+
if not indices:
|
|
491
|
+
return self._fusion_skill_pool_fallback_bodies(skill_names, qv, pool_limit)
|
|
492
|
+
sub = self._chunk_embeddings[indices]
|
|
493
|
+
scores = (sub @ qv).flatten()
|
|
494
|
+
order = np.argsort(-scores)[: min(pool_limit, len(indices))]
|
|
495
|
+
items = []
|
|
496
|
+
em_rows = []
|
|
497
|
+
rel_out = []
|
|
498
|
+
for o in order:
|
|
499
|
+
pos = int(o)
|
|
500
|
+
idx = indices[pos]
|
|
501
|
+
sn, ch = self._chunk_meta[idx]
|
|
502
|
+
items.append({
|
|
503
|
+
"skill": sn,
|
|
504
|
+
"path": None,
|
|
505
|
+
"line_start": ch.line_start,
|
|
506
|
+
"line_end": ch.line_end,
|
|
507
|
+
"text": ch.text,
|
|
508
|
+
"score": float(scores[pos]),
|
|
509
|
+
"source": "skill",
|
|
510
|
+
})
|
|
511
|
+
em_rows.append(sub[pos])
|
|
512
|
+
rel_out.append(float(scores[pos]))
|
|
513
|
+
return items, np.stack(em_rows), np.asarray(rel_out, dtype=np.float32)
|
|
514
|
+
|
|
515
|
+
def _fusion_skill_pool_fallback_bodies(
|
|
516
|
+
self,
|
|
517
|
+
skill_names: list[str],
|
|
518
|
+
qv: np.ndarray,
|
|
519
|
+
pool_limit: int,
|
|
520
|
+
) -> tuple[list[dict[str, Any]], np.ndarray, np.ndarray]:
|
|
521
|
+
ordered = [n for n in skill_names if n in self._by_name]
|
|
522
|
+
edim = int(self.embed_model.get_sentence_embedding_dimension())
|
|
523
|
+
if not ordered:
|
|
524
|
+
return [], np.zeros((0, edim)), np.array([], dtype=np.float32)
|
|
525
|
+
texts = [
|
|
526
|
+
f"{self._by_name[n].title} — {n}\n{(self._by_name[n].body or '')[:FUSION_FULL_BODY_PREVIEW_CHARS]}"
|
|
527
|
+
for n in ordered
|
|
528
|
+
]
|
|
529
|
+
em = self.embed_model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
|
|
530
|
+
em = np.asarray(em, dtype=np.float32)
|
|
531
|
+
em = em / np.maximum(np.linalg.norm(em, axis=1, keepdims=True), 1e-12)
|
|
532
|
+
rel = (em @ qv).flatten()
|
|
533
|
+
order = np.argsort(-rel)[: min(pool_limit, em.shape[0])]
|
|
534
|
+
items = []
|
|
535
|
+
em_rows = []
|
|
536
|
+
rel_out = []
|
|
537
|
+
for o in order:
|
|
538
|
+
i = int(o)
|
|
539
|
+
n = ordered[i]
|
|
540
|
+
s = self._by_name[n]
|
|
541
|
+
items.append({
|
|
542
|
+
"skill": n,
|
|
543
|
+
"path": None,
|
|
544
|
+
"line_start": None,
|
|
545
|
+
"line_end": None,
|
|
546
|
+
"text": s.body,
|
|
547
|
+
"score": float(rel[i]),
|
|
548
|
+
"source": "skill",
|
|
549
|
+
})
|
|
550
|
+
em_rows.append(em[i])
|
|
551
|
+
rel_out.append(float(rel[i]))
|
|
552
|
+
return items, np.stack(em_rows), np.asarray(rel_out, dtype=np.float32)
|
|
553
|
+
|
|
326
554
|
def pick_final_embedding_only(self, candidates):
|
|
327
555
|
"""Pick up to MAX_ACTIVE_SKILLS from the shortlist order (similarity + weights). No LLM call."""
|
|
328
556
|
if not candidates:
|
|
@@ -381,6 +609,23 @@ def jaccard_change(old, new):
|
|
|
381
609
|
return 1.0 - (inter / union)
|
|
382
610
|
|
|
383
611
|
|
|
612
|
+
def format_context_items_markdown(context_items: list[dict[str, Any]]) -> str:
|
|
613
|
+
"""Human-readable block list for MCP / CLI from context items (skills + optional project files)."""
|
|
614
|
+
blocks = []
|
|
615
|
+
for c in context_items:
|
|
616
|
+
ls, le = c.get("line_start"), c.get("line_end")
|
|
617
|
+
if ls is not None and le is not None:
|
|
618
|
+
loc = f" (lines {ls}-{le})"
|
|
619
|
+
else:
|
|
620
|
+
loc = " (full document)"
|
|
621
|
+
path = c.get("path")
|
|
622
|
+
if path:
|
|
623
|
+
blocks.append(f"### File: `{path}`{loc}\n\n{c['text']}\n")
|
|
624
|
+
else:
|
|
625
|
+
blocks.append(f"### Skill: {c['skill']}{loc}\n\n{c['text']}\n")
|
|
626
|
+
return "\n".join(blocks)
|
|
627
|
+
|
|
628
|
+
|
|
384
629
|
async def run_route_turn(
|
|
385
630
|
con: sqlite3.Connection,
|
|
386
631
|
router: Router,
|
|
@@ -388,8 +633,11 @@ async def run_route_turn(
|
|
|
388
633
|
conversation: list,
|
|
389
634
|
user_id: str = "",
|
|
390
635
|
session_id: str | None = None,
|
|
636
|
+
*,
|
|
637
|
+
project_root: str | None = None,
|
|
638
|
+
include_project_rag: bool = False,
|
|
391
639
|
) -> dict[str, Any]:
|
|
392
|
-
"""Shared routing + session + telemetry for
|
|
640
|
+
"""Shared routing + session + telemetry for MCP route_skills and ``skillforge route``.
|
|
393
641
|
|
|
394
642
|
Updates sessions, skill usage stats, and writes a route row to events.
|
|
395
643
|
"""
|
|
@@ -410,6 +658,109 @@ async def run_route_turn(
|
|
|
410
658
|
change = jaccard_change(prev_active, set(picked_names))
|
|
411
659
|
rerouted = change >= REROUTE_THRESHOLD and bool(prev_active)
|
|
412
660
|
|
|
661
|
+
pr = (project_root or "").strip()
|
|
662
|
+
want_fusion = CONTEXT_FUSION and include_project_rag and bool(pr)
|
|
663
|
+
context_fusion: dict[str, Any] | None = None
|
|
664
|
+
context_items: list[dict[str, Any]] = []
|
|
665
|
+
|
|
666
|
+
proj_pool: list[dict[str, Any]] = []
|
|
667
|
+
proj_emb = np.zeros((0, int(router.embed_model.get_sentence_embedding_dimension())))
|
|
668
|
+
proj_rel = np.array([], dtype=np.float32)
|
|
669
|
+
|
|
670
|
+
if want_fusion:
|
|
671
|
+
try:
|
|
672
|
+
proj_pool, proj_emb, proj_rel = load_project_fusion_pool(
|
|
673
|
+
con, router.embed_model, prompt, FUSION_POOL_PROJECT
|
|
674
|
+
)
|
|
675
|
+
except Exception:
|
|
676
|
+
proj_pool = []
|
|
677
|
+
proj_emb = np.zeros((0, int(router.embed_model.get_sentence_embedding_dimension())))
|
|
678
|
+
proj_rel = np.array([], dtype=np.float32)
|
|
679
|
+
|
|
680
|
+
if want_fusion and proj_pool:
|
|
681
|
+
skill_pool, skill_emb, skill_rel = router.build_fusion_skill_pool(
|
|
682
|
+
prompt, picked_names, FUSION_POOL_SKILL
|
|
683
|
+
)
|
|
684
|
+
n_skill = len(skill_pool)
|
|
685
|
+
n_proj = len(proj_pool)
|
|
686
|
+
pool = skill_pool + proj_pool
|
|
687
|
+
if n_skill and n_proj:
|
|
688
|
+
em = np.vstack([skill_emb, proj_emb])
|
|
689
|
+
rel = np.concatenate([skill_rel, proj_rel])
|
|
690
|
+
elif n_skill:
|
|
691
|
+
em = skill_emb
|
|
692
|
+
rel = skill_rel
|
|
693
|
+
else:
|
|
694
|
+
em = proj_emb
|
|
695
|
+
rel = proj_rel
|
|
696
|
+
lens = np.array([len(c["text"]) for c in pool], dtype=np.int64)
|
|
697
|
+
ovh = np.array([
|
|
698
|
+
CONTEXT_OVERHEAD_SKILL if not c.get("path") else CONTEXT_OVERHEAD_FILE
|
|
699
|
+
for c in pool
|
|
700
|
+
], dtype=np.int64)
|
|
701
|
+
budget = _context_budget_unified()
|
|
702
|
+
order, mmr_trace = mmr_select(
|
|
703
|
+
em,
|
|
704
|
+
rel,
|
|
705
|
+
lens,
|
|
706
|
+
char_budget=budget,
|
|
707
|
+
overhead_per_chunk=ovh,
|
|
708
|
+
lambda_mult=CONTEXT_MMR_LAMBDA,
|
|
709
|
+
)
|
|
710
|
+
for rank, idx in enumerate(order, start=1):
|
|
711
|
+
item = dict(pool[idx])
|
|
712
|
+
item.pop("source", None)
|
|
713
|
+
tr = mmr_trace[rank - 1]
|
|
714
|
+
item["mmr_rank"] = rank
|
|
715
|
+
item["mmr_score"] = tr["mmr"]
|
|
716
|
+
item["retrieval_relevance"] = tr["relevance"]
|
|
717
|
+
item["max_sim_to_prior"] = tr["max_sim_to_selected"]
|
|
718
|
+
context_items.append(item)
|
|
719
|
+
context_fusion = {
|
|
720
|
+
"enabled": True,
|
|
721
|
+
"lambda": CONTEXT_MMR_LAMBDA,
|
|
722
|
+
"budget_chars": budget,
|
|
723
|
+
"pool_skill": n_skill,
|
|
724
|
+
"pool_project": n_proj,
|
|
725
|
+
"selected_count": len(context_items),
|
|
726
|
+
"mmr_trace": mmr_trace,
|
|
727
|
+
}
|
|
728
|
+
else:
|
|
729
|
+
context_items = router.build_context_items(prompt, picked_names)
|
|
730
|
+
if picked_names and not context_items:
|
|
731
|
+
context_items = [
|
|
732
|
+
{
|
|
733
|
+
"skill": n,
|
|
734
|
+
"path": None,
|
|
735
|
+
"line_start": None,
|
|
736
|
+
"line_end": None,
|
|
737
|
+
"text": router._by_name[n].body,
|
|
738
|
+
"score": 1.0,
|
|
739
|
+
}
|
|
740
|
+
for n in picked_names
|
|
741
|
+
if n in router._by_name
|
|
742
|
+
]
|
|
743
|
+
project_add: list[dict[str, Any]] = []
|
|
744
|
+
if include_project_rag and pr:
|
|
745
|
+
try:
|
|
746
|
+
project_add = retrieve_project_context_items(con, router.embed_model, prompt)
|
|
747
|
+
except Exception:
|
|
748
|
+
project_add = []
|
|
749
|
+
context_items = [*context_items, *project_add]
|
|
750
|
+
context_fusion = {"enabled": False}
|
|
751
|
+
|
|
752
|
+
project_rag_items_count = sum(1 for c in context_items if c.get("path"))
|
|
753
|
+
|
|
754
|
+
reasoning_out = reasoning
|
|
755
|
+
safe_prompt_snip = prompt[:300]
|
|
756
|
+
context_redaction_stats: dict[str, Any] = {"enabled": False, "secret_hits": 0, "path_hits": 0}
|
|
757
|
+
if redaction_enabled():
|
|
758
|
+
safe_prompt_snip, _ = redact_secret_patterns(prompt[:300])
|
|
759
|
+
sh, ph = sanitize_context_items(context_items)
|
|
760
|
+
context_redaction_stats = {"enabled": True, "secret_hits": sh, "path_hits": ph}
|
|
761
|
+
if reasoning_out:
|
|
762
|
+
reasoning_out, _ = redact_secret_patterns(reasoning_out)
|
|
763
|
+
|
|
413
764
|
con.execute(
|
|
414
765
|
"""INSERT INTO sessions (id, user_id, created_at, active_skills, turn_count) VALUES (?, ?, ?, ?, 1)
|
|
415
766
|
ON CONFLICT(id) DO UPDATE SET active_skills = ?, turn_count = turn_count + 1""",
|
|
@@ -423,225 +774,40 @@ async def run_route_turn(
|
|
|
423
774
|
"type": "route",
|
|
424
775
|
"session_id": sid,
|
|
425
776
|
"user_id": user_id,
|
|
426
|
-
"prompt":
|
|
777
|
+
"prompt": safe_prompt_snip,
|
|
427
778
|
"candidates": [{"name": s.name, "score": sc} for s, sc in candidates[:10]],
|
|
428
779
|
"picked": picked_names,
|
|
429
|
-
"reasoning":
|
|
780
|
+
"reasoning": reasoning_out,
|
|
430
781
|
"rerouted": rerouted,
|
|
431
782
|
"change_pct": round(change * 100, 1),
|
|
432
783
|
"route_ms": round(route_ms, 1),
|
|
433
784
|
"ts": time.time(),
|
|
785
|
+
"context_mode": router.context_mode,
|
|
786
|
+
"context_items_count": len(context_items),
|
|
787
|
+
"project_rag_items_count": project_rag_items_count,
|
|
788
|
+
"include_project_rag": bool(include_project_rag and pr),
|
|
789
|
+
"context_fusion": context_fusion,
|
|
790
|
+
"context_redaction": context_redaction_stats,
|
|
791
|
+
"chunk_sources_preview": [
|
|
792
|
+
{
|
|
793
|
+
"skill": c.get("skill"),
|
|
794
|
+
"path": c.get("path"),
|
|
795
|
+
"line_start": c.get("line_start"),
|
|
796
|
+
"line_end": c.get("line_end"),
|
|
797
|
+
"mmr_rank": c.get("mmr_rank"),
|
|
798
|
+
}
|
|
799
|
+
for c in context_items[:24]
|
|
800
|
+
],
|
|
434
801
|
}
|
|
435
802
|
log_event(con, sid, "route", event, user_id=user_id)
|
|
436
803
|
return {
|
|
437
804
|
"session_id": sid,
|
|
438
805
|
"picked_names": picked_names,
|
|
439
|
-
"reasoning":
|
|
806
|
+
"reasoning": reasoning_out,
|
|
440
807
|
"candidates": candidates,
|
|
441
808
|
"route_ms": route_ms,
|
|
442
809
|
"rerouted": rerouted,
|
|
443
810
|
"change": change,
|
|
444
811
|
"event": event,
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
# ---------- App ----------
|
|
449
|
-
app_state: dict[str, Any] = {}
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
@asynccontextmanager
|
|
453
|
-
async def lifespan(app: FastAPI):
|
|
454
|
-
print(f"[skillforge] Loading skills from {BUNDLED_SKILLS} + {USER_SKILLS}")
|
|
455
|
-
skills = load_all_skills()
|
|
456
|
-
print(f"[skillforge] Loaded {len(skills)} skills")
|
|
457
|
-
if not skills:
|
|
458
|
-
print("[skillforge] WARNING: no skills found")
|
|
459
|
-
embed_model = SentenceTransformer(EMBED_MODEL)
|
|
460
|
-
anthropic = AsyncAnthropic()
|
|
461
|
-
router_anthropic = None if SKILLFORGE_ROUTER_MODE == "embedding" else anthropic
|
|
462
|
-
if router_anthropic is None:
|
|
463
|
-
print("[skillforge] Router mode: embedding-only (Haiku step skipped; /chat still uses ANSWER model)")
|
|
464
|
-
print("[skillforge] Live usage (terminal): skillforge events --watch")
|
|
465
|
-
router = Router(skills, embed_model, router_anthropic)
|
|
466
|
-
con = init_db()
|
|
467
|
-
app_state.update(
|
|
468
|
-
skills={s.name: s for s in skills},
|
|
469
|
-
router=router,
|
|
470
|
-
anthropic=anthropic,
|
|
471
|
-
con=con,
|
|
472
|
-
)
|
|
473
|
-
yield
|
|
474
|
-
con.close()
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
app = FastAPI(lifespan=lifespan, title="skillforge")
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
class ChatRequest(BaseModel):
|
|
481
|
-
prompt: str
|
|
482
|
-
session_id: str | None = None
|
|
483
|
-
conversation: list[dict] = []
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
class FeedbackRequest(BaseModel):
|
|
487
|
-
session_id: str
|
|
488
|
-
skill_name: str
|
|
489
|
-
thumbs: int
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
class DisableRequest(BaseModel):
|
|
493
|
-
skill_name: str
|
|
494
|
-
disabled: bool
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
@app.post("/chat")
|
|
498
|
-
async def chat(req: ChatRequest, request: Request):
|
|
499
|
-
from app.auth import resolve_user
|
|
500
|
-
user_id = resolve_user(request)
|
|
501
|
-
router: Router = app_state["router"]
|
|
502
|
-
con = app_state["con"]
|
|
503
|
-
anthropic: AsyncAnthropic = app_state["anthropic"]
|
|
504
|
-
|
|
505
|
-
result = await run_route_turn(
|
|
506
|
-
con,
|
|
507
|
-
router,
|
|
508
|
-
req.prompt,
|
|
509
|
-
req.conversation,
|
|
510
|
-
user_id=user_id,
|
|
511
|
-
session_id=req.session_id,
|
|
512
|
-
)
|
|
513
|
-
session_id = result["session_id"]
|
|
514
|
-
picked_names = result["picked_names"]
|
|
515
|
-
|
|
516
|
-
skills_map = app_state["skills"]
|
|
517
|
-
skill_blocks = []
|
|
518
|
-
for n in picked_names:
|
|
519
|
-
s = skills_map.get(n)
|
|
520
|
-
if s:
|
|
521
|
-
skill_blocks.append(f'<skill name="{s.name}">\n{s.body}\n</skill>')
|
|
522
|
-
system_prompt = (
|
|
523
|
-
"You are a helpful assistant. The following skills have been dynamically loaded "
|
|
524
|
-
"for this turn based on the user's request. Use them when relevant; ignore them when not.\n\n"
|
|
525
|
-
+ "\n\n".join(skill_blocks)
|
|
526
|
-
) if skill_blocks else "You are a helpful assistant."
|
|
527
|
-
|
|
528
|
-
messages = req.conversation + [{"role": "user", "content": req.prompt}]
|
|
529
|
-
|
|
530
|
-
async def stream():
|
|
531
|
-
full_text = []
|
|
532
|
-
try:
|
|
533
|
-
async with anthropic.messages.stream(
|
|
534
|
-
model=ANSWER_MODEL,
|
|
535
|
-
max_tokens=4096,
|
|
536
|
-
system=system_prompt,
|
|
537
|
-
messages=messages,
|
|
538
|
-
) as s:
|
|
539
|
-
async for chunk in s.text_stream:
|
|
540
|
-
full_text.append(chunk)
|
|
541
|
-
yield f"data: {json.dumps({'delta': chunk})}\n\n"
|
|
542
|
-
except Exception as e:
|
|
543
|
-
yield f"data: {json.dumps({'error': str(e)})}\n\n"
|
|
544
|
-
return
|
|
545
|
-
response_text = "".join(full_text)
|
|
546
|
-
for n in picked_names:
|
|
547
|
-
s = skills_map.get(n)
|
|
548
|
-
if not s:
|
|
549
|
-
continue
|
|
550
|
-
keywords = [w for w in s.body.split()[:50] if len(w) > 6][:5]
|
|
551
|
-
hits = sum(1 for kw in keywords if kw.lower() in response_text.lower())
|
|
552
|
-
if hits >= 2 or s.name in response_text.lower():
|
|
553
|
-
update_skill_stat(con, n, "referenced", 1, user_id=user_id)
|
|
554
|
-
yield f"data: {json.dumps({'done': True, 'session_id': session_id, 'picked': picked_names})}\n\n"
|
|
555
|
-
|
|
556
|
-
return StreamingResponse(stream(), media_type="text/event-stream")
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
@app.post("/feedback")
|
|
560
|
-
def feedback(req: FeedbackRequest, request: Request):
|
|
561
|
-
from app.auth import resolve_user
|
|
562
|
-
user_id = resolve_user(request)
|
|
563
|
-
con = app_state["con"]
|
|
564
|
-
field = "thumbs_up" if req.thumbs > 0 else "thumbs_down"
|
|
565
|
-
update_skill_stat(con, req.skill_name, field, 1, user_id=user_id)
|
|
566
|
-
log_event(con, req.session_id, "feedback",
|
|
567
|
-
{"skill": req.skill_name, "thumbs": req.thumbs},
|
|
568
|
-
user_id=user_id)
|
|
569
|
-
return {"ok": True}
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
@app.post("/skills/disable")
|
|
573
|
-
def disable(req: DisableRequest, request: Request):
|
|
574
|
-
from app.auth import resolve_user
|
|
575
|
-
user_id = resolve_user(request)
|
|
576
|
-
con = app_state["con"]
|
|
577
|
-
set_skill_disabled(con, req.skill_name, req.disabled, user_id=user_id)
|
|
578
|
-
return {"ok": True}
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
@app.get("/skills")
|
|
582
|
-
def list_skills(request: Request):
|
|
583
|
-
from app.auth import resolve_user
|
|
584
|
-
user_id = resolve_user(request)
|
|
585
|
-
con = app_state["con"]
|
|
586
|
-
skills_map = app_state["skills"]
|
|
587
|
-
out = []
|
|
588
|
-
for name, s in skills_map.items():
|
|
589
|
-
cur = con.execute(
|
|
590
|
-
"SELECT weight, uses, referenced, thumbs_up, thumbs_down, disabled FROM skill_weights WHERE user_id = ? AND skill_name = ?",
|
|
591
|
-
(user_id, name),
|
|
592
|
-
)
|
|
593
|
-
row = cur.fetchone()
|
|
594
|
-
weight, uses, ref, up, down, disabled = row if row else (0.0, 0, 0, 0, 0, 0)
|
|
595
|
-
out.append({
|
|
596
|
-
"name": name,
|
|
597
|
-
"title": s.title,
|
|
598
|
-
"description": s.description[:200],
|
|
599
|
-
"source": s.source,
|
|
600
|
-
"weight": weight,
|
|
601
|
-
"uses": uses,
|
|
602
|
-
"referenced": ref,
|
|
603
|
-
"thumbs_up": up,
|
|
604
|
-
"thumbs_down": down,
|
|
605
|
-
"disabled": bool(disabled),
|
|
606
|
-
})
|
|
607
|
-
out.sort(key=lambda x: -x["uses"])
|
|
608
|
-
return out
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
@app.get("/events")
|
|
612
|
-
def recent_events(request: Request, limit: int = 50):
|
|
613
|
-
from app.auth import resolve_user, auth_enabled
|
|
614
|
-
user_id = resolve_user(request)
|
|
615
|
-
con = app_state["con"]
|
|
616
|
-
if auth_enabled():
|
|
617
|
-
cur = con.execute(
|
|
618
|
-
"SELECT ts, session_id, event_type, payload FROM events WHERE user_id = ? ORDER BY ts DESC LIMIT ?",
|
|
619
|
-
(user_id, limit),
|
|
620
|
-
)
|
|
621
|
-
else:
|
|
622
|
-
cur = con.execute(
|
|
623
|
-
"SELECT ts, session_id, event_type, payload FROM events ORDER BY ts DESC LIMIT ?",
|
|
624
|
-
(limit,),
|
|
625
|
-
)
|
|
626
|
-
return [
|
|
627
|
-
{"ts": ts, "session_id": sid, "type": et, "payload": json.loads(p)}
|
|
628
|
-
for ts, sid, et, p in cur.fetchall()
|
|
629
|
-
]
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
@app.get("/")
|
|
633
|
-
def root():
|
|
634
|
-
return {
|
|
635
|
-
"service": "skillforge",
|
|
636
|
-
"docs": "POST /chat, GET /events, GET /skills, GET /healthz",
|
|
637
|
-
"live_log": "skillforge events --watch",
|
|
638
|
-
}
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
@app.get("/healthz")
|
|
642
|
-
def health():
|
|
643
|
-
return {
|
|
644
|
-
"skills_loaded": len(app_state.get("skills", {})),
|
|
645
|
-
"ok": True,
|
|
646
|
-
"live_log": "skillforge events --watch",
|
|
812
|
+
"context_items": context_items,
|
|
647
813
|
}
|