@heytherevibin/skillforge 0.2.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,6 @@ skillforge — skill orchestrator co-tool for Claude (MCP-first).
4
4
  Primary surface: MCP stdio — route_skills and related tools for hosts
5
5
  (Claude Desktop, Cursor, Claude Code).
6
6
 
7
- Optional: headless HTTP API (POST /chat, /events, …) for integrations.
8
7
  Live usage: `skillforge events --watch` (terminal).
9
8
  """
10
9
  from __future__ import annotations
@@ -16,19 +15,24 @@ import sqlite3
16
15
  import sys
17
16
  import time
18
17
  import uuid
19
- from contextlib import asynccontextmanager
20
18
  from dataclasses import dataclass
21
19
  from pathlib import Path
22
20
  from typing import Any, Optional
23
21
 
24
22
  import numpy as np
25
23
  from anthropic import AsyncAnthropic
26
- from fastapi import FastAPI, Request
27
- from fastapi.responses import StreamingResponse
28
- from pydantic import BaseModel
29
24
  from sentence_transformers import SentenceTransformer
30
25
 
31
26
  from app.db_paths import global_db_path, resolve_orchestrator_db
27
+ from app.chunking import SkillChunk, chunk_max_chars, chunk_overlap_chars, chunk_skill_body
28
+ from app.context_fusion import mmr_select
29
+ from app.project_index import (
30
+ ensure_project_index_schema,
31
+ load_project_fusion_pool,
32
+ project_rag_max_chars,
33
+ retrieve_project_context_items,
34
+ )
35
+ from app.redaction import redaction_enabled, redact_secret_patterns, sanitize_context_items
32
36
 
33
37
  # ---------- Config (env-driven so the Node wrapper controls paths) ----------
34
38
  BUNDLED_SKILLS = Path(os.getenv("SKILLFORGE_BUNDLED_SKILLS", "./skills"))
@@ -40,12 +44,28 @@ DB_PATH = global_db_path()
40
44
 
41
45
  EMBED_MODEL = os.getenv("SKILLFORGE_EMBED_MODEL", "all-MiniLM-L6-v2")
42
46
  ROUTER_MODEL = os.getenv("SKILLFORGE_ROUTER_MODEL", "claude-haiku-4-5-20251001")
43
- ANSWER_MODEL = os.getenv("SKILLFORGE_ANSWER_MODEL", "claude-opus-4-7")
44
47
  TOP_K_CANDIDATES = int(os.getenv("SKILLFORGE_TOP_K", "15"))
45
48
  MAX_ACTIVE_SKILLS = int(os.getenv("SKILLFORGE_MAX_ACTIVE", "7"))
46
49
  REROUTE_THRESHOLD = float(os.getenv("SKILLFORGE_REROUTE_THRESHOLD", "0.4"))
47
50
  # "" | "full" | "embedding" — embedding skips Haiku and takes top skills from the shortlist only.
48
51
  SKILLFORGE_ROUTER_MODE = os.getenv("SKILLFORGE_ROUTER_MODE", "").strip().lower()
52
+ # chunks: RAG-style line-bounded chunks from picked skills. full_body: inject entire SKILL.md per pick (legacy).
53
+ SKILLFORGE_CONTEXT_MODE = os.getenv("SKILLFORGE_CONTEXT_MODE", "chunks").strip().lower()
54
+ ROUTE_MAX_CONTEXT_CHARS = int(os.getenv("SKILLFORGE_ROUTE_MAX_CHARS", "60000"))
55
+ CONTEXT_FUSION = os.getenv("SKILLFORGE_CONTEXT_FUSION", "1").strip().lower() not in ("0", "false", "no", "")
56
+ CONTEXT_MMR_LAMBDA = max(0.0, min(1.0, float(os.getenv("SKILLFORGE_CONTEXT_MMR_LAMBDA", "0.7"))))
57
+ FUSION_POOL_SKILL = max(8, int(os.getenv("SKILLFORGE_FUSION_POOL_SKILL", "96")))
58
+ FUSION_POOL_PROJECT = max(8, int(os.getenv("SKILLFORGE_FUSION_POOL_PROJECT", "96")))
59
+ FUSION_FULL_BODY_PREVIEW_CHARS = max(400, int(os.getenv("SKILLFORGE_FUSION_FULL_BODY_PREVIEW_CHARS", "4000")))
60
+ CONTEXT_OVERHEAD_SKILL = 48
61
+ CONTEXT_OVERHEAD_FILE = 56
62
+
63
+
64
+ def _context_budget_unified() -> int:
65
+ raw = os.getenv("SKILLFORGE_CONTEXT_BUDGET_CHARS", "").strip()
66
+ if raw:
67
+ return max(4000, int(raw))
68
+ return ROUTE_MAX_CONTEXT_CHARS + int(project_rag_max_chars())
49
69
 
50
70
 
51
71
  def build_router_and_skills(
@@ -235,6 +255,7 @@ def init_db(db_file: Path | None = None):
235
255
  con.execute(f"ALTER TABLE {table} ADD COLUMN user_id TEXT DEFAULT ''")
236
256
  except sqlite3.OperationalError:
237
257
  pass # already exists
258
+ ensure_project_index_schema(con)
238
259
  con.commit()
239
260
  return con
240
261
 
@@ -299,13 +320,50 @@ class Router:
299
320
  self.skills = skills
300
321
  self.embed_model = embed_model
301
322
  self.anthropic = anthropic
323
+ self.context_mode = SKILLFORGE_CONTEXT_MODE if SKILLFORGE_CONTEXT_MODE in (
324
+ "chunks",
325
+ "full_body",
326
+ ) else "chunks"
327
+ self._by_name: dict[str, Skill] = {s.name: s for s in skills}
302
328
  texts = [f"{s.title}: {s.description}" for s in skills]
303
- print(f"[skillforge] Embedding {len(skills)} skills...")
329
+ print(f"[skillforge] Embedding {len(skills)} skills (summary)...", file=sys.stderr)
304
330
  embeddings = embed_model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
305
331
  for s, e in zip(skills, embeddings):
306
332
  s.embedding = e / np.linalg.norm(e)
307
333
  self.matrix = np.stack([s.embedding for s in skills]) if skills else np.zeros((0, 0))
308
- print(f"[skillforge] Ready. {len(skills)} skills, matrix shape: {self.matrix.shape}")
334
+
335
+ # Chunk index for CONTEXT_MODE=chunks
336
+ self._chunk_meta: list[tuple[str, SkillChunk]] = []
337
+ edim = int(embed_model.get_sentence_embedding_dimension())
338
+ self._chunk_embeddings: np.ndarray = np.zeros((0, edim))
339
+ if self.context_mode == "chunks" and skills:
340
+ flat_texts: list[str] = []
341
+ self._chunk_meta = []
342
+ mc = chunk_max_chars()
343
+ oc = chunk_overlap_chars()
344
+ for s in skills:
345
+ for ch in chunk_skill_body(s.body, max_chars=mc, overlap=oc):
346
+ # Embed with in-chunk disambiguation
347
+ flat_texts.append(f"{s.title} — {s.name}\n{ch.text}")
348
+ self._chunk_meta.append((s.name, ch))
349
+ if flat_texts:
350
+ print(f"[skillforge] Embedding {len(flat_texts)} skill chunks...", file=sys.stderr)
351
+ ce = embed_model.encode(
352
+ flat_texts, show_progress_bar=False, convert_to_numpy=True
353
+ )
354
+ ce = ce / np.linalg.norm(ce, axis=1, keepdims=True)
355
+ self._chunk_embeddings = ce
356
+ print(
357
+ f"[skillforge] Ready. {len(skills)} skills; chunk matrix {self._chunk_embeddings.shape}; "
358
+ f"context_mode={self.context_mode}",
359
+ file=sys.stderr,
360
+ )
361
+ else:
362
+ print(
363
+ f"[skillforge] Ready. {len(skills)} skills, matrix shape: {self.matrix.shape}; "
364
+ f"context_mode={self.context_mode}",
365
+ file=sys.stderr,
366
+ )
309
367
 
310
368
  def shortlist(self, prompt, con, k=TOP_K_CANDIDATES, user_id=""):
311
369
  if len(self.skills) == 0:
@@ -323,6 +381,176 @@ class Router:
323
381
  top_idx = np.argsort(-biased)[:k]
324
382
  return [(self.skills[i], float(sims[i])) for i in top_idx if biased[i] > -100]
325
383
 
384
+ def build_context_items(
385
+ self,
386
+ prompt: str,
387
+ skill_names: list[str],
388
+ max_total_chars: int | None = None,
389
+ ) -> list[dict[str, Any]]:
390
+ """Return ordered context dicts: skill, line_start, line_end, text, score."""
391
+ cap = max_total_chars if max_total_chars is not None else ROUTE_MAX_CONTEXT_CHARS
392
+ if self.context_mode == "full_body":
393
+ out: list[dict[str, Any]] = []
394
+ for n in skill_names:
395
+ s = self._by_name.get(n)
396
+ if not s:
397
+ continue
398
+ out.append({
399
+ "skill": n,
400
+ "path": None,
401
+ "line_start": None,
402
+ "line_end": None,
403
+ "text": s.body,
404
+ "score": 1.0,
405
+ })
406
+ return out
407
+ if not skill_names or self._chunk_embeddings.shape[0] == 0:
408
+ return []
409
+ allowed = set(skill_names)
410
+ indices = [i for i, (sn, _) in enumerate(self._chunk_meta) if sn in allowed]
411
+ if not indices:
412
+ return []
413
+ qv = self.embed_model.encode(prompt, convert_to_numpy=True)
414
+ qv = qv / np.linalg.norm(qv)
415
+ sub = self._chunk_embeddings[indices]
416
+ scores = (sub @ qv).flatten()
417
+ order = np.argsort(-scores)
418
+ out = []
419
+ total = 0
420
+ overhead = CONTEXT_OVERHEAD_SKILL
421
+ for o in order:
422
+ idx = indices[int(o)]
423
+ sn, ch = self._chunk_meta[idx]
424
+ piece_len = len(ch.text) + overhead
425
+ if total + piece_len > cap:
426
+ continue
427
+ out.append({
428
+ "skill": sn,
429
+ "path": None,
430
+ "line_start": ch.line_start,
431
+ "line_end": ch.line_end,
432
+ "text": ch.text,
433
+ "score": float(scores[int(o)]),
434
+ })
435
+ total += piece_len
436
+ return out
437
+
438
+ def build_fusion_skill_pool(
439
+ self,
440
+ prompt: str,
441
+ skill_names: list[str],
442
+ pool_limit: int,
443
+ ) -> tuple[list[dict[str, Any]], np.ndarray, np.ndarray]:
444
+ """Candidate skill chunks (or one row per skill in full_body) with embeddings for MMR."""
445
+ edim = int(self.embed_model.get_sentence_embedding_dimension())
446
+ if not skill_names:
447
+ return [], np.zeros((0, edim)), np.array([], dtype=np.float32)
448
+ qv = self.embed_model.encode(prompt, convert_to_numpy=True)
449
+ qv = np.asarray(qv, dtype=np.float32).reshape(-1)
450
+ qv = qv / max(float(np.linalg.norm(qv)), 1e-12)
451
+
452
+ if self.context_mode == "full_body":
453
+ ordered = [n for n in skill_names if n in self._by_name]
454
+ if not ordered:
455
+ return [], np.zeros((0, edim)), np.array([], dtype=np.float32)
456
+ texts = [
457
+ f"{self._by_name[n].title} — {n}\n{(self._by_name[n].body or '')[:FUSION_FULL_BODY_PREVIEW_CHARS]}"
458
+ for n in ordered
459
+ ]
460
+ em = self.embed_model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
461
+ em = np.asarray(em, dtype=np.float32)
462
+ em = em / np.maximum(np.linalg.norm(em, axis=1, keepdims=True), 1e-12)
463
+ rel = (em @ qv).flatten()
464
+ order = np.argsort(-rel)[: min(pool_limit, em.shape[0])]
465
+ items: list[dict[str, Any]] = []
466
+ em_rows: list[np.ndarray] = []
467
+ rel_out: list[float] = []
468
+ for o in order:
469
+ i = int(o)
470
+ n = ordered[i]
471
+ s = self._by_name[n]
472
+ items.append({
473
+ "skill": n,
474
+ "path": None,
475
+ "line_start": None,
476
+ "line_end": None,
477
+ "text": s.body,
478
+ "score": float(rel[i]),
479
+ "source": "skill",
480
+ })
481
+ em_rows.append(em[i])
482
+ rel_out.append(float(rel[i]))
483
+ return items, np.stack(em_rows), np.asarray(rel_out, dtype=np.float32)
484
+
485
+ if self._chunk_embeddings.shape[0] == 0:
486
+ return self._fusion_skill_pool_fallback_bodies(skill_names, qv, pool_limit)
487
+
488
+ allowed = set(skill_names)
489
+ indices = [i for i, (sn, _) in enumerate(self._chunk_meta) if sn in allowed]
490
+ if not indices:
491
+ return self._fusion_skill_pool_fallback_bodies(skill_names, qv, pool_limit)
492
+ sub = self._chunk_embeddings[indices]
493
+ scores = (sub @ qv).flatten()
494
+ order = np.argsort(-scores)[: min(pool_limit, len(indices))]
495
+ items = []
496
+ em_rows = []
497
+ rel_out = []
498
+ for o in order:
499
+ pos = int(o)
500
+ idx = indices[pos]
501
+ sn, ch = self._chunk_meta[idx]
502
+ items.append({
503
+ "skill": sn,
504
+ "path": None,
505
+ "line_start": ch.line_start,
506
+ "line_end": ch.line_end,
507
+ "text": ch.text,
508
+ "score": float(scores[pos]),
509
+ "source": "skill",
510
+ })
511
+ em_rows.append(sub[pos])
512
+ rel_out.append(float(scores[pos]))
513
+ return items, np.stack(em_rows), np.asarray(rel_out, dtype=np.float32)
514
+
515
+ def _fusion_skill_pool_fallback_bodies(
516
+ self,
517
+ skill_names: list[str],
518
+ qv: np.ndarray,
519
+ pool_limit: int,
520
+ ) -> tuple[list[dict[str, Any]], np.ndarray, np.ndarray]:
521
+ ordered = [n for n in skill_names if n in self._by_name]
522
+ edim = int(self.embed_model.get_sentence_embedding_dimension())
523
+ if not ordered:
524
+ return [], np.zeros((0, edim)), np.array([], dtype=np.float32)
525
+ texts = [
526
+ f"{self._by_name[n].title} — {n}\n{(self._by_name[n].body or '')[:FUSION_FULL_BODY_PREVIEW_CHARS]}"
527
+ for n in ordered
528
+ ]
529
+ em = self.embed_model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
530
+ em = np.asarray(em, dtype=np.float32)
531
+ em = em / np.maximum(np.linalg.norm(em, axis=1, keepdims=True), 1e-12)
532
+ rel = (em @ qv).flatten()
533
+ order = np.argsort(-rel)[: min(pool_limit, em.shape[0])]
534
+ items = []
535
+ em_rows = []
536
+ rel_out = []
537
+ for o in order:
538
+ i = int(o)
539
+ n = ordered[i]
540
+ s = self._by_name[n]
541
+ items.append({
542
+ "skill": n,
543
+ "path": None,
544
+ "line_start": None,
545
+ "line_end": None,
546
+ "text": s.body,
547
+ "score": float(rel[i]),
548
+ "source": "skill",
549
+ })
550
+ em_rows.append(em[i])
551
+ rel_out.append(float(rel[i]))
552
+ return items, np.stack(em_rows), np.asarray(rel_out, dtype=np.float32)
553
+
326
554
  def pick_final_embedding_only(self, candidates):
327
555
  """Pick up to MAX_ACTIVE_SKILLS from the shortlist order (similarity + weights). No LLM call."""
328
556
  if not candidates:
@@ -381,6 +609,23 @@ def jaccard_change(old, new):
381
609
  return 1.0 - (inter / union)
382
610
 
383
611
 
612
+ def format_context_items_markdown(context_items: list[dict[str, Any]]) -> str:
613
+ """Human-readable block list for MCP / CLI from context items (skills + optional project files)."""
614
+ blocks = []
615
+ for c in context_items:
616
+ ls, le = c.get("line_start"), c.get("line_end")
617
+ if ls is not None and le is not None:
618
+ loc = f" (lines {ls}-{le})"
619
+ else:
620
+ loc = " (full document)"
621
+ path = c.get("path")
622
+ if path:
623
+ blocks.append(f"### File: `{path}`{loc}\n\n{c['text']}\n")
624
+ else:
625
+ blocks.append(f"### Skill: {c['skill']}{loc}\n\n{c['text']}\n")
626
+ return "\n".join(blocks)
627
+
628
+
384
629
  async def run_route_turn(
385
630
  con: sqlite3.Connection,
386
631
  router: Router,
@@ -388,8 +633,11 @@ async def run_route_turn(
388
633
  conversation: list,
389
634
  user_id: str = "",
390
635
  session_id: str | None = None,
636
+ *,
637
+ project_root: str | None = None,
638
+ include_project_rag: bool = False,
391
639
  ) -> dict[str, Any]:
392
- """Shared routing + session + telemetry for HTTP /chat and MCP route_skills.
640
+ """Shared routing + session + telemetry for MCP route_skills and ``skillforge route``.
393
641
 
394
642
  Updates sessions, skill usage stats, and writes a route row to events.
395
643
  """
@@ -410,6 +658,109 @@ async def run_route_turn(
410
658
  change = jaccard_change(prev_active, set(picked_names))
411
659
  rerouted = change >= REROUTE_THRESHOLD and bool(prev_active)
412
660
 
661
+ pr = (project_root or "").strip()
662
+ want_fusion = CONTEXT_FUSION and include_project_rag and bool(pr)
663
+ context_fusion: dict[str, Any] | None = None
664
+ context_items: list[dict[str, Any]] = []
665
+
666
+ proj_pool: list[dict[str, Any]] = []
667
+ proj_emb = np.zeros((0, int(router.embed_model.get_sentence_embedding_dimension())))
668
+ proj_rel = np.array([], dtype=np.float32)
669
+
670
+ if want_fusion:
671
+ try:
672
+ proj_pool, proj_emb, proj_rel = load_project_fusion_pool(
673
+ con, router.embed_model, prompt, FUSION_POOL_PROJECT
674
+ )
675
+ except Exception:
676
+ proj_pool = []
677
+ proj_emb = np.zeros((0, int(router.embed_model.get_sentence_embedding_dimension())))
678
+ proj_rel = np.array([], dtype=np.float32)
679
+
680
+ if want_fusion and proj_pool:
681
+ skill_pool, skill_emb, skill_rel = router.build_fusion_skill_pool(
682
+ prompt, picked_names, FUSION_POOL_SKILL
683
+ )
684
+ n_skill = len(skill_pool)
685
+ n_proj = len(proj_pool)
686
+ pool = skill_pool + proj_pool
687
+ if n_skill and n_proj:
688
+ em = np.vstack([skill_emb, proj_emb])
689
+ rel = np.concatenate([skill_rel, proj_rel])
690
+ elif n_skill:
691
+ em = skill_emb
692
+ rel = skill_rel
693
+ else:
694
+ em = proj_emb
695
+ rel = proj_rel
696
+ lens = np.array([len(c["text"]) for c in pool], dtype=np.int64)
697
+ ovh = np.array([
698
+ CONTEXT_OVERHEAD_SKILL if not c.get("path") else CONTEXT_OVERHEAD_FILE
699
+ for c in pool
700
+ ], dtype=np.int64)
701
+ budget = _context_budget_unified()
702
+ order, mmr_trace = mmr_select(
703
+ em,
704
+ rel,
705
+ lens,
706
+ char_budget=budget,
707
+ overhead_per_chunk=ovh,
708
+ lambda_mult=CONTEXT_MMR_LAMBDA,
709
+ )
710
+ for rank, idx in enumerate(order, start=1):
711
+ item = dict(pool[idx])
712
+ item.pop("source", None)
713
+ tr = mmr_trace[rank - 1]
714
+ item["mmr_rank"] = rank
715
+ item["mmr_score"] = tr["mmr"]
716
+ item["retrieval_relevance"] = tr["relevance"]
717
+ item["max_sim_to_prior"] = tr["max_sim_to_selected"]
718
+ context_items.append(item)
719
+ context_fusion = {
720
+ "enabled": True,
721
+ "lambda": CONTEXT_MMR_LAMBDA,
722
+ "budget_chars": budget,
723
+ "pool_skill": n_skill,
724
+ "pool_project": n_proj,
725
+ "selected_count": len(context_items),
726
+ "mmr_trace": mmr_trace,
727
+ }
728
+ else:
729
+ context_items = router.build_context_items(prompt, picked_names)
730
+ if picked_names and not context_items:
731
+ context_items = [
732
+ {
733
+ "skill": n,
734
+ "path": None,
735
+ "line_start": None,
736
+ "line_end": None,
737
+ "text": router._by_name[n].body,
738
+ "score": 1.0,
739
+ }
740
+ for n in picked_names
741
+ if n in router._by_name
742
+ ]
743
+ project_add: list[dict[str, Any]] = []
744
+ if include_project_rag and pr:
745
+ try:
746
+ project_add = retrieve_project_context_items(con, router.embed_model, prompt)
747
+ except Exception:
748
+ project_add = []
749
+ context_items = [*context_items, *project_add]
750
+ context_fusion = {"enabled": False}
751
+
752
+ project_rag_items_count = sum(1 for c in context_items if c.get("path"))
753
+
754
+ reasoning_out = reasoning
755
+ safe_prompt_snip = prompt[:300]
756
+ context_redaction_stats: dict[str, Any] = {"enabled": False, "secret_hits": 0, "path_hits": 0}
757
+ if redaction_enabled():
758
+ safe_prompt_snip, _ = redact_secret_patterns(prompt[:300])
759
+ sh, ph = sanitize_context_items(context_items)
760
+ context_redaction_stats = {"enabled": True, "secret_hits": sh, "path_hits": ph}
761
+ if reasoning_out:
762
+ reasoning_out, _ = redact_secret_patterns(reasoning_out)
763
+
413
764
  con.execute(
414
765
  """INSERT INTO sessions (id, user_id, created_at, active_skills, turn_count) VALUES (?, ?, ?, ?, 1)
415
766
  ON CONFLICT(id) DO UPDATE SET active_skills = ?, turn_count = turn_count + 1""",
@@ -423,225 +774,40 @@ async def run_route_turn(
423
774
  "type": "route",
424
775
  "session_id": sid,
425
776
  "user_id": user_id,
426
- "prompt": prompt[:300],
777
+ "prompt": safe_prompt_snip,
427
778
  "candidates": [{"name": s.name, "score": sc} for s, sc in candidates[:10]],
428
779
  "picked": picked_names,
429
- "reasoning": reasoning,
780
+ "reasoning": reasoning_out,
430
781
  "rerouted": rerouted,
431
782
  "change_pct": round(change * 100, 1),
432
783
  "route_ms": round(route_ms, 1),
433
784
  "ts": time.time(),
785
+ "context_mode": router.context_mode,
786
+ "context_items_count": len(context_items),
787
+ "project_rag_items_count": project_rag_items_count,
788
+ "include_project_rag": bool(include_project_rag and pr),
789
+ "context_fusion": context_fusion,
790
+ "context_redaction": context_redaction_stats,
791
+ "chunk_sources_preview": [
792
+ {
793
+ "skill": c.get("skill"),
794
+ "path": c.get("path"),
795
+ "line_start": c.get("line_start"),
796
+ "line_end": c.get("line_end"),
797
+ "mmr_rank": c.get("mmr_rank"),
798
+ }
799
+ for c in context_items[:24]
800
+ ],
434
801
  }
435
802
  log_event(con, sid, "route", event, user_id=user_id)
436
803
  return {
437
804
  "session_id": sid,
438
805
  "picked_names": picked_names,
439
- "reasoning": reasoning,
806
+ "reasoning": reasoning_out,
440
807
  "candidates": candidates,
441
808
  "route_ms": route_ms,
442
809
  "rerouted": rerouted,
443
810
  "change": change,
444
811
  "event": event,
445
- }
446
-
447
-
448
- # ---------- App ----------
449
- app_state: dict[str, Any] = {}
450
-
451
-
452
- @asynccontextmanager
453
- async def lifespan(app: FastAPI):
454
- print(f"[skillforge] Loading skills from {BUNDLED_SKILLS} + {USER_SKILLS}")
455
- skills = load_all_skills()
456
- print(f"[skillforge] Loaded {len(skills)} skills")
457
- if not skills:
458
- print("[skillforge] WARNING: no skills found")
459
- embed_model = SentenceTransformer(EMBED_MODEL)
460
- anthropic = AsyncAnthropic()
461
- router_anthropic = None if SKILLFORGE_ROUTER_MODE == "embedding" else anthropic
462
- if router_anthropic is None:
463
- print("[skillforge] Router mode: embedding-only (Haiku step skipped; /chat still uses ANSWER model)")
464
- print("[skillforge] Live usage (terminal): skillforge events --watch")
465
- router = Router(skills, embed_model, router_anthropic)
466
- con = init_db()
467
- app_state.update(
468
- skills={s.name: s for s in skills},
469
- router=router,
470
- anthropic=anthropic,
471
- con=con,
472
- )
473
- yield
474
- con.close()
475
-
476
-
477
- app = FastAPI(lifespan=lifespan, title="skillforge")
478
-
479
-
480
- class ChatRequest(BaseModel):
481
- prompt: str
482
- session_id: str | None = None
483
- conversation: list[dict] = []
484
-
485
-
486
- class FeedbackRequest(BaseModel):
487
- session_id: str
488
- skill_name: str
489
- thumbs: int
490
-
491
-
492
- class DisableRequest(BaseModel):
493
- skill_name: str
494
- disabled: bool
495
-
496
-
497
- @app.post("/chat")
498
- async def chat(req: ChatRequest, request: Request):
499
- from app.auth import resolve_user
500
- user_id = resolve_user(request)
501
- router: Router = app_state["router"]
502
- con = app_state["con"]
503
- anthropic: AsyncAnthropic = app_state["anthropic"]
504
-
505
- result = await run_route_turn(
506
- con,
507
- router,
508
- req.prompt,
509
- req.conversation,
510
- user_id=user_id,
511
- session_id=req.session_id,
512
- )
513
- session_id = result["session_id"]
514
- picked_names = result["picked_names"]
515
-
516
- skills_map = app_state["skills"]
517
- skill_blocks = []
518
- for n in picked_names:
519
- s = skills_map.get(n)
520
- if s:
521
- skill_blocks.append(f'<skill name="{s.name}">\n{s.body}\n</skill>')
522
- system_prompt = (
523
- "You are a helpful assistant. The following skills have been dynamically loaded "
524
- "for this turn based on the user's request. Use them when relevant; ignore them when not.\n\n"
525
- + "\n\n".join(skill_blocks)
526
- ) if skill_blocks else "You are a helpful assistant."
527
-
528
- messages = req.conversation + [{"role": "user", "content": req.prompt}]
529
-
530
- async def stream():
531
- full_text = []
532
- try:
533
- async with anthropic.messages.stream(
534
- model=ANSWER_MODEL,
535
- max_tokens=4096,
536
- system=system_prompt,
537
- messages=messages,
538
- ) as s:
539
- async for chunk in s.text_stream:
540
- full_text.append(chunk)
541
- yield f"data: {json.dumps({'delta': chunk})}\n\n"
542
- except Exception as e:
543
- yield f"data: {json.dumps({'error': str(e)})}\n\n"
544
- return
545
- response_text = "".join(full_text)
546
- for n in picked_names:
547
- s = skills_map.get(n)
548
- if not s:
549
- continue
550
- keywords = [w for w in s.body.split()[:50] if len(w) > 6][:5]
551
- hits = sum(1 for kw in keywords if kw.lower() in response_text.lower())
552
- if hits >= 2 or s.name in response_text.lower():
553
- update_skill_stat(con, n, "referenced", 1, user_id=user_id)
554
- yield f"data: {json.dumps({'done': True, 'session_id': session_id, 'picked': picked_names})}\n\n"
555
-
556
- return StreamingResponse(stream(), media_type="text/event-stream")
557
-
558
-
559
- @app.post("/feedback")
560
- def feedback(req: FeedbackRequest, request: Request):
561
- from app.auth import resolve_user
562
- user_id = resolve_user(request)
563
- con = app_state["con"]
564
- field = "thumbs_up" if req.thumbs > 0 else "thumbs_down"
565
- update_skill_stat(con, req.skill_name, field, 1, user_id=user_id)
566
- log_event(con, req.session_id, "feedback",
567
- {"skill": req.skill_name, "thumbs": req.thumbs},
568
- user_id=user_id)
569
- return {"ok": True}
570
-
571
-
572
- @app.post("/skills/disable")
573
- def disable(req: DisableRequest, request: Request):
574
- from app.auth import resolve_user
575
- user_id = resolve_user(request)
576
- con = app_state["con"]
577
- set_skill_disabled(con, req.skill_name, req.disabled, user_id=user_id)
578
- return {"ok": True}
579
-
580
-
581
- @app.get("/skills")
582
- def list_skills(request: Request):
583
- from app.auth import resolve_user
584
- user_id = resolve_user(request)
585
- con = app_state["con"]
586
- skills_map = app_state["skills"]
587
- out = []
588
- for name, s in skills_map.items():
589
- cur = con.execute(
590
- "SELECT weight, uses, referenced, thumbs_up, thumbs_down, disabled FROM skill_weights WHERE user_id = ? AND skill_name = ?",
591
- (user_id, name),
592
- )
593
- row = cur.fetchone()
594
- weight, uses, ref, up, down, disabled = row if row else (0.0, 0, 0, 0, 0, 0)
595
- out.append({
596
- "name": name,
597
- "title": s.title,
598
- "description": s.description[:200],
599
- "source": s.source,
600
- "weight": weight,
601
- "uses": uses,
602
- "referenced": ref,
603
- "thumbs_up": up,
604
- "thumbs_down": down,
605
- "disabled": bool(disabled),
606
- })
607
- out.sort(key=lambda x: -x["uses"])
608
- return out
609
-
610
-
611
- @app.get("/events")
612
- def recent_events(request: Request, limit: int = 50):
613
- from app.auth import resolve_user, auth_enabled
614
- user_id = resolve_user(request)
615
- con = app_state["con"]
616
- if auth_enabled():
617
- cur = con.execute(
618
- "SELECT ts, session_id, event_type, payload FROM events WHERE user_id = ? ORDER BY ts DESC LIMIT ?",
619
- (user_id, limit),
620
- )
621
- else:
622
- cur = con.execute(
623
- "SELECT ts, session_id, event_type, payload FROM events ORDER BY ts DESC LIMIT ?",
624
- (limit,),
625
- )
626
- return [
627
- {"ts": ts, "session_id": sid, "type": et, "payload": json.loads(p)}
628
- for ts, sid, et, p in cur.fetchall()
629
- ]
630
-
631
-
632
- @app.get("/")
633
- def root():
634
- return {
635
- "service": "skillforge",
636
- "docs": "POST /chat, GET /events, GET /skills, GET /healthz",
637
- "live_log": "skillforge events --watch",
638
- }
639
-
640
-
641
- @app.get("/healthz")
642
- def health():
643
- return {
644
- "skills_loaded": len(app_state.get("skills", {})),
645
- "ok": True,
646
- "live_log": "skillforge events --watch",
812
+ "context_items": context_items,
647
813
  }