npm - @heytherevibin/skillforge - Versions diffs - 0.2.1 → 0.8.0 - Mend

@heytherevibin/skillforge 0.2.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/CHANGELOG.md +43 -0
package/README.md +89 -56
package/RELEASING.md +1 -1
package/SECURITY.md +2 -2
package/STRATEGY.md +1 -3
package/bin/cli.js +32 -138
package/package.json +2 -2
package/python/app/chunking.py +116 -0
package/python/app/context_fusion.py +77 -0
package/python/app/events_cli.py +1 -1
package/python/app/index_cli.py +89 -0
package/python/app/main.py +632 -229
package/python/app/mcp_contract.py +121 -0
package/python/app/mcp_server.py +304 -30
package/python/app/project_index.py +600 -0
package/python/app/redaction.py +128 -0
package/python/app/route_cli.py +42 -19
package/python/app/route_policies.py +133 -0
package/python/app/routing_signals.py +95 -0
package/python/requirements.txt +1 -4
package/python/tests/test_chunking.py +34 -0
package/python/tests/test_context_fusion.py +45 -0
package/python/tests/test_mcp_contract.py +137 -0
package/python/tests/test_project_index.py +76 -0
package/python/tests/test_redaction.py +51 -0
package/python/tests/test_route_policies.py +115 -0
package/python/tests/test_routing_signals.py +77 -0
package/python/app/auth.py +0 -63
package/python/app/cli.py +0 -78

package/bin/cli.js CHANGED Viewed

@@ -5,17 +5,15 @@
  * Usage:
  *   skillforge, skillforge --help   Show help (primary path: MCP, not a web app)
  *   skillforge mcp                   MCP stdio server (Claude / Cursor / …)
- *   skillforge start [--port=8000]   Optional headless HTTP API (no browser UI)
  *   skillforge events [--watch] [--limit=N]     Print SQLite routing events
  *   skillforge route [words…] [--prompt=…]     Same routing as MCP route_skills (terminal)
- *   skillforge chat                  Dev harness (needs `start` + ANTHROPIC_API_KEY)
+ *   skillforge index --project-root=…            Chunk/embed repo files for project RAG
  *   skillforge install               One-time Python venv + deps
- *   skillforge skills … / pack … / auth … / reset
+ *   skillforge skills … / pack … / reset
  */
 const path = require('path');
 const fs = require('fs');
-const crypto = require('crypto');
 const { spawn, spawnSync } = require('child_process');
 const os = require('os');
 const packs = require('../lib/packs');
@@ -26,8 +24,8 @@ const CONFIG_DIR = path.join(os.homedir(), '.skillforge');
 const VENV_DIR = path.join(CONFIG_DIR, 'venv');
 const DATA_DIR = path.join(CONFIG_DIR, 'data');
 const USER_SKILLS_DIR = path.join(CONFIG_DIR, 'skills');
-const PACKS_DIR = path.join(CONFIG_DIR, 'packs');
-const AUTH_FILE = path.join(CONFIG_DIR, 'auth.json');
+/** Bearer-token file for the removed HTTP API (<=0.6.x); deleted on first CLI use. */
+const LEGACY_AUTH_FILE = path.join(CONFIG_DIR, 'auth.json');
 const SETUP_MARKER = path.join(CONFIG_DIR, '.setup-complete');
 const args = process.argv.slice(2);
@@ -87,6 +85,18 @@ function ensureDirs() {
   }
 }
+/** v0.7.0 removed HTTP + `skillforge auth`; leftover tokens file is misleading — remove once. */
+function dropLegacyAuthJsonIfPresent() {
+  try {
+    if (fs.existsSync(LEGACY_AUTH_FILE)) {
+      fs.rmSync(LEGACY_AUTH_FILE);
+      info('Removed legacy ~/.skillforge/auth.json (HTTP API was removed in v0.7).');
+    }
+  } catch (e) {
+    err(`Could not remove legacy auth.json: ${e.message}`);
+  }
+}
 function runSetup() {
   info('First-time setup — this happens once and takes ~2 minutes');
   ensureDirs();
@@ -145,77 +155,7 @@ function setupIfNeeded() {
   }
 }
-// ---- API key check ----
-function checkApiKey() {
-  if (!process.env.ANTHROPIC_API_KEY) {
-    err('ANTHROPIC_API_KEY environment variable is not set.');
-    log(c.dim('  Get a key at https://console.anthropic.com/'));
-    log(c.dim('  Then set it:'));
-    log(c.dim('    export ANTHROPIC_API_KEY=sk-ant-...'));
-    process.exit(1);
-  }
-}
-// ---- auth management ----
-function loadAuth() {
-  if (!fs.existsSync(AUTH_FILE)) return {};
-  try { return JSON.parse(fs.readFileSync(AUTH_FILE, 'utf8')); } catch { return {}; }
-}
-function saveAuth(map) {
-  ensureDirs();
-  fs.writeFileSync(AUTH_FILE, JSON.stringify(map, null, 2), { mode: 0o600 });
-}
-function authToEnvVar(map) {
-  // map is { token: userId }. Convert and inject as JSON env var.
-  return JSON.stringify(map);
-}
-function authAdd(user) {
-  if (!user) { err('Usage: skillforge auth add <user-id>'); process.exit(1); }
-  const map = loadAuth();
-  // Generate a token
-  const token = 'sf_' + crypto.randomBytes(24).toString('base64url');
-  map[token] = user;
-  saveAuth(map);
-  ok(`Created token for user "${user}":`);
-  log('');
-  log('   ' + c.bold(token));
-  log('');
-  log(c.dim('Use this token in the Authorization header:'));
-  log(c.dim(`   Authorization: Bearer ${token}`));
-  log(c.dim('Restart the server for the token to take effect.'));
-}
-function authList() {
-  const map = loadAuth();
-  const tokens = Object.entries(map);
-  if (tokens.length === 0) {
-    info('No auth tokens. Server runs in single-user mode.');
-    log(c.dim('  Add one with: skillforge auth add <user-id>'));
-    return;
-  }
-  log(c.bold('Auth tokens:'));
-  for (const [token, user] of tokens) {
-    log(`  ${c.dim(token.slice(0, 16) + '...')} → ${user}`);
-  }
-}
-function authRemove(user) {
-  if (!user) { err('Usage: skillforge auth remove <user-id>'); process.exit(1); }
-  const map = loadAuth();
-  const before = Object.keys(map).length;
-  for (const [t, u] of Object.entries(map)) {
-    if (u === user) delete map[t];
-  }
-  const removed = before - Object.keys(map).length;
-  saveAuth(map);
-  if (removed > 0) ok(`Revoked ${removed} token(s) for "${user}"`);
-  else info(`No tokens for "${user}"`);
-}
-// ---- server lifecycle ----
 function buildEnv(extra = {}) {
-  const authMap = loadAuth();
   return {
     ...process.env,
     SKILLFORGE_BUNDLED_SKILLS: path.join(PKG_ROOT, 'skills'),
@@ -223,36 +163,10 @@ function buildEnv(extra = {}) {
     SKILLFORGE_DB_PATH: path.join(DATA_DIR, 'orchestrator.db'),
     PYTHONPATH: path.join(PKG_ROOT, 'python'),
     PYTHONUNBUFFERED: '1',
-    ...(Object.keys(authMap).length > 0 ? { SKILLFORGE_AUTH_TOKENS: authToEnvVar(authMap) } : {}),
     ...extra,
   };
 }
-function startServer({ port = 8000 } = {}) {
-  setupIfNeeded();
-  checkApiKey();
-  const env = buildEnv({ SKILLFORGE_PORT: String(port) });
-  const authEnabled = Object.keys(loadAuth()).length > 0;
-  info(`Starting HTTP API on http://localhost:${port}`);
-  log(c.dim('  Live log:     skillforge events --watch'));
-  log(c.dim(`  Skills dir: ${USER_SKILLS_DIR} (drop folders here to add)`));
-  log(c.dim(`  Data dir:   ${DATA_DIR}`));
-  log(c.dim(`  Auth:       ${authEnabled ? 'enabled (bearer token required)' : 'disabled (single-user)'}`));
-  log('');
-  const proc = spawn(
-    venvPython(),
-    ['-m', 'uvicorn', 'app.main:app', '--host', '0.0.0.0', '--port', String(port)],
-    { stdio: 'inherit', env }
-  );
-  proc.on('exit', (code) => process.exit(code || 0));
-  process.on('SIGINT', () => proc.kill('SIGINT'));
-  process.on('SIGTERM', () => proc.kill('SIGTERM'));
-}
 function printMcpConfig() {
   setupIfNeeded();
   const useLocal = args.includes('--local');
@@ -313,12 +227,14 @@ function runRouteCmd() {
   proc.on('exit', (code) => process.exit(code ?? 0));
 }
-function runChat() {
+function runIndexCmd() {
   setupIfNeeded();
-  checkApiKey();
-  const env = buildEnv();
-  const proc = spawn(venvPython(), ['-m', 'app.cli'], { stdio: 'inherit', env });
-  proc.on('exit', (code) => process.exit(code || 0));
+  const sub = args.slice(1);
+  const proc = spawn(venvPython(), ['-m', 'app.index_cli', ...sub], {
+    stdio: 'inherit',
+    env: buildEnv(),
+  });
+  proc.on('exit', (code) => process.exit(code ?? 0));
 }
 // ---- skill management ----
@@ -341,7 +257,7 @@ function skillsAdd(srcPath) {
   const dest = path.join(USER_SKILLS_DIR, name);
   fs.cpSync(src, dest, { recursive: true });
   ok(`Added skill "${name}" → ${dest}`);
-  log(c.dim('  Restart the server to pick up the new skill.'));
+  log(c.dim('  Restart skillforge mcp (or trigger catalog reload) to pick up the new skill.'));
 }
 function skillsList() {
@@ -373,7 +289,7 @@ function skillsRemove(name) {
   }
   const target = path.join(USER_SKILLS_DIR, name);
   if (!fs.existsSync(target)) {
-    err(`No user skill named "${name}". Bundled skills cannot be removed (use disable_skill via MCP or HTTP API).`);
+    err(`No user skill named "${name}". Bundled skills cannot be removed (use disable_skill via MCP).`);
     process.exit(1);
   }
   fs.rmSync(target, { recursive: true, force: true });
@@ -398,10 +314,9 @@ ${c.bold('Run modes:')}
   skillforge --help                This message (recommended first step)
   skillforge mcp                   MCP stdio — primary integration for Claude / Cursor
   skillforge mcp config [--local] [--with-anthropic]   Print JSON for MCP host (merge into mcp.json)
-  skillforge start [--port=8000]   Optional HTTP API (no web dashboard)
   skillforge events [--watch] [--limit=N] [--verbose] [--user=…]   Live routing log + usage (see --help)
-  skillforge route [words…] [--project-root=…] [--session-id=…]   Route a prompt (see skillforge route --help)
-  skillforge chat                  Dev harness (needs start + ANTHROPIC_API_KEY)
+  skillforge route [words…] [--project-root=…] [--include-project-rag]   Route a prompt (see skillforge route --help)
+  skillforge index --project-root=… [--reset] [--stats-only]   Index repo text for include_project_rag
 ${c.bold('Skills:')}
   skillforge skills list           List bundled and user skills
@@ -414,11 +329,6 @@ ${c.bold('Skill packs (install from git):')}
   skillforge pack update <name>    Update a pack
   skillforge pack remove <name>    Uninstall a pack
-${c.bold('Auth (multi-user mode):')}
-  skillforge auth add <user>       Create a bearer token for a user
-  skillforge auth list             List users with tokens
-  skillforge auth remove <user>    Revoke all tokens for a user
 ${c.bold('Maintenance:')}
   skillforge reset                 Wipe learned state and event log
   skillforge install               Re-run setup (auto-runs on first launch)
@@ -436,29 +346,25 @@ ${c.bold('MCP integration:')}
 // ---- main ----
 async function main() {
+  dropLegacyAuthJsonIfPresent();
   if (args.includes('--help') || args.includes('-h') || cmd === 'help') {
     showHelp();
     return;
   }
-  const portArg = args.find((a) => a.startsWith('--port='));
-  const port = portArg ? parseInt(portArg.split('=')[1], 10) : 8000;
   switch (cmd) {
     case undefined:
       showHelp();
       break;
-    case 'start':
-      startServer({ port });
-      break;
     case 'events':
       runEventsCmd();
       break;
     case 'route':
       runRouteCmd();
       break;
-    case 'chat':
-      runChat();
+    case 'index':
+      runIndexCmd();
       break;
     case 'mcp':
       if (args[1] === 'config') {
@@ -492,7 +398,7 @@ async function main() {
           const result = packs.installPack(args[2]);
           ok(`Installed pack "${result.name}" (${result.version}) with ${result.skills.length} skill(s):`);
           result.skills.forEach(s => log('  ' + c.dim('•'), s));
-          log(c.dim('  Restart the server to pick up new skills.'));
+          log(c.dim('  Restart skillforge mcp (or trigger catalog reload) to pick up new skills.'));
         } else if (sub === 'list') {
           const list = packs.listPacks();
           if (list.length === 0) {
@@ -522,18 +428,6 @@ async function main() {
       }
       break;
     }
-    case 'auth': {
-      const sub = args[1];
-      if (sub === 'add') authAdd(args[2]);
-      else if (sub === 'list') authList();
-      else if (sub === 'remove' || sub === 'rm') authRemove(args[2]);
-      else {
-        err(`Unknown auth subcommand: ${sub}`);
-        log(c.dim('  Try: add, list, remove'));
-        process.exit(1);
-      }
-      break;
-    }
     default:
       err(`Unknown command: ${cmd}`);
       showHelp();

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@heytherevibin/skillforge",
-  "version": "0.2.1",
-  "description": "Skill orchestration for Claude: hybrid embedding and router-based routing, MCP and HTTP servers, per-user learning, and a large bundled SKILL.md catalog.",
+  "version": "0.8.0",
+  "description": "Skill orchestration for Claude: hybrid embedding and router-based routing, MCP stdio server, per-user learning, and a large bundled SKILL.md catalog.",
   "keywords": [
     "claude",
     "skills",

package/python/app/chunking.py ADDED Viewed

@@ -0,0 +1,116 @@
+"""Split SKILL.md bodies into line-bounded chunks for RAG-style retrieval."""
+from __future__ import annotations
+import os
+from dataclasses import dataclass
+def chunk_max_chars() -> int:
+    return max(400, int(os.getenv("SKILLFORGE_CHUNK_MAX_CHARS", "1200")))
+def chunk_overlap_chars() -> int:
+    return max(0, int(os.getenv("SKILLFORGE_CHUNK_OVERLAP", "200")))
+@dataclass
+class SkillChunk:
+    """One span of a skill body with 1-based inclusive line numbers (within the body text)."""
+    text: str
+    line_start: int
+    line_end: int
+def _split_long_segment(text: str, line_start: int, max_chars: int, overlap: int) -> list[SkillChunk]:
+    """Character windows with overlap; ``line_start`` is the body line of ``text[0]`` (1-based)."""
+    if not text:
+        return []
+    line_no = line_start
+    line_at_idx: list[int] = []
+    for ch in text:
+        line_at_idx.append(line_no)
+        if ch == "\n":
+            line_no += 1
+    n = len(text)
+    out: list[SkillChunk] = []
+    i = 0
+    while i < n:
+        end = min(i + max_chars, n)
+        piece = text[i:end].strip()
+        if piece:
+            ls = line_at_idx[i]
+            le = line_at_idx[end - 1]
+            out.append(SkillChunk(piece, ls, le))
+        if end >= n:
+            break
+        adv = max(1, end - i - overlap)
+        i += adv
+    if out:
+        return out
+    st = text.strip()
+    if not st:
+        return []
+    le_fallback = line_start + max(0, text.count("\n"))
+    return [SkillChunk(st, line_start, max(line_start, le_fallback))]
+def chunk_skill_body(body: str, *, max_chars: int | None = None, overlap: int | None = None) -> list[SkillChunk]:
+    """Chunk by markdown headings (lines starting with ``#``) then hard-split long sections.
+    Empty body yields no chunks (caller may treat as single empty).
+    """
+    mc = max_chars if max_chars is not None else chunk_max_chars()
+    ov = overlap if overlap is not None else chunk_overlap_chars()
+    b = body or ""
+    if not b.strip():
+        return []
+    lines = b.split("\n")
+    sections: list[tuple[str, int, int]] = []
+    cur: list[str] = []
+    cur_start = 1
+    for i, line in enumerate(lines):
+        ln = i + 1
+        if line.startswith("#") and cur:
+            sections.append(("\n".join(cur), cur_start, ln - 1))
+            cur = [line]
+            cur_start = ln
+        else:
+            cur.append(line)
+    if cur:
+        sections.append(("\n".join(cur), cur_start, len(lines)))
+    chunks: list[SkillChunk] = []
+    for text, ls, le in sections:
+        text = text.strip()
+        if not text:
+            continue
+        if len(text) <= mc:
+            chunks.append(SkillChunk(text, ls, le))
+        else:
+            chunks.extend(_split_long_segment(text, ls, mc, ov))
+    return chunks if chunks else [SkillChunk(b.strip(), 1, max(1, len(lines)))]
+def chunk_raw_document(
+    body: str,
+    *,
+    max_chars: int | None = None,
+    overlap: int | None = None,
+) -> list[SkillChunk]:
+    """Chunk arbitrary file text with line-bounded windows (no markdown section split).
+    Line numbers are 1-based within the normalized document (``\\r\\n`` → ``\\n``).
+    """
+    mc = max_chars if max_chars is not None else chunk_max_chars()
+    ov = overlap if overlap is not None else chunk_overlap_chars()
+    if not body:
+        return []
+    normalized = body.replace("\r\n", "\n")
+    if not normalized.strip():
+        return []
+    line_count = normalized.count("\n") + 1
+    if len(normalized) <= mc:
+        return [SkillChunk(normalized, 1, max(1, line_count))]
+    return _split_long_segment(normalized, 1, mc, ov)

package/python/app/context_fusion.py ADDED Viewed

@@ -0,0 +1,77 @@
+"""MMR-based selection to fuse skill + project chunks under one character budget."""
+from __future__ import annotations
+from typing import Any
+import numpy as np
+def mmr_select(
+    embeddings: np.ndarray,
+    relevance: np.ndarray,
+    text_lengths: np.ndarray,
+    *,
+    char_budget: int,
+    overhead_per_chunk: int | np.ndarray,
+    lambda_mult: float,
+) -> tuple[list[int], list[dict[str, Any]]]:
+    """Greedy MMR over normalized row embeddings.
+    Each step maximizes ``lambda_mult * rel[i] - (1 - lambda_mult) * max_{j in selected} sim(i, j)``.
+    Returns selected **indices** in pick order and a trace row per pick (for telemetry).
+    """
+    n = int(embeddings.shape[0])
+    if n == 0 or char_budget <= 0:
+        return [], []
+    lam = float(lambda_mult)
+    lam = max(0.0, min(1.0, lam))
+    rel = np.asarray(relevance, dtype=np.float64).reshape(-1)
+    lens = np.asarray(text_lengths, dtype=np.int64).reshape(-1)
+    emb = np.asarray(embeddings, dtype=np.float32)
+    if isinstance(overhead_per_chunk, int):
+        ovh = np.full(n, int(overhead_per_chunk), dtype=np.int64)
+    else:
+        ovh = np.asarray(overhead_per_chunk, dtype=np.int64).reshape(-1)
+    if emb.shape[0] != n or rel.shape[0] != n or lens.shape[0] != n or ovh.shape[0] != n:
+        raise ValueError("embeddings, relevance, text_lengths, and overheads must align")
+    selected: list[int] = []
+    trace: list[dict[str, Any]] = []
+    used = 0
+    remaining = set(range(n))
+    while remaining:
+        best_i: int | None = None
+        best_mmr = -1e18
+        for i in remaining:
+            need = int(lens[i]) + int(ovh[i])
+            if need <= 0 or used + need > char_budget:
+                continue
+            if not selected:
+                div = 0.0
+            else:
+                sims = emb[i] @ emb[np.array(selected, dtype=np.int64)].T
+                div = float(np.max(sims))
+            mmr = lam * float(rel[i]) - (1.0 - lam) * div
+            if mmr > best_mmr:
+                best_mmr = mmr
+                best_i = i
+        if best_i is None:
+            break
+        if selected:
+            sims = emb[best_i] @ emb[np.array(selected, dtype=np.int64)].T
+            div_used = float(np.max(sims))
+        else:
+            div_used = 0.0
+        selected.append(best_i)
+        used += int(lens[best_i]) + int(ovh[best_i])
+        remaining.remove(best_i)
+        trace.append({
+            "pool_index": best_i,
+            "mmr": round(float(best_mmr), 6),
+            "relevance": round(float(rel[best_i]), 6),
+            "max_sim_to_selected": round(div_used, 6),
+        })
+    return selected, trace

package/python/app/events_cli.py CHANGED Viewed

@@ -123,7 +123,7 @@ def main() -> None:
     db_path = resolve_orchestrator_db(pr)
     if not db_path.exists():
-        print("No database yet — run skillforge mcp or skillforge start first (or route once with this project_root).")
+        print("No database yet — run skillforge mcp first (or route once with this project_root).")
         print(f"  Expected: {db_path}")
         return

package/python/app/index_cli.py ADDED Viewed

@@ -0,0 +1,89 @@
+"""CLI: index project files into ``<project>/.skillforge/orchestrator.db`` for project RAG."""
+from __future__ import annotations
+import argparse
+import asyncio
+import json
+import sys
+from pathlib import Path
+from app.db_paths import resolve_orchestrator_db
+from app.main import build_router_and_skills, init_db
+from app.project_index import index_project, project_index_stats
+def _parse_args(argv: list[str] | None) -> argparse.Namespace:
+    p = argparse.ArgumentParser(
+        description=(
+            "Chunk and embed text files under project_root into the per-repo orchestrator DB. "
+            "Use with MCP route_skills/include_project_rag or skillforge route --include-project-rag."
+        ),
+    )
+    p.add_argument(
+        "--project-root",
+        required=True,
+        help="Repository root directory to index (writes .skillforge/orchestrator.db).",
+    )
+    p.add_argument(
+        "--reset",
+        action="store_true",
+        help="Clear all project_chunks rows before re-indexing.",
+    )
+    p.add_argument(
+        "--stats-only",
+        action="store_true",
+        help="Print index metadata from DB and exit (no scan/embed).",
+    )
+    p.add_argument(
+        "--quiet",
+        action="store_true",
+        help="Skip progress messages on stderr from skill loading.",
+    )
+    return p.parse_args(argv)
+async def _run(args: argparse.Namespace) -> int:
+    root_s = args.project_root.strip()
+    if not root_s:
+        print("skillforge index: --project-root is required.", file=sys.stderr)
+        return 2
+    root = Path(root_s).expanduser().resolve()
+    db_path = resolve_orchestrator_db(str(root))
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+    con = init_db(db_path)
+    try:
+        if args.stats_only:
+            print(json.dumps({"db": str(db_path), **project_index_stats(con)}, indent=2))
+            return 0
+        router, _ = await asyncio.to_thread(
+            build_router_and_skills,
+            log=not args.quiet,
+            log_prefix="[skillforge-index]",
+        )
+        stats = await asyncio.to_thread(
+            index_project,
+            con,
+            root,
+            router.embed_model,
+            reset=args.reset,
+        )
+        print(
+            json.dumps(
+                {"db": str(db_path), "index_state": project_index_stats(con), **stats},
+                indent=2,
+            )
+        )
+        return 0
+    finally:
+        con.close()
+def main(argv: list[str] | None = None) -> None:
+    args = _parse_args(argv)
+    raise SystemExit(asyncio.run(_run(args)))
+if __name__ == "__main__":
+    main()