npm - @toolbaux/guardian - Versions diffs - 0.1.23 → 0.2.0 - Mend

@toolbaux/guardian 0.1.23 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +6 -4
package/dist/cli.js +1 -1
package/dist/commands/context.js +87 -29
package/dist/commands/extract.js +4 -1
package/dist/commands/generate.js +83 -10
package/dist/commands/init.js +88 -56
package/dist/commands/intel.js +23 -0
package/dist/commands/mcp-serve.js +112 -0
package/dist/commands/search.js +43 -3
package/dist/config.js +1 -0
package/dist/db/embeddings.js +113 -0
package/dist/db/fts-builder.js +85 -0
package/dist/db/sqlite-specs-store.js +496 -3
package/package.json +2 -1

package/dist/commands/mcp-serve.js CHANGED Viewed

@@ -111,6 +111,83 @@ async function search(args) {
 async function model(args) {
     return runCli(["search", "--model", args.name, "--input", specsInputDir]);
 }
+/**
+ * guardian_grep — semantic grep via guardian search.
+ *
+ * Replaces raw Grep tool calls. Runs guardian BM25+vector search and returns
+ * matching symbols (file:line:name) and files, formatted like grep output.
+ * Claude gets richer context (call-graph, authority) with zero token overhead.
+ */
+async function grep(args) {
+    const raw = await runCli([
+        "search", "--query", args.query, "--format", "json", "--backend", "auto", "--input", specsInputDir,
+    ]);
+    try {
+        const data = JSON.parse(raw);
+        const lines = [`guardian_grep("${args.query}")`];
+        if (data.symbols?.length) {
+            lines.push("\nSymbols (file:line: name):");
+            for (const s of data.symbols.slice(0, 25)) {
+                lines.push(`  ${s.file}:${s.line}: ${s.name}`);
+            }
+        }
+        if (data.files?.length) {
+            lines.push("\nFiles:");
+            for (const f of data.files.slice(0, 15)) {
+                lines.push(`  ${f.file_path}`);
+            }
+        }
+        if (lines.length === 1)
+            lines.push("  (no matches — try a different query)");
+        return lines.join("\n");
+    }
+    catch {
+        return raw; // passthrough if search returns plain text
+    }
+}
+/**
+ * guardian_glob — semantic file discovery via guardian search.
+ *
+ * Replaces raw Glob tool calls. Extracts meaningful keywords from the glob
+ * pattern and searches the guardian index for matching files. Falls back to
+ * guiding the user toward a more descriptive query for pure extension patterns.
+ */
+async function glob(args) {
+    // Extract keywords: "src/auth/**/*.ts" → "auth", "src/middleware/error*" → "middleware error"
+    const keywords = args.pattern
+        .replace(/\*\*?/g, " ")
+        .replace(/\.\w+$/, "") // strip trailing extension
+        .replace(/[[\]{}]/g, " ")
+        .split(/[/\s]+/)
+        .filter(s => s.length > 2 && !/^(src|lib|dist|app|index)$/.test(s))
+        .join(" ")
+        .trim();
+    if (!keywords) {
+        return [
+            `guardian_glob("${args.pattern}"): pattern has no meaningful keywords.`,
+            `Use guardian_search with a descriptive query instead, e.g.:`,
+            `  guardian_search("TypeScript source files") — or describe what you're looking for.`,
+        ].join("\n");
+    }
+    const raw = await runCli([
+        "search", "--query", keywords, "--format", "json", "--backend", "auto", "--input", specsInputDir,
+    ]);
+    try {
+        const data = JSON.parse(raw);
+        const files = data.files ?? [];
+        const lines = [
+            `guardian_glob("${args.pattern}") — searched: "${keywords}"`,
+            `\nMatching files:`,
+            ...files.slice(0, 20).map(f => `  ${f.file_path}`),
+        ];
+        if (files.length === 0)
+            lines.push("  (no matches)");
+        return lines.join("\n");
+    }
+    catch {
+        return raw;
+    }
+}
 // ── MCP protocol ──
 const TOOLS = [
     {
@@ -167,6 +244,39 @@ const TOOLS = [
         description: "MCP usage stats for this session. Call at end to evaluate guardian's usefulness.",
         inputSchema: { type: "object", properties: {} },
     },
+    {
+        name: "guardian_grep",
+        description: [
+            "Semantic grep — find symbols and files matching a keyword or pattern.",
+            "Use INSTEAD of the Grep tool. Returns matching function/class names with file:line locations.",
+            "Backed by BM25 + call-graph authority so relevant source definitions surface first.",
+            "Example: guardian_grep('validate token') → auth.py:42: validate_token, middleware.py:18: check_jwt",
+        ].join(" "),
+        inputSchema: {
+            type: "object",
+            properties: {
+                query: { type: "string", description: "Keyword or phrase to search for (natural language OK)" },
+                path: { type: "string", description: "Optional: restrict to files under this path prefix" },
+            },
+            required: ["query"],
+        },
+    },
+    {
+        name: "guardian_glob",
+        description: [
+            "Semantic file discovery — find files matching a path pattern.",
+            "Use INSTEAD of the Glob tool. Extracts keywords from the pattern and searches the guardian index.",
+            "Example: guardian_glob('src/auth/**/*.ts') → searches for 'auth typescript' files.",
+            "For pure extension globs with no path context, use guardian_search with a descriptive query.",
+        ].join(" "),
+        inputSchema: {
+            type: "object",
+            properties: {
+                pattern: { type: "string", description: "Glob pattern (e.g. 'src/auth/**/*.ts', '**/middleware*')" },
+            },
+            required: ["pattern"],
+        },
+    },
 ];
 const TOOL_HANDLERS = {
     guardian_orient: orient,
@@ -175,6 +285,8 @@ const TOOL_HANDLERS = {
     guardian_search: search,
     guardian_model: model,
     guardian_metrics: async () => JSON.stringify(metrics.summary()),
+    guardian_grep: grep,
+    guardian_glob: glob,
 };
 function respond(id, result) {
     const msg = JSON.stringify({ jsonrpc: "2.0", id, result });

package/dist/commands/search.js CHANGED Viewed

@@ -17,6 +17,7 @@ export async function runSearch(options) {
             if (sqliteResult !== null) {
                 const base = JSON.parse(await querySearch(inputDir, options.query));
                 base.files = sqliteResult.files;
+                base.symbols = sqliteResult.symbols;
                 base.search_signal = sqliteResult.signal;
                 console.log(JSON.stringify(base));
                 return;
@@ -138,12 +139,36 @@ async function runSearchSqlite(specsInput, query, limit, backend = "sqlite") {
             console.log(`No FTS results for "${query}"`);
             return true;
         }
+        let queryVec;
+        try {
+            const { embedQuery } = await import("../db/embeddings.js");
+            const vec = await embedQuery(cleaned || query, process.env.OPENAI_API_KEY);
+            if (vec)
+                queryVec = vec;
+        }
+        catch { /* graceful degradation */ }
+        const symbols = store.searchSymbols(cleaned || query, Math.ceil(limit / 2), queryVec);
         const lines = [`## FTS5 search: "${query}"\n`];
+        // Build a map of file → matching symbols for quick lookup
+        const symbolsByFile = new Map();
+        for (const s of symbols) {
+            if (!symbolsByFile.has(s.file_path))
+                symbolsByFile.set(s.file_path, []);
+            symbolsByFile.get(s.file_path).push({ name: s.name, line: s.line });
+        }
         for (const r of results) {
             const rank = Math.abs(r.rank).toFixed(3);
             lines.push(`### \`${r.file_path}\`  (score: ${rank})`);
-            if (r.symbol_name)
-                lines.push(`  symbols: ${r.symbol_name}`);
+            // Matching symbols from this file (snippet equivalent)
+            const fileSyms = symbolsByFile.get(r.file_path) ?? [];
+            const inlineSyms = r.matching_symbols.filter(s => !fileSyms.some(f => f.name === s));
+            if (fileSyms.length) {
+                for (const s of fileSyms)
+                    lines.push(`  → \`${s.name}\` :${s.line}`);
+            }
+            if (inlineSyms.length) {
+                lines.push(`  symbols: ${inlineSyms.join(", ")}`);
+            }
             if (r.imports.length)
                 lines.push(`  imports: ${r.imports.join(", ")}`);
             if (r.used_by.length)
@@ -177,7 +202,22 @@ async function getSqliteFileList(specsInput, query, limit, backend = "auto") {
         if (results.length === 0)
             return null;
         const signal = store.querySignal(query);
-        return { files: results.map((r) => r.file_path), signal };
+        // Hybrid symbol search: BM25 + call-graph authority + optional vector similarity.
+        // embedQuery uses local model (no API key) or OpenAI if OPENAI_API_KEY is set.
+        let queryVec;
+        try {
+            const { embedQuery } = await import("../db/embeddings.js");
+            const vec = await embedQuery(cleaned || query, process.env.OPENAI_API_KEY);
+            if (vec)
+                queryVec = vec;
+        }
+        catch { /* graceful degradation — vector unavailable */ }
+        const symbols = store.searchSymbols(cleaned || query, Math.ceil(limit / 2), queryVec);
+        return {
+            files: results.map((r) => r.file_path),
+            symbols: symbols.map((s) => ({ file: s.file_path, name: s.name, line: s.line })),
+            signal,
+        };
     }
     finally {
         await store.close();

package/dist/config.js CHANGED Viewed

@@ -273,6 +273,7 @@ function normalizeConfig(input, configDir) {
 }
 function mergeConfig(base, override) {
     return {
+        project_id: override.project_id ?? base.project_id,
         project: {
             root: override.project?.root ?? base.project?.root ?? "",
             backendRoot: override.project?.backendRoot ?? base.project?.backendRoot ?? "",

package/dist/db/embeddings.js ADDED Viewed

@@ -0,0 +1,113 @@
+/**
+ * Embedding generation for function-level semantic search.
+ *
+ * Strategy (local-first, no API key required):
+ *   Default  — @xenova/transformers running Xenova/all-MiniLM-L6-v2 on-device.
+ *              Model downloads once (~23 MB) and is cached in ~/.cache/xenova.
+ *              dim=384, pure JS/ONNX, no external service needed.
+ *
+ *   Upgrade  — OpenAI text-embedding-3-small when OPENAI_API_KEY is set.
+ *              dim=256, higher quality, costs ~$0.002 per 1M tokens.
+ *
+ * Text per function (concise — name carries most semantic signal):
+ *   "{name} {filename}: {top calls} {short literals}"
+ */
+const LOCAL_MODEL = "Xenova/all-MiniLM-L6-v2";
+const LOCAL_DIM = 384;
+const OPENAI_MODEL = "text-embedding-3-small";
+const OPENAI_DIM = 256;
+const BATCH = 64; // safe for both local and OpenAI
+function fnToText(fn) {
+    const filename = fn.file.split("/").pop() ?? fn.file;
+    const callStr = (fn.calls ?? []).slice(0, 10).join(" ");
+    const litStr = (fn.stringLiterals ?? []).slice(0, 5).join(" ").slice(0, 100);
+    return `${fn.name} ${filename}: ${callStr} ${litStr}`.trim().slice(0, 300);
+}
+// ── Local embedder (no API key) ───────────────────────────────────────────────
+async function embedBatchLocal(texts, pipe) {
+    const out = [];
+    for (const text of texts) {
+        const result = await pipe(text, { pooling: "mean", normalize: true });
+        out.push(new Float32Array(result.data));
+    }
+    return out;
+}
+// ── OpenAI embedder (OPENAI_API_KEY required) ─────────────────────────────────
+async function embedBatchOpenAI(texts, apiKey) {
+    const { default: OpenAI } = await import("openai");
+    const client = new OpenAI({ apiKey });
+    const response = await client.embeddings.create({
+        model: OPENAI_MODEL,
+        input: texts,
+        dimensions: OPENAI_DIM,
+        encoding_format: "float",
+    });
+    return response.data.map(d => new Float32Array(d.embedding));
+}
+// ── Public API ────────────────────────────────────────────────────────────────
+/**
+ * Embed all functions and store them in guardian.db function_embeddings table.
+ * Uses local model by default; OpenAI when OPENAI_API_KEY is set (better quality).
+ */
+export async function embedFunctions(store, fns, apiKey) {
+    if (fns.length === 0)
+        return;
+    const useOpenAI = !!apiKey;
+    let pipe;
+    if (!useOpenAI) {
+        // Lazy-load local model (downloads once, then cached)
+        const { pipeline } = await import("@xenova/transformers");
+        console.log(`[guardian embed] loading local model ${LOCAL_MODEL}…`);
+        pipe = await pipeline("feature-extraction", LOCAL_MODEL);
+    }
+    const rows = [];
+    for (let i = 0; i < fns.length; i += BATCH) {
+        const batch = fns.slice(i, i + BATCH);
+        const texts = batch.map(fnToText);
+        let vecs;
+        try {
+            vecs = useOpenAI
+                ? await embedBatchOpenAI(texts, apiKey)
+                : await embedBatchLocal(texts, pipe);
+        }
+        catch (err) {
+            console.warn(`[guardian embed] batch ${i}–${i + batch.length - 1} failed: ${err.message}`);
+            continue;
+        }
+        for (let j = 0; j < batch.length; j++) {
+            if (!vecs[j])
+                continue;
+            rows.push({
+                file_path: batch[j].file,
+                name: batch[j].name,
+                line: batch[j].lines[0],
+                vec: vecs[j],
+            });
+        }
+        if (i > 0 && i % 500 === 0) {
+            console.log(`[guardian embed] ${i}/${fns.length} functions embedded`);
+        }
+    }
+    store.rebuildEmbeddings(rows);
+    const source = useOpenAI ? `OpenAI ${OPENAI_MODEL} dim=${OPENAI_DIM}` : `local ${LOCAL_MODEL} dim=${LOCAL_DIM}`;
+    console.log(`[guardian embed] stored ${rows.length} embeddings (${source})`);
+}
+/**
+ * Embed a single query string for hybrid search.
+ * Returns null on failure — graceful degradation to BM25 + call-graph authority.
+ */
+export async function embedQuery(query, apiKey) {
+    try {
+        if (apiKey) {
+            const [vec] = await embedBatchOpenAI([query.slice(0, 300)], apiKey);
+            return vec ?? null;
+        }
+        const { pipeline } = await import("@xenova/transformers");
+        const pipe = await pipeline("feature-extraction", LOCAL_MODEL);
+        const [vec] = await embedBatchLocal([query.slice(0, 300)], pipe);
+        return vec ?? null;
+    }
+    catch {
+        return null;
+    }
+}

package/dist/db/fts-builder.js CHANGED Viewed

@@ -297,9 +297,94 @@ export function populateFTSIndex(store, intel, arch, funcIntel) {
     if (funcIntel)
         mergeFunctionIntelRows(rowMap, funcIntel);
     store.rebuildSearchIndex(Array.from(rowMap.values()));
+    // Per-function index — enables symbol-level search results with line numbers.
+    if (funcIntel?.functions?.length) {
+        store.rebuildFunctionIndex(funcIntel.functions);
+    }
     // Build dependency graph
     if (arch) {
         const edges = buildDepEdges(arch);
         store.rebuildDeps(edges);
     }
+    // ── Normalised fact tables ─────────────────────────────────────────────────
+    // Merge arch endpoints + intel api_registry into endpoints_raw.
+    // arch.endpoints is the richer source (has method + file); intel.api_registry adds
+    // request/response schemas and service_calls that arch may not have.
+    const endpointMap = new Map();
+    for (const ep of arch?.endpoints ?? []) {
+        const key = `${(ep.method ?? "").toUpperCase()}::${ep.path ?? ""}`;
+        if (!ep.path)
+            continue;
+        endpointMap.set(key, {
+            method: ep.method ?? "",
+            path: ep.path,
+            handler: ep.handler ?? "",
+            file_path: ep.file ?? ep.file_path ?? "",
+            module: ep.module ?? "",
+            service_calls: ep.service_calls ?? [],
+            request_schema: "",
+            response_schema: "",
+        });
+    }
+    for (const [route, entry] of Object.entries(intel?.api_registry ?? {})) {
+        // route is like "GET /users" or "/users"
+        const parts = route.trim().split(/\s+/);
+        const method = parts.length >= 2 ? parts[0].toUpperCase() : "";
+        const p = parts.length >= 2 ? parts[1] : parts[0];
+        const key = `${method}::${p}`;
+        const existing = endpointMap.get(key);
+        if (existing) {
+            if (entry.request_schema)
+                existing.request_schema = entry.request_schema;
+            if (entry.response_schema)
+                existing.response_schema = entry.response_schema;
+            if (entry.service_calls?.length)
+                existing.service_calls = entry.service_calls;
+        }
+        else {
+            endpointMap.set(key, {
+                method,
+                path: p,
+                handler: entry.handler ?? "",
+                file_path: entry.file ?? "",
+                module: entry.module ?? "",
+                service_calls: entry.service_calls ?? [],
+                request_schema: entry.request_schema ?? "",
+                response_schema: entry.response_schema ?? "",
+            });
+        }
+    }
+    store.rebuildEndpointsRaw(Array.from(endpointMap.values()));
+    // Merge arch data_models + intel model_registry into models_raw.
+    const modelMap = new Map();
+    for (const m of arch?.data_models ?? []) {
+        if (!m.name)
+            continue;
+        modelMap.set(m.name, {
+            name: m.name,
+            file_path: m.file ?? m.file_path ?? "",
+            module: m.module ?? "",
+            fields: m.fields ?? [],
+            relationships: m.relationships ?? [],
+        });
+    }
+    for (const [name, entry] of Object.entries(intel?.model_registry ?? {})) {
+        const existing = modelMap.get(name);
+        if (existing) {
+            if (entry.fields?.length)
+                existing.fields = entry.fields;
+            if (entry.relationships?.length)
+                existing.relationships = entry.relationships;
+        }
+        else {
+            modelMap.set(name, {
+                name,
+                file_path: entry.file ?? "",
+                module: entry.module ?? "",
+                fields: entry.fields ?? [],
+                relationships: entry.relationships ?? [],
+            });
+        }
+    }
+    store.rebuildModelsRaw(Array.from(modelMap.values()));
 }