npm - @toolbaux/guardian - Versions diffs - 0.1.22 → 0.1.23 - Mend

@toolbaux/guardian 0.1.22 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +1 -1
package/dist/adapters/runner.js +72 -3
package/dist/adapters/typescript-adapter.js +24 -10
package/dist/benchmarking/metrics/context-coverage.js +82 -0
package/dist/benchmarking/metrics/drift-score.js +104 -0
package/dist/benchmarking/metrics/search-recall.js +207 -0
package/dist/benchmarking/metrics/token-efficiency.js +79 -0
package/dist/benchmarking/report.js +131 -0
package/dist/benchmarking/runner.js +175 -0
package/dist/benchmarking/types.js +13 -0
package/dist/cli.js +53 -10
package/dist/commands/benchmark.js +62 -0
package/dist/commands/discrepancy.js +1 -1
package/dist/commands/doc-generate.js +1 -1
package/dist/commands/doc-html.js +1 -1
package/dist/commands/extract.js +1 -1
package/dist/commands/feature-context.js +1 -1
package/dist/commands/init.js +1 -0
package/dist/commands/intel.js +47 -1
package/dist/commands/mcp-serve.js +48 -321
package/dist/commands/search.js +602 -14
package/dist/db/file-specs-store.js +174 -0
package/dist/db/fts-builder.js +305 -0
package/dist/db/index.js +55 -0
package/dist/db/specs-store.js +13 -0
package/dist/db/sqlite-specs-store.js +441 -0
package/dist/extract/codebase-intel.js +31 -2
package/dist/extract/compress.js +70 -3
package/dist/extract/context-block.js +11 -2
package/dist/extract/function-intel.js +5 -2
package/dist/extract/index.js +1 -23
package/dist/extract/writer.js +6 -0
package/package.json +3 -1

package/dist/db/sqlite-specs-store.js ADDED Viewed

@@ -0,0 +1,441 @@
+/**
+ * SqliteSpecsStore — SQLite implementation of SpecsStore.
+ *
+ * Stores everything that was previously scattered across .specs/machine/*.json
+ * and .specs/human/*.md into a single guardian.db file.
+ *
+ * Schema:
+ *   specs        — blob storage for all machine intelligence files
+ *   docs         — human-readable doc sections (markdown)
+ *   metrics_log  — append-only event log (replaces mcp-metrics.jsonl)
+ *   search_fts   — FTS5 virtual table built from specs content (extra index)
+ *
+ * Tier gating is stored per-row; the caller passes a tier filter when reading.
+ * This is the foundation for the pro/enterprise access control layer.
+ */
+import Database from "better-sqlite3";
+import path from "node:path";
+/**
+ * Normalise a file path to a canonical repo-relative form.
+ * Used consistently by the FTS builder, dep-graph builder, and search query.
+ * All paths stored in guardian.db go through this function.
+ *
+ *   "flask-full/src/flask/sessions.py"      → "src/flask/sessions.py"
+ *   "django/django/contrib/auth.py"         → "django/contrib/auth.py"
+ *   "sqlalchemy/lib/sqlalchemy/sql/base.py" → "lib/sqlalchemy/sql/base.py"
+ */
+export function normPath(p) {
+    // Strip leading reponame/src/ → src/
+    p = p.replace(/^[^/]+\/src\//, "src/");
+    // Strip double-prefix X/X/ → X/ (package namespace matches repo clone dir)
+    const dm = p.match(/^([^/]+)\/\1\//);
+    if (dm)
+        return p.slice(dm[1].length + 1);
+    // Strip leading repo segment when followed by a known source-directory name
+    if (/^[^/]+\/(?:lib|examples|pkg|packages|apps|internal|cmd|src)\//i.test(p)) {
+        p = p.slice(p.indexOf("/") + 1);
+    }
+    return p;
+}
+/**
+ * Split camelCase and snake_case identifiers into individual tokens so the
+ * porter stemmer can match partial terms.
+ *   getUserById  → "get user by id"
+ *   auth_service → "auth service"
+ */
+function splitIdentifiers(s) {
+    return s
+        .replace(/_/g, " ")
+        .replace(/([a-z])([A-Z])/g, "$1 $2")
+        .replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2")
+        .toLowerCase();
+}
+export const DB_FILENAME = "guardian.db";
+export class SqliteSpecsStore {
+    storeDir;
+    db;
+    constructor(storeDir) {
+        this.storeDir = storeDir;
+    }
+    async init() {
+        const dbPath = path.join(this.storeDir, DB_FILENAME);
+        this.db = new Database(dbPath);
+        this.db.pragma("journal_mode = WAL");
+        this.db.pragma("synchronous = NORMAL");
+        this._migrate();
+    }
+    async close() {
+        this.db?.close();
+    }
+    // ── Spec blobs ─────────────────────────────────────────────────────────────
+    async readSpec(name) {
+        const row = this.db
+            .prepare("SELECT * FROM specs WHERE name = ?")
+            .get(name);
+        return row ?? null;
+    }
+    async writeSpec(name, content, format, tier = "free") {
+        this.db
+            .prepare(`
+        INSERT INTO specs (name, format, content, tier, updated_at)
+        VALUES (?, ?, ?, ?, ?)
+        ON CONFLICT(name) DO UPDATE SET
+          content    = excluded.content,
+          format     = excluded.format,
+          tier       = excluded.tier,
+          updated_at = excluded.updated_at
+      `)
+            .run(name, format, content, tier, Date.now());
+    }
+    async listSpecs() {
+        const rows = this.db
+            .prepare("SELECT name FROM specs ORDER BY name")
+            .all();
+        return rows.map(r => r.name);
+    }
+    async hasSpec(name) {
+        const row = this.db
+            .prepare("SELECT 1 FROM specs WHERE name = ?")
+            .get(name);
+        return !!row;
+    }
+    // ── Human docs ─────────────────────────────────────────────────────────────
+    async readDoc(id) {
+        const row = this.db
+            .prepare("SELECT * FROM docs WHERE id = ?")
+            .get(id);
+        return row ?? null;
+    }
+    async writeDoc(entry) {
+        this.db
+            .prepare(`
+        INSERT INTO docs (id, section, title, body, tier, updated_at)
+        VALUES (?, ?, ?, ?, ?, ?)
+        ON CONFLICT(id) DO UPDATE SET
+          section    = excluded.section,
+          title      = excluded.title,
+          body       = excluded.body,
+          tier       = excluded.tier,
+          updated_at = excluded.updated_at
+      `)
+            .run(entry.id, entry.section, entry.title, entry.body, entry.tier, Date.now());
+    }
+    async listDocs(section) {
+        if (section) {
+            return this.db
+                .prepare("SELECT * FROM docs WHERE section = ? ORDER BY id")
+                .all(section);
+        }
+        return this.db
+            .prepare("SELECT * FROM docs ORDER BY section, id")
+            .all();
+    }
+    // ── Metrics log ────────────────────────────────────────────────────────────
+    async appendMetric(event, payload) {
+        this.db
+            .prepare("INSERT INTO metrics_log (ts, event, payload) VALUES (?, ?, ?)")
+            .run(Date.now(), event, JSON.stringify(payload));
+    }
+    async readMetrics(limit = 1000) {
+        return this.db
+            .prepare("SELECT * FROM metrics_log ORDER BY id DESC LIMIT ?")
+            .all(limit);
+    }
+    // ── FTS search (extra index, no equivalent in FileSpecsStore) ─────────────
+    /**
+     * Rebuild the FTS5 search index from extracted codebase data.
+     *
+     * Each row is one file. Symbol names and endpoints are pre-expanded with
+     * splitIdentifiers() so "getUserById" becomes "get user by id" before
+     * the porter stemmer runs — this gives sub-token recall without trigrams.
+     *
+     * Column BM25 weights (bm25 args, lower = more important):
+     *   file_path(1), symbol_name(2), endpoint(3), body(4), module(5)
+     *   weights:   1.0             0.5             0.7        1.0      0.6
+     */
+    rebuildSearchIndex(rows) {
+        this.db.prepare("DELETE FROM search_fts").run();
+        const insert = this.db.prepare("INSERT INTO search_fts (file_path, symbol_name, endpoint, body, module) VALUES (?, ?, ?, ?, ?)");
+        const insertAll = this.db.transaction((items) => {
+            for (const r of items) {
+                insert.run(r.file_path, splitIdentifiers(r.symbol_name), splitIdentifiers(r.endpoint), r.body, r.module ?? "");
+            }
+        });
+        insertAll(rows);
+    }
+    /** BM25-ranked full-text search over the indexed content. */
+    searchFTS(query, limit = 20) {
+        const tokens = this._buildTokens(query);
+        if (tokens.length === 0)
+            return [];
+        const ftsQuery = tokens.join(" OR ");
+        try {
+            return this.db
+                .prepare(`
+          SELECT file_path, symbol_name,
+                 bm25(search_fts, 1.0, 0.5, 0.7, 1.0, 0.6) AS rank
+          FROM search_fts
+          WHERE search_fts MATCH ?
+          ORDER BY rank
+          LIMIT ?
+        `)
+                .all(ftsQuery, limit);
+        }
+        catch {
+            return [];
+        }
+    }
+    /**
+     * Score how well a query maps to indexed codebase content.
+     *
+     * Returns a 0–1 confidence score and a short reason string.
+     * Useful for:
+     *   - Filtering low-quality benchmark tasks
+     *   - Returning confidence alongside guardian_search results
+     *   - Advising agents when a query needs reformulation
+     *
+     * Three signals (each 0–1, combined with weights):
+     *   token_coverage  0.4 — fraction of query tokens that hit anything in the index
+     *   top_bm25        0.4 — strength of the best match (normalised from BM25 score)
+     *   result_cluster  0.2 — do top results cluster in one module (high) or scatter (low)?
+     */
+    querySignal(query) {
+        const tokens = this._buildTokens(query);
+        if (tokens.length === 0) {
+            return { score: 0, confidence: "low", reason: "query produced no searchable tokens" };
+        }
+        // English stop words that appear everywhere — don't count as code signal.
+        const STOP = new Set(["the", "and", "for", "are", "but", "not", "you", "all", "can", "had", "her", "was", "one", "our", "out", "day", "get", "has", "him", "his", "how", "its", "let", "may", "new", "now", "old", "see", "two", "use", "way", "who", "did", "man", "use", "say", "she", "than", "then", "them", "these", "they", "this", "will", "with", "have", "from", "that", "been", "each", "into", "like", "make", "more", "other", "over", "same", "such", "take", "than", "them", "then", "they", "this", "when", "your", "also", "back", "came", "come", "does", "even", "find", "give", "good", "here", "just", "keep", "kind", "last", "left", "life", "long", "much", "must", "name", "need", "next", "only", "open", "own", "part", "plan", "play", "put", "read", "real", "said", "show", "side", "some", "tell", "time", "very", "well", "went", "what", "with", "work", "year", "change", "update", "remove", "add", "fix", "like", "file", "files", "other", "also"]);
+        // Domain-specific tokens: those NOT in the stop list.
+        const domainTokens = tokens.filter(t => !STOP.has(t.replace(/\*$/, "")));
+        // ── Signal 1: domain token coverage ──────────────────────────────────
+        // Only count tokens that are domain-specific AND match SOURCE files (not config).
+        const SOURCE_EXT_RE = /\.(py|ts|tsx|js|jsx|go|java|cs|rb|rs|cpp|c|php|swift|kt)$/;
+        let domainHits = 0;
+        for (const tok of domainTokens) {
+            try {
+                const row = this.db.prepare("SELECT file_path FROM search_fts WHERE search_fts MATCH ? LIMIT 5").all(tok);
+                // Token must hit at least one actual source file (not config/build)
+                if (row.some(r => SOURCE_EXT_RE.test(r.file_path)))
+                    domainHits++;
+            }
+            catch { /* skip */ }
+        }
+        const tokenCoverage = domainTokens.length > 0 ? domainHits / domainTokens.length : 0;
+        // ── Signal 2: joint match strength ───────────────────────────────────
+        // Use AND (not OR) to find files matching ALL domain tokens together.
+        // Joint co-occurrence in one file means the query is specific, not coincidental.
+        let jointStrength = 0;
+        if (domainTokens.length > 0) {
+            try {
+                const andQuery = domainTokens.join(" AND ");
+                const row = this.db.prepare(`
+          SELECT bm25(search_fts, 1.0, 0.5, 0.7, 1.0, 0.6) AS rank, file_path
+          FROM search_fts WHERE search_fts MATCH ? ORDER BY rank LIMIT 1
+        `).get(andQuery);
+                if (row && SOURCE_EXT_RE.test(row.file_path)) {
+                    // Clamp [-15, 0] → [1, 0]
+                    jointStrength = Math.min(1, Math.max(0, -row.rank / 8));
+                }
+            }
+            catch {
+                // AND query failed (no joint match) → fall back to OR top score
+                try {
+                    const orQuery = domainTokens.join(" OR ");
+                    const row = this.db.prepare(`
+            SELECT bm25(search_fts, 1.0, 0.5, 0.7, 1.0, 0.6) AS rank, file_path
+            FROM search_fts WHERE search_fts MATCH ? ORDER BY rank LIMIT 1
+          `).get(orQuery);
+                    if (row && SOURCE_EXT_RE.test(row.file_path)) {
+                        // OR match is weaker signal — scale down by 50%
+                        jointStrength = Math.min(0.5, Math.max(0, -row.rank / 16));
+                    }
+                }
+                catch { /* skip */ }
+            }
+        }
+        // ── Signal 3: result clustering ───────────────────────────────────────
+        let clustering = 0;
+        try {
+            const orQuery = domainTokens.length > 0 ? domainTokens.join(" OR ") : tokens.join(" OR ");
+            const rows = this.db.prepare(`
+        SELECT file_path FROM search_fts WHERE search_fts MATCH ? ORDER BY bm25(search_fts) LIMIT 5
+      `).all(orQuery);
+            const srcRows = rows.filter(r => SOURCE_EXT_RE.test(r.file_path));
+            if (srcRows.length > 1) {
+                const dirs = srcRows.map(r => r.file_path.split("/").slice(0, -1).join("/"));
+                const unique = new Set(dirs).size;
+                clustering = 1 - (unique - 1) / Math.max(srcRows.length - 1, 1);
+            }
+            else if (srcRows.length === 1) {
+                clustering = 1;
+            }
+        }
+        catch { /* skip */ }
+        const score = tokenCoverage * 0.35 + jointStrength * 0.45 + clustering * 0.2;
+        const confidence = score >= 0.55 ? "high" : score >= 0.25 ? "medium" : "low";
+        const noCodeTokens = domainTokens.length === 0;
+        const reason = noCodeTokens
+            ? "query contains only generic English words, no code-domain terms"
+            : tokenCoverage < 0.3
+                ? `only ${Math.round(tokenCoverage * 100)}% of domain tokens match indexed source files`
+                : jointStrength < 0.15
+                    ? "tokens don't co-occur in any single source file — query is too generic"
+                    : clustering < 0.3
+                        ? "matching files scatter across unrelated modules — query is ambiguous"
+                        : `${Math.round(tokenCoverage * 100)}% domain coverage, strong co-occurrence match`;
+        return { score: Math.round(score * 100) / 100, confidence, reason };
+    }
+    // ── Private ────────────────────────────────────────────────────────────────
+    /** Build FTS5 token list from a natural language query. */
+    _buildTokens(query) {
+        return splitIdentifiers(query)
+            .split(/\s+/)
+            .filter(t => t.length > 1)
+            .map(t => `${t.replace(/[^a-z0-9]/g, "")}*`)
+            .filter(Boolean);
+    }
+    _migrate() {
+        this.db.exec(`
+      CREATE TABLE IF NOT EXISTS specs (
+        name        TEXT PRIMARY KEY,
+        format      TEXT NOT NULL,
+        content     TEXT NOT NULL,
+        tier        TEXT NOT NULL DEFAULT 'free',
+        updated_at  INTEGER NOT NULL
+      );
+      CREATE TABLE IF NOT EXISTS docs (
+        id          TEXT PRIMARY KEY,
+        section     TEXT NOT NULL,
+        title       TEXT NOT NULL,
+        body        TEXT NOT NULL,
+        tier        TEXT NOT NULL DEFAULT 'free',
+        updated_at  INTEGER NOT NULL
+      );
+      CREATE INDEX IF NOT EXISTS docs_section ON docs(section);
+      CREATE TABLE IF NOT EXISTS metrics_log (
+        id          INTEGER PRIMARY KEY AUTOINCREMENT,
+        ts          INTEGER NOT NULL,
+        event       TEXT NOT NULL,
+        payload     TEXT NOT NULL
+      );
+      CREATE INDEX IF NOT EXISTS metrics_log_ts ON metrics_log(ts);
+      CREATE TABLE IF NOT EXISTS file_deps (
+        file     TEXT NOT NULL,
+        imports  TEXT NOT NULL,
+        PRIMARY KEY (file, imports)
+      );
+      CREATE INDEX IF NOT EXISTS file_deps_reverse ON file_deps(imports);
+    `);
+        // FTS5 table — recreate if module column is missing (no ALTER TABLE for virtual tables).
+        // search_fts is always rebuilt on extract, so drop+recreate is safe.
+        const existing = this.db
+            .prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name='search_fts'")
+            .get();
+        if (!existing?.sql?.includes("module")) {
+            this.db.exec(`
+        DROP TABLE IF EXISTS search_fts;
+        CREATE VIRTUAL TABLE search_fts USING fts5(
+          file_path,
+          symbol_name,
+          endpoint,
+          body,
+          module,
+          tokenize='porter unicode61'
+        );
+      `);
+        }
+    }
+    // ── Dependency graph ────────────────────────────────────────────────────────
+    /** Replace all import edges (run once per guardian extract --backend sqlite). */
+    rebuildDeps(edges) {
+        const del = this.db.prepare("DELETE FROM file_deps");
+        const ins = this.db.prepare("INSERT OR IGNORE INTO file_deps (file, imports) VALUES (?, ?)");
+        this.db.transaction(() => {
+            del.run();
+            for (const e of edges)
+                ins.run(e.file, e.imports);
+        })();
+    }
+    /**
+     * BM25 search + dependency-graph quality reranking.
+     *
+     * Ranking model (inspired by HITS / PageRank applied to code):
+     *   - Source files are "authorities": many files import them (high used_by count)
+     *   - Test/example files are "hubs": they import source files but nothing imports them
+     *
+     * Quality score = authority_ratio = used_by / (used_by + imports)
+     * Combined score = bm25_rank / quality   (bm25 is negative; dividing dampens hubs)
+     *
+     * This naturally demotes test/example files without hardcoding path patterns.
+     * Files with no dependency data get a neutral quality (0.7) to avoid over-penalising
+     * isolated scripts or newly-added files not yet in the graph.
+     */
+    searchWithGraph(query, limit = 5) {
+        const tokens = this._buildTokens(query);
+        if (tokens.length === 0)
+            return [];
+        const ftsQuery = tokens.join(" OR ");
+        // Fetch a wider candidate pool so reranking has enough material.
+        const candidateLimit = Math.max(limit * 4, 60);
+        let rows;
+        try {
+            rows = this.db.prepare(`
+        WITH candidates AS (
+          SELECT file_path, symbol_name,
+                 bm25(search_fts, 1.0, 0.5, 0.7, 1.0, 0.6) AS rank
+          FROM search_fts
+          WHERE search_fts MATCH ?
+          ORDER BY rank
+          LIMIT ?
+        )
+        SELECT
+          c.file_path,
+          c.symbol_name,
+          c.rank,
+          GROUP_CONCAT(DISTINCT d.imports) AS imports_,
+          GROUP_CONCAT(DISTINCT r.file)    AS used_by_
+        FROM candidates c
+        LEFT JOIN file_deps d ON d.file    = c.file_path
+        LEFT JOIN file_deps r ON r.imports = c.file_path
+        GROUP BY c.file_path, c.symbol_name, c.rank
+        ORDER BY c.rank
+      `).all(ftsQuery, candidateLimit);
+        }
+        catch {
+            return [];
+        }
+        // Apply quality reranking using dependency-graph authority score.
+        const reranked = rows.map(r => {
+            const imports = r.imports_ ? r.imports_.split(",").filter(Boolean) : [];
+            const used_by = r.used_by_ ? r.used_by_.split(",").filter(Boolean) : [];
+            const usedByN = used_by.length;
+            const importsN = imports.length;
+            let quality;
+            if (usedByN === 0 && importsN === 0) {
+                // No dependency data — preserve BM25 rank entirely.
+                quality = 1.0;
+            }
+            else {
+                // authority_ratio ∈ [0, 1]: 1.0 = pure authority (many things import this file)
+                //                            0.0 = pure hub (imports many, nothing imports it)
+                const authority = usedByN / (usedByN + importsN);
+                // Gentle nudge: [0.7, 1.0] — hubs are demoted by at most 30%.
+                // BM25 relevance still dominates; this is a tiebreaker, not a hard filter.
+                quality = 0.7 + 0.3 * authority;
+            }
+            // bm25 is negative (more negative = better). Multiplying by quality < 1
+            // moves the score toward 0 — making low-quality files rank worse.
+            const combined = r.rank * quality;
+            return { file_path: r.file_path, symbol_name: r.symbol_name, rank: combined, imports, used_by };
+        });
+        reranked.sort((a, b) => a.rank - b.rank);
+        return reranked.slice(0, limit);
+    }
+}

package/dist/extract/codebase-intel.js CHANGED Viewed

@@ -171,8 +171,10 @@ function buildEndpointPatternMap(architecture) {
     }
     return result;
 }
+// ── File-based IO (original implementation — unchanged) ────────────────────
 /**
- * Load snapshots and build CodebaseIntelligence, then write to disk.
+ * Load snapshots and write codebase-intelligence.json to disk.
+ * This is the original file-based implementation, kept intact.
  */
 export async function writeCodebaseIntelligence(specsDir, outputPath) {
     const machineDir = await resolveMachineInputDir(specsDir);
@@ -187,9 +189,36 @@ export async function writeCodebaseIntelligence(specsDir, outputPath) {
     await fs.writeFile(outputPath, JSON.stringify(intel, null, 2), "utf8");
 }
 /**
- * Load an existing codebase-intelligence.json from disk.
+ * Load an existing codebase-intelligence.json from a file path.
+ * Original file-based implementation, kept intact.
  */
 export async function loadCodebaseIntelligence(intelPath) {
     const raw = await fs.readFile(intelPath, "utf8");
     return JSON.parse(raw);
 }
+// ── Store-based IO (new — works with both FileSpecsStore and SqliteSpecsStore) ─
+/**
+ * Build CodebaseIntelligence and write it via a SpecsStore.
+ * Use this when operating on a guardian.db or when you already have a store open.
+ */
+export async function writeCodebaseIntelligenceViaStore(store) {
+    const archEntry = await store.readSpec("architecture.snapshot");
+    const uxEntry = await store.readSpec("ux.snapshot");
+    if (!archEntry || !uxEntry) {
+        throw new Error("architecture.snapshot or ux.snapshot not found in store. Run `guardian extract` first.");
+    }
+    const architecture = yaml.load(archEntry.content);
+    const ux = yaml.load(uxEntry.content);
+    const intel = buildCodebaseIntelligence(architecture, ux);
+    await store.writeSpec("codebase-intelligence", JSON.stringify(intel, null, 2), "json");
+}
+/**
+ * Load CodebaseIntelligence from a SpecsStore.
+ * Returns null if not yet built.
+ */
+export async function loadCodebaseIntelligenceViaStore(store) {
+    const entry = await store.readSpec("codebase-intelligence");
+    if (!entry)
+        return null;
+    return JSON.parse(entry.content);
+}

package/dist/extract/compress.js CHANGED Viewed

@@ -319,6 +319,7 @@ function buildHeatmapFromGraph(level, nodes, edges, nodeLayers) {
         }
     }
     const cycleNodes = findCycleNodes(nodes, adjacency, reverse);
+    const pageRank = computePageRank(nodes, adjacency, reverse);
     const degreeValues = nodes.map((node) => (outbound.get(node) ?? 0) + (inbound.get(node) ?? 0));
     const maxDegree = Math.max(1, ...degreeValues);
     const maxCrossRatio = Math.max(1, ...nodes.map((node) => {
@@ -332,15 +333,22 @@ function buildHeatmapFromGraph(level, nodes, edges, nodeLayers) {
         const out = outbound.get(node) ?? 0;
         const crossRatio = out === 0 ? 0 : crossOut / out;
         const cycleFlag = cycleNodes.has(node) ? 1 : 0;
-        const score = 0.5 * (degree / maxDegree) +
-            0.3 * (crossRatio / maxCrossRatio) +
-            0.2 * cycleFlag;
+        const pr = pageRank.get(node) ?? 0;
+        // PageRank (40%) — importance by what depends on this node
+        // Degree   (30%) — raw connectivity (fallback signal)
+        // Cross-layer (20%) — architectural violation risk
+        // Cycle    (10%) — circular dependency penalty
+        const score = 0.4 * pr +
+            0.3 * (degree / maxDegree) +
+            0.2 * (crossRatio / maxCrossRatio) +
+            0.1 * cycleFlag;
         return {
             id: node,
             layer: nodeLayers.get(node) ?? "unknown",
             score: round(score, 4),
             components: {
                 degree,
+                pagerank: round(pr, 4),
                 cross_layer_ratio: round(crossRatio, 4),
                 cycle: cycleFlag
             }
@@ -368,6 +376,65 @@ function resolveDomainForModule(moduleId, domainMap) {
     }
     return null;
 }
+/**
+ * Iterative PageRank over a directed graph.
+ * Returns a map of node → normalized score in [0, 1].
+ *
+ * Semantics: a node is important if many important nodes import/depend on it.
+ * Damping factor α=0.85 (web-standard). Converges in ~20 iterations for
+ * codebases with <10K files.
+ *
+ * Edge direction follows dependency arrows (A imports B → edge A→B).
+ * Rank flows *backward*: B gains rank because A depends on it, meaning
+ * files that many other files rely on get high scores — exactly what we
+ * want to surface in AI context.
+ */
+function computePageRank(nodes, adjacency, // forward edges (importer → imported)
+reverse // backward edges (imported → importers)
+) {
+    const N = nodes.length;
+    if (N === 0)
+        return new Map();
+    const DAMPING = 0.85;
+    const ITERATIONS = 30;
+    const BASE = (1 - DAMPING) / N;
+    // Initialize uniform rank
+    const rank = new Map();
+    for (const node of nodes)
+        rank.set(node, 1 / N);
+    // Precompute out-degrees (how many nodes each node imports)
+    const outDeg = new Map();
+    for (const node of nodes)
+        outDeg.set(node, (adjacency.get(node) ?? []).length);
+    // Dangling nodes (no outgoing edges) distribute rank uniformly
+    for (let iter = 0; iter < ITERATIONS; iter++) {
+        const next = new Map();
+        // Dangling mass: sum of ranks of sink nodes spread across all nodes
+        let danglingMass = 0;
+        for (const node of nodes) {
+            if ((outDeg.get(node) ?? 0) === 0) {
+                danglingMass += (rank.get(node) ?? 0);
+            }
+        }
+        const danglingContrib = DAMPING * danglingMass / N;
+        for (const node of nodes) {
+            let incoming = 0;
+            for (const importer of (reverse.get(node) ?? [])) {
+                const d = outDeg.get(importer) ?? 1;
+                incoming += (rank.get(importer) ?? 0) / d;
+            }
+            next.set(node, BASE + danglingContrib + DAMPING * incoming);
+        }
+        for (const node of nodes)
+            rank.set(node, next.get(node) ?? 0);
+    }
+    // Normalize to [0, 1] relative to max
+    const max = Math.max(1e-10, ...Array.from(rank.values()));
+    const normalized = new Map();
+    for (const [node, r] of rank.entries())
+        normalized.set(node, r / max);
+    return normalized;
+}
 function findCycleNodes(nodes, adjacency, reverse) {
     const visited = new Set();
     const order = [];

package/dist/extract/context-block.js CHANGED Viewed

@@ -29,8 +29,17 @@ export function renderContextBlock(architecture, ux, options) {
         }
         lines.push("");
     }
-    // Cross-module dependencies
-    const crossEdges = architecture.dependencies.module_graph.filter(e => e.from !== e.to);
+    // Cross-module dependencies (deduplicated)
+    const seenEdges = new Set();
+    const crossEdges = architecture.dependencies.module_graph.filter(e => {
+        if (e.from === e.to)
+            return false;
+        const key = `${e.from}→${e.to}`;
+        if (seenEdges.has(key))
+            return false;
+        seenEdges.add(key);
+        return true;
+    });
     if (crossEdges.length > 0) {
         lines.push("### Module Dependencies");
         for (const edge of crossEdges.slice(0, 10)) {

package/dist/extract/function-intel.js CHANGED Viewed

@@ -160,8 +160,10 @@ async function listSourceFiles(dir, config, results = []) {
  * Scan one or more project roots, run adapters on every source file, and
  * return the aggregated FunctionIntelligence index.
  */
-export async function buildFunctionIntelligenceFromRoots(roots, config) {
+export async function buildFunctionIntelligenceFromRoots(roots, config, projectRoot) {
     const allFunctions = [];
+    // Relativize against project root if provided; otherwise fall back to the scan root
+    const baseDir = projectRoot ?? roots[0];
     for (const root of roots) {
         const files = await listSourceFiles(root, config);
         await Promise.all(files.map(async (filePath) => {
@@ -177,7 +179,8 @@ export async function buildFunctionIntelligenceFromRoots(roots, config) {
             }
             try {
                 const result = runAdapter(adapter, filePath, source);
-                allFunctions.push(...result.functions);
+                const relPath = path.relative(baseDir, filePath);
+                allFunctions.push(...result.functions.map(fn => ({ ...fn, file: relPath })));
             }
             catch {
                 // Skip files that fail to parse (malformed source, encoding issues)

package/dist/extract/index.js CHANGED Viewed

@@ -191,8 +191,7 @@ export async function extractProject(options) {
     // Generate Function Intelligence — call graph, literal index across all languages.
     // Runs as an additive second pass; never modifies the architecture snapshot.
     try {
-        const allRoots = (architecture.project.roots ?? [projectRoot]).map((r) => path.isAbsolute(r) ? r : path.join(projectRoot, r));
-        const funcIntel = await buildFunctionIntelligenceFromRoots(allRoots, config);
+        const funcIntel = await buildFunctionIntelligenceFromRoots([projectRoot], config, projectRoot);
         await writeFunctionIntelligence(layout.machineDir, funcIntel);
     }
     catch (err) {
@@ -421,27 +420,6 @@ function mergeFrontendAnalyses(results, _roots, _workspaceRoot) {
         tests: results.flatMap(r => r.tests)
     };
 }
-function findCommonRoot(paths) {
-    if (paths.length === 0) {
-        return process.cwd();
-    }
-    const splitPaths = paths.map((entry) => path.resolve(entry).split(path.sep));
-    const minLength = Math.min(...splitPaths.map((parts) => parts.length));
-    const shared = [];
-    for (let i = 0; i < minLength; i += 1) {
-        const segment = splitPaths[0][i];
-        if (splitPaths.every((parts) => parts[i] === segment)) {
-            shared.push(segment);
-        }
-        else {
-            break;
-        }
-    }
-    if (shared.length === 0) {
-        return path.parse(paths[0]).root;
-    }
-    return shared.join(path.sep);
-}
 async function loadPreviousSnapshots(machineDir, rootDir) {
     const result = {};
     const candidates = [