@toolbaux/guardian 0.1.21 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,441 @@
1
+ /**
2
+ * SqliteSpecsStore — SQLite implementation of SpecsStore.
3
+ *
4
+ * Stores everything that was previously scattered across .specs/machine/*.json
5
+ * and .specs/human/*.md into a single guardian.db file.
6
+ *
7
+ * Schema:
8
+ * specs — blob storage for all machine intelligence files
9
+ * docs — human-readable doc sections (markdown)
10
+ * metrics_log — append-only event log (replaces mcp-metrics.jsonl)
11
+ * search_fts — FTS5 virtual table built from specs content (extra index)
12
+ *
13
+ * Tier gating is stored per-row; the caller passes a tier filter when reading.
14
+ * This is the foundation for the pro/enterprise access control layer.
15
+ */
16
+ import Database from "better-sqlite3";
17
+ import path from "node:path";
18
+ /**
19
+ * Normalise a file path to a canonical repo-relative form.
20
+ * Used consistently by the FTS builder, dep-graph builder, and search query.
21
+ * All paths stored in guardian.db go through this function.
22
+ *
23
+ * "flask-full/src/flask/sessions.py" → "src/flask/sessions.py"
24
+ * "django/django/contrib/auth.py" → "django/contrib/auth.py"
25
+ * "sqlalchemy/lib/sqlalchemy/sql/base.py" → "lib/sqlalchemy/sql/base.py"
26
+ */
27
+ export function normPath(p) {
28
+ // Strip leading reponame/src/ → src/
29
+ p = p.replace(/^[^/]+\/src\//, "src/");
30
+ // Strip double-prefix X/X/ → X/ (package namespace matches repo clone dir)
31
+ const dm = p.match(/^([^/]+)\/\1\//);
32
+ if (dm)
33
+ return p.slice(dm[1].length + 1);
34
+ // Strip leading repo segment when followed by a known source-directory name
35
+ if (/^[^/]+\/(?:lib|examples|pkg|packages|apps|internal|cmd|src)\//i.test(p)) {
36
+ p = p.slice(p.indexOf("/") + 1);
37
+ }
38
+ return p;
39
+ }
40
+ /**
41
+ * Split camelCase and snake_case identifiers into individual tokens so the
42
+ * porter stemmer can match partial terms.
43
+ * getUserById → "get user by id"
44
+ * auth_service → "auth service"
45
+ */
46
+ function splitIdentifiers(s) {
47
+ return s
48
+ .replace(/_/g, " ")
49
+ .replace(/([a-z])([A-Z])/g, "$1 $2")
50
+ .replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2")
51
+ .toLowerCase();
52
+ }
53
+ export const DB_FILENAME = "guardian.db";
54
+ export class SqliteSpecsStore {
55
+ storeDir;
56
+ db;
57
+ constructor(storeDir) {
58
+ this.storeDir = storeDir;
59
+ }
60
+ async init() {
61
+ const dbPath = path.join(this.storeDir, DB_FILENAME);
62
+ this.db = new Database(dbPath);
63
+ this.db.pragma("journal_mode = WAL");
64
+ this.db.pragma("synchronous = NORMAL");
65
+ this._migrate();
66
+ }
67
+ async close() {
68
+ this.db?.close();
69
+ }
70
+ // ── Spec blobs ─────────────────────────────────────────────────────────────
71
+ async readSpec(name) {
72
+ const row = this.db
73
+ .prepare("SELECT * FROM specs WHERE name = ?")
74
+ .get(name);
75
+ return row ?? null;
76
+ }
77
+ async writeSpec(name, content, format, tier = "free") {
78
+ this.db
79
+ .prepare(`
80
+ INSERT INTO specs (name, format, content, tier, updated_at)
81
+ VALUES (?, ?, ?, ?, ?)
82
+ ON CONFLICT(name) DO UPDATE SET
83
+ content = excluded.content,
84
+ format = excluded.format,
85
+ tier = excluded.tier,
86
+ updated_at = excluded.updated_at
87
+ `)
88
+ .run(name, format, content, tier, Date.now());
89
+ }
90
+ async listSpecs() {
91
+ const rows = this.db
92
+ .prepare("SELECT name FROM specs ORDER BY name")
93
+ .all();
94
+ return rows.map(r => r.name);
95
+ }
96
+ async hasSpec(name) {
97
+ const row = this.db
98
+ .prepare("SELECT 1 FROM specs WHERE name = ?")
99
+ .get(name);
100
+ return !!row;
101
+ }
102
+ // ── Human docs ─────────────────────────────────────────────────────────────
103
+ async readDoc(id) {
104
+ const row = this.db
105
+ .prepare("SELECT * FROM docs WHERE id = ?")
106
+ .get(id);
107
+ return row ?? null;
108
+ }
109
+ async writeDoc(entry) {
110
+ this.db
111
+ .prepare(`
112
+ INSERT INTO docs (id, section, title, body, tier, updated_at)
113
+ VALUES (?, ?, ?, ?, ?, ?)
114
+ ON CONFLICT(id) DO UPDATE SET
115
+ section = excluded.section,
116
+ title = excluded.title,
117
+ body = excluded.body,
118
+ tier = excluded.tier,
119
+ updated_at = excluded.updated_at
120
+ `)
121
+ .run(entry.id, entry.section, entry.title, entry.body, entry.tier, Date.now());
122
+ }
123
+ async listDocs(section) {
124
+ if (section) {
125
+ return this.db
126
+ .prepare("SELECT * FROM docs WHERE section = ? ORDER BY id")
127
+ .all(section);
128
+ }
129
+ return this.db
130
+ .prepare("SELECT * FROM docs ORDER BY section, id")
131
+ .all();
132
+ }
133
+ // ── Metrics log ────────────────────────────────────────────────────────────
134
+ async appendMetric(event, payload) {
135
+ this.db
136
+ .prepare("INSERT INTO metrics_log (ts, event, payload) VALUES (?, ?, ?)")
137
+ .run(Date.now(), event, JSON.stringify(payload));
138
+ }
139
+ async readMetrics(limit = 1000) {
140
+ return this.db
141
+ .prepare("SELECT * FROM metrics_log ORDER BY id DESC LIMIT ?")
142
+ .all(limit);
143
+ }
144
+ // ── FTS search (extra index, no equivalent in FileSpecsStore) ─────────────
145
+ /**
146
+ * Rebuild the FTS5 search index from extracted codebase data.
147
+ *
148
+ * Each row is one file. Symbol names and endpoints are pre-expanded with
149
+ * splitIdentifiers() so "getUserById" becomes "get user by id" before
150
+ * the porter stemmer runs — this gives sub-token recall without trigrams.
151
+ *
152
+ * Column BM25 weights (bm25 args, lower = more important):
153
+ * file_path(1), symbol_name(2), endpoint(3), body(4), module(5)
154
+ * weights: 1.0 0.5 0.7 1.0 0.6
155
+ */
156
+ rebuildSearchIndex(rows) {
157
+ this.db.prepare("DELETE FROM search_fts").run();
158
+ const insert = this.db.prepare("INSERT INTO search_fts (file_path, symbol_name, endpoint, body, module) VALUES (?, ?, ?, ?, ?)");
159
+ const insertAll = this.db.transaction((items) => {
160
+ for (const r of items) {
161
+ insert.run(r.file_path, splitIdentifiers(r.symbol_name), splitIdentifiers(r.endpoint), r.body, r.module ?? "");
162
+ }
163
+ });
164
+ insertAll(rows);
165
+ }
166
+ /** BM25-ranked full-text search over the indexed content. */
167
+ searchFTS(query, limit = 20) {
168
+ const tokens = this._buildTokens(query);
169
+ if (tokens.length === 0)
170
+ return [];
171
+ const ftsQuery = tokens.join(" OR ");
172
+ try {
173
+ return this.db
174
+ .prepare(`
175
+ SELECT file_path, symbol_name,
176
+ bm25(search_fts, 1.0, 0.5, 0.7, 1.0, 0.6) AS rank
177
+ FROM search_fts
178
+ WHERE search_fts MATCH ?
179
+ ORDER BY rank
180
+ LIMIT ?
181
+ `)
182
+ .all(ftsQuery, limit);
183
+ }
184
+ catch {
185
+ return [];
186
+ }
187
+ }
188
+ /**
189
+ * Score how well a query maps to indexed codebase content.
190
+ *
191
+ * Returns a 0–1 confidence score and a short reason string.
192
+ * Useful for:
193
+ * - Filtering low-quality benchmark tasks
194
+ * - Returning confidence alongside guardian_search results
195
+ * - Advising agents when a query needs reformulation
196
+ *
197
+ * Three signals (each 0–1, combined with weights):
198
+ * token_coverage 0.4 — fraction of query tokens that hit anything in the index
199
+ * top_bm25 0.4 — strength of the best match (normalised from BM25 score)
200
+ * result_cluster 0.2 — do top results cluster in one module (high) or scatter (low)?
201
+ */
202
+ querySignal(query) {
203
+ const tokens = this._buildTokens(query);
204
+ if (tokens.length === 0) {
205
+ return { score: 0, confidence: "low", reason: "query produced no searchable tokens" };
206
+ }
207
+ // English stop words that appear everywhere — don't count as code signal.
208
+ const STOP = new Set(["the", "and", "for", "are", "but", "not", "you", "all", "can", "had", "her", "was", "one", "our", "out", "day", "get", "has", "him", "his", "how", "its", "let", "may", "new", "now", "old", "see", "two", "use", "way", "who", "did", "man", "use", "say", "she", "than", "then", "them", "these", "they", "this", "will", "with", "have", "from", "that", "been", "each", "into", "like", "make", "more", "other", "over", "same", "such", "take", "than", "them", "then", "they", "this", "when", "your", "also", "back", "came", "come", "does", "even", "find", "give", "good", "here", "just", "keep", "kind", "last", "left", "life", "long", "much", "must", "name", "need", "next", "only", "open", "own", "part", "plan", "play", "put", "read", "real", "said", "show", "side", "some", "tell", "time", "very", "well", "went", "what", "with", "work", "year", "change", "update", "remove", "add", "fix", "like", "file", "files", "other", "also"]);
209
+ // Domain-specific tokens: those NOT in the stop list.
210
+ const domainTokens = tokens.filter(t => !STOP.has(t.replace(/\*$/, "")));
211
+ // ── Signal 1: domain token coverage ──────────────────────────────────
212
+ // Only count tokens that are domain-specific AND match SOURCE files (not config).
213
+ const SOURCE_EXT_RE = /\.(py|ts|tsx|js|jsx|go|java|cs|rb|rs|cpp|c|php|swift|kt)$/;
214
+ let domainHits = 0;
215
+ for (const tok of domainTokens) {
216
+ try {
217
+ const row = this.db.prepare("SELECT file_path FROM search_fts WHERE search_fts MATCH ? LIMIT 5").all(tok);
218
+ // Token must hit at least one actual source file (not config/build)
219
+ if (row.some(r => SOURCE_EXT_RE.test(r.file_path)))
220
+ domainHits++;
221
+ }
222
+ catch { /* skip */ }
223
+ }
224
+ const tokenCoverage = domainTokens.length > 0 ? domainHits / domainTokens.length : 0;
225
+ // ── Signal 2: joint match strength ───────────────────────────────────
226
+ // Use AND (not OR) to find files matching ALL domain tokens together.
227
+ // Joint co-occurrence in one file means the query is specific, not coincidental.
228
+ let jointStrength = 0;
229
+ if (domainTokens.length > 0) {
230
+ try {
231
+ const andQuery = domainTokens.join(" AND ");
232
+ const row = this.db.prepare(`
233
+ SELECT bm25(search_fts, 1.0, 0.5, 0.7, 1.0, 0.6) AS rank, file_path
234
+ FROM search_fts WHERE search_fts MATCH ? ORDER BY rank LIMIT 1
235
+ `).get(andQuery);
236
+ if (row && SOURCE_EXT_RE.test(row.file_path)) {
237
+ // Clamp [-15, 0] → [1, 0]
238
+ jointStrength = Math.min(1, Math.max(0, -row.rank / 8));
239
+ }
240
+ }
241
+ catch {
242
+ // AND query failed (no joint match) → fall back to OR top score
243
+ try {
244
+ const orQuery = domainTokens.join(" OR ");
245
+ const row = this.db.prepare(`
246
+ SELECT bm25(search_fts, 1.0, 0.5, 0.7, 1.0, 0.6) AS rank, file_path
247
+ FROM search_fts WHERE search_fts MATCH ? ORDER BY rank LIMIT 1
248
+ `).get(orQuery);
249
+ if (row && SOURCE_EXT_RE.test(row.file_path)) {
250
+ // OR match is weaker signal — scale down by 50%
251
+ jointStrength = Math.min(0.5, Math.max(0, -row.rank / 16));
252
+ }
253
+ }
254
+ catch { /* skip */ }
255
+ }
256
+ }
257
+ // ── Signal 3: result clustering ───────────────────────────────────────
258
+ let clustering = 0;
259
+ try {
260
+ const orQuery = domainTokens.length > 0 ? domainTokens.join(" OR ") : tokens.join(" OR ");
261
+ const rows = this.db.prepare(`
262
+ SELECT file_path FROM search_fts WHERE search_fts MATCH ? ORDER BY bm25(search_fts) LIMIT 5
263
+ `).all(orQuery);
264
+ const srcRows = rows.filter(r => SOURCE_EXT_RE.test(r.file_path));
265
+ if (srcRows.length > 1) {
266
+ const dirs = srcRows.map(r => r.file_path.split("/").slice(0, -1).join("/"));
267
+ const unique = new Set(dirs).size;
268
+ clustering = 1 - (unique - 1) / Math.max(srcRows.length - 1, 1);
269
+ }
270
+ else if (srcRows.length === 1) {
271
+ clustering = 1;
272
+ }
273
+ }
274
+ catch { /* skip */ }
275
+ const score = tokenCoverage * 0.35 + jointStrength * 0.45 + clustering * 0.2;
276
+ const confidence = score >= 0.55 ? "high" : score >= 0.25 ? "medium" : "low";
277
+ const noCodeTokens = domainTokens.length === 0;
278
+ const reason = noCodeTokens
279
+ ? "query contains only generic English words, no code-domain terms"
280
+ : tokenCoverage < 0.3
281
+ ? `only ${Math.round(tokenCoverage * 100)}% of domain tokens match indexed source files`
282
+ : jointStrength < 0.15
283
+ ? "tokens don't co-occur in any single source file — query is too generic"
284
+ : clustering < 0.3
285
+ ? "matching files scatter across unrelated modules — query is ambiguous"
286
+ : `${Math.round(tokenCoverage * 100)}% domain coverage, strong co-occurrence match`;
287
+ return { score: Math.round(score * 100) / 100, confidence, reason };
288
+ }
289
+ // ── Private ────────────────────────────────────────────────────────────────
290
+ /** Build FTS5 token list from a natural language query. */
291
+ _buildTokens(query) {
292
+ return splitIdentifiers(query)
293
+ .split(/\s+/)
294
+ .filter(t => t.length > 1)
295
+ .map(t => `${t.replace(/[^a-z0-9]/g, "")}*`)
296
+ .filter(Boolean);
297
+ }
298
+ _migrate() {
299
+ this.db.exec(`
300
+ CREATE TABLE IF NOT EXISTS specs (
301
+ name TEXT PRIMARY KEY,
302
+ format TEXT NOT NULL,
303
+ content TEXT NOT NULL,
304
+ tier TEXT NOT NULL DEFAULT 'free',
305
+ updated_at INTEGER NOT NULL
306
+ );
307
+
308
+ CREATE TABLE IF NOT EXISTS docs (
309
+ id TEXT PRIMARY KEY,
310
+ section TEXT NOT NULL,
311
+ title TEXT NOT NULL,
312
+ body TEXT NOT NULL,
313
+ tier TEXT NOT NULL DEFAULT 'free',
314
+ updated_at INTEGER NOT NULL
315
+ );
316
+
317
+ CREATE INDEX IF NOT EXISTS docs_section ON docs(section);
318
+
319
+ CREATE TABLE IF NOT EXISTS metrics_log (
320
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
321
+ ts INTEGER NOT NULL,
322
+ event TEXT NOT NULL,
323
+ payload TEXT NOT NULL
324
+ );
325
+
326
+ CREATE INDEX IF NOT EXISTS metrics_log_ts ON metrics_log(ts);
327
+
328
+ CREATE TABLE IF NOT EXISTS file_deps (
329
+ file TEXT NOT NULL,
330
+ imports TEXT NOT NULL,
331
+ PRIMARY KEY (file, imports)
332
+ );
333
+
334
+ CREATE INDEX IF NOT EXISTS file_deps_reverse ON file_deps(imports);
335
+ `);
336
+ // FTS5 table — recreate if module column is missing (no ALTER TABLE for virtual tables).
337
+ // search_fts is always rebuilt on extract, so drop+recreate is safe.
338
+ const existing = this.db
339
+ .prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name='search_fts'")
340
+ .get();
341
+ if (!existing?.sql?.includes("module")) {
342
+ this.db.exec(`
343
+ DROP TABLE IF EXISTS search_fts;
344
+ CREATE VIRTUAL TABLE search_fts USING fts5(
345
+ file_path,
346
+ symbol_name,
347
+ endpoint,
348
+ body,
349
+ module,
350
+ tokenize='porter unicode61'
351
+ );
352
+ `);
353
+ }
354
+ }
355
+ // ── Dependency graph ────────────────────────────────────────────────────────
356
+ /** Replace all import edges (run once per guardian extract --backend sqlite). */
357
+ rebuildDeps(edges) {
358
+ const del = this.db.prepare("DELETE FROM file_deps");
359
+ const ins = this.db.prepare("INSERT OR IGNORE INTO file_deps (file, imports) VALUES (?, ?)");
360
+ this.db.transaction(() => {
361
+ del.run();
362
+ for (const e of edges)
363
+ ins.run(e.file, e.imports);
364
+ })();
365
+ }
366
+ /**
367
+ * BM25 search + dependency-graph quality reranking.
368
+ *
369
+ * Ranking model (inspired by HITS / PageRank applied to code):
370
+ * - Source files are "authorities": many files import them (high used_by count)
371
+ * - Test/example files are "hubs": they import source files but nothing imports them
372
+ *
373
+ * Quality score = authority_ratio = used_by / (used_by + imports)
374
+ * Combined score = bm25_rank / quality (bm25 is negative; dividing dampens hubs)
375
+ *
376
+ * This naturally demotes test/example files without hardcoding path patterns.
377
+ * Files with no dependency data get a neutral quality (0.7) to avoid over-penalising
378
+ * isolated scripts or newly-added files not yet in the graph.
379
+ */
380
+ searchWithGraph(query, limit = 5) {
381
+ const tokens = this._buildTokens(query);
382
+ if (tokens.length === 0)
383
+ return [];
384
+ const ftsQuery = tokens.join(" OR ");
385
+ // Fetch a wider candidate pool so reranking has enough material.
386
+ const candidateLimit = Math.max(limit * 4, 60);
387
+ let rows;
388
+ try {
389
+ rows = this.db.prepare(`
390
+ WITH candidates AS (
391
+ SELECT file_path, symbol_name,
392
+ bm25(search_fts, 1.0, 0.5, 0.7, 1.0, 0.6) AS rank
393
+ FROM search_fts
394
+ WHERE search_fts MATCH ?
395
+ ORDER BY rank
396
+ LIMIT ?
397
+ )
398
+ SELECT
399
+ c.file_path,
400
+ c.symbol_name,
401
+ c.rank,
402
+ GROUP_CONCAT(DISTINCT d.imports) AS imports_,
403
+ GROUP_CONCAT(DISTINCT r.file) AS used_by_
404
+ FROM candidates c
405
+ LEFT JOIN file_deps d ON d.file = c.file_path
406
+ LEFT JOIN file_deps r ON r.imports = c.file_path
407
+ GROUP BY c.file_path, c.symbol_name, c.rank
408
+ ORDER BY c.rank
409
+ `).all(ftsQuery, candidateLimit);
410
+ }
411
+ catch {
412
+ return [];
413
+ }
414
+ // Apply quality reranking using dependency-graph authority score.
415
+ const reranked = rows.map(r => {
416
+ const imports = r.imports_ ? r.imports_.split(",").filter(Boolean) : [];
417
+ const used_by = r.used_by_ ? r.used_by_.split(",").filter(Boolean) : [];
418
+ const usedByN = used_by.length;
419
+ const importsN = imports.length;
420
+ let quality;
421
+ if (usedByN === 0 && importsN === 0) {
422
+ // No dependency data — preserve BM25 rank entirely.
423
+ quality = 1.0;
424
+ }
425
+ else {
426
+ // authority_ratio ∈ [0, 1]: 1.0 = pure authority (many things import this file)
427
+ // 0.0 = pure hub (imports many, nothing imports it)
428
+ const authority = usedByN / (usedByN + importsN);
429
+ // Gentle nudge: [0.7, 1.0] — hubs are demoted by at most 30%.
430
+ // BM25 relevance still dominates; this is a tiebreaker, not a hard filter.
431
+ quality = 0.7 + 0.3 * authority;
432
+ }
433
+ // bm25 is negative (more negative = better). Multiplying by quality < 1
434
+ // moves the score toward 0 — making low-quality files rank worse.
435
+ const combined = r.rank * quality;
436
+ return { file_path: r.file_path, symbol_name: r.symbol_name, rank: combined, imports, used_by };
437
+ });
438
+ reranked.sort((a, b) => a.rank - b.rank);
439
+ return reranked.slice(0, limit);
440
+ }
441
+ }
@@ -171,8 +171,10 @@ function buildEndpointPatternMap(architecture) {
171
171
  }
172
172
  return result;
173
173
  }
174
+ // ── File-based IO (original implementation — unchanged) ────────────────────
174
175
  /**
175
- * Load snapshots and build CodebaseIntelligence, then write to disk.
176
+ * Load snapshots and write codebase-intelligence.json to disk.
177
+ * This is the original file-based implementation, kept intact.
176
178
  */
177
179
  export async function writeCodebaseIntelligence(specsDir, outputPath) {
178
180
  const machineDir = await resolveMachineInputDir(specsDir);
@@ -187,9 +189,36 @@ export async function writeCodebaseIntelligence(specsDir, outputPath) {
187
189
  await fs.writeFile(outputPath, JSON.stringify(intel, null, 2), "utf8");
188
190
  }
189
191
  /**
190
- * Load an existing codebase-intelligence.json from disk.
192
+ * Load an existing codebase-intelligence.json from a file path.
193
+ * Original file-based implementation, kept intact.
191
194
  */
192
195
  export async function loadCodebaseIntelligence(intelPath) {
193
196
  const raw = await fs.readFile(intelPath, "utf8");
194
197
  return JSON.parse(raw);
195
198
  }
199
+ // ── Store-based IO (new — works with both FileSpecsStore and SqliteSpecsStore) ─
200
+ /**
201
+ * Build CodebaseIntelligence and write it via a SpecsStore.
202
+ * Use this when operating on a guardian.db or when you already have a store open.
203
+ */
204
+ export async function writeCodebaseIntelligenceViaStore(store) {
205
+ const archEntry = await store.readSpec("architecture.snapshot");
206
+ const uxEntry = await store.readSpec("ux.snapshot");
207
+ if (!archEntry || !uxEntry) {
208
+ throw new Error("architecture.snapshot or ux.snapshot not found in store. Run `guardian extract` first.");
209
+ }
210
+ const architecture = yaml.load(archEntry.content);
211
+ const ux = yaml.load(uxEntry.content);
212
+ const intel = buildCodebaseIntelligence(architecture, ux);
213
+ await store.writeSpec("codebase-intelligence", JSON.stringify(intel, null, 2), "json");
214
+ }
215
+ /**
216
+ * Load CodebaseIntelligence from a SpecsStore.
217
+ * Returns null if not yet built.
218
+ */
219
+ export async function loadCodebaseIntelligenceViaStore(store) {
220
+ const entry = await store.readSpec("codebase-intelligence");
221
+ if (!entry)
222
+ return null;
223
+ return JSON.parse(entry.content);
224
+ }
@@ -319,6 +319,7 @@ function buildHeatmapFromGraph(level, nodes, edges, nodeLayers) {
319
319
  }
320
320
  }
321
321
  const cycleNodes = findCycleNodes(nodes, adjacency, reverse);
322
+ const pageRank = computePageRank(nodes, adjacency, reverse);
322
323
  const degreeValues = nodes.map((node) => (outbound.get(node) ?? 0) + (inbound.get(node) ?? 0));
323
324
  const maxDegree = Math.max(1, ...degreeValues);
324
325
  const maxCrossRatio = Math.max(1, ...nodes.map((node) => {
@@ -332,15 +333,22 @@ function buildHeatmapFromGraph(level, nodes, edges, nodeLayers) {
332
333
  const out = outbound.get(node) ?? 0;
333
334
  const crossRatio = out === 0 ? 0 : crossOut / out;
334
335
  const cycleFlag = cycleNodes.has(node) ? 1 : 0;
335
- const score = 0.5 * (degree / maxDegree) +
336
- 0.3 * (crossRatio / maxCrossRatio) +
337
- 0.2 * cycleFlag;
336
+ const pr = pageRank.get(node) ?? 0;
337
+ // PageRank (40%) importance by what depends on this node
338
+ // Degree (30%) — raw connectivity (fallback signal)
339
+ // Cross-layer (20%) — architectural violation risk
340
+ // Cycle (10%) — circular dependency penalty
341
+ const score = 0.4 * pr +
342
+ 0.3 * (degree / maxDegree) +
343
+ 0.2 * (crossRatio / maxCrossRatio) +
344
+ 0.1 * cycleFlag;
338
345
  return {
339
346
  id: node,
340
347
  layer: nodeLayers.get(node) ?? "unknown",
341
348
  score: round(score, 4),
342
349
  components: {
343
350
  degree,
351
+ pagerank: round(pr, 4),
344
352
  cross_layer_ratio: round(crossRatio, 4),
345
353
  cycle: cycleFlag
346
354
  }
@@ -368,6 +376,65 @@ function resolveDomainForModule(moduleId, domainMap) {
368
376
  }
369
377
  return null;
370
378
  }
379
+ /**
380
+ * Iterative PageRank over a directed graph.
381
+ * Returns a map of node → normalized score in [0, 1].
382
+ *
383
+ * Semantics: a node is important if many important nodes import/depend on it.
384
+ * Damping factor α=0.85 (web-standard). Converges in ~20 iterations for
385
+ * codebases with <10K files.
386
+ *
387
+ * Edge direction follows dependency arrows (A imports B → edge A→B).
388
+ * Rank flows *backward*: B gains rank because A depends on it, meaning
389
+ * files that many other files rely on get high scores — exactly what we
390
+ * want to surface in AI context.
391
+ */
392
+ function computePageRank(nodes, adjacency, // forward edges (importer → imported)
393
+ reverse // backward edges (imported → importers)
394
+ ) {
395
+ const N = nodes.length;
396
+ if (N === 0)
397
+ return new Map();
398
+ const DAMPING = 0.85;
399
+ const ITERATIONS = 30;
400
+ const BASE = (1 - DAMPING) / N;
401
+ // Initialize uniform rank
402
+ const rank = new Map();
403
+ for (const node of nodes)
404
+ rank.set(node, 1 / N);
405
+ // Precompute out-degrees (how many nodes each node imports)
406
+ const outDeg = new Map();
407
+ for (const node of nodes)
408
+ outDeg.set(node, (adjacency.get(node) ?? []).length);
409
+ // Dangling nodes (no outgoing edges) distribute rank uniformly
410
+ for (let iter = 0; iter < ITERATIONS; iter++) {
411
+ const next = new Map();
412
+ // Dangling mass: sum of ranks of sink nodes spread across all nodes
413
+ let danglingMass = 0;
414
+ for (const node of nodes) {
415
+ if ((outDeg.get(node) ?? 0) === 0) {
416
+ danglingMass += (rank.get(node) ?? 0);
417
+ }
418
+ }
419
+ const danglingContrib = DAMPING * danglingMass / N;
420
+ for (const node of nodes) {
421
+ let incoming = 0;
422
+ for (const importer of (reverse.get(node) ?? [])) {
423
+ const d = outDeg.get(importer) ?? 1;
424
+ incoming += (rank.get(importer) ?? 0) / d;
425
+ }
426
+ next.set(node, BASE + danglingContrib + DAMPING * incoming);
427
+ }
428
+ for (const node of nodes)
429
+ rank.set(node, next.get(node) ?? 0);
430
+ }
431
+ // Normalize to [0, 1] relative to max
432
+ const max = Math.max(1e-10, ...Array.from(rank.values()));
433
+ const normalized = new Map();
434
+ for (const [node, r] of rank.entries())
435
+ normalized.set(node, r / max);
436
+ return normalized;
437
+ }
371
438
  function findCycleNodes(nodes, adjacency, reverse) {
372
439
  const visited = new Set();
373
440
  const order = [];
@@ -29,8 +29,17 @@ export function renderContextBlock(architecture, ux, options) {
29
29
  }
30
30
  lines.push("");
31
31
  }
32
- // Cross-module dependencies
33
- const crossEdges = architecture.dependencies.module_graph.filter(e => e.from !== e.to);
32
+ // Cross-module dependencies (deduplicated)
33
+ const seenEdges = new Set();
34
+ const crossEdges = architecture.dependencies.module_graph.filter(e => {
35
+ if (e.from === e.to)
36
+ return false;
37
+ const key = `${e.from}→${e.to}`;
38
+ if (seenEdges.has(key))
39
+ return false;
40
+ seenEdges.add(key);
41
+ return true;
42
+ });
34
43
  if (crossEdges.length > 0) {
35
44
  lines.push("### Module Dependencies");
36
45
  for (const edge of crossEdges.slice(0, 10)) {
@@ -160,8 +160,10 @@ async function listSourceFiles(dir, config, results = []) {
160
160
  * Scan one or more project roots, run adapters on every source file, and
161
161
  * return the aggregated FunctionIntelligence index.
162
162
  */
163
- export async function buildFunctionIntelligenceFromRoots(roots, config) {
163
+ export async function buildFunctionIntelligenceFromRoots(roots, config, projectRoot) {
164
164
  const allFunctions = [];
165
+ // Relativize against project root if provided; otherwise fall back to the scan root
166
+ const baseDir = projectRoot ?? roots[0];
165
167
  for (const root of roots) {
166
168
  const files = await listSourceFiles(root, config);
167
169
  await Promise.all(files.map(async (filePath) => {
@@ -177,7 +179,8 @@ export async function buildFunctionIntelligenceFromRoots(roots, config) {
177
179
  }
178
180
  try {
179
181
  const result = runAdapter(adapter, filePath, source);
180
- allFunctions.push(...result.functions);
182
+ const relPath = path.relative(baseDir, filePath);
183
+ allFunctions.push(...result.functions.map(fn => ({ ...fn, file: relPath })));
181
184
  }
182
185
  catch {
183
186
  // Skip files that fail to parse (malformed source, encoding issues)
@@ -191,8 +191,7 @@ export async function extractProject(options) {
191
191
  // Generate Function Intelligence — call graph, literal index across all languages.
192
192
  // Runs as an additive second pass; never modifies the architecture snapshot.
193
193
  try {
194
- const allRoots = (architecture.project.roots ?? [projectRoot]).map((r) => path.isAbsolute(r) ? r : path.join(projectRoot, r));
195
- const funcIntel = await buildFunctionIntelligenceFromRoots(allRoots, config);
194
+ const funcIntel = await buildFunctionIntelligenceFromRoots([projectRoot], config, projectRoot);
196
195
  await writeFunctionIntelligence(layout.machineDir, funcIntel);
197
196
  }
198
197
  catch (err) {
@@ -421,27 +420,6 @@ function mergeFrontendAnalyses(results, _roots, _workspaceRoot) {
421
420
  tests: results.flatMap(r => r.tests)
422
421
  };
423
422
  }
424
- function findCommonRoot(paths) {
425
- if (paths.length === 0) {
426
- return process.cwd();
427
- }
428
- const splitPaths = paths.map((entry) => path.resolve(entry).split(path.sep));
429
- const minLength = Math.min(...splitPaths.map((parts) => parts.length));
430
- const shared = [];
431
- for (let i = 0; i < minLength; i += 1) {
432
- const segment = splitPaths[0][i];
433
- if (splitPaths.every((parts) => parts[i] === segment)) {
434
- shared.push(segment);
435
- }
436
- else {
437
- break;
438
- }
439
- }
440
- if (shared.length === 0) {
441
- return path.parse(paths[0]).root;
442
- }
443
- return shared.join(path.sep);
444
- }
445
423
  async function loadPreviousSnapshots(machineDir, rootDir) {
446
424
  const result = {};
447
425
  const candidates = [