@toolbaux/guardian 0.1.22 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +6 -4
  2. package/dist/adapters/runner.js +72 -3
  3. package/dist/adapters/typescript-adapter.js +24 -10
  4. package/dist/benchmarking/metrics/context-coverage.js +82 -0
  5. package/dist/benchmarking/metrics/drift-score.js +104 -0
  6. package/dist/benchmarking/metrics/search-recall.js +207 -0
  7. package/dist/benchmarking/metrics/token-efficiency.js +79 -0
  8. package/dist/benchmarking/report.js +131 -0
  9. package/dist/benchmarking/runner.js +175 -0
  10. package/dist/benchmarking/types.js +13 -0
  11. package/dist/cli.js +53 -10
  12. package/dist/commands/benchmark.js +62 -0
  13. package/dist/commands/context.js +87 -29
  14. package/dist/commands/discrepancy.js +1 -1
  15. package/dist/commands/doc-generate.js +1 -1
  16. package/dist/commands/doc-html.js +1 -1
  17. package/dist/commands/extract.js +4 -1
  18. package/dist/commands/feature-context.js +1 -1
  19. package/dist/commands/generate.js +83 -10
  20. package/dist/commands/init.js +89 -56
  21. package/dist/commands/intel.js +70 -1
  22. package/dist/commands/mcp-serve.js +155 -316
  23. package/dist/commands/search.js +642 -14
  24. package/dist/config.js +1 -0
  25. package/dist/db/embeddings.js +113 -0
  26. package/dist/db/file-specs-store.js +174 -0
  27. package/dist/db/fts-builder.js +390 -0
  28. package/dist/db/index.js +55 -0
  29. package/dist/db/specs-store.js +13 -0
  30. package/dist/db/sqlite-specs-store.js +934 -0
  31. package/dist/extract/codebase-intel.js +31 -2
  32. package/dist/extract/compress.js +70 -3
  33. package/dist/extract/context-block.js +11 -2
  34. package/dist/extract/function-intel.js +5 -2
  35. package/dist/extract/index.js +1 -23
  36. package/dist/extract/writer.js +6 -0
  37. package/package.json +4 -1
@@ -7,6 +7,56 @@ import { resolveMachineInputDir } from "../output-layout.js";
7
7
  import { DEFAULT_SPECS_DIR } from "../config.js";
8
8
  export async function runSearch(options) {
9
9
  const inputDir = await resolveMachineInputDir(options.input || DEFAULT_SPECS_DIR);
10
+ // ── SQLite/FTS5 backend: BM25-ranked search via guardian.db ──────────────
11
+ // SQLite is primary for ALL formats when guardian.db exists.
12
+ // File-based search is only a fallback for backward compatibility.
13
+ if ((options.backend === "sqlite" || options.backend === "auto") && options.query) {
14
+ if (options.format === "json") {
15
+ // For JSON output (used by MCP): merge BM25-ranked files into querySearch output
16
+ const sqliteResult = await getSqliteFileList(options.input || DEFAULT_SPECS_DIR, options.query, options.topN ?? 20, options.backend);
17
+ if (sqliteResult !== null) {
18
+ const base = JSON.parse(await querySearch(inputDir, options.query));
19
+ base.files = sqliteResult.files;
20
+ base.symbols = sqliteResult.symbols;
21
+ base.search_signal = sqliteResult.signal;
22
+ console.log(JSON.stringify(base));
23
+ return;
24
+ }
25
+ // No guardian.db — fall through to file-based querySearch below
26
+ }
27
+ else {
28
+ const handled = await runSearchSqlite(options.input || DEFAULT_SPECS_DIR, options.query, options.topN ?? 20, options.backend);
29
+ if (handled)
30
+ return; // false = no guardian.db, fall through to file search
31
+ }
32
+ }
33
+ // ── Mode dispatch: intel-based lookups ──
34
+ if (options.orient) {
35
+ console.log(await queryOrient(inputDir));
36
+ return;
37
+ }
38
+ if (options.file) {
39
+ console.log(await queryFile(inputDir, options.file));
40
+ return;
41
+ }
42
+ if (options.model) {
43
+ console.log(await queryModel(inputDir, options.model));
44
+ return;
45
+ }
46
+ if (options.impact) {
47
+ console.log(await queryImpact(inputDir, options.impact));
48
+ return;
49
+ }
50
+ // ── Semantic search ──
51
+ if (!options.query) {
52
+ console.error("Error: --query is required for semantic search (or use --orient / --file / --model / --impact)");
53
+ process.exit(1);
54
+ }
55
+ if (options.format === "json") {
56
+ // Fallback: file-based categorical search (no guardian.db available)
57
+ console.log(await querySearch(inputDir, options.query));
58
+ return;
59
+ }
10
60
  const { architecture, ux } = await loadSnapshots(inputDir);
11
61
  const heatmap = await loadHeatmap(inputDir);
12
62
  const funcIntel = await loadFunctionIntelligence(inputDir);
@@ -22,7 +72,9 @@ export async function runSearch(options) {
22
72
  projectRoot,
23
73
  topN: options.topN ?? 10,
24
74
  });
25
- const content = renderSearchMarkdown(options.query, matches);
75
+ const content = options.verbose
76
+ ? renderSearchMarkdownVerbose(options.query, matches)
77
+ : renderSearchMarkdown(options.query, matches);
26
78
  if (options.output) {
27
79
  const outputPath = path.resolve(options.output);
28
80
  await fs.mkdir(path.dirname(outputPath), { recursive: true });
@@ -32,6 +84,146 @@ export async function runSearch(options) {
32
84
  }
33
85
  console.log(content);
34
86
  }
87
+ // ── SQLite / FTS5 search path ────────────────────────────────────────────────
88
+ /**
89
+ * Preprocess a user query before FTS5 matching.
90
+ * Strips commit-message noise (issue numbers, conventional commit prefixes, PR refs)
91
+ * and expands camelCase/snake_case identifiers so BM25 ranks them correctly.
92
+ */
93
+ function preprocessSearchQuery(q) {
94
+ return q
95
+ // Remove PR/issue references: (#1234) or #1234
96
+ .replace(/\(#\d+\)/g, "")
97
+ .replace(/#\d+\s*/g, "")
98
+ // Remove conventional commit prefixes: "Fixed #37016 --", "Refs #28455 --"
99
+ .replace(/^(?:Fixed|Refs|Closes|Resolved)\s*(?:#\d+\s*)?--?\s*/i, "")
100
+ // Remove conventional commit types: "feat(deps)!:", "chore:", "docs:", etc.
101
+ .replace(/^(?:feat|fix|chore|docs|test|refactor|style|perf|ci|build)(?:\([^)]+\))?!?:\s*/i, "")
102
+ // Remove double dashes
103
+ .replace(/\s*--\s*/g, " ")
104
+ // Expand camelCase: getUserById → get user by id
105
+ .replace(/([a-z])([A-Z])/g, "$1 $2")
106
+ .replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2")
107
+ // Expand snake_case: get_user_by_id → get user by id
108
+ .replace(/_/g, " ")
109
+ // Normalize whitespace
110
+ .replace(/\s+/g, " ")
111
+ .trim();
112
+ }
113
+ /**
114
+ * Returns false if no guardian.db found and backend is "auto" (caller should fall through to file search).
115
+ * Exits the process if backend is "sqlite" and no db found.
116
+ */
117
+ async function runSearchSqlite(specsInput, query, limit, backend = "sqlite") {
118
+ const { openSpecsStore } = await import("../db/index.js");
119
+ const { SqliteSpecsStore } = await import("../db/sqlite-specs-store.js");
120
+ const { getOutputLayout } = await import("../output-layout.js");
121
+ const layout = getOutputLayout(path.resolve(specsInput));
122
+ const store = await openSpecsStore(layout, { backend });
123
+ try {
124
+ if (!(store instanceof SqliteSpecsStore)) {
125
+ if (backend === "auto")
126
+ return false; // fall through to file search
127
+ console.error("guardian.db not found — run `guardian extract --backend sqlite` first.");
128
+ process.exit(1);
129
+ }
130
+ const cleaned = preprocessSearchQuery(query);
131
+ let results = store.searchWithGraph(cleaned, limit);
132
+ // If preprocessed query returns nothing, try the raw query as a fallback
133
+ if (results.length === 0 && cleaned !== query) {
134
+ results = store.searchWithGraph(query, limit);
135
+ }
136
+ if (results.length === 0) {
137
+ if (backend === "auto")
138
+ return false; // fall through to file-based search
139
+ console.log(`No FTS results for "${query}"`);
140
+ return true;
141
+ }
142
+ let queryVec;
143
+ try {
144
+ const { embedQuery } = await import("../db/embeddings.js");
145
+ const vec = await embedQuery(cleaned || query, process.env.OPENAI_API_KEY);
146
+ if (vec)
147
+ queryVec = vec;
148
+ }
149
+ catch { /* graceful degradation */ }
150
+ const symbols = store.searchSymbols(cleaned || query, Math.ceil(limit / 2), queryVec);
151
+ const lines = [`## FTS5 search: "${query}"\n`];
152
+ // Build a map of file → matching symbols for quick lookup
153
+ const symbolsByFile = new Map();
154
+ for (const s of symbols) {
155
+ if (!symbolsByFile.has(s.file_path))
156
+ symbolsByFile.set(s.file_path, []);
157
+ symbolsByFile.get(s.file_path).push({ name: s.name, line: s.line });
158
+ }
159
+ for (const r of results) {
160
+ const rank = Math.abs(r.rank).toFixed(3);
161
+ lines.push(`### \`${r.file_path}\` (score: ${rank})`);
162
+ // Matching symbols from this file (snippet equivalent)
163
+ const fileSyms = symbolsByFile.get(r.file_path) ?? [];
164
+ const inlineSyms = r.matching_symbols.filter(s => !fileSyms.some(f => f.name === s));
165
+ if (fileSyms.length) {
166
+ for (const s of fileSyms)
167
+ lines.push(` → \`${s.name}\` :${s.line}`);
168
+ }
169
+ if (inlineSyms.length) {
170
+ lines.push(` symbols: ${inlineSyms.join(", ")}`);
171
+ }
172
+ if (r.imports.length)
173
+ lines.push(` imports: ${r.imports.join(", ")}`);
174
+ if (r.used_by.length)
175
+ lines.push(` used by: ${r.used_by.join(", ")}`);
176
+ lines.push("");
177
+ }
178
+ console.log(lines.join("\n"));
179
+ return true;
180
+ }
181
+ finally {
182
+ await store.close();
183
+ }
184
+ }
185
+ async function getSqliteFileList(specsInput, query, limit, backend = "auto") {
186
+ const { openSpecsStore } = await import("../db/index.js");
187
+ const { SqliteSpecsStore } = await import("../db/sqlite-specs-store.js");
188
+ const { getOutputLayout } = await import("../output-layout.js");
189
+ const layout = getOutputLayout(path.resolve(specsInput));
190
+ const store = await openSpecsStore(layout, { backend });
191
+ try {
192
+ if (!(store instanceof SqliteSpecsStore)) {
193
+ return null; // no guardian.db — caller uses file-based fallback
194
+ }
195
+ const cleaned = preprocessSearchQuery(query);
196
+ let results = store.searchWithGraph(cleaned, limit);
197
+ // If preprocessed query returns nothing, try raw query
198
+ if (results.length === 0 && cleaned !== query) {
199
+ results = store.searchWithGraph(query, limit);
200
+ }
201
+ // Return null on 0 results so caller can fall back to querySearch()
202
+ if (results.length === 0)
203
+ return null;
204
+ const signal = store.querySignal(query);
205
+ // Hybrid symbol search: BM25 + call-graph authority + optional vector similarity.
206
+ // embedQuery uses local model (no API key) or OpenAI if OPENAI_API_KEY is set.
207
+ let queryVec;
208
+ try {
209
+ const { embedQuery } = await import("../db/embeddings.js");
210
+ const vec = await embedQuery(cleaned || query, process.env.OPENAI_API_KEY);
211
+ if (vec)
212
+ queryVec = vec;
213
+ }
214
+ catch { /* graceful degradation — vector unavailable */ }
215
+ const symbols = store.searchSymbols(cleaned || query, Math.ceil(limit / 2), queryVec);
216
+ return {
217
+ files: results.map((r) => r.file_path),
218
+ symbols: symbols.map((s) => ({ file: s.file_path, name: s.name, line: s.line })),
219
+ signal,
220
+ };
221
+ }
222
+ finally {
223
+ await store.close();
224
+ }
225
+ }
226
+ // ── File-based snapshots loader (original, unchanged) ────────────────────────
35
227
  async function loadSnapshots(inputDir) {
36
228
  const architecturePath = path.join(inputDir, "architecture.snapshot.yaml");
37
229
  const uxPath = path.join(inputDir, "ux.snapshot.yaml");
@@ -55,24 +247,21 @@ async function loadSnapshots(inputDir) {
55
247
  };
56
248
  }
57
249
  function normalizeTypes(types) {
250
+ const ALL_TYPES = ["models", "endpoints", "components", "modules", "tasks", "files"];
58
251
  if (!types || types.length === 0) {
59
- return new Set(["models", "endpoints", "components", "modules", "tasks"]);
252
+ return new Set(ALL_TYPES);
60
253
  }
61
254
  const normalized = new Set();
62
255
  for (const entry of types) {
63
256
  for (const part of entry.split(",").map((value) => value.trim().toLowerCase())) {
64
- if (part === "models" ||
65
- part === "endpoints" ||
66
- part === "components" ||
67
- part === "modules" ||
68
- part === "tasks") {
257
+ if (ALL_TYPES.includes(part) || part === "functions") {
69
258
  normalized.add(part);
70
259
  }
71
260
  }
72
261
  }
73
262
  return normalized.size > 0
74
263
  ? normalized
75
- : new Set(["models", "endpoints", "components", "modules", "tasks", "functions"]);
264
+ : new Set([...ALL_TYPES, "functions"]);
76
265
  }
77
266
  function tokenize(value) {
78
267
  return value
@@ -124,6 +313,31 @@ function searchSnapshots(params) {
124
313
  entry.id,
125
314
  entry.score
126
315
  ]));
316
+ // PageRank scores per file — prefer file-level heatmap, fall back to module-level
317
+ // (maps absolute or relative file path → pagerank score in [0,1])
318
+ const filePrFromFileLevel = new Map((heatmap?.levels.find((level) => level.level === "file")?.entries ?? []).map((entry) => [
319
+ entry.id,
320
+ entry.components.pagerank ?? 0
321
+ ]));
322
+ // Build file→module map so we can use module-level PR when file-level is unavailable
323
+ const fileToModuleId = new Map();
324
+ for (const mod of architecture.modules) {
325
+ for (const f of mod.files) {
326
+ fileToModuleId.set(f, mod.id);
327
+ fileToModuleId.set(path.join(projectRoot, f), mod.id);
328
+ }
329
+ }
330
+ const modulePrMap = new Map((heatmap?.levels.find((level) => level.level === "module")?.entries ?? []).map((entry) => [
331
+ entry.id,
332
+ entry.components.pagerank ?? 0
333
+ ]));
334
+ const getFilePr = (filePath) => {
335
+ const direct = filePrFromFileLevel.get(filePath);
336
+ if (direct !== undefined)
337
+ return direct;
338
+ const modId = fileToModuleId.get(filePath) ?? fileToModuleId.get(path.relative(projectRoot, filePath));
339
+ return modulePrMap.get(modId ?? "") ?? 0;
340
+ };
127
341
  if (types.has("models")) {
128
342
  for (const model of architecture.data_models) {
129
343
  const score = scoreItem(queryTokens, {
@@ -244,6 +458,57 @@ function searchSnapshots(params) {
244
458
  });
245
459
  }
246
460
  }
461
+ if (types.has("files")) {
462
+ const allFiles = new Map(); // keyed by normalized project-relative path
463
+ // Helper: normalize a path to project-relative form
464
+ const normalizePath = (rawPath, moduleId) => {
465
+ if (rawPath.startsWith("frontend/") || rawPath.startsWith("backend/"))
466
+ return rawPath;
467
+ // UX snapshot stores paths relative to frontend root (e.g. "app/parent/login.tsx")
468
+ if (moduleId.startsWith("frontend/"))
469
+ return `frontend/${rawPath}`;
470
+ return rawPath;
471
+ };
472
+ for (const mod of architecture.modules) {
473
+ for (const f of mod.files) {
474
+ const norm = normalizePath(f, mod.id);
475
+ const pr = getFilePr(f) || getFilePr(norm);
476
+ allFiles.set(norm, { filePath: norm, module: mod.id, pagerank: pr });
477
+ }
478
+ }
479
+ // Also collect ux component files (may not be in arch modules)
480
+ for (const comp of ux.components) {
481
+ if (!comp.file)
482
+ continue;
483
+ const norm = normalizePath(comp.file, "frontend/app");
484
+ if (!allFiles.has(norm)) {
485
+ allFiles.set(norm, { filePath: norm, module: "frontend/app", pagerank: getFilePr(norm) });
486
+ }
487
+ }
488
+ for (const { filePath, module: modId, pagerank } of allFiles.values()) {
489
+ const filename = path.basename(filePath);
490
+ const stem = filename.replace(/\.[^.]+$/, ""); // without extension
491
+ // Score: query overlap against path segments + filename stem
492
+ const pathSegments = filePath.split("/");
493
+ const queryScore = scoreItem(queryTokens, {
494
+ name: stem,
495
+ file: filePath,
496
+ text: pathSegments
497
+ });
498
+ if (queryScore <= 0)
499
+ continue;
500
+ // Blend query relevance + PageRank (architecturally important files surface higher)
501
+ const score = 0.7 * queryScore + 0.3 * pagerank;
502
+ matches.push({
503
+ type: "files",
504
+ name: filePath,
505
+ score,
506
+ markdown: [
507
+ `${filePath} [${modId}]${pagerank > 0.5 ? " · high-pagerank" : ""}`
508
+ ]
509
+ });
510
+ }
511
+ }
247
512
  if (types.has("functions") && funcIntel) {
248
513
  const queryTokens = tokenize(query);
249
514
  const fnMatches = [];
@@ -282,13 +547,16 @@ function searchSnapshots(params) {
282
547
  };
283
548
  // 1. Name match — function / theorem name contains a query token
284
549
  for (const fn of funcIntel.functions) {
285
- const score = scoreItem(queryTokens, {
550
+ const queryScore = scoreItem(queryTokens, {
286
551
  name: fn.name,
287
552
  file: fn.file,
288
553
  text: [...fn.stringLiterals, ...fn.regexPatterns, ...fn.calls, fn.language],
289
554
  });
290
- if (score <= 0)
555
+ if (queryScore <= 0)
291
556
  continue;
557
+ // Blend: 70% query relevance + 30% file PageRank (importance of the file in the graph)
558
+ const pr = getFilePr(fn.file);
559
+ const score = 0.7 * queryScore + 0.3 * pr;
292
560
  const relFile = relativize(fn.file);
293
561
  const lineRange = `${fn.lines[0]}–${fn.lines[1]}`;
294
562
  const detail = buildDetail(fn, relFile);
@@ -316,17 +584,19 @@ function searchSnapshots(params) {
316
584
  const fn = funcIntel.functions.find((f) => f.file === hit.file && f.name === hit.function);
317
585
  if (!fn)
318
586
  continue;
319
- const score = scoreItem(queryTokens, {
587
+ const queryScore = scoreItem(queryTokens, {
320
588
  name: fn.name,
321
589
  file: fn.file,
322
590
  text: [...fn.stringLiterals, ...fn.regexPatterns, ...fn.calls, fn.language],
323
591
  });
592
+ const pr = getFilePr(fn.file);
593
+ const score = Math.max(0.7 * queryScore + 0.3 * pr, 0.2);
324
594
  const relFile = relativize(fn.file);
325
595
  const detail = buildDetail(fn, relFile);
326
596
  fnMatches.push({
327
597
  type: "functions",
328
598
  name: `${fn.name} (${fn.language})`,
329
- score: Math.max(score, 0.2), // floor at 0.2 so literal hits still surface but rank below name matches
599
+ score,
330
600
  markdown: [
331
601
  `**${fn.name}** · ${relFile}:${fn.lines[0]}–${fn.lines[1]} · ${fn.language}`,
332
602
  `Matched literal/pattern containing "${tok}"`,
@@ -379,7 +649,92 @@ function formatProps(props) {
379
649
  .map((prop) => `${prop.name}${prop.optional ? "?" : ""}: ${prop.type}`)
380
650
  .join(", ");
381
651
  }
652
+ /**
653
+ * Compact file-first renderer — the default for agent navigation.
654
+ *
655
+ * Deduplicates by file path and emits one line per file:
656
+ * backend/service-auth/main.py [create_child, PersonaCreateRequest, ...]
657
+ *
658
+ * Keeps total output small so LLMs can extract the answer without wading
659
+ * through hundreds of match lines. Capped at 15 files max.
660
+ */
382
661
  function renderSearchMarkdown(query, matches) {
662
+ if (matches.length === 0) {
663
+ return `# Search: "${query}"\n\n*No matches found.*`;
664
+ }
665
+ // Build a file → {score, symbols} map. Each match contributes its file path
666
+ // and a short symbol label extracted from the first markdown line.
667
+ const fileMap = new Map();
668
+ const extractFile = (md, matchType) => {
669
+ // Modules are collections — their path isn't a usable file path; skip them.
670
+ if (matchType === "modules")
671
+ return null;
672
+ const first = md[0] ?? "";
673
+ // Endpoint format: "POST /path → handler (file.py)"
674
+ let m = first.match(/\(([^)]+)\)\s*$/);
675
+ if (m)
676
+ return m[1].trim();
677
+ // Files type: bare path at start, no bold markdown — check before model format
678
+ // "path/to/file [module]" or "path/to/file [module] · high-pagerank"
679
+ m = first.match(/^([^\s[*]+)\s+\[/);
680
+ if (m)
681
+ return m[1].trim();
682
+ // Model/component/task/function: "**Name** · file.py ..."
683
+ m = first.match(/·\s+([^\s·:]+)\s*(?:·|$)/);
684
+ if (m)
685
+ return m[1].trim();
686
+ return null;
687
+ };
688
+ const extractSymbol = (md, matchType) => {
689
+ const first = md[0] ?? "";
690
+ if (matchType === "endpoints") {
691
+ // "POST /path → handler (file)" → extract "handler"
692
+ const m = first.match(/→\s+(\S+)\s+\(/);
693
+ return m ? m[1] : null;
694
+ }
695
+ if (matchType === "models" || matchType === "tasks" || matchType === "functions") {
696
+ // "**Name** · file" → extract "Name"
697
+ const m = first.match(/\*\*([^*]+)\*\*/);
698
+ return m ? m[1] : null;
699
+ }
700
+ if (matchType === "components") {
701
+ const m = first.match(/\*\*([^*]+)\*\*/);
702
+ return m ? m[1] : null;
703
+ }
704
+ return null;
705
+ };
706
+ for (const match of matches) {
707
+ const file = extractFile(match.markdown, match.type);
708
+ if (!file)
709
+ continue;
710
+ const existing = fileMap.get(file);
711
+ const symbol = extractSymbol(match.markdown, match.type);
712
+ if (existing) {
713
+ if (match.score > existing.score)
714
+ existing.score = match.score;
715
+ if (symbol && !existing.symbols.includes(symbol))
716
+ existing.symbols.push(symbol);
717
+ }
718
+ else {
719
+ fileMap.set(file, { score: match.score, symbols: symbol ? [symbol] : [] });
720
+ }
721
+ }
722
+ // Sort files by best score descending, cap at 15
723
+ const ranked = Array.from(fileMap.entries())
724
+ .sort(([, a], [, b]) => b.score - a.score)
725
+ .slice(0, 15);
726
+ const lines = [];
727
+ lines.push(`# Search: "${query}" — ${ranked.length} relevant files\n`);
728
+ for (const [file, { symbols }] of ranked) {
729
+ const sym = symbols.slice(0, 6).join(", ");
730
+ lines.push(sym ? `${file} [${sym}]` : file);
731
+ }
732
+ return lines.join("\n").trimEnd();
733
+ }
734
+ /**
735
+ * Verbose grouped renderer — kept for human inspection (`--verbose`).
736
+ */
737
+ function renderSearchMarkdownVerbose(query, matches) {
383
738
  const grouped = new Map();
384
739
  for (const match of matches) {
385
740
  const entry = grouped.get(match.type) ?? [];
@@ -392,6 +747,7 @@ function renderSearchMarkdown(query, matches) {
392
747
  ["components", "Components"],
393
748
  ["modules", "Modules"],
394
749
  ["tasks", "Tasks"],
750
+ ["files", "Files"],
395
751
  ["functions", "Functions"],
396
752
  ];
397
753
  const lines = [];
@@ -403,9 +759,8 @@ function renderSearchMarkdown(query, matches) {
403
759
  }
404
760
  for (const [type, label] of labels) {
405
761
  const entries = grouped.get(type) ?? [];
406
- if (entries.length === 0) {
762
+ if (entries.length === 0)
407
763
  continue;
408
- }
409
764
  lines.push(`## ${label} (${entries.length})`);
410
765
  lines.push("");
411
766
  for (const entry of entries.slice(0, 8)) {
@@ -415,3 +770,276 @@ function renderSearchMarkdown(query, matches) {
415
770
  }
416
771
  return lines.join("\n").trimEnd();
417
772
  }
773
+ // ─────────────────────────────────────────────────────────────────────────────
774
+ // Intel-based query functions
775
+ // Read from pre-built intelligence files (written by VSCode plugin / guardian extract).
776
+ // These are the authoritative implementations — MCP tools call the CLI which calls these.
777
+ // ─────────────────────────────────────────────────────────────────────────────
778
+ async function loadCodebaseIntel(inputDir) {
779
+ const intelPath = path.join(inputDir, "codebase-intelligence.json");
780
+ try {
781
+ const raw = await fs.readFile(intelPath, "utf8");
782
+ return JSON.parse(raw);
783
+ }
784
+ catch {
785
+ return { api_registry: {}, model_registry: {}, service_map: [], frontend_pages: [], enum_registry: {}, background_tasks: [], meta: {} };
786
+ }
787
+ }
788
+ async function loadFuncIntelRaw(inputDir) {
789
+ const fnPath = path.join(inputDir, "function-intelligence.json");
790
+ try {
791
+ const raw = await fs.readFile(fnPath, "utf8");
792
+ return JSON.parse(raw);
793
+ }
794
+ catch {
795
+ return null;
796
+ }
797
+ }
798
+ // ── Scoring (same algorithm as MCP, kept in sync) ──
799
+ const SKIP_SERVICES = new Set(["str", "dict", "int", "len", "float", "max", "join", "getattr", "lower", "open", "params.append", "updates.append"]);
800
+ function isGenericCall(s) {
801
+ if (SKIP_SERVICES.has(s))
802
+ return true;
803
+ const genericPrefixes = ["service.", "self.", "db.", "session.", "response.", "request.", "app.", "router.", "logger.", "config.", "os.", "json.", "re.", "datetime.", "uuid."];
804
+ return genericPrefixes.some(p => s.toLowerCase().startsWith(p));
805
+ }
806
+ function scoreQueryIntel(query, fields) {
807
+ const tokens = query.toLowerCase().split(/\s+/).filter(Boolean);
808
+ let best = 0;
809
+ for (const { value, weight } of fields) {
810
+ if (!value)
811
+ continue;
812
+ const low = value.toLowerCase();
813
+ if (low === query.toLowerCase()) {
814
+ best = Math.max(best, weight * 1.0);
815
+ continue;
816
+ }
817
+ if (low.includes(query.toLowerCase())) {
818
+ best = Math.max(best, weight * 0.8);
819
+ continue;
820
+ }
821
+ if (tokens.length > 1 && tokens.every(t => low.includes(t))) {
822
+ best = Math.max(best, weight * 0.6);
823
+ continue;
824
+ }
825
+ const matched = tokens.filter(t => t.length >= 3 && low.includes(t)).length;
826
+ if (matched > 0) {
827
+ best = Math.max(best, weight * (matched >= 2 ? 0.45 : 0.3));
828
+ }
829
+ }
830
+ return best;
831
+ }
832
+ function normalizeFilePath(p) {
833
+ return p.replace(/^\.\//, "").replace(/\/\//g, "/");
834
+ }
835
+ function findModuleForFile(data, file) {
836
+ const f = normalizeFilePath(file);
837
+ return data.service_map?.find((m) => {
838
+ const mp = normalizeFilePath(m.path || "");
839
+ return mp && (f.startsWith(mp + "/") || f === mp);
840
+ }) || data.service_map?.find((m) => {
841
+ const mid = normalizeFilePath(m.id || "");
842
+ return mid && f.includes(mid);
843
+ });
844
+ }
845
+ function findEndpointsInFile(data, file) {
846
+ const f = normalizeFilePath(file);
847
+ const basename = path.basename(f);
848
+ return Object.values(data.api_registry || {}).filter((ep) => {
849
+ const ef = normalizeFilePath(ep.file || "");
850
+ return ef && (f.includes(ef) || ef.includes(f) || ef.endsWith(basename));
851
+ });
852
+ }
853
+ function findModelsInFile(data, file) {
854
+ const f = normalizeFilePath(file);
855
+ const basename = path.basename(f);
856
+ return Object.values(data.model_registry || {}).filter((m) => {
857
+ const mf = normalizeFilePath(m.file || "");
858
+ return mf && (f.includes(mf) || mf.includes(f) || mf.endsWith(basename));
859
+ });
860
+ }
861
+ // ── orient: architecture-context.md as compact JSON ──
862
+ export async function queryOrient(inputDir) {
863
+ const contextPath = path.join(inputDir, "architecture-context.md");
864
+ try {
865
+ const raw = await fs.readFile(contextPath, "utf8");
866
+ const match = raw.match(/<!-- guardian:context[^>]*-->([\s\S]*?)<!-- \/guardian:context -->/);
867
+ if (match) {
868
+ const lines = match[1].split("\n").map(l => l.trim()).filter(Boolean);
869
+ const desc = raw.match(/Description: (.+)/)?.[1]?.slice(0, 120) ?? "";
870
+ const map = lines.find(l => l.startsWith("**Backend:**")) ?? "";
871
+ const modules = lines
872
+ .filter(l => /^- \*\*[^*]+\*\*\s*\([^)]+\)/.test(l))
873
+ .map(l => { const m = l.match(/\*\*([^*]+)\*\*\s*\(([^)]+)\)/); return m ? `${m[1]} (${m[2]})` : null; })
874
+ .filter((x) => x !== null);
875
+ const deps = lines.filter(l => l.includes("→")).map(l => l.replace(/^- /, ""));
876
+ const coupling = lines.filter(l => /score \d/.test(l)).map(l => l.replace(/^- /, "")).slice(0, 5);
877
+ const modelEp = lines.filter(l => l.includes("endpoints) ->")).map(l => l.replace(/^- /, ""));
878
+ return JSON.stringify({ desc, map, modules, deps, coupling, modelEp });
879
+ }
880
+ }
881
+ catch { }
882
+ const d = await loadCodebaseIntel(inputDir);
883
+ const c = d.meta?.counts || {};
884
+ const pages = (d.frontend_pages || []).map((p) => p.path);
885
+ return JSON.stringify({ p: d.meta?.project, ep: c.endpoints, models: c.models, pg: c.pages, pages });
886
+ }
887
+ // ── file: per-file or per-endpoint context ──
888
+ export async function queryFile(inputDir, target) {
889
+ const d = await loadCodebaseIntel(inputDir);
890
+ const epMatch = target.match(/^(GET|POST|PUT|DELETE|PATCH)\s+(.+)$/i);
891
+ if (epMatch) {
892
+ const ep = d.api_registry?.[`${epMatch[1].toUpperCase()} ${epMatch[2]}`]
893
+ || Object.values(d.api_registry || {}).find((e) => e.method === epMatch[1].toUpperCase() && e.path === epMatch[2]);
894
+ if (!ep)
895
+ return JSON.stringify({ err: "not found" });
896
+ const calls = (ep.service_calls || []).filter((s) => !SKIP_SERVICES.has(s));
897
+ return JSON.stringify({ ep: `${ep.method} ${ep.path}`, h: ep.handler, f: ep.file, m: ep.module, req: ep.request_schema, res: ep.response_schema, calls, ai: ep.ai_operations?.length || 0 });
898
+ }
899
+ const file = normalizeFilePath(target);
900
+ const mod = findModuleForFile(d, file);
901
+ const eps = findEndpointsInFile(d, file);
902
+ const models = findModelsInFile(d, file);
903
+ const fileName = path.basename(file, path.extname(file));
904
+ const calledBy = [];
905
+ for (const ep of Object.values(d.api_registry || {})) {
906
+ if (ep.service_calls?.some((s) => s.toLowerCase().includes(fileName.toLowerCase()))) {
907
+ calledBy.push(`${ep.method} ${ep.path}`);
908
+ }
909
+ }
910
+ const calls = eps.flatMap((ep) => (ep.service_calls || []).filter((s) => !SKIP_SERVICES.has(s)));
911
+ return JSON.stringify({ f: file, mod: mod ? [mod.id, mod.layer] : null, ep: eps.map((e) => `${e.method} ${e.path}`), models: models.map((m) => [m.name, m.fields?.length || 0]), calls: [...new Set(calls)], calledBy: calledBy.slice(0, 8) });
912
+ }
913
+ // ── model: model details + usage ──
914
+ export async function queryModel(inputDir, name) {
915
+ const d = await loadCodebaseIntel(inputDir);
916
+ const m = d.model_registry?.[name];
917
+ if (!m)
918
+ return JSON.stringify({ err: "not found", name });
919
+ const usedBy = Object.values(d.api_registry || {})
920
+ .filter((ep) => ep.request_schema === name || ep.response_schema === name)
921
+ .map((ep) => `${ep.method} ${ep.path}`);
922
+ return JSON.stringify({ name: m.name, fw: m.framework, f: m.file, fields: m.fields, rels: m.relationships, usedBy });
923
+ }
924
+ // ── impact: what breaks if you change this file ──
925
+ export async function queryImpact(inputDir, target) {
926
+ const d = await loadCodebaseIntel(inputDir);
927
+ const file = normalizeFilePath(target);
928
+ const eps = findEndpointsInFile(d, file);
929
+ const models = findModelsInFile(d, file);
930
+ const modelNames = new Set(models.map((m) => m.name));
931
+ const affectedEps = Object.values(d.api_registry || {}).filter((ep) => (ep.request_schema && modelNames.has(ep.request_schema)) ||
932
+ (ep.response_schema && modelNames.has(ep.response_schema)));
933
+ const mod = findModuleForFile(d, file);
934
+ const depMods = mod ? (d.service_map || []).filter((m) => m.imports?.includes(mod.id)) : [];
935
+ const affectedPages = (d.frontend_pages || []).filter((p) => p.api_calls?.some((call) => eps.some((ep) => call.includes(ep.path?.split("{")[0]))));
936
+ const total = eps.length + affectedEps.length + depMods.length + affectedPages.length;
937
+ return JSON.stringify({ f: file, risk: total > 5 ? "HIGH" : total > 2 ? "MED" : "LOW", ep: eps.map((e) => `${e.method} ${e.path}`), models: models.map((m) => m.name), affectedEp: affectedEps.map((e) => `${e.method} ${e.path}`), depMods: depMods.map((m) => m.id), pages: affectedPages.map((p) => p.path) });
938
+ }
939
+ // ── querySearch --format json: categorical search from codebase-intelligence.json ──
940
+ export async function querySearch(inputDir, query) {
941
+ const d = await loadCodebaseIntel(inputDir);
942
+ const q = query;
943
+ const scoredEps = [];
944
+ for (const ep of Object.values(d.api_registry || {})) {
945
+ const score = scoreQueryIntel(q, [
946
+ { value: ep.path, weight: 1.0 }, { value: ep.handler, weight: 0.9 },
947
+ ...(ep.service_calls || []).filter((s) => !isGenericCall(s)).map((s) => ({ value: s, weight: 0.5 })),
948
+ ]);
949
+ if (score > 0)
950
+ scoredEps.push({ item: ep, score });
951
+ }
952
+ scoredEps.sort((a, b) => b.score - a.score);
953
+ const eps = scoredEps.slice(0, 8).map(({ item: ep }) => `${ep.method} ${ep.path} [${ep.module}]`);
954
+ const scoredModels = [];
955
+ for (const m of Object.values(d.model_registry || {})) {
956
+ const score = scoreQueryIntel(q, [{ value: m.name, weight: 1.0 }, ...(m.fields || []).map((f) => ({ value: f, weight: 0.6 }))]);
957
+ if (score > 0)
958
+ scoredModels.push({ item: m, score });
959
+ }
960
+ scoredModels.sort((a, b) => b.score - a.score);
961
+ const models = scoredModels.slice(0, 8).map(({ item: m }) => `${m.name}:${m.fields?.length}f`);
962
+ const mods = (d.service_map || []).filter((m) => scoreQueryIntel(q, [{ value: m.id, weight: 1.0 }, ...(m.imports || []).map((i) => ({ value: i, weight: 0.5 }))]) > 0).slice(0, 5).map((m) => `${m.id}:${m.file_count}files [${m.layer}]`);
963
+ const scoredExports = [];
964
+ for (const m of d.service_map || []) {
965
+ for (const sym of m.exports || []) {
966
+ const score = scoreQueryIntel(q, [{ value: sym, weight: 1.0 }]);
967
+ if (score > 0)
968
+ scoredExports.push({ item: `${sym} [${m.id}]`, score });
969
+ }
970
+ }
971
+ scoredExports.sort((a, b) => b.score - a.score);
972
+ const ASSET_EXTS = new Set([".svg", ".png", ".jpg", ".jpeg", ".gif", ".webp", ".ico", ".css", ".scss", ".less", ".lock", ".map"]);
973
+ const isMigration = (f) => /alembic\/versions|migrations\/\d/.test(f);
974
+ const scoredFiles = [];
975
+ for (const m of d.service_map || []) {
976
+ for (const f of m.files || []) {
977
+ if (ASSET_EXTS.has(path.extname(f).toLowerCase()) || isMigration(f))
978
+ continue;
979
+ const score = scoreQueryIntel(q, [{ value: path.basename(f), weight: 1.0 }, { value: f, weight: 0.5 }]);
980
+ if (score > 0)
981
+ scoredFiles.push({ item: f, score });
982
+ }
983
+ }
984
+ scoredFiles.sort((a, b) => b.score - a.score);
985
+ const enums = Object.values(d.enum_registry || {}).filter((e) => scoreQueryIntel(q, [{ value: e.name, weight: 1.0 }, ...(e.values || []).map((v) => ({ value: v, weight: 0.6 }))]) > 0).slice(0, 5).map((e) => `${e.name} [${e.file}]`);
986
+ const tasks = (d.background_tasks || []).filter((t) => scoreQueryIntel(q, [{ value: t.name, weight: 1.0 }, { value: t.kind, weight: 0.6 }]) > 0).slice(0, 5).map((t) => `${t.name} [${t.kind}] ${t.file}`);
987
+ const pages = (d.frontend_pages || []).filter((p) => scoreQueryIntel(q, [
988
+ { value: p.path, weight: 1.0 },
989
+ { value: p.component, weight: 0.9 },
990
+ { value: p.file ?? "", weight: 0.8 },
991
+ ...(p.api_calls || []).map((c) => ({ value: c, weight: 0.5 })),
992
+ ...(p.components || []).map((c) => ({ value: c, weight: 0.4 })),
993
+ ]) > 0).slice(0, 5).map((p) => p.file ? `${p.path} [${p.file}]` : `${p.path} → ${p.component}`);
994
+ const fnHits = [];
995
+ const fi = await loadFuncIntelRaw(inputDir);
996
+ if (fi) {
997
+ const scored = [];
998
+ const seen = new Set();
999
+ for (const fn of (fi.functions ?? [])) {
1000
+ const nameNorm = (fn.name ?? "").toLowerCase();
1001
+ const fileNorm = (fn.file ?? "").toLowerCase();
1002
+ const callsNorm = (fn.calls ?? []).map((c) => c.toLowerCase());
1003
+ const litsNorm = [...(fn.stringLiterals ?? []), ...(fn.regexPatterns ?? [])].map((l) => l.toLowerCase());
1004
+ let score = 0;
1005
+ if (nameNorm === q)
1006
+ score = 1.0;
1007
+ else if (nameNorm.includes(q))
1008
+ score = 0.7;
1009
+ else if (callsNorm.some((c) => c.includes(q)))
1010
+ score = 0.5;
1011
+ else if (litsNorm.some((l) => l.includes(q)))
1012
+ score = 0.3;
1013
+ else if (fileNorm.includes(q))
1014
+ score = 0.2;
1015
+ if (score > 0) {
1016
+ scored.push({ fn, score });
1017
+ seen.add(`${fn.file}:${fn.name}`);
1018
+ }
1019
+ }
1020
+ const litIndex = fi.literal_index ?? {};
1021
+ for (const [key, hits] of Object.entries(litIndex)) {
1022
+ if (!key.includes(q))
1023
+ continue;
1024
+ for (const h of hits) {
1025
+ const uid = `${h.file}:${h.function}`;
1026
+ if (seen.has(uid))
1027
+ continue;
1028
+ seen.add(uid);
1029
+ const fn = fi.functions.find((f) => f.file === h.file && f.name === h.function);
1030
+ scored.push({ fn: fn ?? { name: h.function, file: h.file, lines: [h.line, h.line] }, score: 0.25 });
1031
+ }
1032
+ }
1033
+ scored.sort((a, b) => b.score - a.score);
1034
+ for (const { fn } of scored.slice(0, 10)) {
1035
+ fnHits.push(`${fn.name} [${fn.file}:${fn.lines?.[0]}]`);
1036
+ }
1037
+ }
1038
+ return JSON.stringify({
1039
+ ep: eps, mod: models, m: mods,
1040
+ exports: scoredExports.slice(0, 10).map(e => e.item),
1041
+ files: scoredFiles.slice(0, 8).map(f => f.item),
1042
+ enums, tasks, pages,
1043
+ ...(fnHits.length > 0 ? { fns: fnHits } : {}),
1044
+ });
1045
+ }