@toolbaux/guardian 0.1.17 → 0.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,95 @@ function findChildren(node, type) {
10
10
  }
11
11
  return results;
12
12
  }
13
+ // ── Function-level intelligence helpers ──────────────────────────────────
14
+ /** re.* functions whose first argument is a regex pattern string */
15
+ const RE_FUNCS = new Set(["compile", "sub", "subn", "match", "search", "fullmatch", "findall", "finditer", "split"]);
16
+ /**
17
+ * Collect string literals, regex patterns (strings passed to re.*), and call
18
+ * targets from a Python function body subtree.
19
+ */
20
+ function collectPythonBodyIntel(body, getText) {
21
+ const strings = new Set();
22
+ const regexes = new Set();
23
+ const calls = new Set();
24
+ function walk(n) {
25
+ if (n.type === "string") {
26
+ // Python string: "text", 'text', """text""", r"pattern"
27
+ const raw = getText(n);
28
+ // Strip leading r/b/f/u prefix and quote chars
29
+ const stripped = raw.replace(/^[rRbBuUfF]*/u, "").replace(/^'''|^"""|^'|^"/u, "").replace(/'''$|"""$|'$|"$/u, "");
30
+ if (stripped.length > 0 && stripped.length < 300) {
31
+ strings.add(stripped);
32
+ // Is this string the first argument to a re.* call?
33
+ const parent = n.parent;
34
+ if (parent?.type === "argument_list") {
35
+ const callNode = parent.parent;
36
+ if (callNode?.type === "call") {
37
+ const funcNode = callNode.childForFieldName("function");
38
+ if (funcNode) {
39
+ const fnText = getText(funcNode);
40
+ // re.sub, re.compile, re.match, …
41
+ const simple = fnText.split(".").pop() ?? "";
42
+ if (fnText.startsWith("re.") && RE_FUNCS.has(simple)) {
43
+ regexes.add(stripped);
44
+ }
45
+ }
46
+ }
47
+ }
48
+ }
49
+ }
50
+ else if (n.type === "call") {
51
+ const funcNode = n.childForFieldName("function");
52
+ if (funcNode) {
53
+ calls.add(getText(funcNode).trim());
54
+ }
55
+ }
56
+ for (const child of n.namedChildren)
57
+ walk(child);
58
+ }
59
+ walk(body);
60
+ return {
61
+ stringLiterals: [...strings],
62
+ regexPatterns: [...regexes],
63
+ calls: [...calls],
64
+ };
65
+ }
66
+ /**
67
+ * Recursively extract FunctionRecord entries from a Python AST node.
68
+ * Handles top-level functions, methods inside classes, and nested functions.
69
+ */
70
+ function extractPyFunctions(file, source, node) {
71
+ const records = [];
72
+ const getText = (n) => source.substring(n.startIndex, n.endIndex);
73
+ function process(n) {
74
+ if (n.type === "function_definition") {
75
+ const nameN = n.childForFieldName("name");
76
+ const bodyN = n.childForFieldName("body");
77
+ if (nameN && bodyN) {
78
+ const funcName = getText(nameN);
79
+ // Detect async: check if "async" keyword precedes the def
80
+ const srcBefore = source.substring(n.startIndex, nameN.startIndex);
81
+ const isAsync = /\basync\b/.test(srcBefore);
82
+ const intel = collectPythonBodyIntel(bodyN, getText);
83
+ records.push({
84
+ id: `${file}#${funcName}:${n.startPosition.row + 1}`,
85
+ name: funcName,
86
+ file,
87
+ lines: [n.startPosition.row + 1, n.endPosition.row + 1],
88
+ calls: intel.calls,
89
+ stringLiterals: intel.stringLiterals,
90
+ regexPatterns: intel.regexPatterns,
91
+ isAsync,
92
+ language: "python",
93
+ });
94
+ }
95
+ }
96
+ for (const child of n.namedChildren)
97
+ process(child);
98
+ }
99
+ process(node);
100
+ return records;
101
+ }
13
102
  export const PythonAdapter = {
14
103
  name: "python",
15
104
  language: Python,
@@ -178,6 +267,7 @@ export const PythonAdapter = {
178
267
  relationships
179
268
  });
180
269
  }
181
- return { endpoints, models, components, tests };
270
+ const functions = extractPyFunctions(file, source, root);
271
+ return { endpoints, models, components, tests, functions };
182
272
  }
183
273
  };
@@ -1,10 +1,37 @@
1
1
  import Parser from "tree-sitter";
2
2
  export function runAdapter(adapter, file, source) {
3
+ // Text-based adapters (e.g. Lean4) set language to null and rely entirely on
4
+ // their extract() implementation — no tree-sitter parse step needed.
5
+ if (!adapter.language) {
6
+ if (adapter.extract) {
7
+ const result = adapter.extract(file, source, null);
8
+ return {
9
+ endpoints: result.endpoints,
10
+ models: result.models,
11
+ components: result.components,
12
+ tests: result.tests,
13
+ functions: result.functions ?? [],
14
+ };
15
+ }
16
+ return { endpoints: [], models: [], components: [], tests: [], functions: [] };
17
+ }
18
+ // tree-sitter native binding throws "Invalid argument" for very large files.
19
+ // Skip files over 1 MB to avoid silent crashes; they are rare in practice.
20
+ if (source.length > 1_000_000) {
21
+ return { endpoints: [], models: [], components: [], tests: [], functions: [] };
22
+ }
3
23
  const parser = new Parser();
4
24
  parser.setLanguage(adapter.language);
5
25
  const tree = parser.parse(source);
6
26
  if (adapter.extract) {
7
- return adapter.extract(file, source, tree.rootNode);
27
+ const result = adapter.extract(file, source, tree.rootNode);
28
+ return {
29
+ endpoints: result.endpoints,
30
+ models: result.models,
31
+ components: result.components,
32
+ tests: result.tests,
33
+ functions: result.functions ?? [],
34
+ };
8
35
  }
9
36
  const endpoints = [];
10
37
  const models = [];
@@ -65,5 +92,5 @@ export function runAdapter(adapter, file, source) {
65
92
  }
66
93
  }
67
94
  }
68
- return { endpoints, models, components, tests };
95
+ return { endpoints, models, components, tests, functions: [] };
69
96
  }
@@ -11,6 +11,115 @@ function findChildren(node, type) {
11
11
  }
12
12
  return results;
13
13
  }
14
+ // ── Function-level intelligence helpers ──────────────────────────────────
15
+ /** Walk all descendants depth-first. */
16
+ function* walkAll(node) {
17
+ yield node;
18
+ for (const child of node.namedChildren)
19
+ yield* walkAll(child);
20
+ }
21
+ /**
22
+ * Collect string literals, regex patterns, and call targets from a subtree.
23
+ * Scoped to the function body so we don't bleed into nested function records.
24
+ */
25
+ function collectBodyIntel(body, getText) {
26
+ const strings = new Set();
27
+ const regexes = new Set();
28
+ const calls = new Set();
29
+ for (const n of walkAll(body)) {
30
+ if (n.type === "string") {
31
+ const frag = n.namedChildren.find((c) => c.type === "string_fragment");
32
+ if (frag) {
33
+ const val = getText(frag);
34
+ if (val.length > 0 && val.length < 300)
35
+ strings.add(val);
36
+ }
37
+ }
38
+ else if (n.type === "template_string") {
39
+ // Strip backticks; include template string content
40
+ const raw = getText(n).slice(1, -1);
41
+ if (raw.length > 0 && raw.length < 300)
42
+ strings.add(raw);
43
+ }
44
+ else if (n.type === "regex") {
45
+ const raw = getText(n);
46
+ // /pattern/flags → extract pattern
47
+ const m = raw.match(/^\/(.+)\/[gimsuy]*$/s);
48
+ if (m)
49
+ regexes.add(m[1]);
50
+ }
51
+ else if (n.type === "call_expression") {
52
+ const fn = n.childForFieldName("function");
53
+ if (fn) {
54
+ const name = getText(fn).split("\n")[0].trim();
55
+ calls.add(name);
56
+ }
57
+ }
58
+ }
59
+ return {
60
+ stringLiterals: [...strings],
61
+ regexPatterns: [...regexes],
62
+ calls: [...calls],
63
+ };
64
+ }
65
+ /**
66
+ * Recursively extract FunctionRecord entries from a TypeScript/TSX AST node.
67
+ * Handles: function declarations, method definitions, arrow functions assigned
68
+ * to variables (const foo = () => {}), and class method definitions.
69
+ */
70
+ function extractTsFunctions(file, source, node) {
71
+ const records = [];
72
+ const getText = (n) => source.substring(n.startIndex, n.endIndex);
73
+ function process(n) {
74
+ let funcName = null;
75
+ let bodyNode = null;
76
+ let isAsync = false;
77
+ if (n.type === "function_declaration") {
78
+ const nameN = n.childForFieldName("name");
79
+ if (nameN)
80
+ funcName = getText(nameN);
81
+ bodyNode = n.childForFieldName("body");
82
+ isAsync = n.children.some((c) => c.type === "async");
83
+ }
84
+ else if (n.type === "method_definition") {
85
+ const nameN = n.childForFieldName("name");
86
+ if (nameN)
87
+ funcName = getText(nameN);
88
+ bodyNode = n.childForFieldName("body");
89
+ isAsync = n.children.some((c) => c.type === "async");
90
+ }
91
+ else if (n.type === "variable_declarator") {
92
+ const valN = n.childForFieldName("value");
93
+ if (valN && (valN.type === "arrow_function" || valN.type === "function")) {
94
+ const nameN = n.childForFieldName("name");
95
+ if (nameN)
96
+ funcName = getText(nameN);
97
+ bodyNode = valN.childForFieldName("body") ?? valN;
98
+ isAsync = valN.children.some((c) => c.type === "async");
99
+ }
100
+ }
101
+ if (funcName && bodyNode) {
102
+ const intel = collectBodyIntel(bodyNode, getText);
103
+ records.push({
104
+ id: `${file}#${funcName}:${n.startPosition.row + 1}`,
105
+ name: funcName,
106
+ file,
107
+ lines: [n.startPosition.row + 1, n.endPosition.row + 1],
108
+ calls: intel.calls,
109
+ stringLiterals: intel.stringLiterals,
110
+ regexPatterns: intel.regexPatterns,
111
+ isAsync,
112
+ language: "typescript",
113
+ });
114
+ }
115
+ // Recurse into all children (nested functions become their own records)
116
+ for (const child of n.namedChildren) {
117
+ process(child);
118
+ }
119
+ }
120
+ process(node);
121
+ return records;
122
+ }
14
123
  export const TypeScriptAdapter = {
15
124
  name: "typescript",
16
125
  language: TypeScript.tsx, // We use the TSX grammar to capture both TS and TSX seamlessly
@@ -174,6 +283,7 @@ export const TypeScriptAdapter = {
174
283
  }
175
284
  }
176
285
  }
177
- return { endpoints, models, components, tests };
286
+ const functions = extractTsFunctions(file, source, root);
287
+ return { endpoints, models, components, tests, functions };
178
288
  }
179
289
  };
package/dist/cli.js CHANGED
@@ -350,7 +350,21 @@ program
350
350
  quiet: options.quiet,
351
351
  });
352
352
  });
353
- program.parseAsync().catch((error) => {
353
+ program
354
+ .parseAsync()
355
+ .then(() => {
356
+ // Force exit after one-shot commands complete.
357
+ // Tree-sitter native bindings keep a libuv ref alive, preventing natural
358
+ // process exit. mcp-serve is excluded: it sets up readline and returns
359
+ // immediately (before any messages are processed), so calling process.exit()
360
+ // here would kill it before it processes any input. mcp-serve manages its
361
+ // own lifecycle via process.exit(0) inside rl.on("close").
362
+ const subCommand = process.argv[2];
363
+ if (subCommand !== "mcp-serve") {
364
+ process.exit(process.exitCode ?? 0);
365
+ }
366
+ })
367
+ .catch((error) => {
354
368
  console.error(error);
355
- process.exitCode = 1;
369
+ process.exit(1);
356
370
  });
@@ -98,6 +98,26 @@ async function loadIntel() {
98
98
  }
99
99
  return intel;
100
100
  }
101
+ // ── Function intelligence loader ──
102
+ let funcIntel = null;
103
+ let funcIntelPath = "";
104
+ let funcIntelLoadTime = 0;
105
+ async function loadFuncIntel() {
106
+ if (!funcIntelPath)
107
+ return null;
108
+ const now = Date.now();
109
+ if (funcIntel && now - funcIntelLoadTime < 5000)
110
+ return funcIntel;
111
+ try {
112
+ const raw = await fs.readFile(funcIntelPath, "utf8");
113
+ funcIntel = JSON.parse(raw);
114
+ funcIntelLoadTime = now;
115
+ }
116
+ catch {
117
+ // File may not exist yet — not an error
118
+ }
119
+ return funcIntel;
120
+ }
101
121
  // ── Helpers ──
102
122
  const SKIP_SERVICES = new Set(["str", "dict", "int", "len", "float", "max", "join", "getattr", "lower", "open", "params.append", "updates.append"]);
103
123
  function compact(obj) {
@@ -291,11 +311,38 @@ async function search(args) {
291
311
  // Frontend pages: match path or component
292
312
  const pages = (d.frontend_pages || []).filter((p) => p.path?.toLowerCase().includes(q) || p.component?.toLowerCase().includes(q) ||
293
313
  p.api_calls?.some((c) => c.toLowerCase().includes(q))).slice(0, 5).map((p) => `${p.path} → ${p.component}`);
314
+ // Functions: search literal_index (tactic:simp, sorry, string patterns) + function names
315
+ const fnHits = [];
316
+ const fi = await loadFuncIntel();
317
+ if (fi) {
318
+ // Literal index: exact key match + partial key match
319
+ const litIndex = fi.literal_index ?? {};
320
+ for (const [key, hits] of Object.entries(litIndex)) {
321
+ if (key.includes(q)) {
322
+ for (const h of hits.slice(0, 3)) {
323
+ fnHits.push(`${h.function} [${h.file}:${h.line}] (lit:${key})`);
324
+ }
325
+ }
326
+ if (fnHits.length >= 10)
327
+ break;
328
+ }
329
+ // Function names: match by name
330
+ if (fnHits.length < 10) {
331
+ for (const fn of (fi.functions ?? [])) {
332
+ if (fn.name?.toLowerCase().includes(q)) {
333
+ fnHits.push(`${fn.name} [${fn.file}:${fn.lines?.[0]}]`);
334
+ if (fnHits.length >= 10)
335
+ break;
336
+ }
337
+ }
338
+ }
339
+ }
294
340
  return compact({
295
341
  ep: eps, mod: models, m: mods,
296
342
  exports: exports.slice(0, 10),
297
343
  files: files.slice(0, 8),
298
344
  enums, tasks, pages,
345
+ ...(fnHits.length > 0 ? { fns: fnHits } : {}),
299
346
  });
300
347
  }
301
348
  async function model(args) {
@@ -458,8 +505,10 @@ export async function runMcpServe(options) {
458
505
  const specsDir = path.resolve(options.specs);
459
506
  const quiet = options.quiet ?? false;
460
507
  intelPath = path.join(specsDir, "machine", "codebase-intelligence.json");
508
+ funcIntelPath = path.join(specsDir, "machine", "function-intelligence.json");
461
509
  // Pre-load intelligence
462
510
  await loadIntel();
511
+ await loadFuncIntel();
463
512
  // Log to stderr (stdout is for MCP protocol)
464
513
  if (!quiet) {
465
514
  process.stderr.write(`Guardian MCP server started. Intelligence: ${intelPath}\n`);
@@ -2,19 +2,22 @@ import fs from "node:fs/promises";
2
2
  import path from "node:path";
3
3
  import yaml from "js-yaml";
4
4
  import { loadHeatmap } from "../extract/compress.js";
5
+ import { loadFunctionIntelligence, } from "../extract/function-intel.js";
5
6
  import { resolveMachineInputDir } from "../output-layout.js";
6
7
  import { DEFAULT_SPECS_DIR } from "../config.js";
7
8
  export async function runSearch(options) {
8
9
  const inputDir = await resolveMachineInputDir(options.input || DEFAULT_SPECS_DIR);
9
10
  const { architecture, ux } = await loadSnapshots(inputDir);
10
11
  const heatmap = await loadHeatmap(inputDir);
12
+ const funcIntel = await loadFunctionIntelligence(inputDir);
11
13
  const types = normalizeTypes(options.types);
12
14
  const matches = searchSnapshots({
13
15
  architecture,
14
16
  ux,
15
17
  query: options.query,
16
18
  types,
17
- heatmap
19
+ heatmap,
20
+ funcIntel,
18
21
  });
19
22
  const content = renderSearchMarkdown(options.query, matches);
20
23
  if (options.output) {
@@ -66,7 +69,7 @@ function normalizeTypes(types) {
66
69
  }
67
70
  return normalized.size > 0
68
71
  ? normalized
69
- : new Set(["models", "endpoints", "components", "modules", "tasks"]);
72
+ : new Set(["models", "endpoints", "components", "modules", "tasks", "functions"]);
70
73
  }
71
74
  function tokenize(value) {
72
75
  return value
@@ -110,7 +113,7 @@ function scoreItem(queryTokens, item) {
110
113
  return Math.min(1, total / queryTokens.length);
111
114
  }
112
115
  function searchSnapshots(params) {
113
- const { architecture, ux, query, types, heatmap } = params;
116
+ const { architecture, ux, query, types, heatmap, funcIntel } = params;
114
117
  const queryTokens = tokenize(query);
115
118
  const matches = [];
116
119
  const pageUsage = buildComponentPageUsage(ux);
@@ -238,6 +241,65 @@ function searchSnapshots(params) {
238
241
  });
239
242
  }
240
243
  }
244
+ if (types.has("functions") && funcIntel) {
245
+ const queryTokens = tokenize(query);
246
+ // 1. Name match — function / theorem name contains a query token
247
+ for (const fn of funcIntel.functions) {
248
+ const score = scoreItem(queryTokens, {
249
+ name: fn.name,
250
+ file: fn.file,
251
+ text: [...fn.stringLiterals, ...fn.regexPatterns, ...fn.calls, fn.language],
252
+ });
253
+ if (score <= 0)
254
+ continue;
255
+ const lineRange = `${fn.lines[0]}–${fn.lines[1]}`;
256
+ const detail = [];
257
+ if (fn.stringLiterals.length > 0) {
258
+ detail.push(`Literals: ${formatList(fn.stringLiterals.slice(0, 3).map((l) => `"${l.slice(0, 60)}"`), 3)}`);
259
+ }
260
+ if (fn.regexPatterns.length > 0) {
261
+ detail.push(`Patterns: ${formatList(fn.regexPatterns.slice(0, 3).map((p) => `/${p.slice(0, 60)}/`), 3)}`);
262
+ }
263
+ if (fn.calls.length > 0) {
264
+ detail.push(`Calls: ${formatList(fn.calls, 5)}`);
265
+ }
266
+ matches.push({
267
+ type: "functions",
268
+ name: `${fn.name} (${fn.language})`,
269
+ score,
270
+ markdown: [
271
+ `**${fn.name}** · ${fn.file}:${lineRange} · ${fn.language}${fn.isAsync ? " · async" : ""}`,
272
+ ...detail,
273
+ ],
274
+ });
275
+ }
276
+ // 2. Literal index match — query token appears in a function's string/regex literals
277
+ // (additive: surfaces functions whose body contains the queried literal even if
278
+ // the function name itself doesn't match)
279
+ for (const tok of queryTokens) {
280
+ const hits = funcIntel.literal_index[tok.toLowerCase()];
281
+ if (!hits)
282
+ continue;
283
+ for (const hit of hits) {
284
+ // Skip if we already emitted this function via name match above
285
+ if (matches.some((m) => m.type === "functions" && m.name.startsWith(hit.function + " ("))) {
286
+ continue;
287
+ }
288
+ const fn = funcIntel.functions.find((f) => f.file === hit.file && f.name === hit.function);
289
+ if (!fn)
290
+ continue;
291
+ matches.push({
292
+ type: "functions",
293
+ name: `${fn.name} (${fn.language})`,
294
+ score: 0.6,
295
+ markdown: [
296
+ `**${fn.name}** · ${fn.file}:${fn.lines[0]}–${fn.lines[1]} · ${fn.language}`,
297
+ `Matched literal/pattern containing "${tok}"`,
298
+ ],
299
+ });
300
+ }
301
+ }
302
+ }
241
303
  return matches.sort((a, b) => b.score - a.score || a.name.localeCompare(b.name));
242
304
  }
243
305
  function buildComponentPageUsage(ux) {
@@ -290,7 +352,8 @@ function renderSearchMarkdown(query, matches) {
290
352
  ["endpoints", "Endpoints"],
291
353
  ["components", "Components"],
292
354
  ["modules", "Modules"],
293
- ["tasks", "Tasks"]
355
+ ["tasks", "Tasks"],
356
+ ["functions", "Functions"],
294
357
  ];
295
358
  const lines = [];
296
359
  lines.push(`# Search: "${query}" - ${matches.length} matches`);