@toolbaux/guardian 0.1.17 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/csharp-adapter.js +76 -1
- package/dist/adapters/go-adapter.js +69 -1
- package/dist/adapters/index.js +3 -2
- package/dist/adapters/java-adapter.js +73 -1
- package/dist/adapters/lean4-adapter.js +358 -0
- package/dist/adapters/python-adapter.js +91 -1
- package/dist/adapters/runner.js +29 -2
- package/dist/adapters/typescript-adapter.js +111 -1
- package/dist/cli.js +16 -2
- package/dist/commands/search.js +67 -4
- package/dist/extract/function-intel.js +209 -0
- package/dist/extract/index.js +12 -0
- package/package.json +1 -1
|
@@ -3,6 +3,80 @@ import Parser from "tree-sitter";
|
|
|
3
3
|
function text(node) {
|
|
4
4
|
return node ? node.text : "";
|
|
5
5
|
}
|
|
6
|
+
// ── Function-level intelligence ───────────────────────────────────────────
|
|
7
|
+
const CS_FUNC_QUERY = `
|
|
8
|
+
(method_declaration name: (identifier) @name) @fn
|
|
9
|
+
(constructor_declaration name: (identifier) @name) @fn
|
|
10
|
+
`;
|
|
11
|
+
function walkBody(body, visitor) {
|
|
12
|
+
const stack = [body];
|
|
13
|
+
while (stack.length > 0) {
|
|
14
|
+
const n = stack.pop();
|
|
15
|
+
visitor(n);
|
|
16
|
+
for (let i = n.namedChildCount - 1; i >= 0; i--) {
|
|
17
|
+
const c = n.namedChild(i);
|
|
18
|
+
if (c)
|
|
19
|
+
stack.push(c);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
function collectCSharpBodyIntel(body) {
|
|
24
|
+
const strings = new Set();
|
|
25
|
+
const calls = new Set();
|
|
26
|
+
walkBody(body, (n) => {
|
|
27
|
+
if (n.type === "string_literal" || n.type === "verbatim_string_literal") {
|
|
28
|
+
const raw = n.text.replace(/^@?"/, "").replace(/"$/, "");
|
|
29
|
+
if (raw.length > 0 && raw.length < 300)
|
|
30
|
+
strings.add(raw);
|
|
31
|
+
}
|
|
32
|
+
else if (n.type === "interpolated_string_expression") {
|
|
33
|
+
const raw = n.text.replace(/^\$"/, "").replace(/"$/, "");
|
|
34
|
+
if (raw.length > 0 && raw.length < 300)
|
|
35
|
+
strings.add(raw);
|
|
36
|
+
}
|
|
37
|
+
else if (n.type === "invocation_expression") {
|
|
38
|
+
const fn = n.childForFieldName("function");
|
|
39
|
+
if (fn)
|
|
40
|
+
calls.add(fn.text.split("\n")[0].trim());
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
return { stringLiterals: [...strings], regexPatterns: [], calls: [...calls] };
|
|
44
|
+
}
|
|
45
|
+
function extractCSharpFunctions(language, file, root) {
|
|
46
|
+
const records = [];
|
|
47
|
+
const query = new Parser.Query(language, CS_FUNC_QUERY);
|
|
48
|
+
for (const match of query.matches(root)) {
|
|
49
|
+
const fnNode = match.captures.find((c) => c.name === "fn")?.node;
|
|
50
|
+
const nameNode = match.captures.find((c) => c.name === "name")?.node;
|
|
51
|
+
if (!fnNode || !nameNode)
|
|
52
|
+
continue;
|
|
53
|
+
const funcName = text(nameNode);
|
|
54
|
+
// async: scan direct children for modifier — no full-tree walk needed
|
|
55
|
+
let isAsync = false;
|
|
56
|
+
for (const child of fnNode.children) {
|
|
57
|
+
if (child.type === "modifier" && child.text === "async") {
|
|
58
|
+
isAsync = true;
|
|
59
|
+
break;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
const bodyNode = fnNode.childForFieldName("body");
|
|
63
|
+
const intel = bodyNode
|
|
64
|
+
? collectCSharpBodyIntel(bodyNode)
|
|
65
|
+
: { stringLiterals: [], regexPatterns: [], calls: [] };
|
|
66
|
+
records.push({
|
|
67
|
+
id: `${file}#${funcName}:${fnNode.startPosition.row + 1}`,
|
|
68
|
+
name: funcName,
|
|
69
|
+
file,
|
|
70
|
+
lines: [fnNode.startPosition.row + 1, fnNode.endPosition.row + 1],
|
|
71
|
+
calls: intel.calls,
|
|
72
|
+
stringLiterals: intel.stringLiterals,
|
|
73
|
+
regexPatterns: intel.regexPatterns,
|
|
74
|
+
isAsync,
|
|
75
|
+
language: "csharp",
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
return records;
|
|
79
|
+
}
|
|
6
80
|
export const CSharpAdapter = {
|
|
7
81
|
name: "C# ASP.NET Core Adapter",
|
|
8
82
|
language: CSharp,
|
|
@@ -144,6 +218,7 @@ export const CSharpAdapter = {
|
|
|
144
218
|
}
|
|
145
219
|
}
|
|
146
220
|
}
|
|
147
|
-
|
|
221
|
+
const functions = extractCSharpFunctions(this.language, file, root);
|
|
222
|
+
return { endpoints, models, components, tests, functions };
|
|
148
223
|
}
|
|
149
224
|
};
|
|
@@ -5,6 +5,73 @@ const Go = require("tree-sitter-go");
|
|
|
5
5
|
function text(node) {
|
|
6
6
|
return node ? node.text : "";
|
|
7
7
|
}
|
|
8
|
+
// ── Function-level intelligence ───────────────────────────────────────────
|
|
9
|
+
// Tree-sitter query — runs in C, fast regardless of file size.
|
|
10
|
+
const GO_FUNC_QUERY = `
|
|
11
|
+
(function_declaration name: (identifier) @name) @fn
|
|
12
|
+
(method_declaration name: (field_identifier) @name) @fn
|
|
13
|
+
`;
|
|
14
|
+
/** Walk a single node's subtree iteratively (stack-based, no recursion). */
|
|
15
|
+
function walkBody(body, visitor) {
|
|
16
|
+
const stack = [body];
|
|
17
|
+
while (stack.length > 0) {
|
|
18
|
+
const n = stack.pop();
|
|
19
|
+
visitor(n);
|
|
20
|
+
for (let i = n.namedChildCount - 1; i >= 0; i--) {
|
|
21
|
+
const c = n.namedChild(i);
|
|
22
|
+
if (c)
|
|
23
|
+
stack.push(c);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
function collectGoBodyIntel(body) {
|
|
28
|
+
const strings = new Set();
|
|
29
|
+
const calls = new Set();
|
|
30
|
+
let isAsync = false;
|
|
31
|
+
walkBody(body, (n) => {
|
|
32
|
+
if (n.type === "interpreted_string_literal" || n.type === "raw_string_literal") {
|
|
33
|
+
const val = n.text.slice(1, -1);
|
|
34
|
+
if (val.length > 0 && val.length < 300)
|
|
35
|
+
strings.add(val);
|
|
36
|
+
}
|
|
37
|
+
else if (n.type === "call_expression") {
|
|
38
|
+
const fn = n.childForFieldName("function");
|
|
39
|
+
if (fn)
|
|
40
|
+
calls.add(fn.text.split("\n")[0].trim());
|
|
41
|
+
}
|
|
42
|
+
else if (n.type === "go_statement") {
|
|
43
|
+
isAsync = true;
|
|
44
|
+
}
|
|
45
|
+
});
|
|
46
|
+
return { stringLiterals: [...strings], regexPatterns: [], calls: [...calls], isAsync };
|
|
47
|
+
}
|
|
48
|
+
function extractGoFunctions(language, file, root) {
|
|
49
|
+
const records = [];
|
|
50
|
+
const query = new Parser.Query(language, GO_FUNC_QUERY);
|
|
51
|
+
for (const match of query.matches(root)) {
|
|
52
|
+
const fnNode = match.captures.find((c) => c.name === "fn")?.node;
|
|
53
|
+
const nameNode = match.captures.find((c) => c.name === "name")?.node;
|
|
54
|
+
if (!fnNode || !nameNode)
|
|
55
|
+
continue;
|
|
56
|
+
const funcName = nameNode.text;
|
|
57
|
+
const bodyNode = fnNode.childForFieldName("body");
|
|
58
|
+
const intel = bodyNode
|
|
59
|
+
? collectGoBodyIntel(bodyNode)
|
|
60
|
+
: { stringLiterals: [], regexPatterns: [], calls: [], isAsync: false };
|
|
61
|
+
records.push({
|
|
62
|
+
id: `${file}#${funcName}:${fnNode.startPosition.row + 1}`,
|
|
63
|
+
name: funcName,
|
|
64
|
+
file,
|
|
65
|
+
lines: [fnNode.startPosition.row + 1, fnNode.endPosition.row + 1],
|
|
66
|
+
calls: intel.calls,
|
|
67
|
+
stringLiterals: intel.stringLiterals,
|
|
68
|
+
regexPatterns: intel.regexPatterns,
|
|
69
|
+
isAsync: intel.isAsync,
|
|
70
|
+
language: "go",
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
return records;
|
|
74
|
+
}
|
|
8
75
|
export const GoAdapter = {
|
|
9
76
|
name: "Go Gin Adapter",
|
|
10
77
|
language: Go,
|
|
@@ -91,6 +158,7 @@ export const GoAdapter = {
|
|
|
91
158
|
});
|
|
92
159
|
}
|
|
93
160
|
}
|
|
94
|
-
|
|
161
|
+
const functions = extractGoFunctions(this.language, file, root);
|
|
162
|
+
return { endpoints, models, components, tests, functions };
|
|
95
163
|
}
|
|
96
164
|
};
|
package/dist/adapters/index.js
CHANGED
|
@@ -3,9 +3,10 @@ import { TypeScriptAdapter } from "./typescript-adapter.js";
|
|
|
3
3
|
import { JavaAdapter } from "./java-adapter.js";
|
|
4
4
|
import { GoAdapter } from "./go-adapter.js";
|
|
5
5
|
import { CSharpAdapter } from "./csharp-adapter.js";
|
|
6
|
+
import { Lean4Adapter } from "./lean4-adapter.js";
|
|
6
7
|
import { runAdapter } from "./runner.js";
|
|
7
|
-
export { PythonAdapter, TypeScriptAdapter, JavaAdapter, GoAdapter, CSharpAdapter, runAdapter };
|
|
8
|
-
export const ADAPTERS = [PythonAdapter, TypeScriptAdapter, JavaAdapter, GoAdapter, CSharpAdapter];
|
|
8
|
+
export { PythonAdapter, TypeScriptAdapter, JavaAdapter, GoAdapter, CSharpAdapter, Lean4Adapter, runAdapter };
|
|
9
|
+
export const ADAPTERS = [PythonAdapter, TypeScriptAdapter, JavaAdapter, GoAdapter, CSharpAdapter, Lean4Adapter];
|
|
9
10
|
export function getAdapterForFile(file) {
|
|
10
11
|
for (const adapter of ADAPTERS) {
|
|
11
12
|
if (adapter.fileExtensions.some(ext => file.endsWith(ext))) {
|
|
@@ -3,6 +3,77 @@ import Parser from "tree-sitter";
|
|
|
3
3
|
function text(node) {
|
|
4
4
|
return node ? node.text : "";
|
|
5
5
|
}
|
|
6
|
+
// ── Function-level intelligence ───────────────────────────────────────────
|
|
7
|
+
const JAVA_FUNC_QUERY = `
|
|
8
|
+
(method_declaration name: (identifier) @name) @fn
|
|
9
|
+
(constructor_declaration name: (identifier) @name) @fn
|
|
10
|
+
`;
|
|
11
|
+
function walkBody(body, visitor) {
|
|
12
|
+
const stack = [body];
|
|
13
|
+
while (stack.length > 0) {
|
|
14
|
+
const n = stack.pop();
|
|
15
|
+
visitor(n);
|
|
16
|
+
for (let i = n.namedChildCount - 1; i >= 0; i--) {
|
|
17
|
+
const c = n.namedChild(i);
|
|
18
|
+
if (c)
|
|
19
|
+
stack.push(c);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
function collectJavaBodyIntel(body) {
|
|
24
|
+
const strings = new Set();
|
|
25
|
+
const calls = new Set();
|
|
26
|
+
walkBody(body, (n) => {
|
|
27
|
+
if (n.type === "string_literal") {
|
|
28
|
+
const raw = n.text.replace(/^"/, "").replace(/"$/, "");
|
|
29
|
+
if (raw.length > 0 && raw.length < 300)
|
|
30
|
+
strings.add(raw);
|
|
31
|
+
}
|
|
32
|
+
else if (n.type === "text_block") {
|
|
33
|
+
const raw = n.text.replace(/^"""/, "").replace(/"""$/, "").trim();
|
|
34
|
+
if (raw.length > 0 && raw.length < 300)
|
|
35
|
+
strings.add(raw);
|
|
36
|
+
}
|
|
37
|
+
else if (n.type === "method_invocation") {
|
|
38
|
+
const nameNode = n.childForFieldName("name");
|
|
39
|
+
const objNode = n.childForFieldName("object");
|
|
40
|
+
if (nameNode) {
|
|
41
|
+
const call = objNode ? `${text(objNode)}.${text(nameNode)}` : text(nameNode);
|
|
42
|
+
calls.add(call.split("\n")[0].trim());
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
});
|
|
46
|
+
return { stringLiterals: [...strings], regexPatterns: [], calls: [...calls] };
|
|
47
|
+
}
|
|
48
|
+
function extractJavaFunctions(language, file, root) {
|
|
49
|
+
const records = [];
|
|
50
|
+
const query = new Parser.Query(language, JAVA_FUNC_QUERY);
|
|
51
|
+
for (const match of query.matches(root)) {
|
|
52
|
+
const fnNode = match.captures.find((c) => c.name === "fn")?.node;
|
|
53
|
+
const nameNode = match.captures.find((c) => c.name === "name")?.node;
|
|
54
|
+
if (!fnNode || !nameNode)
|
|
55
|
+
continue;
|
|
56
|
+
const funcName = text(nameNode);
|
|
57
|
+
const bodyNode = fnNode.childForFieldName("body");
|
|
58
|
+
const intel = bodyNode
|
|
59
|
+
? collectJavaBodyIntel(bodyNode)
|
|
60
|
+
: { stringLiterals: [], regexPatterns: [], calls: [] };
|
|
61
|
+
const typeNode = fnNode.childForFieldName("type");
|
|
62
|
+
const isAsync = /CompletableFuture|Mono|Flux|Future/.test(text(typeNode));
|
|
63
|
+
records.push({
|
|
64
|
+
id: `${file}#${funcName}:${fnNode.startPosition.row + 1}`,
|
|
65
|
+
name: funcName,
|
|
66
|
+
file,
|
|
67
|
+
lines: [fnNode.startPosition.row + 1, fnNode.endPosition.row + 1],
|
|
68
|
+
calls: intel.calls,
|
|
69
|
+
stringLiterals: intel.stringLiterals,
|
|
70
|
+
regexPatterns: intel.regexPatterns,
|
|
71
|
+
isAsync,
|
|
72
|
+
language: "java",
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
return records;
|
|
76
|
+
}
|
|
6
77
|
export const JavaAdapter = {
|
|
7
78
|
name: "Java Spring Boot Adapter",
|
|
8
79
|
language: Java,
|
|
@@ -117,6 +188,7 @@ export const JavaAdapter = {
|
|
|
117
188
|
});
|
|
118
189
|
}
|
|
119
190
|
}
|
|
120
|
-
|
|
191
|
+
const functions = extractJavaFunctions(this.language, file, root);
|
|
192
|
+
return { endpoints, models, components, tests, functions };
|
|
121
193
|
}
|
|
122
194
|
};
|
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lean4 / Mathlib adapter for guardian.
|
|
3
|
+
*
|
|
4
|
+
* Uses deterministic regex-based extraction (no tree-sitter-lean4 dependency).
|
|
5
|
+
* Captures: theorems, lemmas, defs, structures, classes, instances, `sorry`
|
|
6
|
+
* locations, tactic usage, and Mathlib import dependencies.
|
|
7
|
+
*
|
|
8
|
+
* Implements SpecGuardAdapter with `language: null` — runner.ts calls
|
|
9
|
+
* extract() directly without a tree-sitter parse step.
|
|
10
|
+
*/
|
|
11
|
+
// ── Constants ─────────────────────────────────────────────────────────────
|
|
12
|
+
/**
|
|
13
|
+
* Well-known Lean4 tactic names. Checked as whole words in the proof body.
|
|
14
|
+
* Kept in alphabetical order for maintainability.
|
|
15
|
+
*/
|
|
16
|
+
const KNOWN_TACTICS = [
|
|
17
|
+
"Abel",
|
|
18
|
+
"aesop",
|
|
19
|
+
"all_goals",
|
|
20
|
+
"any_goals",
|
|
21
|
+
"apply",
|
|
22
|
+
"apply?",
|
|
23
|
+
"assumption",
|
|
24
|
+
"by_cases",
|
|
25
|
+
"by_contra",
|
|
26
|
+
"calc",
|
|
27
|
+
"cases",
|
|
28
|
+
"change",
|
|
29
|
+
"clear",
|
|
30
|
+
"congr",
|
|
31
|
+
"constructor",
|
|
32
|
+
"contrapose",
|
|
33
|
+
"conv",
|
|
34
|
+
"decide",
|
|
35
|
+
"dsimp",
|
|
36
|
+
"exact",
|
|
37
|
+
"exact?",
|
|
38
|
+
"ext",
|
|
39
|
+
"field_simp",
|
|
40
|
+
"fin_cases",
|
|
41
|
+
"first",
|
|
42
|
+
"funext",
|
|
43
|
+
"gcongr",
|
|
44
|
+
"group",
|
|
45
|
+
"have",
|
|
46
|
+
"induction",
|
|
47
|
+
"interval_cases",
|
|
48
|
+
"intro",
|
|
49
|
+
"intros",
|
|
50
|
+
"linarith",
|
|
51
|
+
"linear_combination",
|
|
52
|
+
"module_cast",
|
|
53
|
+
"native_decide",
|
|
54
|
+
"nlinarith",
|
|
55
|
+
"norm_cast",
|
|
56
|
+
"norm_num",
|
|
57
|
+
"norm_num?",
|
|
58
|
+
"nth_rw",
|
|
59
|
+
"obtain",
|
|
60
|
+
"omega",
|
|
61
|
+
"polyrith",
|
|
62
|
+
"positivity",
|
|
63
|
+
"push_cast",
|
|
64
|
+
"push_neg",
|
|
65
|
+
"rcases",
|
|
66
|
+
"refine",
|
|
67
|
+
"rename",
|
|
68
|
+
"repeat",
|
|
69
|
+
"revert",
|
|
70
|
+
"rfl",
|
|
71
|
+
"ring",
|
|
72
|
+
"rw",
|
|
73
|
+
"rw?",
|
|
74
|
+
"set",
|
|
75
|
+
"show",
|
|
76
|
+
"simp",
|
|
77
|
+
"simp?",
|
|
78
|
+
"skip",
|
|
79
|
+
"split",
|
|
80
|
+
"suffices",
|
|
81
|
+
"swap",
|
|
82
|
+
"symm",
|
|
83
|
+
"tauto",
|
|
84
|
+
"trans",
|
|
85
|
+
"trivial",
|
|
86
|
+
"try",
|
|
87
|
+
"unfold",
|
|
88
|
+
"use",
|
|
89
|
+
];
|
|
90
|
+
// ── Regex patterns ────────────────────────────────────────────────────────
|
|
91
|
+
/**
|
|
92
|
+
* Matches theorem/lemma/def/abbrev declarations (including noncomputable variants).
|
|
93
|
+
* Group 1: keyword (e.g. "theorem", "noncomputable def")
|
|
94
|
+
* Group 2: declaration name
|
|
95
|
+
*/
|
|
96
|
+
const THEOREM_RE = /^(?:[ \t]*(?:@\[[^\]]*\][ \t]*\n?[ \t]*)*)(?:private[ \t]+|protected[ \t]+)?(?:(noncomputable[ \t]+def|noncomputable[ \t]+abbrev|theorem|lemma|def|abbrev|example))(?:[ \t]+([^\s(:{\[]+))?/gm;
|
|
97
|
+
/**
|
|
98
|
+
* Matches structure/class/inductive/instance declarations.
|
|
99
|
+
* Group 1: keyword, Group 2: name (optional for anonymous instances)
|
|
100
|
+
*/
|
|
101
|
+
const STRUCT_RE = /^(?:[ \t]*(?:@\[[^\]]*\][ \t]*\n?[ \t]*)*)(?:private[ \t]+|protected[ \t]+)?(structure|class|inductive|instance|mutual)(?:[ \t]+([^\s(:{\[]+))?/gm;
|
|
102
|
+
/** Matches import statements. Group 1: module path */
|
|
103
|
+
const IMPORT_RE = /^import[ \t]+([\w.]+)/gm;
|
|
104
|
+
/** Matches namespace declarations. Group 1: namespace name */
|
|
105
|
+
const NS_OPEN_RE = /^namespace[ \t]+([\w.]+)/gm;
|
|
106
|
+
/** Matches end-of-namespace. Group 1: namespace name */
|
|
107
|
+
const NS_END_RE = /^end[ \t]+([\w.]+)/gm;
|
|
108
|
+
/** `sorry` as a standalone term or tactic */
|
|
109
|
+
const SORRY_RE = /\bsorry\b/g;
|
|
110
|
+
/** `:=` with optional trailing whitespace — used in proof body and statement extraction */
|
|
111
|
+
const ASSIGN_RE = /:=\s*/g;
|
|
112
|
+
// ── Helpers ───────────────────────────────────────────────────────────────
|
|
113
|
+
/**
|
|
114
|
+
* Build a sorted array of newline offsets for O(log n) line lookups.
|
|
115
|
+
* Index i holds the character offset of the start of line i+1 (0-based array, 1-based lines).
|
|
116
|
+
*/
|
|
117
|
+
function buildLineIndex(source) {
|
|
118
|
+
const starts = [0]; // line 1 starts at offset 0
|
|
119
|
+
for (let i = 0; i < source.length; i++) {
|
|
120
|
+
if (source[i] === "\n")
|
|
121
|
+
starts.push(i + 1);
|
|
122
|
+
}
|
|
123
|
+
return starts;
|
|
124
|
+
}
|
|
125
|
+
/** 1-based line number for a character offset, using precomputed line index. */
|
|
126
|
+
function lineOfFast(lineIndex, offset) {
|
|
127
|
+
let lo = 0, hi = lineIndex.length - 1;
|
|
128
|
+
while (lo < hi) {
|
|
129
|
+
const mid = (lo + hi + 1) >> 1;
|
|
130
|
+
if (lineIndex[mid] <= offset)
|
|
131
|
+
lo = mid;
|
|
132
|
+
else
|
|
133
|
+
hi = mid - 1;
|
|
134
|
+
}
|
|
135
|
+
return lo + 1; // 1-based
|
|
136
|
+
}
|
|
137
|
+
/** Extract all Lean4 import paths from source. */
|
|
138
|
+
function extractImports(source) {
|
|
139
|
+
const imports = [];
|
|
140
|
+
IMPORT_RE.lastIndex = 0;
|
|
141
|
+
let m;
|
|
142
|
+
while ((m = IMPORT_RE.exec(source)) !== null) {
|
|
143
|
+
imports.push(m[1]);
|
|
144
|
+
}
|
|
145
|
+
return imports;
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Scan the entire source once and return a sorted list of namespace open/end
|
|
149
|
+
* events. Pass this to activeNamespaceAtFast() — O(1) amortised per declaration
|
|
150
|
+
* when declarations are processed left-to-right (which THEOREM_RE guarantees).
|
|
151
|
+
*/
|
|
152
|
+
function buildNsEvents(source) {
|
|
153
|
+
const events = [];
|
|
154
|
+
let m;
|
|
155
|
+
NS_OPEN_RE.lastIndex = 0;
|
|
156
|
+
while ((m = NS_OPEN_RE.exec(source)) !== null) {
|
|
157
|
+
events.push({ idx: m.index, name: m[1], kind: "open" });
|
|
158
|
+
}
|
|
159
|
+
NS_END_RE.lastIndex = 0;
|
|
160
|
+
while ((m = NS_END_RE.exec(source)) !== null) {
|
|
161
|
+
events.push({ idx: m.index, name: m[1], kind: "end" });
|
|
162
|
+
}
|
|
163
|
+
return events.sort((a, b) => a.idx - b.idx);
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Return the active namespace at `offset` using precomputed events.
|
|
167
|
+
* Call this in declaration order (ascending offset) and pass the same
|
|
168
|
+
* `eventIdx` cursor — the cursor advances monotonically, making this O(n)
|
|
169
|
+
* total across all declarations rather than O(n²).
|
|
170
|
+
*/
|
|
171
|
+
function activeNamespaceAtFast(events, offset, cursor, stack) {
|
|
172
|
+
// Advance cursor through all events that precede `offset`
|
|
173
|
+
while (cursor.i < events.length && events[cursor.i].idx < offset) {
|
|
174
|
+
const ev = events[cursor.i++];
|
|
175
|
+
if (ev.kind === "open") {
|
|
176
|
+
stack.push(ev.name);
|
|
177
|
+
}
|
|
178
|
+
else {
|
|
179
|
+
const idx = stack.lastIndexOf(ev.name);
|
|
180
|
+
if (idx >= 0)
|
|
181
|
+
stack.splice(idx, 1);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
return stack.join(".");
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Extract the proof/definition body that follows a declaration's `:=` (or `by`).
|
|
188
|
+
* Works directly on `source` from `startOffset` to avoid repeated string slicing.
|
|
189
|
+
* Returns the raw text of the body, capped at 4000 chars to limit memory use.
|
|
190
|
+
*/
|
|
191
|
+
function extractProofBody(source, startOffset) {
|
|
192
|
+
// Search for := starting at startOffset without slicing the full source
|
|
193
|
+
ASSIGN_RE.lastIndex = startOffset;
|
|
194
|
+
const assignMatch = ASSIGN_RE.exec(source);
|
|
195
|
+
if (!assignMatch)
|
|
196
|
+
return "";
|
|
197
|
+
const bodyStart = assignMatch.index + assignMatch[0].length;
|
|
198
|
+
const bodyText = source.slice(bodyStart, bodyStart + 4000);
|
|
199
|
+
// Stop at the next top-level declaration (unindented keyword)
|
|
200
|
+
const stopRe = /\n(?=(?:theorem|lemma|def|abbrev|noncomputable|structure|class|inductive|instance|example|namespace|end|#|import)\b)/;
|
|
201
|
+
const stopIdx = bodyText.search(stopRe);
|
|
202
|
+
return stopIdx >= 0 ? bodyText.slice(0, stopIdx) : bodyText;
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* Single combined regex that matches any known tactic in one pass.
|
|
206
|
+
* Tactics with `?` (apply?, exact?, etc.) need the `?` escaped in the regex.
|
|
207
|
+
* Using a non-global RegExp for the initial "does body contain any tactic?" check,
|
|
208
|
+
* then a global one for collecting all matches.
|
|
209
|
+
*/
|
|
210
|
+
const TACTIC_COMBINED_RE = new RegExp(`\\b(${KNOWN_TACTICS.map((t) => t.replace(/[?]/g, "\\?")).join("|")})\\b`, "g");
|
|
211
|
+
/** Extract which known tactics appear in a proof body — single-pass scan. */
|
|
212
|
+
function extractTactics(body) {
|
|
213
|
+
TACTIC_COMBINED_RE.lastIndex = 0;
|
|
214
|
+
const found = new Set();
|
|
215
|
+
let m;
|
|
216
|
+
while ((m = TACTIC_COMBINED_RE.exec(body)) !== null) {
|
|
217
|
+
found.add(m[1]);
|
|
218
|
+
}
|
|
219
|
+
TACTIC_COMBINED_RE.lastIndex = 0;
|
|
220
|
+
return [...found].sort();
|
|
221
|
+
}
|
|
222
|
+
/** Return true if the body text contains `sorry`. Resets lastIndex after test. */
|
|
223
|
+
function containsSorry(body) {
|
|
224
|
+
SORRY_RE.lastIndex = 0;
|
|
225
|
+
const result = SORRY_RE.test(body);
|
|
226
|
+
SORRY_RE.lastIndex = 0;
|
|
227
|
+
return result;
|
|
228
|
+
}
|
|
229
|
+
/** Estimate end line from start line + body newlines. */
|
|
230
|
+
function estimateEndLine(startLine, body) {
|
|
231
|
+
return startLine + (body.split("\n").length - 1);
|
|
232
|
+
}
|
|
233
|
+
// ── Adapter ───────────────────────────────────────────────────────────────
|
|
234
|
+
export const Lean4Adapter = {
|
|
235
|
+
name: "lean4",
|
|
236
|
+
/**
|
|
237
|
+
* No tree-sitter grammar — runner.ts calls extract() directly when
|
|
238
|
+
* `language` is falsy. All extraction is done via regex on the source text.
|
|
239
|
+
*/
|
|
240
|
+
language: null,
|
|
241
|
+
fileExtensions: [".lean"],
|
|
242
|
+
queries: {},
|
|
243
|
+
extract(file, source, _root) {
|
|
244
|
+
const endpoints = [];
|
|
245
|
+
const models = [];
|
|
246
|
+
const components = [];
|
|
247
|
+
const tests = [];
|
|
248
|
+
const functions = [];
|
|
249
|
+
const imports = extractImports(source);
|
|
250
|
+
const mathlibDeps = imports.filter((i) => i.startsWith("Mathlib"));
|
|
251
|
+
// ── Precompute indices — O(n) each, amortises all per-declaration lookups ──
|
|
252
|
+
const lineIndex = buildLineIndex(source);
|
|
253
|
+
const nsEvents = buildNsEvents(source);
|
|
254
|
+
const nsCursor = { i: 0 };
|
|
255
|
+
const nsStack = [];
|
|
256
|
+
// ── Theorems / Lemmas / Defs ──────────────────────────────────────────
|
|
257
|
+
THEOREM_RE.lastIndex = 0;
|
|
258
|
+
let m;
|
|
259
|
+
while ((m = THEOREM_RE.exec(source)) !== null) {
|
|
260
|
+
const rawKind = m[1]?.trim().replace(/\s+/g, "_") ?? "def";
|
|
261
|
+
const name = m[2] ?? "(anonymous)";
|
|
262
|
+
const offset = m.index;
|
|
263
|
+
const startLine = lineOfFast(lineIndex, offset);
|
|
264
|
+
const namespace = activeNamespaceAtFast(nsEvents, offset, nsCursor, nsStack);
|
|
265
|
+
// Extract statement: text between end of match and :=
|
|
266
|
+
const matchEnd = offset + m[0].length;
|
|
267
|
+
ASSIGN_RE.lastIndex = matchEnd;
|
|
268
|
+
const stmtMatch = ASSIGN_RE.exec(source);
|
|
269
|
+
const statement = (stmtMatch
|
|
270
|
+
? source.slice(matchEnd, stmtMatch.index)
|
|
271
|
+
: "").trim().slice(0, 500);
|
|
272
|
+
const body = extractProofBody(source, offset + m[0].length);
|
|
273
|
+
const hasSorry = containsSorry(body);
|
|
274
|
+
const tactics = extractTactics(body);
|
|
275
|
+
const endLine = estimateEndLine(startLine, body);
|
|
276
|
+
const kindMap = {
|
|
277
|
+
theorem: "theorem",
|
|
278
|
+
lemma: "lemma",
|
|
279
|
+
def: "def",
|
|
280
|
+
noncomputable_def: "noncomputable_def",
|
|
281
|
+
abbrev: "abbrev",
|
|
282
|
+
noncomputable_abbrev: "abbrev",
|
|
283
|
+
example: "example",
|
|
284
|
+
inductive: "inductive",
|
|
285
|
+
};
|
|
286
|
+
const kind = kindMap[rawKind] ?? "def";
|
|
287
|
+
const record = {
|
|
288
|
+
id: `${file}#${name}:${startLine}`,
|
|
289
|
+
name,
|
|
290
|
+
file,
|
|
291
|
+
lines: [startLine, endLine],
|
|
292
|
+
calls: [],
|
|
293
|
+
// Push domain concepts into stringLiterals so the generic literal_index
|
|
294
|
+
// can surface them — no language knowledge needed outside the adapter.
|
|
295
|
+
stringLiterals: [
|
|
296
|
+
...(hasSorry ? ["sorry"] : []), // `guardian search --query sorry`
|
|
297
|
+
...tactics.map((t) => `tactic:${t}`), // `guardian search --query simp`
|
|
298
|
+
],
|
|
299
|
+
regexPatterns: [],
|
|
300
|
+
isAsync: false,
|
|
301
|
+
language: "lean4",
|
|
302
|
+
kind,
|
|
303
|
+
namespace,
|
|
304
|
+
statement,
|
|
305
|
+
hasSorry,
|
|
306
|
+
tactics,
|
|
307
|
+
mathlibDeps,
|
|
308
|
+
};
|
|
309
|
+
functions.push(record);
|
|
310
|
+
}
|
|
311
|
+
// ── Structures / Classes / Instances ─────────────────────────────────
|
|
312
|
+
// Fresh cursor for STRUCT_RE pass — offsets may interleave with THEOREM_RE
|
|
313
|
+
const nsCursor2 = { i: 0 };
|
|
314
|
+
const nsStack2 = [];
|
|
315
|
+
STRUCT_RE.lastIndex = 0;
|
|
316
|
+
while ((m = STRUCT_RE.exec(source)) !== null) {
|
|
317
|
+
const structKind = m[1];
|
|
318
|
+
const name = m[2];
|
|
319
|
+
if (!name)
|
|
320
|
+
continue; // anonymous instance — skip for models
|
|
321
|
+
models.push({
|
|
322
|
+
name,
|
|
323
|
+
file,
|
|
324
|
+
framework: structKind,
|
|
325
|
+
fields: [],
|
|
326
|
+
relationships: [],
|
|
327
|
+
});
|
|
328
|
+
// Also emit a FunctionRecord so it appears in function search
|
|
329
|
+
const startLine = lineOfFast(lineIndex, m.index);
|
|
330
|
+
const kindMap = {
|
|
331
|
+
structure: "structure",
|
|
332
|
+
class: "class",
|
|
333
|
+
instance: "instance",
|
|
334
|
+
inductive: "inductive",
|
|
335
|
+
mutual: "def",
|
|
336
|
+
};
|
|
337
|
+
const record = {
|
|
338
|
+
id: `${file}#${name}:${startLine}`,
|
|
339
|
+
name,
|
|
340
|
+
file,
|
|
341
|
+
lines: [startLine, startLine],
|
|
342
|
+
calls: [],
|
|
343
|
+
stringLiterals: [],
|
|
344
|
+
regexPatterns: [],
|
|
345
|
+
isAsync: false,
|
|
346
|
+
language: "lean4",
|
|
347
|
+
kind: kindMap[structKind] ?? "structure",
|
|
348
|
+
namespace: activeNamespaceAtFast(nsEvents, m.index, nsCursor2, nsStack2),
|
|
349
|
+
statement: "",
|
|
350
|
+
hasSorry: false,
|
|
351
|
+
tactics: [],
|
|
352
|
+
mathlibDeps,
|
|
353
|
+
};
|
|
354
|
+
functions.push(record);
|
|
355
|
+
}
|
|
356
|
+
return { endpoints, models, components, tests, functions };
|
|
357
|
+
},
|
|
358
|
+
};
|
|
@@ -10,6 +10,95 @@ function findChildren(node, type) {
|
|
|
10
10
|
}
|
|
11
11
|
return results;
|
|
12
12
|
}
|
|
13
|
+
// ── Function-level intelligence helpers ──────────────────────────────────
|
|
14
|
+
/** re.* functions whose first argument is a regex pattern string */
|
|
15
|
+
const RE_FUNCS = new Set(["compile", "sub", "subn", "match", "search", "fullmatch", "findall", "finditer", "split"]);
|
|
16
|
+
/**
|
|
17
|
+
* Collect string literals, regex patterns (strings passed to re.*), and call
|
|
18
|
+
* targets from a Python function body subtree.
|
|
19
|
+
*/
|
|
20
|
+
function collectPythonBodyIntel(body, getText) {
|
|
21
|
+
const strings = new Set();
|
|
22
|
+
const regexes = new Set();
|
|
23
|
+
const calls = new Set();
|
|
24
|
+
function walk(n) {
|
|
25
|
+
if (n.type === "string") {
|
|
26
|
+
// Python string: "text", 'text', """text""", r"pattern"
|
|
27
|
+
const raw = getText(n);
|
|
28
|
+
// Strip leading r/b/f/u prefix and quote chars
|
|
29
|
+
const stripped = raw.replace(/^[rRbBuUfF]*/u, "").replace(/^'''|^"""|^'|^"/u, "").replace(/'''$|"""$|'$|"$/u, "");
|
|
30
|
+
if (stripped.length > 0 && stripped.length < 300) {
|
|
31
|
+
strings.add(stripped);
|
|
32
|
+
// Is this string the first argument to a re.* call?
|
|
33
|
+
const parent = n.parent;
|
|
34
|
+
if (parent?.type === "argument_list") {
|
|
35
|
+
const callNode = parent.parent;
|
|
36
|
+
if (callNode?.type === "call") {
|
|
37
|
+
const funcNode = callNode.childForFieldName("function");
|
|
38
|
+
if (funcNode) {
|
|
39
|
+
const fnText = getText(funcNode);
|
|
40
|
+
// re.sub, re.compile, re.match, …
|
|
41
|
+
const simple = fnText.split(".").pop() ?? "";
|
|
42
|
+
if (fnText.startsWith("re.") && RE_FUNCS.has(simple)) {
|
|
43
|
+
regexes.add(stripped);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
else if (n.type === "call") {
|
|
51
|
+
const funcNode = n.childForFieldName("function");
|
|
52
|
+
if (funcNode) {
|
|
53
|
+
calls.add(getText(funcNode).trim());
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
for (const child of n.namedChildren)
|
|
57
|
+
walk(child);
|
|
58
|
+
}
|
|
59
|
+
walk(body);
|
|
60
|
+
return {
|
|
61
|
+
stringLiterals: [...strings],
|
|
62
|
+
regexPatterns: [...regexes],
|
|
63
|
+
calls: [...calls],
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Recursively extract FunctionRecord entries from a Python AST node.
|
|
68
|
+
* Handles top-level functions, methods inside classes, and nested functions.
|
|
69
|
+
*/
|
|
70
|
+
function extractPyFunctions(file, source, node) {
|
|
71
|
+
const records = [];
|
|
72
|
+
const getText = (n) => source.substring(n.startIndex, n.endIndex);
|
|
73
|
+
function process(n) {
|
|
74
|
+
if (n.type === "function_definition") {
|
|
75
|
+
const nameN = n.childForFieldName("name");
|
|
76
|
+
const bodyN = n.childForFieldName("body");
|
|
77
|
+
if (nameN && bodyN) {
|
|
78
|
+
const funcName = getText(nameN);
|
|
79
|
+
// Detect async: check if "async" keyword precedes the def
|
|
80
|
+
const srcBefore = source.substring(n.startIndex, nameN.startIndex);
|
|
81
|
+
const isAsync = /\basync\b/.test(srcBefore);
|
|
82
|
+
const intel = collectPythonBodyIntel(bodyN, getText);
|
|
83
|
+
records.push({
|
|
84
|
+
id: `${file}#${funcName}:${n.startPosition.row + 1}`,
|
|
85
|
+
name: funcName,
|
|
86
|
+
file,
|
|
87
|
+
lines: [n.startPosition.row + 1, n.endPosition.row + 1],
|
|
88
|
+
calls: intel.calls,
|
|
89
|
+
stringLiterals: intel.stringLiterals,
|
|
90
|
+
regexPatterns: intel.regexPatterns,
|
|
91
|
+
isAsync,
|
|
92
|
+
language: "python",
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
for (const child of n.namedChildren)
|
|
97
|
+
process(child);
|
|
98
|
+
}
|
|
99
|
+
process(node);
|
|
100
|
+
return records;
|
|
101
|
+
}
|
|
13
102
|
export const PythonAdapter = {
|
|
14
103
|
name: "python",
|
|
15
104
|
language: Python,
|
|
@@ -178,6 +267,7 @@ export const PythonAdapter = {
|
|
|
178
267
|
relationships
|
|
179
268
|
});
|
|
180
269
|
}
|
|
181
|
-
|
|
270
|
+
const functions = extractPyFunctions(file, source, root);
|
|
271
|
+
return { endpoints, models, components, tests, functions };
|
|
182
272
|
}
|
|
183
273
|
};
|
package/dist/adapters/runner.js
CHANGED
|
@@ -1,10 +1,37 @@
|
|
|
1
1
|
import Parser from "tree-sitter";
|
|
2
2
|
export function runAdapter(adapter, file, source) {
|
|
3
|
+
// Text-based adapters (e.g. Lean4) set language to null and rely entirely on
|
|
4
|
+
// their extract() implementation — no tree-sitter parse step needed.
|
|
5
|
+
if (!adapter.language) {
|
|
6
|
+
if (adapter.extract) {
|
|
7
|
+
const result = adapter.extract(file, source, null);
|
|
8
|
+
return {
|
|
9
|
+
endpoints: result.endpoints,
|
|
10
|
+
models: result.models,
|
|
11
|
+
components: result.components,
|
|
12
|
+
tests: result.tests,
|
|
13
|
+
functions: result.functions ?? [],
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
return { endpoints: [], models: [], components: [], tests: [], functions: [] };
|
|
17
|
+
}
|
|
18
|
+
// tree-sitter native binding throws "Invalid argument" for very large files.
|
|
19
|
+
// Skip files over 1 MB to avoid silent crashes; they are rare in practice.
|
|
20
|
+
if (source.length > 1_000_000) {
|
|
21
|
+
return { endpoints: [], models: [], components: [], tests: [], functions: [] };
|
|
22
|
+
}
|
|
3
23
|
const parser = new Parser();
|
|
4
24
|
parser.setLanguage(adapter.language);
|
|
5
25
|
const tree = parser.parse(source);
|
|
6
26
|
if (adapter.extract) {
|
|
7
|
-
|
|
27
|
+
const result = adapter.extract(file, source, tree.rootNode);
|
|
28
|
+
return {
|
|
29
|
+
endpoints: result.endpoints,
|
|
30
|
+
models: result.models,
|
|
31
|
+
components: result.components,
|
|
32
|
+
tests: result.tests,
|
|
33
|
+
functions: result.functions ?? [],
|
|
34
|
+
};
|
|
8
35
|
}
|
|
9
36
|
const endpoints = [];
|
|
10
37
|
const models = [];
|
|
@@ -65,5 +92,5 @@ export function runAdapter(adapter, file, source) {
|
|
|
65
92
|
}
|
|
66
93
|
}
|
|
67
94
|
}
|
|
68
|
-
return { endpoints, models, components, tests };
|
|
95
|
+
return { endpoints, models, components, tests, functions: [] };
|
|
69
96
|
}
|
|
@@ -11,6 +11,115 @@ function findChildren(node, type) {
|
|
|
11
11
|
}
|
|
12
12
|
return results;
|
|
13
13
|
}
|
|
14
|
+
// ── Function-level intelligence helpers ──────────────────────────────────
|
|
15
|
+
/** Walk all descendants depth-first. */
|
|
16
|
+
function* walkAll(node) {
|
|
17
|
+
yield node;
|
|
18
|
+
for (const child of node.namedChildren)
|
|
19
|
+
yield* walkAll(child);
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Collect string literals, regex patterns, and call targets from a subtree.
|
|
23
|
+
* Scoped to the function body so we don't bleed into nested function records.
|
|
24
|
+
*/
|
|
25
|
+
function collectBodyIntel(body, getText) {
|
|
26
|
+
const strings = new Set();
|
|
27
|
+
const regexes = new Set();
|
|
28
|
+
const calls = new Set();
|
|
29
|
+
for (const n of walkAll(body)) {
|
|
30
|
+
if (n.type === "string") {
|
|
31
|
+
const frag = n.namedChildren.find((c) => c.type === "string_fragment");
|
|
32
|
+
if (frag) {
|
|
33
|
+
const val = getText(frag);
|
|
34
|
+
if (val.length > 0 && val.length < 300)
|
|
35
|
+
strings.add(val);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
else if (n.type === "template_string") {
|
|
39
|
+
// Strip backticks; include template string content
|
|
40
|
+
const raw = getText(n).slice(1, -1);
|
|
41
|
+
if (raw.length > 0 && raw.length < 300)
|
|
42
|
+
strings.add(raw);
|
|
43
|
+
}
|
|
44
|
+
else if (n.type === "regex") {
|
|
45
|
+
const raw = getText(n);
|
|
46
|
+
// /pattern/flags → extract pattern
|
|
47
|
+
const m = raw.match(/^\/(.+)\/[gimsuy]*$/s);
|
|
48
|
+
if (m)
|
|
49
|
+
regexes.add(m[1]);
|
|
50
|
+
}
|
|
51
|
+
else if (n.type === "call_expression") {
|
|
52
|
+
const fn = n.childForFieldName("function");
|
|
53
|
+
if (fn) {
|
|
54
|
+
const name = getText(fn).split("\n")[0].trim();
|
|
55
|
+
calls.add(name);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return {
|
|
60
|
+
stringLiterals: [...strings],
|
|
61
|
+
regexPatterns: [...regexes],
|
|
62
|
+
calls: [...calls],
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Recursively extract FunctionRecord entries from a TypeScript/TSX AST node.
|
|
67
|
+
* Handles: function declarations, method definitions, arrow functions assigned
|
|
68
|
+
* to variables (const foo = () => {}), and class method definitions.
|
|
69
|
+
*/
|
|
70
|
+
function extractTsFunctions(file, source, node) {
|
|
71
|
+
const records = [];
|
|
72
|
+
const getText = (n) => source.substring(n.startIndex, n.endIndex);
|
|
73
|
+
function process(n) {
|
|
74
|
+
let funcName = null;
|
|
75
|
+
let bodyNode = null;
|
|
76
|
+
let isAsync = false;
|
|
77
|
+
if (n.type === "function_declaration") {
|
|
78
|
+
const nameN = n.childForFieldName("name");
|
|
79
|
+
if (nameN)
|
|
80
|
+
funcName = getText(nameN);
|
|
81
|
+
bodyNode = n.childForFieldName("body");
|
|
82
|
+
isAsync = n.children.some((c) => c.type === "async");
|
|
83
|
+
}
|
|
84
|
+
else if (n.type === "method_definition") {
|
|
85
|
+
const nameN = n.childForFieldName("name");
|
|
86
|
+
if (nameN)
|
|
87
|
+
funcName = getText(nameN);
|
|
88
|
+
bodyNode = n.childForFieldName("body");
|
|
89
|
+
isAsync = n.children.some((c) => c.type === "async");
|
|
90
|
+
}
|
|
91
|
+
else if (n.type === "variable_declarator") {
|
|
92
|
+
const valN = n.childForFieldName("value");
|
|
93
|
+
if (valN && (valN.type === "arrow_function" || valN.type === "function")) {
|
|
94
|
+
const nameN = n.childForFieldName("name");
|
|
95
|
+
if (nameN)
|
|
96
|
+
funcName = getText(nameN);
|
|
97
|
+
bodyNode = valN.childForFieldName("body") ?? valN;
|
|
98
|
+
isAsync = valN.children.some((c) => c.type === "async");
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
if (funcName && bodyNode) {
|
|
102
|
+
const intel = collectBodyIntel(bodyNode, getText);
|
|
103
|
+
records.push({
|
|
104
|
+
id: `${file}#${funcName}:${n.startPosition.row + 1}`,
|
|
105
|
+
name: funcName,
|
|
106
|
+
file,
|
|
107
|
+
lines: [n.startPosition.row + 1, n.endPosition.row + 1],
|
|
108
|
+
calls: intel.calls,
|
|
109
|
+
stringLiterals: intel.stringLiterals,
|
|
110
|
+
regexPatterns: intel.regexPatterns,
|
|
111
|
+
isAsync,
|
|
112
|
+
language: "typescript",
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
// Recurse into all children (nested functions become their own records)
|
|
116
|
+
for (const child of n.namedChildren) {
|
|
117
|
+
process(child);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
process(node);
|
|
121
|
+
return records;
|
|
122
|
+
}
|
|
14
123
|
export const TypeScriptAdapter = {
|
|
15
124
|
name: "typescript",
|
|
16
125
|
language: TypeScript.tsx, // We use the TSX grammar to capture both TS and TSX seamlessly
|
|
@@ -174,6 +283,7 @@ export const TypeScriptAdapter = {
|
|
|
174
283
|
}
|
|
175
284
|
}
|
|
176
285
|
}
|
|
177
|
-
|
|
286
|
+
const functions = extractTsFunctions(file, source, root);
|
|
287
|
+
return { endpoints, models, components, tests, functions };
|
|
178
288
|
}
|
|
179
289
|
};
|
package/dist/cli.js
CHANGED
|
@@ -350,7 +350,21 @@ program
|
|
|
350
350
|
quiet: options.quiet,
|
|
351
351
|
});
|
|
352
352
|
});
|
|
353
|
-
program
|
|
353
|
+
program
|
|
354
|
+
.parseAsync()
|
|
355
|
+
.then(() => {
|
|
356
|
+
// Force exit after one-shot commands complete.
|
|
357
|
+
// Tree-sitter native bindings keep a libuv ref alive, preventing natural
|
|
358
|
+
// process exit. mcp-serve is excluded: it sets up readline and returns
|
|
359
|
+
// immediately (before any messages are processed), so calling process.exit()
|
|
360
|
+
// here would kill it before it processes any input. mcp-serve manages its
|
|
361
|
+
// own lifecycle via process.exit(0) inside rl.on("close").
|
|
362
|
+
const subCommand = process.argv[2];
|
|
363
|
+
if (subCommand !== "mcp-serve") {
|
|
364
|
+
process.exit(process.exitCode ?? 0);
|
|
365
|
+
}
|
|
366
|
+
})
|
|
367
|
+
.catch((error) => {
|
|
354
368
|
console.error(error);
|
|
355
|
-
process.
|
|
369
|
+
process.exit(1);
|
|
356
370
|
});
|
package/dist/commands/search.js
CHANGED
|
@@ -2,19 +2,22 @@ import fs from "node:fs/promises";
|
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import yaml from "js-yaml";
|
|
4
4
|
import { loadHeatmap } from "../extract/compress.js";
|
|
5
|
+
import { loadFunctionIntelligence, } from "../extract/function-intel.js";
|
|
5
6
|
import { resolveMachineInputDir } from "../output-layout.js";
|
|
6
7
|
import { DEFAULT_SPECS_DIR } from "../config.js";
|
|
7
8
|
export async function runSearch(options) {
|
|
8
9
|
const inputDir = await resolveMachineInputDir(options.input || DEFAULT_SPECS_DIR);
|
|
9
10
|
const { architecture, ux } = await loadSnapshots(inputDir);
|
|
10
11
|
const heatmap = await loadHeatmap(inputDir);
|
|
12
|
+
const funcIntel = await loadFunctionIntelligence(inputDir);
|
|
11
13
|
const types = normalizeTypes(options.types);
|
|
12
14
|
const matches = searchSnapshots({
|
|
13
15
|
architecture,
|
|
14
16
|
ux,
|
|
15
17
|
query: options.query,
|
|
16
18
|
types,
|
|
17
|
-
heatmap
|
|
19
|
+
heatmap,
|
|
20
|
+
funcIntel,
|
|
18
21
|
});
|
|
19
22
|
const content = renderSearchMarkdown(options.query, matches);
|
|
20
23
|
if (options.output) {
|
|
@@ -66,7 +69,7 @@ function normalizeTypes(types) {
|
|
|
66
69
|
}
|
|
67
70
|
return normalized.size > 0
|
|
68
71
|
? normalized
|
|
69
|
-
: new Set(["models", "endpoints", "components", "modules", "tasks"]);
|
|
72
|
+
: new Set(["models", "endpoints", "components", "modules", "tasks", "functions"]);
|
|
70
73
|
}
|
|
71
74
|
function tokenize(value) {
|
|
72
75
|
return value
|
|
@@ -110,7 +113,7 @@ function scoreItem(queryTokens, item) {
|
|
|
110
113
|
return Math.min(1, total / queryTokens.length);
|
|
111
114
|
}
|
|
112
115
|
function searchSnapshots(params) {
|
|
113
|
-
const { architecture, ux, query, types, heatmap } = params;
|
|
116
|
+
const { architecture, ux, query, types, heatmap, funcIntel } = params;
|
|
114
117
|
const queryTokens = tokenize(query);
|
|
115
118
|
const matches = [];
|
|
116
119
|
const pageUsage = buildComponentPageUsage(ux);
|
|
@@ -238,6 +241,65 @@ function searchSnapshots(params) {
|
|
|
238
241
|
});
|
|
239
242
|
}
|
|
240
243
|
}
|
|
244
|
+
if (types.has("functions") && funcIntel) {
|
|
245
|
+
const queryTokens = tokenize(query);
|
|
246
|
+
// 1. Name match — function / theorem name contains a query token
|
|
247
|
+
for (const fn of funcIntel.functions) {
|
|
248
|
+
const score = scoreItem(queryTokens, {
|
|
249
|
+
name: fn.name,
|
|
250
|
+
file: fn.file,
|
|
251
|
+
text: [...fn.stringLiterals, ...fn.regexPatterns, ...fn.calls, fn.language],
|
|
252
|
+
});
|
|
253
|
+
if (score <= 0)
|
|
254
|
+
continue;
|
|
255
|
+
const lineRange = `${fn.lines[0]}–${fn.lines[1]}`;
|
|
256
|
+
const detail = [];
|
|
257
|
+
if (fn.stringLiterals.length > 0) {
|
|
258
|
+
detail.push(`Literals: ${formatList(fn.stringLiterals.slice(0, 3).map((l) => `"${l.slice(0, 60)}"`), 3)}`);
|
|
259
|
+
}
|
|
260
|
+
if (fn.regexPatterns.length > 0) {
|
|
261
|
+
detail.push(`Patterns: ${formatList(fn.regexPatterns.slice(0, 3).map((p) => `/${p.slice(0, 60)}/`), 3)}`);
|
|
262
|
+
}
|
|
263
|
+
if (fn.calls.length > 0) {
|
|
264
|
+
detail.push(`Calls: ${formatList(fn.calls, 5)}`);
|
|
265
|
+
}
|
|
266
|
+
matches.push({
|
|
267
|
+
type: "functions",
|
|
268
|
+
name: `${fn.name} (${fn.language})`,
|
|
269
|
+
score,
|
|
270
|
+
markdown: [
|
|
271
|
+
`**${fn.name}** · ${fn.file}:${lineRange} · ${fn.language}${fn.isAsync ? " · async" : ""}`,
|
|
272
|
+
...detail,
|
|
273
|
+
],
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
// 2. Literal index match — query token appears in a function's string/regex literals
|
|
277
|
+
// (additive: surfaces functions whose body contains the queried literal even if
|
|
278
|
+
// the function name itself doesn't match)
|
|
279
|
+
for (const tok of queryTokens) {
|
|
280
|
+
const hits = funcIntel.literal_index[tok.toLowerCase()];
|
|
281
|
+
if (!hits)
|
|
282
|
+
continue;
|
|
283
|
+
for (const hit of hits) {
|
|
284
|
+
// Skip if we already emitted this function via name match above
|
|
285
|
+
if (matches.some((m) => m.type === "functions" && m.name.startsWith(hit.function + " ("))) {
|
|
286
|
+
continue;
|
|
287
|
+
}
|
|
288
|
+
const fn = funcIntel.functions.find((f) => f.file === hit.file && f.name === hit.function);
|
|
289
|
+
if (!fn)
|
|
290
|
+
continue;
|
|
291
|
+
matches.push({
|
|
292
|
+
type: "functions",
|
|
293
|
+
name: `${fn.name} (${fn.language})`,
|
|
294
|
+
score: 0.6,
|
|
295
|
+
markdown: [
|
|
296
|
+
`**${fn.name}** · ${fn.file}:${fn.lines[0]}–${fn.lines[1]} · ${fn.language}`,
|
|
297
|
+
`Matched literal/pattern containing "${tok}"`,
|
|
298
|
+
],
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
}
|
|
241
303
|
return matches.sort((a, b) => b.score - a.score || a.name.localeCompare(b.name));
|
|
242
304
|
}
|
|
243
305
|
function buildComponentPageUsage(ux) {
|
|
@@ -290,7 +352,8 @@ function renderSearchMarkdown(query, matches) {
|
|
|
290
352
|
["endpoints", "Endpoints"],
|
|
291
353
|
["components", "Components"],
|
|
292
354
|
["modules", "Modules"],
|
|
293
|
-
["tasks", "Tasks"]
|
|
355
|
+
["tasks", "Tasks"],
|
|
356
|
+
["functions", "Functions"],
|
|
294
357
|
];
|
|
295
358
|
const lines = [];
|
|
296
359
|
lines.push(`# Search: "${query}" - ${matches.length} matches`);
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Function-level intelligence extraction and persistence.
|
|
3
|
+
*
|
|
4
|
+
* Produces `function-intelligence.json` in the guardian machine output dir.
|
|
5
|
+
* Contains:
|
|
6
|
+
* - Full FunctionRecord list (all languages)
|
|
7
|
+
* - Call graph: name → { calls, called_by }
|
|
8
|
+
* - Literal index: token → [{ file, function, line }] ← drives `guardian search --types functions`
|
|
9
|
+
*
|
|
10
|
+
* This is a second-pass scan that runs after the main extraction.
|
|
11
|
+
* It re-uses the adapter pipeline on the same files; results are not fed
|
|
12
|
+
* back into the architecture snapshot (additive, non-breaking).
|
|
13
|
+
*
|
|
14
|
+
* Language-specific domain concepts (e.g. Lean4 `sorry`, Python re.* patterns)
|
|
15
|
+
* are surfaced entirely by each adapter — this module has zero language knowledge.
|
|
16
|
+
* Adapters encode domain specifics into `stringLiterals`, making them searchable
|
|
17
|
+
* through the generic literal_index.
|
|
18
|
+
*/
|
|
19
|
+
import fs from "node:fs/promises";
|
|
20
|
+
import path from "node:path";
|
|
21
|
+
import { getAdapterForFile, runAdapter } from "../adapters/index.js";
|
|
22
|
+
// ── Token helpers ─────────────────────────────────────────────────────────
|
|
23
|
+
/** Split text into lowercase alphanumeric tokens (min 3 chars). */
|
|
24
|
+
function tokenize(text) {
|
|
25
|
+
return text
|
|
26
|
+
.toLowerCase()
|
|
27
|
+
.split(/[^a-z0-9_]+/)
|
|
28
|
+
.filter((t) => t.length >= 3);
|
|
29
|
+
}
|
|
30
|
+
// ── Core build ────────────────────────────────────────────────────────────
|
|
31
|
+
/**
|
|
32
|
+
* Given a flat list of FunctionRecord entries (from all files / all adapters),
|
|
33
|
+
* build the call graph and literal index.
|
|
34
|
+
*/
|
|
35
|
+
export function buildFunctionIntelligence(allFunctions) {
|
|
36
|
+
// ── Call graph ──
|
|
37
|
+
const callGraph = Object.create(null);
|
|
38
|
+
for (const fn of allFunctions) {
|
|
39
|
+
if (!Object.prototype.hasOwnProperty.call(callGraph, fn.name))
|
|
40
|
+
callGraph[fn.name] = { calls: [], called_by: [] };
|
|
41
|
+
callGraph[fn.name].calls = [...new Set(fn.calls)];
|
|
42
|
+
}
|
|
43
|
+
// Invert: for each callee name, record who calls it
|
|
44
|
+
for (const fn of allFunctions) {
|
|
45
|
+
for (const callee of fn.calls) {
|
|
46
|
+
if (!Object.prototype.hasOwnProperty.call(callGraph, callee))
|
|
47
|
+
callGraph[callee] = { calls: [], called_by: [] };
|
|
48
|
+
const entry = callGraph[callee];
|
|
49
|
+
if (!entry.called_by.includes(fn.name)) {
|
|
50
|
+
entry.called_by.push(fn.name);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
// ── Literal index ──
|
|
55
|
+
// Object.create(null) avoids prototype property collisions (e.g. "constructor",
|
|
56
|
+
// "toString") that would cause `existing.some is not a function` errors when
|
|
57
|
+
// source tokens match built-in Object property names.
|
|
58
|
+
const literalIndex = Object.create(null);
|
|
59
|
+
function addHit(token, fn) {
|
|
60
|
+
const key = token.toLowerCase().trim();
|
|
61
|
+
if (!key)
|
|
62
|
+
return;
|
|
63
|
+
if (!Object.prototype.hasOwnProperty.call(literalIndex, key))
|
|
64
|
+
literalIndex[key] = [];
|
|
65
|
+
const existing = literalIndex[key];
|
|
66
|
+
// One hit per function per token — no duplicates
|
|
67
|
+
if (!existing.some((h) => h.file === fn.file && h.function === fn.name)) {
|
|
68
|
+
existing.push({ file: fn.file, function: fn.name, line: fn.lines[0] });
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
for (const fn of allFunctions) {
|
|
72
|
+
for (const lit of fn.stringLiterals) {
|
|
73
|
+
const full = lit.slice(0, 100);
|
|
74
|
+
if (full.length >= 3)
|
|
75
|
+
addHit(full, fn);
|
|
76
|
+
for (const tok of tokenize(lit))
|
|
77
|
+
addHit(tok, fn);
|
|
78
|
+
}
|
|
79
|
+
for (const pat of fn.regexPatterns) {
|
|
80
|
+
const full = pat.slice(0, 100);
|
|
81
|
+
if (full.length >= 3)
|
|
82
|
+
addHit(full, fn);
|
|
83
|
+
for (const tok of tokenize(pat))
|
|
84
|
+
addHit(tok, fn);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return {
|
|
88
|
+
version: "0.1",
|
|
89
|
+
generated_at: new Date().toISOString(),
|
|
90
|
+
total_functions: allFunctions.length,
|
|
91
|
+
functions: allFunctions,
|
|
92
|
+
call_graph: callGraph,
|
|
93
|
+
literal_index: literalIndex,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
// ── File scanning ─────────────────────────────────────────────────────────
|
|
97
|
+
const DEFAULT_IGNORE_DIRS = new Set([
|
|
98
|
+
// Version control
|
|
99
|
+
".git",
|
|
100
|
+
// Lean4 / Lake package manager (contains Mathlib — thousands of .lean files)
|
|
101
|
+
".lake",
|
|
102
|
+
// JS/TS
|
|
103
|
+
"node_modules",
|
|
104
|
+
"dist",
|
|
105
|
+
".next",
|
|
106
|
+
".nuxt",
|
|
107
|
+
"coverage",
|
|
108
|
+
// Python
|
|
109
|
+
".venv",
|
|
110
|
+
"venv",
|
|
111
|
+
"__pycache__",
|
|
112
|
+
".mypy_cache",
|
|
113
|
+
".pytest_cache",
|
|
114
|
+
// Java/Kotlin (Maven + Gradle build output + generated sources)
|
|
115
|
+
"target",
|
|
116
|
+
".gradle",
|
|
117
|
+
"generated",
|
|
118
|
+
"generated-sources",
|
|
119
|
+
// Go
|
|
120
|
+
"vendor",
|
|
121
|
+
// .NET
|
|
122
|
+
"obj",
|
|
123
|
+
"bin",
|
|
124
|
+
// Generic
|
|
125
|
+
"build",
|
|
126
|
+
".specs",
|
|
127
|
+
".cache",
|
|
128
|
+
]);
|
|
129
|
+
async function listSourceFiles(dir, config, results = []) {
|
|
130
|
+
const ignoreDirs = new Set([
|
|
131
|
+
...DEFAULT_IGNORE_DIRS,
|
|
132
|
+
...(config.ignore?.directories ?? []),
|
|
133
|
+
]);
|
|
134
|
+
const ignorePaths = config.ignore?.paths ?? [];
|
|
135
|
+
let entries;
|
|
136
|
+
try {
|
|
137
|
+
// encoding: "utf8" ensures entry.name is always string, not Buffer
|
|
138
|
+
entries = await fs.readdir(dir, { withFileTypes: true, encoding: "utf8" });
|
|
139
|
+
}
|
|
140
|
+
catch {
|
|
141
|
+
return results;
|
|
142
|
+
}
|
|
143
|
+
for (const entry of entries) {
|
|
144
|
+
const name = entry.name;
|
|
145
|
+
const fullPath = path.join(dir, name);
|
|
146
|
+
if (entry.isDirectory()) {
|
|
147
|
+
if (!ignoreDirs.has(name) && !ignorePaths.some((p) => fullPath.includes(p))) {
|
|
148
|
+
await listSourceFiles(fullPath, config, results);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
else if (entry.isFile()) {
|
|
152
|
+
if (getAdapterForFile(name)) {
|
|
153
|
+
results.push(fullPath);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
return results;
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Scan one or more project roots, run adapters on every source file, and
|
|
161
|
+
* return the aggregated FunctionIntelligence index.
|
|
162
|
+
*/
|
|
163
|
+
export async function buildFunctionIntelligenceFromRoots(roots, config) {
|
|
164
|
+
const allFunctions = [];
|
|
165
|
+
for (const root of roots) {
|
|
166
|
+
const files = await listSourceFiles(root, config);
|
|
167
|
+
await Promise.all(files.map(async (filePath) => {
|
|
168
|
+
const adapter = getAdapterForFile(path.basename(filePath));
|
|
169
|
+
if (!adapter)
|
|
170
|
+
return;
|
|
171
|
+
let source;
|
|
172
|
+
try {
|
|
173
|
+
source = await fs.readFile(filePath, "utf8");
|
|
174
|
+
}
|
|
175
|
+
catch {
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
try {
|
|
179
|
+
const result = runAdapter(adapter, filePath, source);
|
|
180
|
+
allFunctions.push(...result.functions);
|
|
181
|
+
}
|
|
182
|
+
catch {
|
|
183
|
+
// Skip files that fail to parse (malformed source, encoding issues)
|
|
184
|
+
}
|
|
185
|
+
}));
|
|
186
|
+
}
|
|
187
|
+
return buildFunctionIntelligence(allFunctions);
|
|
188
|
+
}
|
|
189
|
+
// ── Write ─────────────────────────────────────────────────────────────────
|
|
190
|
+
/** Persist function-intelligence.json to the guardian machine output dir. */
|
|
191
|
+
export async function writeFunctionIntelligence(outputDir, intel) {
|
|
192
|
+
const filePath = path.join(outputDir, "function-intelligence.json");
|
|
193
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
194
|
+
await fs.writeFile(filePath, JSON.stringify(intel, null, 2), "utf8");
|
|
195
|
+
console.log(`Wrote ${filePath}`);
|
|
196
|
+
return filePath;
|
|
197
|
+
}
|
|
198
|
+
// ── Load ──────────────────────────────────────────────────────────────────
|
|
199
|
+
/** Load function-intelligence.json if it exists; returns null if absent. */
|
|
200
|
+
export async function loadFunctionIntelligence(machineDir) {
|
|
201
|
+
const filePath = path.join(machineDir, "function-intelligence.json");
|
|
202
|
+
try {
|
|
203
|
+
const raw = await fs.readFile(filePath, "utf8");
|
|
204
|
+
return JSON.parse(raw);
|
|
205
|
+
}
|
|
206
|
+
catch {
|
|
207
|
+
return null;
|
|
208
|
+
}
|
|
209
|
+
}
|
package/dist/extract/index.js
CHANGED
|
@@ -13,6 +13,7 @@ import { validateArchitectureSnapshot, validateUxSnapshot } from "../schema/inde
|
|
|
13
13
|
import { getOutputLayout } from "../output-layout.js";
|
|
14
14
|
import { logResolvedProjectPaths, resolveProjectPaths } from "../project-discovery.js";
|
|
15
15
|
import { analyzeDepth } from "./analyzers/depth.js";
|
|
16
|
+
import { buildFunctionIntelligenceFromRoots, writeFunctionIntelligence, } from "./function-intel.js";
|
|
16
17
|
export async function buildSnapshots(options) {
|
|
17
18
|
const startedAt = Date.now();
|
|
18
19
|
const includeFileGraph = options.includeFileGraph ?? false;
|
|
@@ -187,6 +188,17 @@ export async function extractProject(options) {
|
|
|
187
188
|
await fs.writeFile(siPath, JSON.stringify(siReports, null, 2), "utf8");
|
|
188
189
|
console.log(`Wrote ${siPath}`);
|
|
189
190
|
}
|
|
191
|
+
// Generate Function Intelligence — call graph, literal index across all languages.
|
|
192
|
+
// Runs as an additive second pass; never modifies the architecture snapshot.
|
|
193
|
+
try {
|
|
194
|
+
const allRoots = (architecture.project.roots ?? [projectRoot]).map((r) => path.isAbsolute(r) ? r : path.join(projectRoot, r));
|
|
195
|
+
const funcIntel = await buildFunctionIntelligenceFromRoots(allRoots, config);
|
|
196
|
+
await writeFunctionIntelligence(layout.machineDir, funcIntel);
|
|
197
|
+
}
|
|
198
|
+
catch (err) {
|
|
199
|
+
// Non-fatal — function intel is additive; don't block the main extract
|
|
200
|
+
console.warn(`Function intelligence skipped: ${err.message}`);
|
|
201
|
+
}
|
|
190
202
|
return result;
|
|
191
203
|
}
|
|
192
204
|
async function generateStructuralIntelligenceReports(architecture) {
|
package/package.json
CHANGED