@toolbaux/guardian 0.1.16 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -6
- package/dist/adapters/csharp-adapter.js +76 -1
- package/dist/adapters/go-adapter.js +69 -1
- package/dist/adapters/index.js +3 -2
- package/dist/adapters/java-adapter.js +73 -1
- package/dist/adapters/lean4-adapter.js +358 -0
- package/dist/adapters/python-adapter.js +91 -1
- package/dist/adapters/runner.js +29 -2
- package/dist/adapters/typescript-adapter.js +111 -1
- package/dist/cli.js +16 -2
- package/dist/commands/init.js +32 -30
- package/dist/commands/search.js +67 -4
- package/dist/extract/function-intel.js +209 -0
- package/dist/extract/index.js +12 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
[](https://www.npmjs.com/package/@toolbaux/guardian)
|
|
4
4
|
[](./LICENSE)
|
|
5
5
|
|
|
6
|
+
> **Beta Release** — Guardian is under active development. Core features (extract, context, drift, MCP server) are stable and used daily across multiple projects, but you may encounter minor issues with edge cases in framework detection or config handling. Bug reports and feedback welcome via [GitHub Issues](https://github.com/idocoding/guardian/issues).
|
|
7
|
+
|
|
6
8
|
Architectural intelligence for codebases. One command turns your repo into compact, machine-readable context that AI coding tools can reason about without hallucinating.
|
|
7
9
|
|
|
8
10
|
```bash
|
|
@@ -53,7 +55,7 @@ After `guardian init`, your project gets:
|
|
|
53
55
|
- `.specs/` directory with architecture snapshots
|
|
54
56
|
- `CLAUDE.md` with auto-injected context (refreshed on every save and commit)
|
|
55
57
|
- Pre-commit hook that keeps context fresh automatically
|
|
56
|
-
- `guardian.config.json`
|
|
58
|
+
- `guardian.config.json` for project settings (roots auto-detected at runtime)
|
|
57
59
|
|
|
58
60
|
## Claude Code / Cursor Integration
|
|
59
61
|
|
|
@@ -96,9 +98,8 @@ Guardian includes an MCP server that Claude Code and Cursor connect to automatic
|
|
|
96
98
|
|
|
97
99
|
All responses are compact JSON — no pretty-printing, no verbose keys. Repeated calls are cached (30s TTL). Usage metrics tracked per session.
|
|
98
100
|
|
|
99
|
-
**
|
|
101
|
+
**Setup:** `guardian init` and the VSCode extension auto-create `.mcp.json` at your project root. If you need to create it manually:
|
|
100
102
|
|
|
101
|
-
Create `.mcp.json` at your project root:
|
|
102
103
|
```json
|
|
103
104
|
{
|
|
104
105
|
"mcpServers": {
|
|
@@ -110,6 +111,8 @@ Create `.mcp.json` at your project root:
|
|
|
110
111
|
}
|
|
111
112
|
```
|
|
112
113
|
|
|
114
|
+
> **Note:** After `.mcp.json` is created or modified, you must **restart your Claude Code / Cursor session** (or reload the VSCode window) for the MCP server to connect. MCP config is only read at session start.
|
|
115
|
+
|
|
113
116
|
## VSCode Extension
|
|
114
117
|
|
|
115
118
|
Install from [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=toolbaux.toolbaux-guardian):
|
|
@@ -276,14 +279,14 @@ guardian feature-context --spec feature-specs/billing.yaml
|
|
|
276
279
|
<details>
|
|
277
280
|
<summary><strong>Configuration</strong></summary>
|
|
278
281
|
|
|
279
|
-
`guardian.config.json` at project root (auto-created by `guardian init`):
|
|
282
|
+
`guardian.config.json` at project root (auto-created by `guardian init`). Backend and frontend roots are auto-detected at runtime — only set them if auto-detection picks the wrong directory:
|
|
280
283
|
|
|
281
284
|
```json
|
|
282
285
|
{
|
|
283
286
|
"project": {
|
|
287
|
+
"description": "Short product description for generated docs",
|
|
284
288
|
"backendRoot": "./backend",
|
|
285
|
-
"frontendRoot": "./frontend"
|
|
286
|
-
"description": "Short product description for generated docs"
|
|
289
|
+
"frontendRoot": "./frontend"
|
|
287
290
|
},
|
|
288
291
|
"frontend": {
|
|
289
292
|
"routeDirs": ["app"],
|
|
@@ -3,6 +3,80 @@ import Parser from "tree-sitter";
|
|
|
3
3
|
function text(node) {
|
|
4
4
|
return node ? node.text : "";
|
|
5
5
|
}
|
|
6
|
+
// ── Function-level intelligence ───────────────────────────────────────────
|
|
7
|
+
const CS_FUNC_QUERY = `
|
|
8
|
+
(method_declaration name: (identifier) @name) @fn
|
|
9
|
+
(constructor_declaration name: (identifier) @name) @fn
|
|
10
|
+
`;
|
|
11
|
+
function walkBody(body, visitor) {
|
|
12
|
+
const stack = [body];
|
|
13
|
+
while (stack.length > 0) {
|
|
14
|
+
const n = stack.pop();
|
|
15
|
+
visitor(n);
|
|
16
|
+
for (let i = n.namedChildCount - 1; i >= 0; i--) {
|
|
17
|
+
const c = n.namedChild(i);
|
|
18
|
+
if (c)
|
|
19
|
+
stack.push(c);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
function collectCSharpBodyIntel(body) {
|
|
24
|
+
const strings = new Set();
|
|
25
|
+
const calls = new Set();
|
|
26
|
+
walkBody(body, (n) => {
|
|
27
|
+
if (n.type === "string_literal" || n.type === "verbatim_string_literal") {
|
|
28
|
+
const raw = n.text.replace(/^@?"/, "").replace(/"$/, "");
|
|
29
|
+
if (raw.length > 0 && raw.length < 300)
|
|
30
|
+
strings.add(raw);
|
|
31
|
+
}
|
|
32
|
+
else if (n.type === "interpolated_string_expression") {
|
|
33
|
+
const raw = n.text.replace(/^\$"/, "").replace(/"$/, "");
|
|
34
|
+
if (raw.length > 0 && raw.length < 300)
|
|
35
|
+
strings.add(raw);
|
|
36
|
+
}
|
|
37
|
+
else if (n.type === "invocation_expression") {
|
|
38
|
+
const fn = n.childForFieldName("function");
|
|
39
|
+
if (fn)
|
|
40
|
+
calls.add(fn.text.split("\n")[0].trim());
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
return { stringLiterals: [...strings], regexPatterns: [], calls: [...calls] };
|
|
44
|
+
}
|
|
45
|
+
function extractCSharpFunctions(language, file, root) {
|
|
46
|
+
const records = [];
|
|
47
|
+
const query = new Parser.Query(language, CS_FUNC_QUERY);
|
|
48
|
+
for (const match of query.matches(root)) {
|
|
49
|
+
const fnNode = match.captures.find((c) => c.name === "fn")?.node;
|
|
50
|
+
const nameNode = match.captures.find((c) => c.name === "name")?.node;
|
|
51
|
+
if (!fnNode || !nameNode)
|
|
52
|
+
continue;
|
|
53
|
+
const funcName = text(nameNode);
|
|
54
|
+
// async: scan direct children for modifier — no full-tree walk needed
|
|
55
|
+
let isAsync = false;
|
|
56
|
+
for (const child of fnNode.children) {
|
|
57
|
+
if (child.type === "modifier" && child.text === "async") {
|
|
58
|
+
isAsync = true;
|
|
59
|
+
break;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
const bodyNode = fnNode.childForFieldName("body");
|
|
63
|
+
const intel = bodyNode
|
|
64
|
+
? collectCSharpBodyIntel(bodyNode)
|
|
65
|
+
: { stringLiterals: [], regexPatterns: [], calls: [] };
|
|
66
|
+
records.push({
|
|
67
|
+
id: `${file}#${funcName}:${fnNode.startPosition.row + 1}`,
|
|
68
|
+
name: funcName,
|
|
69
|
+
file,
|
|
70
|
+
lines: [fnNode.startPosition.row + 1, fnNode.endPosition.row + 1],
|
|
71
|
+
calls: intel.calls,
|
|
72
|
+
stringLiterals: intel.stringLiterals,
|
|
73
|
+
regexPatterns: intel.regexPatterns,
|
|
74
|
+
isAsync,
|
|
75
|
+
language: "csharp",
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
return records;
|
|
79
|
+
}
|
|
6
80
|
export const CSharpAdapter = {
|
|
7
81
|
name: "C# ASP.NET Core Adapter",
|
|
8
82
|
language: CSharp,
|
|
@@ -144,6 +218,7 @@ export const CSharpAdapter = {
|
|
|
144
218
|
}
|
|
145
219
|
}
|
|
146
220
|
}
|
|
147
|
-
|
|
221
|
+
const functions = extractCSharpFunctions(this.language, file, root);
|
|
222
|
+
return { endpoints, models, components, tests, functions };
|
|
148
223
|
}
|
|
149
224
|
};
|
|
@@ -5,6 +5,73 @@ const Go = require("tree-sitter-go");
|
|
|
5
5
|
function text(node) {
|
|
6
6
|
return node ? node.text : "";
|
|
7
7
|
}
|
|
8
|
+
// ── Function-level intelligence ───────────────────────────────────────────
|
|
9
|
+
// Tree-sitter query — runs in C, fast regardless of file size.
|
|
10
|
+
const GO_FUNC_QUERY = `
|
|
11
|
+
(function_declaration name: (identifier) @name) @fn
|
|
12
|
+
(method_declaration name: (field_identifier) @name) @fn
|
|
13
|
+
`;
|
|
14
|
+
/** Walk a single node's subtree iteratively (stack-based, no recursion). */
|
|
15
|
+
function walkBody(body, visitor) {
|
|
16
|
+
const stack = [body];
|
|
17
|
+
while (stack.length > 0) {
|
|
18
|
+
const n = stack.pop();
|
|
19
|
+
visitor(n);
|
|
20
|
+
for (let i = n.namedChildCount - 1; i >= 0; i--) {
|
|
21
|
+
const c = n.namedChild(i);
|
|
22
|
+
if (c)
|
|
23
|
+
stack.push(c);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
function collectGoBodyIntel(body) {
|
|
28
|
+
const strings = new Set();
|
|
29
|
+
const calls = new Set();
|
|
30
|
+
let isAsync = false;
|
|
31
|
+
walkBody(body, (n) => {
|
|
32
|
+
if (n.type === "interpreted_string_literal" || n.type === "raw_string_literal") {
|
|
33
|
+
const val = n.text.slice(1, -1);
|
|
34
|
+
if (val.length > 0 && val.length < 300)
|
|
35
|
+
strings.add(val);
|
|
36
|
+
}
|
|
37
|
+
else if (n.type === "call_expression") {
|
|
38
|
+
const fn = n.childForFieldName("function");
|
|
39
|
+
if (fn)
|
|
40
|
+
calls.add(fn.text.split("\n")[0].trim());
|
|
41
|
+
}
|
|
42
|
+
else if (n.type === "go_statement") {
|
|
43
|
+
isAsync = true;
|
|
44
|
+
}
|
|
45
|
+
});
|
|
46
|
+
return { stringLiterals: [...strings], regexPatterns: [], calls: [...calls], isAsync };
|
|
47
|
+
}
|
|
48
|
+
function extractGoFunctions(language, file, root) {
|
|
49
|
+
const records = [];
|
|
50
|
+
const query = new Parser.Query(language, GO_FUNC_QUERY);
|
|
51
|
+
for (const match of query.matches(root)) {
|
|
52
|
+
const fnNode = match.captures.find((c) => c.name === "fn")?.node;
|
|
53
|
+
const nameNode = match.captures.find((c) => c.name === "name")?.node;
|
|
54
|
+
if (!fnNode || !nameNode)
|
|
55
|
+
continue;
|
|
56
|
+
const funcName = nameNode.text;
|
|
57
|
+
const bodyNode = fnNode.childForFieldName("body");
|
|
58
|
+
const intel = bodyNode
|
|
59
|
+
? collectGoBodyIntel(bodyNode)
|
|
60
|
+
: { stringLiterals: [], regexPatterns: [], calls: [], isAsync: false };
|
|
61
|
+
records.push({
|
|
62
|
+
id: `${file}#${funcName}:${fnNode.startPosition.row + 1}`,
|
|
63
|
+
name: funcName,
|
|
64
|
+
file,
|
|
65
|
+
lines: [fnNode.startPosition.row + 1, fnNode.endPosition.row + 1],
|
|
66
|
+
calls: intel.calls,
|
|
67
|
+
stringLiterals: intel.stringLiterals,
|
|
68
|
+
regexPatterns: intel.regexPatterns,
|
|
69
|
+
isAsync: intel.isAsync,
|
|
70
|
+
language: "go",
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
return records;
|
|
74
|
+
}
|
|
8
75
|
export const GoAdapter = {
|
|
9
76
|
name: "Go Gin Adapter",
|
|
10
77
|
language: Go,
|
|
@@ -91,6 +158,7 @@ export const GoAdapter = {
|
|
|
91
158
|
});
|
|
92
159
|
}
|
|
93
160
|
}
|
|
94
|
-
|
|
161
|
+
const functions = extractGoFunctions(this.language, file, root);
|
|
162
|
+
return { endpoints, models, components, tests, functions };
|
|
95
163
|
}
|
|
96
164
|
};
|
package/dist/adapters/index.js
CHANGED
|
@@ -3,9 +3,10 @@ import { TypeScriptAdapter } from "./typescript-adapter.js";
|
|
|
3
3
|
import { JavaAdapter } from "./java-adapter.js";
|
|
4
4
|
import { GoAdapter } from "./go-adapter.js";
|
|
5
5
|
import { CSharpAdapter } from "./csharp-adapter.js";
|
|
6
|
+
import { Lean4Adapter } from "./lean4-adapter.js";
|
|
6
7
|
import { runAdapter } from "./runner.js";
|
|
7
|
-
export { PythonAdapter, TypeScriptAdapter, JavaAdapter, GoAdapter, CSharpAdapter, runAdapter };
|
|
8
|
-
export const ADAPTERS = [PythonAdapter, TypeScriptAdapter, JavaAdapter, GoAdapter, CSharpAdapter];
|
|
8
|
+
export { PythonAdapter, TypeScriptAdapter, JavaAdapter, GoAdapter, CSharpAdapter, Lean4Adapter, runAdapter };
|
|
9
|
+
export const ADAPTERS = [PythonAdapter, TypeScriptAdapter, JavaAdapter, GoAdapter, CSharpAdapter, Lean4Adapter];
|
|
9
10
|
export function getAdapterForFile(file) {
|
|
10
11
|
for (const adapter of ADAPTERS) {
|
|
11
12
|
if (adapter.fileExtensions.some(ext => file.endsWith(ext))) {
|
|
@@ -3,6 +3,77 @@ import Parser from "tree-sitter";
|
|
|
3
3
|
function text(node) {
|
|
4
4
|
return node ? node.text : "";
|
|
5
5
|
}
|
|
6
|
+
// ── Function-level intelligence ───────────────────────────────────────────
|
|
7
|
+
const JAVA_FUNC_QUERY = `
|
|
8
|
+
(method_declaration name: (identifier) @name) @fn
|
|
9
|
+
(constructor_declaration name: (identifier) @name) @fn
|
|
10
|
+
`;
|
|
11
|
+
function walkBody(body, visitor) {
|
|
12
|
+
const stack = [body];
|
|
13
|
+
while (stack.length > 0) {
|
|
14
|
+
const n = stack.pop();
|
|
15
|
+
visitor(n);
|
|
16
|
+
for (let i = n.namedChildCount - 1; i >= 0; i--) {
|
|
17
|
+
const c = n.namedChild(i);
|
|
18
|
+
if (c)
|
|
19
|
+
stack.push(c);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
function collectJavaBodyIntel(body) {
|
|
24
|
+
const strings = new Set();
|
|
25
|
+
const calls = new Set();
|
|
26
|
+
walkBody(body, (n) => {
|
|
27
|
+
if (n.type === "string_literal") {
|
|
28
|
+
const raw = n.text.replace(/^"/, "").replace(/"$/, "");
|
|
29
|
+
if (raw.length > 0 && raw.length < 300)
|
|
30
|
+
strings.add(raw);
|
|
31
|
+
}
|
|
32
|
+
else if (n.type === "text_block") {
|
|
33
|
+
const raw = n.text.replace(/^"""/, "").replace(/"""$/, "").trim();
|
|
34
|
+
if (raw.length > 0 && raw.length < 300)
|
|
35
|
+
strings.add(raw);
|
|
36
|
+
}
|
|
37
|
+
else if (n.type === "method_invocation") {
|
|
38
|
+
const nameNode = n.childForFieldName("name");
|
|
39
|
+
const objNode = n.childForFieldName("object");
|
|
40
|
+
if (nameNode) {
|
|
41
|
+
const call = objNode ? `${text(objNode)}.${text(nameNode)}` : text(nameNode);
|
|
42
|
+
calls.add(call.split("\n")[0].trim());
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
});
|
|
46
|
+
return { stringLiterals: [...strings], regexPatterns: [], calls: [...calls] };
|
|
47
|
+
}
|
|
48
|
+
function extractJavaFunctions(language, file, root) {
|
|
49
|
+
const records = [];
|
|
50
|
+
const query = new Parser.Query(language, JAVA_FUNC_QUERY);
|
|
51
|
+
for (const match of query.matches(root)) {
|
|
52
|
+
const fnNode = match.captures.find((c) => c.name === "fn")?.node;
|
|
53
|
+
const nameNode = match.captures.find((c) => c.name === "name")?.node;
|
|
54
|
+
if (!fnNode || !nameNode)
|
|
55
|
+
continue;
|
|
56
|
+
const funcName = text(nameNode);
|
|
57
|
+
const bodyNode = fnNode.childForFieldName("body");
|
|
58
|
+
const intel = bodyNode
|
|
59
|
+
? collectJavaBodyIntel(bodyNode)
|
|
60
|
+
: { stringLiterals: [], regexPatterns: [], calls: [] };
|
|
61
|
+
const typeNode = fnNode.childForFieldName("type");
|
|
62
|
+
const isAsync = /CompletableFuture|Mono|Flux|Future/.test(text(typeNode));
|
|
63
|
+
records.push({
|
|
64
|
+
id: `${file}#${funcName}:${fnNode.startPosition.row + 1}`,
|
|
65
|
+
name: funcName,
|
|
66
|
+
file,
|
|
67
|
+
lines: [fnNode.startPosition.row + 1, fnNode.endPosition.row + 1],
|
|
68
|
+
calls: intel.calls,
|
|
69
|
+
stringLiterals: intel.stringLiterals,
|
|
70
|
+
regexPatterns: intel.regexPatterns,
|
|
71
|
+
isAsync,
|
|
72
|
+
language: "java",
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
return records;
|
|
76
|
+
}
|
|
6
77
|
export const JavaAdapter = {
|
|
7
78
|
name: "Java Spring Boot Adapter",
|
|
8
79
|
language: Java,
|
|
@@ -117,6 +188,7 @@ export const JavaAdapter = {
|
|
|
117
188
|
});
|
|
118
189
|
}
|
|
119
190
|
}
|
|
120
|
-
|
|
191
|
+
const functions = extractJavaFunctions(this.language, file, root);
|
|
192
|
+
return { endpoints, models, components, tests, functions };
|
|
121
193
|
}
|
|
122
194
|
};
|
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lean4 / Mathlib adapter for guardian.
|
|
3
|
+
*
|
|
4
|
+
* Uses deterministic regex-based extraction (no tree-sitter-lean4 dependency).
|
|
5
|
+
* Captures: theorems, lemmas, defs, structures, classes, instances, `sorry`
|
|
6
|
+
* locations, tactic usage, and Mathlib import dependencies.
|
|
7
|
+
*
|
|
8
|
+
* Implements SpecGuardAdapter with `language: null` — runner.ts calls
|
|
9
|
+
* extract() directly without a tree-sitter parse step.
|
|
10
|
+
*/
|
|
11
|
+
// ── Constants ─────────────────────────────────────────────────────────────
|
|
12
|
+
/**
|
|
13
|
+
* Well-known Lean4 tactic names. Checked as whole words in the proof body.
|
|
14
|
+
* Kept in alphabetical order for maintainability.
|
|
15
|
+
*/
|
|
16
|
+
const KNOWN_TACTICS = [
|
|
17
|
+
"Abel",
|
|
18
|
+
"aesop",
|
|
19
|
+
"all_goals",
|
|
20
|
+
"any_goals",
|
|
21
|
+
"apply",
|
|
22
|
+
"apply?",
|
|
23
|
+
"assumption",
|
|
24
|
+
"by_cases",
|
|
25
|
+
"by_contra",
|
|
26
|
+
"calc",
|
|
27
|
+
"cases",
|
|
28
|
+
"change",
|
|
29
|
+
"clear",
|
|
30
|
+
"congr",
|
|
31
|
+
"constructor",
|
|
32
|
+
"contrapose",
|
|
33
|
+
"conv",
|
|
34
|
+
"decide",
|
|
35
|
+
"dsimp",
|
|
36
|
+
"exact",
|
|
37
|
+
"exact?",
|
|
38
|
+
"ext",
|
|
39
|
+
"field_simp",
|
|
40
|
+
"fin_cases",
|
|
41
|
+
"first",
|
|
42
|
+
"funext",
|
|
43
|
+
"gcongr",
|
|
44
|
+
"group",
|
|
45
|
+
"have",
|
|
46
|
+
"induction",
|
|
47
|
+
"interval_cases",
|
|
48
|
+
"intro",
|
|
49
|
+
"intros",
|
|
50
|
+
"linarith",
|
|
51
|
+
"linear_combination",
|
|
52
|
+
"module_cast",
|
|
53
|
+
"native_decide",
|
|
54
|
+
"nlinarith",
|
|
55
|
+
"norm_cast",
|
|
56
|
+
"norm_num",
|
|
57
|
+
"norm_num?",
|
|
58
|
+
"nth_rw",
|
|
59
|
+
"obtain",
|
|
60
|
+
"omega",
|
|
61
|
+
"polyrith",
|
|
62
|
+
"positivity",
|
|
63
|
+
"push_cast",
|
|
64
|
+
"push_neg",
|
|
65
|
+
"rcases",
|
|
66
|
+
"refine",
|
|
67
|
+
"rename",
|
|
68
|
+
"repeat",
|
|
69
|
+
"revert",
|
|
70
|
+
"rfl",
|
|
71
|
+
"ring",
|
|
72
|
+
"rw",
|
|
73
|
+
"rw?",
|
|
74
|
+
"set",
|
|
75
|
+
"show",
|
|
76
|
+
"simp",
|
|
77
|
+
"simp?",
|
|
78
|
+
"skip",
|
|
79
|
+
"split",
|
|
80
|
+
"suffices",
|
|
81
|
+
"swap",
|
|
82
|
+
"symm",
|
|
83
|
+
"tauto",
|
|
84
|
+
"trans",
|
|
85
|
+
"trivial",
|
|
86
|
+
"try",
|
|
87
|
+
"unfold",
|
|
88
|
+
"use",
|
|
89
|
+
];
|
|
90
|
+
// ── Regex patterns ────────────────────────────────────────────────────────
|
|
91
|
+
/**
|
|
92
|
+
* Matches theorem/lemma/def/abbrev declarations (including noncomputable variants).
|
|
93
|
+
* Group 1: keyword (e.g. "theorem", "noncomputable def")
|
|
94
|
+
* Group 2: declaration name
|
|
95
|
+
*/
|
|
96
|
+
const THEOREM_RE = /^(?:[ \t]*(?:@\[[^\]]*\][ \t]*\n?[ \t]*)*)(?:private[ \t]+|protected[ \t]+)?(?:(noncomputable[ \t]+def|noncomputable[ \t]+abbrev|theorem|lemma|def|abbrev|example))(?:[ \t]+([^\s(:{\[]+))?/gm;
|
|
97
|
+
/**
|
|
98
|
+
* Matches structure/class/inductive/instance declarations.
|
|
99
|
+
* Group 1: keyword, Group 2: name (optional for anonymous instances)
|
|
100
|
+
*/
|
|
101
|
+
const STRUCT_RE = /^(?:[ \t]*(?:@\[[^\]]*\][ \t]*\n?[ \t]*)*)(?:private[ \t]+|protected[ \t]+)?(structure|class|inductive|instance|mutual)(?:[ \t]+([^\s(:{\[]+))?/gm;
|
|
102
|
+
/** Matches import statements. Group 1: module path */
|
|
103
|
+
const IMPORT_RE = /^import[ \t]+([\w.]+)/gm;
|
|
104
|
+
/** Matches namespace declarations. Group 1: namespace name */
|
|
105
|
+
const NS_OPEN_RE = /^namespace[ \t]+([\w.]+)/gm;
|
|
106
|
+
/** Matches end-of-namespace. Group 1: namespace name */
|
|
107
|
+
const NS_END_RE = /^end[ \t]+([\w.]+)/gm;
|
|
108
|
+
/** `sorry` as a standalone term or tactic */
|
|
109
|
+
const SORRY_RE = /\bsorry\b/g;
|
|
110
|
+
/** `:=` with optional trailing whitespace — used in proof body and statement extraction */
|
|
111
|
+
const ASSIGN_RE = /:=\s*/g;
|
|
112
|
+
// ── Helpers ───────────────────────────────────────────────────────────────
|
|
113
|
+
/**
|
|
114
|
+
* Build a sorted array of newline offsets for O(log n) line lookups.
|
|
115
|
+
* Index i holds the character offset of the start of line i+1 (0-based array, 1-based lines).
|
|
116
|
+
*/
|
|
117
|
+
function buildLineIndex(source) {
|
|
118
|
+
const starts = [0]; // line 1 starts at offset 0
|
|
119
|
+
for (let i = 0; i < source.length; i++) {
|
|
120
|
+
if (source[i] === "\n")
|
|
121
|
+
starts.push(i + 1);
|
|
122
|
+
}
|
|
123
|
+
return starts;
|
|
124
|
+
}
|
|
125
|
+
/** 1-based line number for a character offset, using precomputed line index. */
|
|
126
|
+
function lineOfFast(lineIndex, offset) {
|
|
127
|
+
let lo = 0, hi = lineIndex.length - 1;
|
|
128
|
+
while (lo < hi) {
|
|
129
|
+
const mid = (lo + hi + 1) >> 1;
|
|
130
|
+
if (lineIndex[mid] <= offset)
|
|
131
|
+
lo = mid;
|
|
132
|
+
else
|
|
133
|
+
hi = mid - 1;
|
|
134
|
+
}
|
|
135
|
+
return lo + 1; // 1-based
|
|
136
|
+
}
|
|
137
|
+
/** Extract all Lean4 import paths from source. */
|
|
138
|
+
function extractImports(source) {
|
|
139
|
+
const imports = [];
|
|
140
|
+
IMPORT_RE.lastIndex = 0;
|
|
141
|
+
let m;
|
|
142
|
+
while ((m = IMPORT_RE.exec(source)) !== null) {
|
|
143
|
+
imports.push(m[1]);
|
|
144
|
+
}
|
|
145
|
+
return imports;
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Scan the entire source once and return a sorted list of namespace open/end
|
|
149
|
+
* events. Pass this to activeNamespaceAtFast() — O(1) amortised per declaration
|
|
150
|
+
* when declarations are processed left-to-right (which THEOREM_RE guarantees).
|
|
151
|
+
*/
|
|
152
|
+
function buildNsEvents(source) {
|
|
153
|
+
const events = [];
|
|
154
|
+
let m;
|
|
155
|
+
NS_OPEN_RE.lastIndex = 0;
|
|
156
|
+
while ((m = NS_OPEN_RE.exec(source)) !== null) {
|
|
157
|
+
events.push({ idx: m.index, name: m[1], kind: "open" });
|
|
158
|
+
}
|
|
159
|
+
NS_END_RE.lastIndex = 0;
|
|
160
|
+
while ((m = NS_END_RE.exec(source)) !== null) {
|
|
161
|
+
events.push({ idx: m.index, name: m[1], kind: "end" });
|
|
162
|
+
}
|
|
163
|
+
return events.sort((a, b) => a.idx - b.idx);
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Return the active namespace at `offset` using precomputed events.
|
|
167
|
+
* Call this in declaration order (ascending offset) and pass the same
|
|
168
|
+
* `eventIdx` cursor — the cursor advances monotonically, making this O(n)
|
|
169
|
+
* total across all declarations rather than O(n²).
|
|
170
|
+
*/
|
|
171
|
+
function activeNamespaceAtFast(events, offset, cursor, stack) {
|
|
172
|
+
// Advance cursor through all events that precede `offset`
|
|
173
|
+
while (cursor.i < events.length && events[cursor.i].idx < offset) {
|
|
174
|
+
const ev = events[cursor.i++];
|
|
175
|
+
if (ev.kind === "open") {
|
|
176
|
+
stack.push(ev.name);
|
|
177
|
+
}
|
|
178
|
+
else {
|
|
179
|
+
const idx = stack.lastIndexOf(ev.name);
|
|
180
|
+
if (idx >= 0)
|
|
181
|
+
stack.splice(idx, 1);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
return stack.join(".");
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Extract the proof/definition body that follows a declaration's `:=` (or `by`).
|
|
188
|
+
* Works directly on `source` from `startOffset` to avoid repeated string slicing.
|
|
189
|
+
* Returns the raw text of the body, capped at 4000 chars to limit memory use.
|
|
190
|
+
*/
|
|
191
|
+
function extractProofBody(source, startOffset) {
|
|
192
|
+
// Search for := starting at startOffset without slicing the full source
|
|
193
|
+
ASSIGN_RE.lastIndex = startOffset;
|
|
194
|
+
const assignMatch = ASSIGN_RE.exec(source);
|
|
195
|
+
if (!assignMatch)
|
|
196
|
+
return "";
|
|
197
|
+
const bodyStart = assignMatch.index + assignMatch[0].length;
|
|
198
|
+
const bodyText = source.slice(bodyStart, bodyStart + 4000);
|
|
199
|
+
// Stop at the next top-level declaration (unindented keyword)
|
|
200
|
+
const stopRe = /\n(?=(?:theorem|lemma|def|abbrev|noncomputable|structure|class|inductive|instance|example|namespace|end|#|import)\b)/;
|
|
201
|
+
const stopIdx = bodyText.search(stopRe);
|
|
202
|
+
return stopIdx >= 0 ? bodyText.slice(0, stopIdx) : bodyText;
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* Single combined regex that matches any known tactic in one pass.
|
|
206
|
+
* Tactics with `?` (apply?, exact?, etc.) need the `?` escaped in the regex.
|
|
207
|
+
* Using a non-global RegExp for the initial "does body contain any tactic?" check,
|
|
208
|
+
* then a global one for collecting all matches.
|
|
209
|
+
*/
|
|
210
|
+
const TACTIC_COMBINED_RE = new RegExp(`\\b(${KNOWN_TACTICS.map((t) => t.replace(/[?]/g, "\\?")).join("|")})\\b`, "g");
|
|
211
|
+
/** Extract which known tactics appear in a proof body — single-pass scan. */
|
|
212
|
+
function extractTactics(body) {
|
|
213
|
+
TACTIC_COMBINED_RE.lastIndex = 0;
|
|
214
|
+
const found = new Set();
|
|
215
|
+
let m;
|
|
216
|
+
while ((m = TACTIC_COMBINED_RE.exec(body)) !== null) {
|
|
217
|
+
found.add(m[1]);
|
|
218
|
+
}
|
|
219
|
+
TACTIC_COMBINED_RE.lastIndex = 0;
|
|
220
|
+
return [...found].sort();
|
|
221
|
+
}
|
|
222
|
+
/** Return true if the body text contains `sorry`. Resets lastIndex after test. */
|
|
223
|
+
function containsSorry(body) {
|
|
224
|
+
SORRY_RE.lastIndex = 0;
|
|
225
|
+
const result = SORRY_RE.test(body);
|
|
226
|
+
SORRY_RE.lastIndex = 0;
|
|
227
|
+
return result;
|
|
228
|
+
}
|
|
229
|
+
/** Estimate end line from start line + body newlines. */
|
|
230
|
+
function estimateEndLine(startLine, body) {
|
|
231
|
+
return startLine + (body.split("\n").length - 1);
|
|
232
|
+
}
|
|
233
|
+
// ── Adapter ───────────────────────────────────────────────────────────────
|
|
234
|
+
export const Lean4Adapter = {
|
|
235
|
+
name: "lean4",
|
|
236
|
+
/**
|
|
237
|
+
* No tree-sitter grammar — runner.ts calls extract() directly when
|
|
238
|
+
* `language` is falsy. All extraction is done via regex on the source text.
|
|
239
|
+
*/
|
|
240
|
+
language: null,
|
|
241
|
+
fileExtensions: [".lean"],
|
|
242
|
+
queries: {},
|
|
243
|
+
extract(file, source, _root) {
|
|
244
|
+
const endpoints = [];
|
|
245
|
+
const models = [];
|
|
246
|
+
const components = [];
|
|
247
|
+
const tests = [];
|
|
248
|
+
const functions = [];
|
|
249
|
+
const imports = extractImports(source);
|
|
250
|
+
const mathlibDeps = imports.filter((i) => i.startsWith("Mathlib"));
|
|
251
|
+
// ── Precompute indices — O(n) each, amortises all per-declaration lookups ──
|
|
252
|
+
const lineIndex = buildLineIndex(source);
|
|
253
|
+
const nsEvents = buildNsEvents(source);
|
|
254
|
+
const nsCursor = { i: 0 };
|
|
255
|
+
const nsStack = [];
|
|
256
|
+
// ── Theorems / Lemmas / Defs ──────────────────────────────────────────
|
|
257
|
+
THEOREM_RE.lastIndex = 0;
|
|
258
|
+
let m;
|
|
259
|
+
while ((m = THEOREM_RE.exec(source)) !== null) {
|
|
260
|
+
const rawKind = m[1]?.trim().replace(/\s+/g, "_") ?? "def";
|
|
261
|
+
const name = m[2] ?? "(anonymous)";
|
|
262
|
+
const offset = m.index;
|
|
263
|
+
const startLine = lineOfFast(lineIndex, offset);
|
|
264
|
+
const namespace = activeNamespaceAtFast(nsEvents, offset, nsCursor, nsStack);
|
|
265
|
+
// Extract statement: text between end of match and :=
|
|
266
|
+
const matchEnd = offset + m[0].length;
|
|
267
|
+
ASSIGN_RE.lastIndex = matchEnd;
|
|
268
|
+
const stmtMatch = ASSIGN_RE.exec(source);
|
|
269
|
+
const statement = (stmtMatch
|
|
270
|
+
? source.slice(matchEnd, stmtMatch.index)
|
|
271
|
+
: "").trim().slice(0, 500);
|
|
272
|
+
const body = extractProofBody(source, offset + m[0].length);
|
|
273
|
+
const hasSorry = containsSorry(body);
|
|
274
|
+
const tactics = extractTactics(body);
|
|
275
|
+
const endLine = estimateEndLine(startLine, body);
|
|
276
|
+
const kindMap = {
|
|
277
|
+
theorem: "theorem",
|
|
278
|
+
lemma: "lemma",
|
|
279
|
+
def: "def",
|
|
280
|
+
noncomputable_def: "noncomputable_def",
|
|
281
|
+
abbrev: "abbrev",
|
|
282
|
+
noncomputable_abbrev: "abbrev",
|
|
283
|
+
example: "example",
|
|
284
|
+
inductive: "inductive",
|
|
285
|
+
};
|
|
286
|
+
const kind = kindMap[rawKind] ?? "def";
|
|
287
|
+
const record = {
|
|
288
|
+
id: `${file}#${name}:${startLine}`,
|
|
289
|
+
name,
|
|
290
|
+
file,
|
|
291
|
+
lines: [startLine, endLine],
|
|
292
|
+
calls: [],
|
|
293
|
+
// Push domain concepts into stringLiterals so the generic literal_index
|
|
294
|
+
// can surface them — no language knowledge needed outside the adapter.
|
|
295
|
+
stringLiterals: [
|
|
296
|
+
...(hasSorry ? ["sorry"] : []), // `guardian search --query sorry`
|
|
297
|
+
...tactics.map((t) => `tactic:${t}`), // `guardian search --query simp`
|
|
298
|
+
],
|
|
299
|
+
regexPatterns: [],
|
|
300
|
+
isAsync: false,
|
|
301
|
+
language: "lean4",
|
|
302
|
+
kind,
|
|
303
|
+
namespace,
|
|
304
|
+
statement,
|
|
305
|
+
hasSorry,
|
|
306
|
+
tactics,
|
|
307
|
+
mathlibDeps,
|
|
308
|
+
};
|
|
309
|
+
functions.push(record);
|
|
310
|
+
}
|
|
311
|
+
// ── Structures / Classes / Instances ─────────────────────────────────
|
|
312
|
+
// Fresh cursor for STRUCT_RE pass — offsets may interleave with THEOREM_RE
|
|
313
|
+
const nsCursor2 = { i: 0 };
|
|
314
|
+
const nsStack2 = [];
|
|
315
|
+
STRUCT_RE.lastIndex = 0;
|
|
316
|
+
while ((m = STRUCT_RE.exec(source)) !== null) {
|
|
317
|
+
const structKind = m[1];
|
|
318
|
+
const name = m[2];
|
|
319
|
+
if (!name)
|
|
320
|
+
continue; // anonymous instance — skip for models
|
|
321
|
+
models.push({
|
|
322
|
+
name,
|
|
323
|
+
file,
|
|
324
|
+
framework: structKind,
|
|
325
|
+
fields: [],
|
|
326
|
+
relationships: [],
|
|
327
|
+
});
|
|
328
|
+
// Also emit a FunctionRecord so it appears in function search
|
|
329
|
+
const startLine = lineOfFast(lineIndex, m.index);
|
|
330
|
+
const kindMap = {
|
|
331
|
+
structure: "structure",
|
|
332
|
+
class: "class",
|
|
333
|
+
instance: "instance",
|
|
334
|
+
inductive: "inductive",
|
|
335
|
+
mutual: "def",
|
|
336
|
+
};
|
|
337
|
+
const record = {
|
|
338
|
+
id: `${file}#${name}:${startLine}`,
|
|
339
|
+
name,
|
|
340
|
+
file,
|
|
341
|
+
lines: [startLine, startLine],
|
|
342
|
+
calls: [],
|
|
343
|
+
stringLiterals: [],
|
|
344
|
+
regexPatterns: [],
|
|
345
|
+
isAsync: false,
|
|
346
|
+
language: "lean4",
|
|
347
|
+
kind: kindMap[structKind] ?? "structure",
|
|
348
|
+
namespace: activeNamespaceAtFast(nsEvents, m.index, nsCursor2, nsStack2),
|
|
349
|
+
statement: "",
|
|
350
|
+
hasSorry: false,
|
|
351
|
+
tactics: [],
|
|
352
|
+
mathlibDeps,
|
|
353
|
+
};
|
|
354
|
+
functions.push(record);
|
|
355
|
+
}
|
|
356
|
+
return { endpoints, models, components, tests, functions };
|
|
357
|
+
},
|
|
358
|
+
};
|