@toolbaux/guardian 0.1.16 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -3,6 +3,8 @@
3
3
  [![npm version](https://img.shields.io/npm/v/@toolbaux/guardian.svg)](https://www.npmjs.com/package/@toolbaux/guardian)
4
4
  [![license](https://img.shields.io/npm/l/@toolbaux/guardian.svg)](./LICENSE)
5
5
 
6
+ > **Beta Release** — Guardian is under active development. Core features (extract, context, drift, MCP server) are stable and used daily across multiple projects, but you may encounter minor issues with edge cases in framework detection or config handling. Bug reports and feedback welcome via [GitHub Issues](https://github.com/idocoding/guardian/issues).
7
+
6
8
  Architectural intelligence for codebases. One command turns your repo into compact, machine-readable context that AI coding tools can reason about without hallucinating.
7
9
 
8
10
  ```bash
@@ -53,7 +55,7 @@ After `guardian init`, your project gets:
53
55
  - `.specs/` directory with architecture snapshots
54
56
  - `CLAUDE.md` with auto-injected context (refreshed on every save and commit)
55
57
  - Pre-commit hook that keeps context fresh automatically
56
- - `guardian.config.json` with auto-detected backend/frontend roots
58
+ - `guardian.config.json` for project settings (roots auto-detected at runtime)
57
59
 
58
60
  ## Claude Code / Cursor Integration
59
61
 
@@ -96,9 +98,8 @@ Guardian includes an MCP server that Claude Code and Cursor connect to automatic
96
98
 
97
99
  All responses are compact JSON — no pretty-printing, no verbose keys. Repeated calls are cached (30s TTL). Usage metrics tracked per session.
98
100
 
99
- **Manual setup** (if the extension doesn't auto-configure):
101
+ **Setup:** `guardian init` and the VSCode extension auto-create `.mcp.json` at your project root. If you need to create it manually:
100
102
 
101
- Create `.mcp.json` at your project root:
102
103
  ```json
103
104
  {
104
105
  "mcpServers": {
@@ -110,6 +111,8 @@ Create `.mcp.json` at your project root:
110
111
  }
111
112
  ```
112
113
 
114
+ > **Note:** After `.mcp.json` is created or modified, you must **restart your Claude Code / Cursor session** (or reload the VSCode window) for the MCP server to connect. MCP config is only read at session start.
115
+
113
116
  ## VSCode Extension
114
117
 
115
118
  Install from [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=toolbaux.toolbaux-guardian):
@@ -276,14 +279,14 @@ guardian feature-context --spec feature-specs/billing.yaml
276
279
  <details>
277
280
  <summary><strong>Configuration</strong></summary>
278
281
 
279
- `guardian.config.json` at project root (auto-created by `guardian init`):
282
+ `guardian.config.json` at project root (auto-created by `guardian init`). Backend and frontend roots are auto-detected at runtime — only set them if auto-detection picks the wrong directory:
280
283
 
281
284
  ```json
282
285
  {
283
286
  "project": {
287
+ "description": "Short product description for generated docs",
284
288
  "backendRoot": "./backend",
285
- "frontendRoot": "./frontend",
286
- "description": "Short product description for generated docs"
289
+ "frontendRoot": "./frontend"
287
290
  },
288
291
  "frontend": {
289
292
  "routeDirs": ["app"],
@@ -3,6 +3,80 @@ import Parser from "tree-sitter";
3
3
  function text(node) {
4
4
  return node ? node.text : "";
5
5
  }
6
+ // ── Function-level intelligence ───────────────────────────────────────────
7
+ const CS_FUNC_QUERY = `
8
+ (method_declaration name: (identifier) @name) @fn
9
+ (constructor_declaration name: (identifier) @name) @fn
10
+ `;
11
+ function walkBody(body, visitor) {
12
+ const stack = [body];
13
+ while (stack.length > 0) {
14
+ const n = stack.pop();
15
+ visitor(n);
16
+ for (let i = n.namedChildCount - 1; i >= 0; i--) {
17
+ const c = n.namedChild(i);
18
+ if (c)
19
+ stack.push(c);
20
+ }
21
+ }
22
+ }
23
+ function collectCSharpBodyIntel(body) {
24
+ const strings = new Set();
25
+ const calls = new Set();
26
+ walkBody(body, (n) => {
27
+ if (n.type === "string_literal" || n.type === "verbatim_string_literal") {
28
+ const raw = n.text.replace(/^@?"/, "").replace(/"$/, "");
29
+ if (raw.length > 0 && raw.length < 300)
30
+ strings.add(raw);
31
+ }
32
+ else if (n.type === "interpolated_string_expression") {
33
+ const raw = n.text.replace(/^\$"/, "").replace(/"$/, "");
34
+ if (raw.length > 0 && raw.length < 300)
35
+ strings.add(raw);
36
+ }
37
+ else if (n.type === "invocation_expression") {
38
+ const fn = n.childForFieldName("function");
39
+ if (fn)
40
+ calls.add(fn.text.split("\n")[0].trim());
41
+ }
42
+ });
43
+ return { stringLiterals: [...strings], regexPatterns: [], calls: [...calls] };
44
+ }
45
+ function extractCSharpFunctions(language, file, root) {
46
+ const records = [];
47
+ const query = new Parser.Query(language, CS_FUNC_QUERY);
48
+ for (const match of query.matches(root)) {
49
+ const fnNode = match.captures.find((c) => c.name === "fn")?.node;
50
+ const nameNode = match.captures.find((c) => c.name === "name")?.node;
51
+ if (!fnNode || !nameNode)
52
+ continue;
53
+ const funcName = text(nameNode);
54
+ // async: scan direct children for modifier — no full-tree walk needed
55
+ let isAsync = false;
56
+ for (const child of fnNode.children) {
57
+ if (child.type === "modifier" && child.text === "async") {
58
+ isAsync = true;
59
+ break;
60
+ }
61
+ }
62
+ const bodyNode = fnNode.childForFieldName("body");
63
+ const intel = bodyNode
64
+ ? collectCSharpBodyIntel(bodyNode)
65
+ : { stringLiterals: [], regexPatterns: [], calls: [] };
66
+ records.push({
67
+ id: `${file}#${funcName}:${fnNode.startPosition.row + 1}`,
68
+ name: funcName,
69
+ file,
70
+ lines: [fnNode.startPosition.row + 1, fnNode.endPosition.row + 1],
71
+ calls: intel.calls,
72
+ stringLiterals: intel.stringLiterals,
73
+ regexPatterns: intel.regexPatterns,
74
+ isAsync,
75
+ language: "csharp",
76
+ });
77
+ }
78
+ return records;
79
+ }
6
80
  export const CSharpAdapter = {
7
81
  name: "C# ASP.NET Core Adapter",
8
82
  language: CSharp,
@@ -144,6 +218,7 @@ export const CSharpAdapter = {
144
218
  }
145
219
  }
146
220
  }
147
- return { endpoints, models, components, tests };
221
+ const functions = extractCSharpFunctions(this.language, file, root);
222
+ return { endpoints, models, components, tests, functions };
148
223
  }
149
224
  };
@@ -5,6 +5,73 @@ const Go = require("tree-sitter-go");
5
5
  function text(node) {
6
6
  return node ? node.text : "";
7
7
  }
8
+ // ── Function-level intelligence ───────────────────────────────────────────
9
+ // Tree-sitter query — runs in C, fast regardless of file size.
10
+ const GO_FUNC_QUERY = `
11
+ (function_declaration name: (identifier) @name) @fn
12
+ (method_declaration name: (field_identifier) @name) @fn
13
+ `;
14
+ /** Walk a single node's subtree iteratively (stack-based, no recursion). */
15
+ function walkBody(body, visitor) {
16
+ const stack = [body];
17
+ while (stack.length > 0) {
18
+ const n = stack.pop();
19
+ visitor(n);
20
+ for (let i = n.namedChildCount - 1; i >= 0; i--) {
21
+ const c = n.namedChild(i);
22
+ if (c)
23
+ stack.push(c);
24
+ }
25
+ }
26
+ }
27
+ function collectGoBodyIntel(body) {
28
+ const strings = new Set();
29
+ const calls = new Set();
30
+ let isAsync = false;
31
+ walkBody(body, (n) => {
32
+ if (n.type === "interpreted_string_literal" || n.type === "raw_string_literal") {
33
+ const val = n.text.slice(1, -1);
34
+ if (val.length > 0 && val.length < 300)
35
+ strings.add(val);
36
+ }
37
+ else if (n.type === "call_expression") {
38
+ const fn = n.childForFieldName("function");
39
+ if (fn)
40
+ calls.add(fn.text.split("\n")[0].trim());
41
+ }
42
+ else if (n.type === "go_statement") {
43
+ isAsync = true;
44
+ }
45
+ });
46
+ return { stringLiterals: [...strings], regexPatterns: [], calls: [...calls], isAsync };
47
+ }
48
+ function extractGoFunctions(language, file, root) {
49
+ const records = [];
50
+ const query = new Parser.Query(language, GO_FUNC_QUERY);
51
+ for (const match of query.matches(root)) {
52
+ const fnNode = match.captures.find((c) => c.name === "fn")?.node;
53
+ const nameNode = match.captures.find((c) => c.name === "name")?.node;
54
+ if (!fnNode || !nameNode)
55
+ continue;
56
+ const funcName = nameNode.text;
57
+ const bodyNode = fnNode.childForFieldName("body");
58
+ const intel = bodyNode
59
+ ? collectGoBodyIntel(bodyNode)
60
+ : { stringLiterals: [], regexPatterns: [], calls: [], isAsync: false };
61
+ records.push({
62
+ id: `${file}#${funcName}:${fnNode.startPosition.row + 1}`,
63
+ name: funcName,
64
+ file,
65
+ lines: [fnNode.startPosition.row + 1, fnNode.endPosition.row + 1],
66
+ calls: intel.calls,
67
+ stringLiterals: intel.stringLiterals,
68
+ regexPatterns: intel.regexPatterns,
69
+ isAsync: intel.isAsync,
70
+ language: "go",
71
+ });
72
+ }
73
+ return records;
74
+ }
8
75
  export const GoAdapter = {
9
76
  name: "Go Gin Adapter",
10
77
  language: Go,
@@ -91,6 +158,7 @@ export const GoAdapter = {
91
158
  });
92
159
  }
93
160
  }
94
- return { endpoints, models, components, tests };
161
+ const functions = extractGoFunctions(this.language, file, root);
162
+ return { endpoints, models, components, tests, functions };
95
163
  }
96
164
  };
@@ -3,9 +3,10 @@ import { TypeScriptAdapter } from "./typescript-adapter.js";
3
3
  import { JavaAdapter } from "./java-adapter.js";
4
4
  import { GoAdapter } from "./go-adapter.js";
5
5
  import { CSharpAdapter } from "./csharp-adapter.js";
6
+ import { Lean4Adapter } from "./lean4-adapter.js";
6
7
  import { runAdapter } from "./runner.js";
7
- export { PythonAdapter, TypeScriptAdapter, JavaAdapter, GoAdapter, CSharpAdapter, runAdapter };
8
- export const ADAPTERS = [PythonAdapter, TypeScriptAdapter, JavaAdapter, GoAdapter, CSharpAdapter];
8
+ export { PythonAdapter, TypeScriptAdapter, JavaAdapter, GoAdapter, CSharpAdapter, Lean4Adapter, runAdapter };
9
+ export const ADAPTERS = [PythonAdapter, TypeScriptAdapter, JavaAdapter, GoAdapter, CSharpAdapter, Lean4Adapter];
9
10
  export function getAdapterForFile(file) {
10
11
  for (const adapter of ADAPTERS) {
11
12
  if (adapter.fileExtensions.some(ext => file.endsWith(ext))) {
@@ -3,6 +3,77 @@ import Parser from "tree-sitter";
3
3
  function text(node) {
4
4
  return node ? node.text : "";
5
5
  }
6
+ // ── Function-level intelligence ───────────────────────────────────────────
7
+ const JAVA_FUNC_QUERY = `
8
+ (method_declaration name: (identifier) @name) @fn
9
+ (constructor_declaration name: (identifier) @name) @fn
10
+ `;
11
+ function walkBody(body, visitor) {
12
+ const stack = [body];
13
+ while (stack.length > 0) {
14
+ const n = stack.pop();
15
+ visitor(n);
16
+ for (let i = n.namedChildCount - 1; i >= 0; i--) {
17
+ const c = n.namedChild(i);
18
+ if (c)
19
+ stack.push(c);
20
+ }
21
+ }
22
+ }
23
+ function collectJavaBodyIntel(body) {
24
+ const strings = new Set();
25
+ const calls = new Set();
26
+ walkBody(body, (n) => {
27
+ if (n.type === "string_literal") {
28
+ const raw = n.text.replace(/^"/, "").replace(/"$/, "");
29
+ if (raw.length > 0 && raw.length < 300)
30
+ strings.add(raw);
31
+ }
32
+ else if (n.type === "text_block") {
33
+ const raw = n.text.replace(/^"""/, "").replace(/"""$/, "").trim();
34
+ if (raw.length > 0 && raw.length < 300)
35
+ strings.add(raw);
36
+ }
37
+ else if (n.type === "method_invocation") {
38
+ const nameNode = n.childForFieldName("name");
39
+ const objNode = n.childForFieldName("object");
40
+ if (nameNode) {
41
+ const call = objNode ? `${text(objNode)}.${text(nameNode)}` : text(nameNode);
42
+ calls.add(call.split("\n")[0].trim());
43
+ }
44
+ }
45
+ });
46
+ return { stringLiterals: [...strings], regexPatterns: [], calls: [...calls] };
47
+ }
48
+ function extractJavaFunctions(language, file, root) {
49
+ const records = [];
50
+ const query = new Parser.Query(language, JAVA_FUNC_QUERY);
51
+ for (const match of query.matches(root)) {
52
+ const fnNode = match.captures.find((c) => c.name === "fn")?.node;
53
+ const nameNode = match.captures.find((c) => c.name === "name")?.node;
54
+ if (!fnNode || !nameNode)
55
+ continue;
56
+ const funcName = text(nameNode);
57
+ const bodyNode = fnNode.childForFieldName("body");
58
+ const intel = bodyNode
59
+ ? collectJavaBodyIntel(bodyNode)
60
+ : { stringLiterals: [], regexPatterns: [], calls: [] };
61
+ const typeNode = fnNode.childForFieldName("type");
62
+ const isAsync = /CompletableFuture|Mono|Flux|Future/.test(text(typeNode));
63
+ records.push({
64
+ id: `${file}#${funcName}:${fnNode.startPosition.row + 1}`,
65
+ name: funcName,
66
+ file,
67
+ lines: [fnNode.startPosition.row + 1, fnNode.endPosition.row + 1],
68
+ calls: intel.calls,
69
+ stringLiterals: intel.stringLiterals,
70
+ regexPatterns: intel.regexPatterns,
71
+ isAsync,
72
+ language: "java",
73
+ });
74
+ }
75
+ return records;
76
+ }
6
77
  export const JavaAdapter = {
7
78
  name: "Java Spring Boot Adapter",
8
79
  language: Java,
@@ -117,6 +188,7 @@ export const JavaAdapter = {
117
188
  });
118
189
  }
119
190
  }
120
- return { endpoints, models, components, tests };
191
+ const functions = extractJavaFunctions(this.language, file, root);
192
+ return { endpoints, models, components, tests, functions };
121
193
  }
122
194
  };
@@ -0,0 +1,358 @@
1
+ /**
2
+ * Lean4 / Mathlib adapter for guardian.
3
+ *
4
+ * Uses deterministic regex-based extraction (no tree-sitter-lean4 dependency).
5
+ * Captures: theorems, lemmas, defs, structures, classes, instances, `sorry`
6
+ * locations, tactic usage, and Mathlib import dependencies.
7
+ *
8
+ * Implements SpecGuardAdapter with `language: null` — runner.ts calls
9
+ * extract() directly without a tree-sitter parse step.
10
+ */
11
+ // ── Constants ─────────────────────────────────────────────────────────────
12
+ /**
13
+ * Well-known Lean4 tactic names. Checked as whole words in the proof body.
14
+ * Kept in alphabetical order for maintainability.
15
+ */
16
+ const KNOWN_TACTICS = [
17
+ "Abel",
18
+ "aesop",
19
+ "all_goals",
20
+ "any_goals",
21
+ "apply",
22
+ "apply?",
23
+ "assumption",
24
+ "by_cases",
25
+ "by_contra",
26
+ "calc",
27
+ "cases",
28
+ "change",
29
+ "clear",
30
+ "congr",
31
+ "constructor",
32
+ "contrapose",
33
+ "conv",
34
+ "decide",
35
+ "dsimp",
36
+ "exact",
37
+ "exact?",
38
+ "ext",
39
+ "field_simp",
40
+ "fin_cases",
41
+ "first",
42
+ "funext",
43
+ "gcongr",
44
+ "group",
45
+ "have",
46
+ "induction",
47
+ "interval_cases",
48
+ "intro",
49
+ "intros",
50
+ "linarith",
51
+ "linear_combination",
52
+ "module_cast",
53
+ "native_decide",
54
+ "nlinarith",
55
+ "norm_cast",
56
+ "norm_num",
57
+ "norm_num?",
58
+ "nth_rw",
59
+ "obtain",
60
+ "omega",
61
+ "polyrith",
62
+ "positivity",
63
+ "push_cast",
64
+ "push_neg",
65
+ "rcases",
66
+ "refine",
67
+ "rename",
68
+ "repeat",
69
+ "revert",
70
+ "rfl",
71
+ "ring",
72
+ "rw",
73
+ "rw?",
74
+ "set",
75
+ "show",
76
+ "simp",
77
+ "simp?",
78
+ "skip",
79
+ "split",
80
+ "suffices",
81
+ "swap",
82
+ "symm",
83
+ "tauto",
84
+ "trans",
85
+ "trivial",
86
+ "try",
87
+ "unfold",
88
+ "use",
89
+ ];
90
+ // ── Regex patterns ────────────────────────────────────────────────────────
91
+ /**
92
+ * Matches theorem/lemma/def/abbrev declarations (including noncomputable variants).
93
+ * Group 1: keyword (e.g. "theorem", "noncomputable def")
94
+ * Group 2: declaration name
95
+ */
96
+ const THEOREM_RE = /^(?:[ \t]*(?:@\[[^\]]*\][ \t]*\n?[ \t]*)*)(?:private[ \t]+|protected[ \t]+)?(?:(noncomputable[ \t]+def|noncomputable[ \t]+abbrev|theorem|lemma|def|abbrev|example))(?:[ \t]+([^\s(:{\[]+))?/gm;
97
+ /**
98
+ * Matches structure/class/inductive/instance declarations.
99
+ * Group 1: keyword, Group 2: name (optional for anonymous instances)
100
+ */
101
+ const STRUCT_RE = /^(?:[ \t]*(?:@\[[^\]]*\][ \t]*\n?[ \t]*)*)(?:private[ \t]+|protected[ \t]+)?(structure|class|inductive|instance|mutual)(?:[ \t]+([^\s(:{\[]+))?/gm;
102
+ /** Matches import statements. Group 1: module path */
103
+ const IMPORT_RE = /^import[ \t]+([\w.]+)/gm;
104
+ /** Matches namespace declarations. Group 1: namespace name */
105
+ const NS_OPEN_RE = /^namespace[ \t]+([\w.]+)/gm;
106
+ /** Matches end-of-namespace. Group 1: namespace name */
107
+ const NS_END_RE = /^end[ \t]+([\w.]+)/gm;
108
+ /** `sorry` as a standalone term or tactic */
109
+ const SORRY_RE = /\bsorry\b/g;
110
+ /** `:=` with optional trailing whitespace — used in proof body and statement extraction */
111
+ const ASSIGN_RE = /:=\s*/g;
112
+ // ── Helpers ───────────────────────────────────────────────────────────────
113
+ /**
114
+ * Build a sorted array of newline offsets for O(log n) line lookups.
115
+ * Index i holds the character offset of the start of line i+1 (0-based array, 1-based lines).
116
+ */
117
+ function buildLineIndex(source) {
118
+ const starts = [0]; // line 1 starts at offset 0
119
+ for (let i = 0; i < source.length; i++) {
120
+ if (source[i] === "\n")
121
+ starts.push(i + 1);
122
+ }
123
+ return starts;
124
+ }
125
+ /** 1-based line number for a character offset, using precomputed line index. */
126
+ function lineOfFast(lineIndex, offset) {
127
+ let lo = 0, hi = lineIndex.length - 1;
128
+ while (lo < hi) {
129
+ const mid = (lo + hi + 1) >> 1;
130
+ if (lineIndex[mid] <= offset)
131
+ lo = mid;
132
+ else
133
+ hi = mid - 1;
134
+ }
135
+ return lo + 1; // 1-based
136
+ }
137
+ /** Extract all Lean4 import paths from source. */
138
+ function extractImports(source) {
139
+ const imports = [];
140
+ IMPORT_RE.lastIndex = 0;
141
+ let m;
142
+ while ((m = IMPORT_RE.exec(source)) !== null) {
143
+ imports.push(m[1]);
144
+ }
145
+ return imports;
146
+ }
147
+ /**
148
+ * Scan the entire source once and return a sorted list of namespace open/end
149
+ * events. Pass this to activeNamespaceAtFast() — O(1) amortised per declaration
150
+ * when declarations are processed left-to-right (which THEOREM_RE guarantees).
151
+ */
152
+ function buildNsEvents(source) {
153
+ const events = [];
154
+ let m;
155
+ NS_OPEN_RE.lastIndex = 0;
156
+ while ((m = NS_OPEN_RE.exec(source)) !== null) {
157
+ events.push({ idx: m.index, name: m[1], kind: "open" });
158
+ }
159
+ NS_END_RE.lastIndex = 0;
160
+ while ((m = NS_END_RE.exec(source)) !== null) {
161
+ events.push({ idx: m.index, name: m[1], kind: "end" });
162
+ }
163
+ return events.sort((a, b) => a.idx - b.idx);
164
+ }
165
+ /**
166
+ * Return the active namespace at `offset` using precomputed events.
167
+ * Call this in declaration order (ascending offset) and pass the same
168
+ * `eventIdx` cursor — the cursor advances monotonically, making this O(n)
169
+ * total across all declarations rather than O(n²).
170
+ */
171
+ function activeNamespaceAtFast(events, offset, cursor, stack) {
172
+ // Advance cursor through all events that precede `offset`
173
+ while (cursor.i < events.length && events[cursor.i].idx < offset) {
174
+ const ev = events[cursor.i++];
175
+ if (ev.kind === "open") {
176
+ stack.push(ev.name);
177
+ }
178
+ else {
179
+ const idx = stack.lastIndexOf(ev.name);
180
+ if (idx >= 0)
181
+ stack.splice(idx, 1);
182
+ }
183
+ }
184
+ return stack.join(".");
185
+ }
186
+ /**
187
+ * Extract the proof/definition body that follows a declaration's `:=` (or `by`).
188
+ * Works directly on `source` from `startOffset` to avoid repeated string slicing.
189
+ * Returns the raw text of the body, capped at 4000 chars to limit memory use.
190
+ */
191
+ function extractProofBody(source, startOffset) {
192
+ // Search for := starting at startOffset without slicing the full source
193
+ ASSIGN_RE.lastIndex = startOffset;
194
+ const assignMatch = ASSIGN_RE.exec(source);
195
+ if (!assignMatch)
196
+ return "";
197
+ const bodyStart = assignMatch.index + assignMatch[0].length;
198
+ const bodyText = source.slice(bodyStart, bodyStart + 4000);
199
+ // Stop at the next top-level declaration (unindented keyword)
200
+ const stopRe = /\n(?=(?:theorem|lemma|def|abbrev|noncomputable|structure|class|inductive|instance|example|namespace|end|#|import)\b)/;
201
+ const stopIdx = bodyText.search(stopRe);
202
+ return stopIdx >= 0 ? bodyText.slice(0, stopIdx) : bodyText;
203
+ }
204
+ /**
205
+ * Single combined regex that matches any known tactic in one pass.
206
+ * Tactics with `?` (apply?, exact?, etc.) need the `?` escaped in the regex.
207
+ * Using a non-global RegExp for the initial "does body contain any tactic?" check,
208
+ * then a global one for collecting all matches.
209
+ */
210
+ const TACTIC_COMBINED_RE = new RegExp(`\\b(${KNOWN_TACTICS.map((t) => t.replace(/[?]/g, "\\?")).join("|")})\\b`, "g");
211
+ /** Extract which known tactics appear in a proof body — single-pass scan. */
212
+ function extractTactics(body) {
213
+ TACTIC_COMBINED_RE.lastIndex = 0;
214
+ const found = new Set();
215
+ let m;
216
+ while ((m = TACTIC_COMBINED_RE.exec(body)) !== null) {
217
+ found.add(m[1]);
218
+ }
219
+ TACTIC_COMBINED_RE.lastIndex = 0;
220
+ return [...found].sort();
221
+ }
222
+ /** Return true if the body text contains `sorry`. Resets lastIndex after test. */
223
+ function containsSorry(body) {
224
+ SORRY_RE.lastIndex = 0;
225
+ const result = SORRY_RE.test(body);
226
+ SORRY_RE.lastIndex = 0;
227
+ return result;
228
+ }
229
+ /** Estimate end line from start line + body newlines. */
230
+ function estimateEndLine(startLine, body) {
231
+ return startLine + (body.split("\n").length - 1);
232
+ }
233
+ // ── Adapter ───────────────────────────────────────────────────────────────
234
+ export const Lean4Adapter = {
235
+ name: "lean4",
236
+ /**
237
+ * No tree-sitter grammar — runner.ts calls extract() directly when
238
+ * `language` is falsy. All extraction is done via regex on the source text.
239
+ */
240
+ language: null,
241
+ fileExtensions: [".lean"],
242
+ queries: {},
243
+ extract(file, source, _root) {
244
+ const endpoints = [];
245
+ const models = [];
246
+ const components = [];
247
+ const tests = [];
248
+ const functions = [];
249
+ const imports = extractImports(source);
250
+ const mathlibDeps = imports.filter((i) => i.startsWith("Mathlib"));
251
+ // ── Precompute indices — O(n) each, amortises all per-declaration lookups ──
252
+ const lineIndex = buildLineIndex(source);
253
+ const nsEvents = buildNsEvents(source);
254
+ const nsCursor = { i: 0 };
255
+ const nsStack = [];
256
+ // ── Theorems / Lemmas / Defs ──────────────────────────────────────────
257
+ THEOREM_RE.lastIndex = 0;
258
+ let m;
259
+ while ((m = THEOREM_RE.exec(source)) !== null) {
260
+ const rawKind = m[1]?.trim().replace(/\s+/g, "_") ?? "def";
261
+ const name = m[2] ?? "(anonymous)";
262
+ const offset = m.index;
263
+ const startLine = lineOfFast(lineIndex, offset);
264
+ const namespace = activeNamespaceAtFast(nsEvents, offset, nsCursor, nsStack);
265
+ // Extract statement: text between end of match and :=
266
+ const matchEnd = offset + m[0].length;
267
+ ASSIGN_RE.lastIndex = matchEnd;
268
+ const stmtMatch = ASSIGN_RE.exec(source);
269
+ const statement = (stmtMatch
270
+ ? source.slice(matchEnd, stmtMatch.index)
271
+ : "").trim().slice(0, 500);
272
+ const body = extractProofBody(source, offset + m[0].length);
273
+ const hasSorry = containsSorry(body);
274
+ const tactics = extractTactics(body);
275
+ const endLine = estimateEndLine(startLine, body);
276
+ const kindMap = {
277
+ theorem: "theorem",
278
+ lemma: "lemma",
279
+ def: "def",
280
+ noncomputable_def: "noncomputable_def",
281
+ abbrev: "abbrev",
282
+ noncomputable_abbrev: "abbrev",
283
+ example: "example",
284
+ inductive: "inductive",
285
+ };
286
+ const kind = kindMap[rawKind] ?? "def";
287
+ const record = {
288
+ id: `${file}#${name}:${startLine}`,
289
+ name,
290
+ file,
291
+ lines: [startLine, endLine],
292
+ calls: [],
293
+ // Push domain concepts into stringLiterals so the generic literal_index
294
+ // can surface them — no language knowledge needed outside the adapter.
295
+ stringLiterals: [
296
+ ...(hasSorry ? ["sorry"] : []), // `guardian search --query sorry`
297
+ ...tactics.map((t) => `tactic:${t}`), // `guardian search --query simp`
298
+ ],
299
+ regexPatterns: [],
300
+ isAsync: false,
301
+ language: "lean4",
302
+ kind,
303
+ namespace,
304
+ statement,
305
+ hasSorry,
306
+ tactics,
307
+ mathlibDeps,
308
+ };
309
+ functions.push(record);
310
+ }
311
+ // ── Structures / Classes / Instances ─────────────────────────────────
312
+ // Fresh cursor for STRUCT_RE pass — offsets may interleave with THEOREM_RE
313
+ const nsCursor2 = { i: 0 };
314
+ const nsStack2 = [];
315
+ STRUCT_RE.lastIndex = 0;
316
+ while ((m = STRUCT_RE.exec(source)) !== null) {
317
+ const structKind = m[1];
318
+ const name = m[2];
319
+ if (!name)
320
+ continue; // anonymous instance — skip for models
321
+ models.push({
322
+ name,
323
+ file,
324
+ framework: structKind,
325
+ fields: [],
326
+ relationships: [],
327
+ });
328
+ // Also emit a FunctionRecord so it appears in function search
329
+ const startLine = lineOfFast(lineIndex, m.index);
330
+ const kindMap = {
331
+ structure: "structure",
332
+ class: "class",
333
+ instance: "instance",
334
+ inductive: "inductive",
335
+ mutual: "def",
336
+ };
337
+ const record = {
338
+ id: `${file}#${name}:${startLine}`,
339
+ name,
340
+ file,
341
+ lines: [startLine, startLine],
342
+ calls: [],
343
+ stringLiterals: [],
344
+ regexPatterns: [],
345
+ isAsync: false,
346
+ language: "lean4",
347
+ kind: kindMap[structKind] ?? "structure",
348
+ namespace: activeNamespaceAtFast(nsEvents, m.index, nsCursor2, nsStack2),
349
+ statement: "",
350
+ hasSorry: false,
351
+ tactics: [],
352
+ mathlibDeps,
353
+ };
354
+ functions.push(record);
355
+ }
356
+ return { endpoints, models, components, tests, functions };
357
+ },
358
+ };