@danielblomma/cortex-mcp 0.4.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +125 -42
- package/bin/cortex.mjs +36 -63
- package/bin/wsl.mjs +30 -0
- package/package.json +15 -3
- package/scaffold/.context/ontology.cypher +47 -0
- package/scaffold/.githooks/post-commit +14 -0
- package/scaffold/.githooks/post-rewrite +23 -0
- package/scaffold/mcp/package-lock.json +16 -16
- package/scaffold/mcp/package.json +4 -1
- package/scaffold/mcp/src/contextEntities.ts +311 -0
- package/scaffold/mcp/src/defaults.ts +6 -0
- package/scaffold/mcp/src/embed.ts +163 -37
- package/scaffold/mcp/src/frontmatter.ts +39 -0
- package/scaffold/mcp/src/graph.ts +253 -130
- package/scaffold/mcp/src/graphMetrics.ts +12 -0
- package/scaffold/mcp/src/impactPresentation.ts +202 -0
- package/scaffold/mcp/src/impactRanking.ts +237 -0
- package/scaffold/mcp/src/impactResponse.ts +47 -0
- package/scaffold/mcp/src/impactResults.ts +173 -0
- package/scaffold/mcp/src/impactSeed.ts +33 -0
- package/scaffold/mcp/src/impactTraversal.ts +83 -0
- package/scaffold/mcp/src/jsonl.ts +34 -0
- package/scaffold/mcp/src/loadGraph.ts +345 -86
- package/scaffold/mcp/src/paths.ts +33 -2
- package/scaffold/mcp/src/presets.ts +137 -0
- package/scaffold/mcp/src/relatedResponse.ts +30 -0
- package/scaffold/mcp/src/relatedTraversal.ts +101 -0
- package/scaffold/mcp/src/rules.ts +27 -0
- package/scaffold/mcp/src/search.ts +186 -455
- package/scaffold/mcp/src/searchCore.ts +274 -0
- package/scaffold/mcp/src/searchResults.ts +133 -0
- package/scaffold/mcp/src/server.ts +95 -3
- package/scaffold/mcp/src/types.ts +82 -3
- package/scaffold/scripts/context.sh +12 -46
- package/scaffold/scripts/dashboard.mjs +797 -0
- package/scaffold/scripts/dashboard.sh +13 -0
- package/scaffold/scripts/ingest.mjs +2227 -59
- package/scaffold/scripts/install-git-hooks.sh +3 -1
- package/scaffold/scripts/memory-compile.mjs +241 -0
- package/scaffold/scripts/memory-compile.sh +20 -0
- package/scaffold/scripts/memory-lint.mjs +384 -0
- package/scaffold/scripts/memory-lint.sh +20 -0
- package/scaffold/scripts/parsers/config.mjs +178 -0
- package/scaffold/scripts/parsers/cpp.mjs +316 -0
- package/scaffold/scripts/parsers/dotnet/VbNetParser/Program.cs +374 -0
- package/scaffold/scripts/parsers/dotnet/VbNetParser/VbNetParser.csproj +13 -0
- package/scaffold/scripts/parsers/javascript/ast.mjs +61 -0
- package/scaffold/scripts/parsers/javascript/calls.mjs +53 -0
- package/scaffold/scripts/parsers/javascript/chunks.mjs +388 -0
- package/scaffold/scripts/parsers/javascript/imports.mjs +162 -0
- package/scaffold/scripts/parsers/javascript/patterns.mjs +82 -0
- package/scaffold/scripts/parsers/javascript/scope-analysis.mjs +3 -0
- package/scaffold/scripts/parsers/javascript/scope-builder.mjs +305 -0
- package/scaffold/scripts/parsers/javascript/scope-resolver.mjs +82 -0
- package/scaffold/scripts/parsers/javascript.mjs +27 -350
- package/scaffold/scripts/parsers/resources.mjs +166 -0
- package/scaffold/scripts/parsers/rust.mjs +515 -0
- package/scaffold/scripts/parsers/sql.mjs +137 -0
- package/scaffold/scripts/parsers/vbnet.mjs +143 -0
- package/scaffold/scripts/status.sh +0 -7
- package/scaffold/scripts/watch.sh +9 -1
- package/scaffold/scripts/capture-note.sh +0 -55
- package/scaffold/scripts/plan-state-engine.cjs +0 -310
- package/scaffold/scripts/plan-state.sh +0 -71
|
@@ -0,0 +1,515 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Regex-based Rust parser for Cortex.
|
|
4
|
+
*
|
|
5
|
+
* Extracts semantic chunks from Rust source files: functions, structs, enums,
|
|
6
|
+
* traits, impl blocks (with methods), inline modules, macro_rules! definitions,
|
|
7
|
+
* use imports, and call relationships.
|
|
8
|
+
*
|
|
9
|
+
* No external dependencies — pure regex, always available.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const CALL_KEYWORDS = new Set([
|
|
13
|
+
"if", "for", "while", "loop", "match", "return",
|
|
14
|
+
"Some", "None", "Ok", "Err", "Box", "Vec", "String",
|
|
15
|
+
"println", "eprintln", "format", "write", "writeln",
|
|
16
|
+
"panic", "todo", "unimplemented", "unreachable",
|
|
17
|
+
"assert", "assert_eq", "assert_ne", "debug_assert",
|
|
18
|
+
"debug_assert_eq", "debug_assert_ne",
|
|
19
|
+
"cfg", "derive", "allow", "warn", "deny"
|
|
20
|
+
]);
|
|
21
|
+
|
|
22
|
+
const VIS_PREFIX = /(?:pub(?:\s*\([^)]*\))?\s+)?/;
|
|
23
|
+
const VIS_PREFIX_SRC = VIS_PREFIX.source;
|
|
24
|
+
const LINE_START = "^[^\\S\\n]*";
|
|
25
|
+
|
|
26
|
+
const FN_PATTERN = new RegExp(
|
|
27
|
+
`${LINE_START}${VIS_PREFIX_SRC}(?:default\\s+)?(?:async\\s+)?(?:unsafe\\s+)?(?:const\\s+)?(?:extern\\s+"[^"]*"\\s+)?fn\\s+([A-Za-z_]\\w*)`,
|
|
28
|
+
"gm"
|
|
29
|
+
);
|
|
30
|
+
|
|
31
|
+
const STRUCT_PATTERN = new RegExp(
|
|
32
|
+
`${LINE_START}${VIS_PREFIX_SRC}struct\\s+([A-Za-z_]\\w*)`,
|
|
33
|
+
"gm"
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
const ENUM_PATTERN = new RegExp(
|
|
37
|
+
`${LINE_START}${VIS_PREFIX_SRC}enum\\s+([A-Za-z_]\\w*)`,
|
|
38
|
+
"gm"
|
|
39
|
+
);
|
|
40
|
+
|
|
41
|
+
const TRAIT_PATTERN = new RegExp(
|
|
42
|
+
`${LINE_START}${VIS_PREFIX_SRC}(?:unsafe\\s+)?trait\\s+([A-Za-z_]\\w*)`,
|
|
43
|
+
"gm"
|
|
44
|
+
);
|
|
45
|
+
|
|
46
|
+
const IMPL_PATTERN = /^[^\S\n]*(?:unsafe\s+)?impl(?:<[^>]*>)?\s+(?:([A-Za-z_]\w*(?:<[^>]*>)?)\s+for\s+)?([A-Za-z_]\w*)/gm;
|
|
47
|
+
|
|
48
|
+
const MOD_PATTERN = new RegExp(
|
|
49
|
+
`${LINE_START}${VIS_PREFIX_SRC}mod\\s+([A-Za-z_]\\w*)`,
|
|
50
|
+
"gm"
|
|
51
|
+
);
|
|
52
|
+
|
|
53
|
+
const MACRO_PATTERN = new RegExp(
|
|
54
|
+
`${LINE_START}${VIS_PREFIX_SRC}macro_rules!\\s+([A-Za-z_]\\w*)`,
|
|
55
|
+
"gm"
|
|
56
|
+
);
|
|
57
|
+
|
|
58
|
+
const USE_PATTERN = new RegExp(
|
|
59
|
+
`^\\s*${VIS_PREFIX_SRC}use\\s+(.+?)\\s*;`,
|
|
60
|
+
"gm"
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
function countLinesBefore(text, index) {
|
|
64
|
+
let line = 1;
|
|
65
|
+
for (let i = 0; i < index; i += 1) {
|
|
66
|
+
if (text[i] === "\n") {
|
|
67
|
+
line += 1;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
return line;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function normalizeWhitespace(value) {
|
|
74
|
+
return value.replace(/\s+/g, " ").trim();
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function findMatchingBrace(text, openBraceIndex) {
|
|
78
|
+
if (openBraceIndex < 0 || text[openBraceIndex] !== "{") {
|
|
79
|
+
return -1;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
let depth = 0;
|
|
83
|
+
let inSingleLineComment = false;
|
|
84
|
+
let inBlockComment = false;
|
|
85
|
+
let inString = false;
|
|
86
|
+
let stringChar = "";
|
|
87
|
+
let inRawString = false;
|
|
88
|
+
let rawHashCount = 0;
|
|
89
|
+
|
|
90
|
+
for (let index = openBraceIndex; index < text.length; index += 1) {
|
|
91
|
+
const current = text[index];
|
|
92
|
+
const next = text[index + 1];
|
|
93
|
+
|
|
94
|
+
if (inSingleLineComment) {
|
|
95
|
+
if (current === "\n") {
|
|
96
|
+
inSingleLineComment = false;
|
|
97
|
+
}
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (inBlockComment) {
|
|
102
|
+
if (current === "*" && next === "/") {
|
|
103
|
+
inBlockComment = false;
|
|
104
|
+
index += 1;
|
|
105
|
+
}
|
|
106
|
+
continue;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (inRawString) {
|
|
110
|
+
if (current === '"') {
|
|
111
|
+
let hashes = 0;
|
|
112
|
+
while (hashes < rawHashCount && text[index + 1 + hashes] === "#") {
|
|
113
|
+
hashes += 1;
|
|
114
|
+
}
|
|
115
|
+
if (hashes === rawHashCount) {
|
|
116
|
+
inRawString = false;
|
|
117
|
+
index += hashes;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (inString) {
|
|
124
|
+
if (current === "\\" && next) {
|
|
125
|
+
index += 1;
|
|
126
|
+
continue;
|
|
127
|
+
}
|
|
128
|
+
if (current === stringChar) {
|
|
129
|
+
inString = false;
|
|
130
|
+
stringChar = "";
|
|
131
|
+
}
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
if (current === "/" && next === "/") {
|
|
136
|
+
inSingleLineComment = true;
|
|
137
|
+
index += 1;
|
|
138
|
+
continue;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
if (current === "/" && next === "*") {
|
|
142
|
+
inBlockComment = true;
|
|
143
|
+
index += 1;
|
|
144
|
+
continue;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Rust raw strings: r#"..."#, r##"..."##, etc.
|
|
148
|
+
if (current === "r" && (next === '"' || next === "#")) {
|
|
149
|
+
let hashes = 0;
|
|
150
|
+
let pos = index + 1;
|
|
151
|
+
while (text[pos] === "#") {
|
|
152
|
+
hashes += 1;
|
|
153
|
+
pos += 1;
|
|
154
|
+
}
|
|
155
|
+
if (text[pos] === '"') {
|
|
156
|
+
inRawString = true;
|
|
157
|
+
rawHashCount = hashes;
|
|
158
|
+
index = pos;
|
|
159
|
+
continue;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (current === '"' || current === "'") {
|
|
164
|
+
// Rust lifetime annotations ('a) should not trigger string mode
|
|
165
|
+
if (current === "'" && next && /[a-zA-Z_]/.test(next)) {
|
|
166
|
+
// Check if this is a lifetime like 'a or a char like 'x'
|
|
167
|
+
const afterIdent = text.indexOf("'", index + 2);
|
|
168
|
+
const nextNewline = text.indexOf("\n", index + 1);
|
|
169
|
+
if (afterIdent === -1 || (nextNewline !== -1 && afterIdent > nextNewline) || afterIdent > index + 4) {
|
|
170
|
+
// Lifetime — skip the tick and identifier
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
inString = true;
|
|
175
|
+
stringChar = current;
|
|
176
|
+
continue;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if (current === "{") {
|
|
180
|
+
depth += 1;
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (current === "}") {
|
|
185
|
+
depth -= 1;
|
|
186
|
+
if (depth === 0) {
|
|
187
|
+
return index;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return -1;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function findOpenBraceAfterMatch(code, matchEnd) {
|
|
196
|
+
let inLineComment = false;
|
|
197
|
+
let inBlockComment = false;
|
|
198
|
+
let inString = false;
|
|
199
|
+
let stringChar = "";
|
|
200
|
+
|
|
201
|
+
for (let i = matchEnd; i < code.length; i += 1) {
|
|
202
|
+
const ch = code[i];
|
|
203
|
+
const next = code[i + 1];
|
|
204
|
+
|
|
205
|
+
if (inLineComment) {
|
|
206
|
+
if (ch === "\n") inLineComment = false;
|
|
207
|
+
continue;
|
|
208
|
+
}
|
|
209
|
+
if (inBlockComment) {
|
|
210
|
+
if (ch === "*" && next === "/") { inBlockComment = false; i += 1; }
|
|
211
|
+
continue;
|
|
212
|
+
}
|
|
213
|
+
if (inString) {
|
|
214
|
+
if (ch === "\\" && next) { i += 1; continue; }
|
|
215
|
+
if (ch === stringChar) { inString = false; stringChar = ""; }
|
|
216
|
+
continue;
|
|
217
|
+
}
|
|
218
|
+
if (ch === "/" && next === "/") { inLineComment = true; i += 1; continue; }
|
|
219
|
+
if (ch === "/" && next === "*") { inBlockComment = true; i += 1; continue; }
|
|
220
|
+
if (ch === '"' || ch === "'") { inString = true; stringChar = ch; continue; }
|
|
221
|
+
|
|
222
|
+
if (ch === "{") return i;
|
|
223
|
+
if (ch === ";") return -1; // Declaration without body
|
|
224
|
+
}
|
|
225
|
+
return -1;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function buildSignature(source) {
|
|
229
|
+
const snippet = normalizeWhitespace(source);
|
|
230
|
+
const braceIndex = snippet.indexOf("{");
|
|
231
|
+
return (braceIndex === -1 ? snippet : snippet.slice(0, braceIndex)).trim();
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function extractUseImports(code) {
|
|
235
|
+
const imports = [];
|
|
236
|
+
let match;
|
|
237
|
+
USE_PATTERN.lastIndex = 0;
|
|
238
|
+
while ((match = USE_PATTERN.exec(code)) !== null) {
|
|
239
|
+
imports.push(match[1].trim());
|
|
240
|
+
}
|
|
241
|
+
return [...new Set(imports)];
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
function collectCallNames(body, chunkName) {
|
|
245
|
+
const refs = new Set();
|
|
246
|
+
const ownTailName = chunkName.split("::").pop() || chunkName;
|
|
247
|
+
const pattern = /\b([A-Za-z_]\w*(?:::\w+)*)\s*[!(]\s*/g;
|
|
248
|
+
let match;
|
|
249
|
+
while ((match = pattern.exec(body)) !== null) {
|
|
250
|
+
let name = match[1];
|
|
251
|
+
const tailName = name.split("::").pop() || name;
|
|
252
|
+
if (CALL_KEYWORDS.has(tailName) || tailName === ownTailName) {
|
|
253
|
+
continue;
|
|
254
|
+
}
|
|
255
|
+
// Skip if it matched a macro invocation keyword
|
|
256
|
+
if (CALL_KEYWORDS.has(name)) {
|
|
257
|
+
continue;
|
|
258
|
+
}
|
|
259
|
+
refs.add(tailName);
|
|
260
|
+
}
|
|
261
|
+
return [...refs];
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
function extractBlockChunks(code, pattern, kind, language) {
|
|
265
|
+
const chunks = [];
|
|
266
|
+
pattern.lastIndex = 0;
|
|
267
|
+
let match;
|
|
268
|
+
while ((match = pattern.exec(code)) !== null) {
|
|
269
|
+
const name = match[1];
|
|
270
|
+
const openBraceIndex = findOpenBraceAfterMatch(code, match.index + match[0].length);
|
|
271
|
+
if (openBraceIndex === -1) {
|
|
272
|
+
// Could be a unit struct like `struct Foo;` — extract as single-line chunk
|
|
273
|
+
if (kind === "struct") {
|
|
274
|
+
const lineEnd = code.indexOf("\n", match.index);
|
|
275
|
+
const endIdx = lineEnd === -1 ? code.length : lineEnd;
|
|
276
|
+
const body = code.slice(match.index, endIdx).trimEnd();
|
|
277
|
+
if (body.includes(";")) {
|
|
278
|
+
const startLine = countLinesBefore(code, match.index);
|
|
279
|
+
chunks.push({
|
|
280
|
+
name,
|
|
281
|
+
kind,
|
|
282
|
+
signature: normalizeWhitespace(body),
|
|
283
|
+
body,
|
|
284
|
+
startLine,
|
|
285
|
+
endLine: startLine,
|
|
286
|
+
language,
|
|
287
|
+
calls: [],
|
|
288
|
+
imports: []
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
continue;
|
|
293
|
+
}
|
|
294
|
+
const closeBraceIndex = findMatchingBrace(code, openBraceIndex);
|
|
295
|
+
if (closeBraceIndex === -1) continue;
|
|
296
|
+
|
|
297
|
+
const bodyEndIndex = closeBraceIndex + 1;
|
|
298
|
+
const body = code.slice(match.index, bodyEndIndex);
|
|
299
|
+
const startLine = countLinesBefore(code, match.index);
|
|
300
|
+
const endLine = countLinesBefore(code, Math.max(match.index, bodyEndIndex - 1));
|
|
301
|
+
|
|
302
|
+
chunks.push({
|
|
303
|
+
name,
|
|
304
|
+
kind,
|
|
305
|
+
signature: buildSignature(body),
|
|
306
|
+
body,
|
|
307
|
+
startLine,
|
|
308
|
+
endLine,
|
|
309
|
+
language,
|
|
310
|
+
calls: kind === "function" ? collectCallNames(body, name) : [],
|
|
311
|
+
imports: []
|
|
312
|
+
});
|
|
313
|
+
}
|
|
314
|
+
return chunks;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
function extractImplBlocks(code, language, imports) {
|
|
318
|
+
const chunks = [];
|
|
319
|
+
IMPL_PATTERN.lastIndex = 0;
|
|
320
|
+
let match;
|
|
321
|
+
while ((match = IMPL_PATTERN.exec(code)) !== null) {
|
|
322
|
+
const traitName = match[1] || null;
|
|
323
|
+
const typeName = match[2];
|
|
324
|
+
const openBraceIndex = findOpenBraceAfterMatch(code, match.index + match[0].length);
|
|
325
|
+
if (openBraceIndex === -1) continue;
|
|
326
|
+
const closeBraceIndex = findMatchingBrace(code, openBraceIndex);
|
|
327
|
+
if (closeBraceIndex === -1) continue;
|
|
328
|
+
|
|
329
|
+
const implBody = code.slice(match.index, closeBraceIndex + 1);
|
|
330
|
+
const implStartLine = countLinesBefore(code, match.index);
|
|
331
|
+
const implEndLine = countLinesBefore(code, closeBraceIndex);
|
|
332
|
+
const implName = traitName ? `${traitName} for ${typeName}` : typeName;
|
|
333
|
+
|
|
334
|
+
// Add the impl block itself
|
|
335
|
+
chunks.push({
|
|
336
|
+
name: implName,
|
|
337
|
+
kind: "impl",
|
|
338
|
+
signature: buildSignature(implBody),
|
|
339
|
+
body: implBody,
|
|
340
|
+
startLine: implStartLine,
|
|
341
|
+
endLine: implEndLine,
|
|
342
|
+
language,
|
|
343
|
+
calls: [],
|
|
344
|
+
imports: []
|
|
345
|
+
});
|
|
346
|
+
|
|
347
|
+
// Extract methods within the impl block
|
|
348
|
+
const innerCode = code.slice(openBraceIndex + 1, closeBraceIndex);
|
|
349
|
+
const innerOffset = openBraceIndex + 1;
|
|
350
|
+
FN_PATTERN.lastIndex = 0;
|
|
351
|
+
let fnMatch;
|
|
352
|
+
while ((fnMatch = FN_PATTERN.exec(innerCode)) !== null) {
|
|
353
|
+
const fnName = fnMatch[1];
|
|
354
|
+
const qualifiedName = `${typeName}::${fnName}`;
|
|
355
|
+
const fnOpenBrace = findOpenBraceAfterMatch(innerCode, fnMatch.index + fnMatch[0].length);
|
|
356
|
+
if (fnOpenBrace === -1) continue;
|
|
357
|
+
const fnCloseBrace = findMatchingBrace(innerCode, fnOpenBrace);
|
|
358
|
+
if (fnCloseBrace === -1) continue;
|
|
359
|
+
|
|
360
|
+
const fnBodyEndIndex = fnCloseBrace + 1;
|
|
361
|
+
const fnBody = innerCode.slice(fnMatch.index, fnBodyEndIndex);
|
|
362
|
+
const fnStartLine = countLinesBefore(code, innerOffset + fnMatch.index);
|
|
363
|
+
const fnEndLine = countLinesBefore(code, innerOffset + Math.max(fnMatch.index, fnBodyEndIndex - 1));
|
|
364
|
+
|
|
365
|
+
chunks.push({
|
|
366
|
+
name: qualifiedName,
|
|
367
|
+
kind: "method",
|
|
368
|
+
signature: buildSignature(fnBody),
|
|
369
|
+
body: fnBody,
|
|
370
|
+
startLine: fnStartLine,
|
|
371
|
+
endLine: fnEndLine,
|
|
372
|
+
language,
|
|
373
|
+
calls: collectCallNames(fnBody, qualifiedName),
|
|
374
|
+
imports
|
|
375
|
+
});
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
return chunks;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
function extractMacroChunks(code, language) {
|
|
382
|
+
const chunks = [];
|
|
383
|
+
MACRO_PATTERN.lastIndex = 0;
|
|
384
|
+
let match;
|
|
385
|
+
while ((match = MACRO_PATTERN.exec(code)) !== null) {
|
|
386
|
+
const name = match[1];
|
|
387
|
+
// macro_rules! uses { } or ( ) or [ ] as delimiters
|
|
388
|
+
const afterMatch = code.slice(match.index + match[0].length).trimStart();
|
|
389
|
+
let openChar, closeChar;
|
|
390
|
+
if (afterMatch[0] === "{") {
|
|
391
|
+
openChar = "{";
|
|
392
|
+
} else if (afterMatch[0] === "(") {
|
|
393
|
+
openChar = "(";
|
|
394
|
+
} else if (afterMatch[0] === "[") {
|
|
395
|
+
openChar = "[";
|
|
396
|
+
} else {
|
|
397
|
+
continue;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// For braces, use findMatchingBrace; for parens/brackets, do simple depth counting
|
|
401
|
+
let endIndex;
|
|
402
|
+
if (openChar === "{") {
|
|
403
|
+
const openBraceIndex = code.indexOf("{", match.index + match[0].length);
|
|
404
|
+
const closeBraceIndex = findMatchingBrace(code, openBraceIndex);
|
|
405
|
+
if (closeBraceIndex === -1) continue;
|
|
406
|
+
endIndex = closeBraceIndex + 1;
|
|
407
|
+
} else {
|
|
408
|
+
closeChar = openChar === "(" ? ")" : "]";
|
|
409
|
+
const startSearch = match.index + match[0].length + afterMatch.indexOf(openChar);
|
|
410
|
+
let depth = 0;
|
|
411
|
+
endIndex = -1;
|
|
412
|
+
for (let i = startSearch; i < code.length; i += 1) {
|
|
413
|
+
if (code[i] === openChar) depth += 1;
|
|
414
|
+
else if (code[i] === closeChar) {
|
|
415
|
+
depth -= 1;
|
|
416
|
+
if (depth === 0) {
|
|
417
|
+
endIndex = i + 1;
|
|
418
|
+
break;
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
if (endIndex === -1) continue;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
const body = code.slice(match.index, endIndex);
|
|
426
|
+
const startLine = countLinesBefore(code, match.index);
|
|
427
|
+
const endLine = countLinesBefore(code, Math.max(match.index, endIndex - 1));
|
|
428
|
+
|
|
429
|
+
chunks.push({
|
|
430
|
+
name,
|
|
431
|
+
kind: "macro",
|
|
432
|
+
signature: `macro_rules! ${name}`,
|
|
433
|
+
body,
|
|
434
|
+
startLine,
|
|
435
|
+
endLine,
|
|
436
|
+
language,
|
|
437
|
+
calls: [],
|
|
438
|
+
imports: []
|
|
439
|
+
});
|
|
440
|
+
}
|
|
441
|
+
return chunks;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
function extractTopLevelFunctions(code, language, implChunks, imports) {
|
|
445
|
+
const chunks = [];
|
|
446
|
+
FN_PATTERN.lastIndex = 0;
|
|
447
|
+
let match;
|
|
448
|
+
while ((match = FN_PATTERN.exec(code)) !== null) {
|
|
449
|
+
const name = match[1];
|
|
450
|
+
const openBraceIndex = findOpenBraceAfterMatch(code, match.index + match[0].length);
|
|
451
|
+
if (openBraceIndex === -1) continue;
|
|
452
|
+
const closeBraceIndex = findMatchingBrace(code, openBraceIndex);
|
|
453
|
+
if (closeBraceIndex === -1) continue;
|
|
454
|
+
|
|
455
|
+
const startLine = countLinesBefore(code, match.index);
|
|
456
|
+
const endLine = countLinesBefore(code, closeBraceIndex);
|
|
457
|
+
|
|
458
|
+
// Skip functions that are inside impl blocks (already extracted as methods)
|
|
459
|
+
const insideImpl = implChunks.some(
|
|
460
|
+
(impl) => impl.kind === "impl" && startLine >= impl.startLine && endLine <= impl.endLine
|
|
461
|
+
);
|
|
462
|
+
if (insideImpl) continue;
|
|
463
|
+
|
|
464
|
+
const bodyEndIndex = closeBraceIndex + 1;
|
|
465
|
+
const body = code.slice(match.index, bodyEndIndex);
|
|
466
|
+
|
|
467
|
+
chunks.push({
|
|
468
|
+
name,
|
|
469
|
+
kind: "function",
|
|
470
|
+
signature: buildSignature(body),
|
|
471
|
+
body,
|
|
472
|
+
startLine,
|
|
473
|
+
endLine,
|
|
474
|
+
language,
|
|
475
|
+
calls: collectCallNames(body, name),
|
|
476
|
+
imports
|
|
477
|
+
});
|
|
478
|
+
}
|
|
479
|
+
return chunks;
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
export function parseCode(code, filePath, language = "rust") {
|
|
483
|
+
const imports = extractUseImports(code);
|
|
484
|
+
const implChunks = extractImplBlocks(code, language, imports);
|
|
485
|
+
const structChunks = extractBlockChunks(code, STRUCT_PATTERN, "struct", language);
|
|
486
|
+
const enumChunks = extractBlockChunks(code, ENUM_PATTERN, "enum", language);
|
|
487
|
+
const traitChunks = extractBlockChunks(code, TRAIT_PATTERN, "trait", language);
|
|
488
|
+
const modChunks = extractBlockChunks(code, MOD_PATTERN, "module", language);
|
|
489
|
+
const macroChunks = extractMacroChunks(code, language);
|
|
490
|
+
const fnChunks = extractTopLevelFunctions(code, language, implChunks, imports);
|
|
491
|
+
|
|
492
|
+
const seen = new Set();
|
|
493
|
+
const chunks = [...structChunks, ...enumChunks, ...traitChunks, ...implChunks, ...modChunks, ...macroChunks, ...fnChunks].filter((chunk) => {
|
|
494
|
+
const key = `${chunk.kind}|${chunk.name}|${chunk.startLine}|${chunk.endLine}`;
|
|
495
|
+
if (seen.has(key)) return false;
|
|
496
|
+
seen.add(key);
|
|
497
|
+
return true;
|
|
498
|
+
});
|
|
499
|
+
|
|
500
|
+
return { chunks, errors: [] };
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
504
|
+
const fs = await import("node:fs");
|
|
505
|
+
const filePath = process.argv[2];
|
|
506
|
+
|
|
507
|
+
if (!filePath) {
|
|
508
|
+
console.error("Usage: rust.mjs <file.rs>");
|
|
509
|
+
process.exit(1);
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
const code = fs.readFileSync(filePath, "utf8");
|
|
513
|
+
const result = parseCode(code, filePath, "rust");
|
|
514
|
+
console.log(JSON.stringify(result, null, 2));
|
|
515
|
+
}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* SQL parser for Cortex.
|
|
4
|
+
* Extracts stored procedures, views, functions, tables, and triggers as chunks.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const SQL_OBJECT_PATTERN =
|
|
8
|
+
/create\s+(?:or\s+alter\s+)?(procedure|proc|view|function|table|trigger)\s+([^\s(]+)/gi;
|
|
9
|
+
|
|
10
|
+
const SQL_REFERENCE_PATTERNS = [
|
|
11
|
+
/\bexec(?:ute)?\s+([#@]?[A-Za-z0-9_[\].]+)/gi,
|
|
12
|
+
/\bfrom\s+([#@]?[A-Za-z0-9_[\].]+)/gi,
|
|
13
|
+
/\bjoin\s+([#@]?[A-Za-z0-9_[\].]+)/gi,
|
|
14
|
+
/\bupdate\s+([#@]?[A-Za-z0-9_[\].]+)/gi,
|
|
15
|
+
/\binsert\s+into\s+([#@]?[A-Za-z0-9_[\].]+)/gi,
|
|
16
|
+
/\bdelete\s+from\s+([#@]?[A-Za-z0-9_[\].]+)/gi,
|
|
17
|
+
/\bmerge\s+into\s+([#@]?[A-Za-z0-9_[\].]+)/gi
|
|
18
|
+
];
|
|
19
|
+
|
|
20
|
+
const OBJECT_KIND_MAP = new Map([
|
|
21
|
+
["proc", "procedure"],
|
|
22
|
+
["procedure", "procedure"],
|
|
23
|
+
["view", "view"],
|
|
24
|
+
["function", "function"],
|
|
25
|
+
["table", "table"],
|
|
26
|
+
["trigger", "trigger"]
|
|
27
|
+
]);
|
|
28
|
+
|
|
29
|
+
function countLinesBefore(text, index) {
|
|
30
|
+
let line = 1;
|
|
31
|
+
for (let i = 0; i < index; i += 1) {
|
|
32
|
+
if (text[i] === "\n") {
|
|
33
|
+
line += 1;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
return line;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function normalizeSqlName(value) {
|
|
40
|
+
if (!value) {
|
|
41
|
+
return "";
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return value
|
|
45
|
+
.trim()
|
|
46
|
+
.replace(/[;"`]/g, "")
|
|
47
|
+
.replace(/\[(.+?)\]/g, "$1")
|
|
48
|
+
.replace(/\s+/g, "")
|
|
49
|
+
.replace(/^\.+|\.+$/g, "")
|
|
50
|
+
.replace(/\.\.+/g, ".")
|
|
51
|
+
.toLowerCase();
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function sqlNameAliases(name) {
|
|
55
|
+
const normalized = normalizeSqlName(name);
|
|
56
|
+
if (!normalized) {
|
|
57
|
+
return [];
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const aliases = new Set([normalized]);
|
|
61
|
+
const parts = normalized.split(".").filter(Boolean);
|
|
62
|
+
if (parts.length > 1) {
|
|
63
|
+
aliases.add(parts[parts.length - 1]);
|
|
64
|
+
}
|
|
65
|
+
return [...aliases];
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function extractReferenceNames(body, selfAliases) {
|
|
69
|
+
const refs = new Set();
|
|
70
|
+
|
|
71
|
+
for (const pattern of SQL_REFERENCE_PATTERNS) {
|
|
72
|
+
let match;
|
|
73
|
+
while ((match = pattern.exec(body)) !== null) {
|
|
74
|
+
const name = normalizeSqlName(match[1]);
|
|
75
|
+
if (!name || name.startsWith("@") || name.startsWith("#")) {
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const aliases = sqlNameAliases(name);
|
|
80
|
+
if (aliases.some((alias) => selfAliases.has(alias))) {
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
refs.add(name);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return [...refs];
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export function parseCode(code, filePath, language = "sql") {
|
|
92
|
+
const matches = [...code.matchAll(SQL_OBJECT_PATTERN)];
|
|
93
|
+
const chunks = [];
|
|
94
|
+
|
|
95
|
+
for (let index = 0; index < matches.length; index += 1) {
|
|
96
|
+
const match = matches[index];
|
|
97
|
+
const kind = OBJECT_KIND_MAP.get((match[1] || "").toLowerCase()) || "sql_object";
|
|
98
|
+
const objectName = normalizeSqlName(match[2] || "");
|
|
99
|
+
if (!objectName) {
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const start = match.index ?? 0;
|
|
104
|
+
const end = index + 1 < matches.length ? matches[index + 1].index ?? code.length : code.length;
|
|
105
|
+
const body = code.slice(start, end).trimEnd();
|
|
106
|
+
const firstLine = body.split(/\r?\n/, 1)[0]?.trim() || `${kind} ${objectName}`;
|
|
107
|
+
const selfAliases = new Set(sqlNameAliases(objectName));
|
|
108
|
+
|
|
109
|
+
chunks.push({
|
|
110
|
+
name: objectName,
|
|
111
|
+
kind,
|
|
112
|
+
signature: firstLine,
|
|
113
|
+
body,
|
|
114
|
+
startLine: countLinesBefore(code, start),
|
|
115
|
+
endLine: countLinesBefore(code, Math.max(start, end - 1)),
|
|
116
|
+
language,
|
|
117
|
+
calls: extractReferenceNames(body, selfAliases),
|
|
118
|
+
imports: []
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return { chunks, errors: [] };
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
126
|
+
const fs = await import("node:fs");
|
|
127
|
+
const filePath = process.argv[2];
|
|
128
|
+
|
|
129
|
+
if (!filePath) {
|
|
130
|
+
console.error("Usage: sql.mjs <file.sql>");
|
|
131
|
+
process.exit(1);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const code = fs.readFileSync(filePath, "utf8");
|
|
135
|
+
const result = parseCode(code, filePath, "sql");
|
|
136
|
+
console.log(JSON.stringify(result, null, 2));
|
|
137
|
+
}
|