@optave/codegraph 3.9.4 → 3.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -10
- package/dist/ast-analysis/engine.d.ts.map +1 -1
- package/dist/ast-analysis/engine.js +3 -2
- package/dist/ast-analysis/engine.js.map +1 -1
- package/dist/ast-analysis/rules/csharp.d.ts.map +1 -1
- package/dist/ast-analysis/rules/csharp.js +8 -1
- package/dist/ast-analysis/rules/csharp.js.map +1 -1
- package/dist/ast-analysis/rules/go.d.ts.map +1 -1
- package/dist/ast-analysis/rules/go.js +4 -1
- package/dist/ast-analysis/rules/go.js.map +1 -1
- package/dist/ast-analysis/rules/index.d.ts +6 -0
- package/dist/ast-analysis/rules/index.d.ts.map +1 -1
- package/dist/ast-analysis/rules/index.js +151 -4
- package/dist/ast-analysis/rules/index.js.map +1 -1
- package/dist/ast-analysis/rules/java.d.ts.map +1 -1
- package/dist/ast-analysis/rules/java.js +5 -1
- package/dist/ast-analysis/rules/java.js.map +1 -1
- package/dist/ast-analysis/rules/php.d.ts.map +1 -1
- package/dist/ast-analysis/rules/php.js +6 -1
- package/dist/ast-analysis/rules/php.js.map +1 -1
- package/dist/ast-analysis/rules/python.d.ts.map +1 -1
- package/dist/ast-analysis/rules/python.js +5 -1
- package/dist/ast-analysis/rules/python.js.map +1 -1
- package/dist/ast-analysis/rules/ruby.d.ts.map +1 -1
- package/dist/ast-analysis/rules/ruby.js +4 -1
- package/dist/ast-analysis/rules/ruby.js.map +1 -1
- package/dist/ast-analysis/rules/rust.d.ts.map +1 -1
- package/dist/ast-analysis/rules/rust.js +5 -1
- package/dist/ast-analysis/rules/rust.js.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.d.ts +2 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.js +129 -37
- package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
- package/dist/cli/commands/watch.d.ts.map +1 -1
- package/dist/cli/commands/watch.js +2 -0
- package/dist/cli/commands/watch.js.map +1 -1
- package/dist/cli.js +24 -1
- package/dist/cli.js.map +1 -1
- package/dist/domain/graph/builder/context.d.ts +2 -0
- package/dist/domain/graph/builder/context.d.ts.map +1 -1
- package/dist/domain/graph/builder/context.js.map +1 -1
- package/dist/domain/graph/builder/helpers.d.ts +13 -2
- package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
- package/dist/domain/graph/builder/helpers.js +30 -4
- package/dist/domain/graph/builder/helpers.js.map +1 -1
- package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
- package/dist/domain/graph/builder/pipeline.js +141 -3
- package/dist/domain/graph/builder/pipeline.js.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.js +58 -26
- package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.js +54 -45
- package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.js +17 -0
- package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
- package/dist/domain/graph/journal.d.ts +15 -0
- package/dist/domain/graph/journal.d.ts.map +1 -1
- package/dist/domain/graph/journal.js +283 -28
- package/dist/domain/graph/journal.js.map +1 -1
- package/dist/domain/graph/watcher.d.ts +17 -0
- package/dist/domain/graph/watcher.d.ts.map +1 -1
- package/dist/domain/graph/watcher.js +23 -7
- package/dist/domain/graph/watcher.js.map +1 -1
- package/dist/domain/parser.d.ts +53 -4
- package/dist/domain/parser.d.ts.map +1 -1
- package/dist/domain/parser.js +278 -80
- package/dist/domain/parser.js.map +1 -1
- package/dist/domain/search/generator.d.ts.map +1 -1
- package/dist/domain/search/generator.js +28 -2
- package/dist/domain/search/generator.js.map +1 -1
- package/dist/domain/search/models.js +1 -1
- package/dist/domain/wasm-worker-entry.d.ts +24 -0
- package/dist/domain/wasm-worker-entry.d.ts.map +1 -0
- package/dist/domain/wasm-worker-entry.js +644 -0
- package/dist/domain/wasm-worker-entry.js.map +1 -0
- package/dist/domain/wasm-worker-pool.d.ts +59 -0
- package/dist/domain/wasm-worker-pool.d.ts.map +1 -0
- package/dist/domain/wasm-worker-pool.js +312 -0
- package/dist/domain/wasm-worker-pool.js.map +1 -0
- package/dist/domain/wasm-worker-protocol.d.ts +65 -0
- package/dist/domain/wasm-worker-protocol.d.ts.map +1 -0
- package/dist/domain/wasm-worker-protocol.js +13 -0
- package/dist/domain/wasm-worker-protocol.js.map +1 -0
- package/dist/extractors/javascript.js +146 -2
- package/dist/extractors/javascript.js.map +1 -1
- package/dist/features/ast.d.ts.map +1 -1
- package/dist/features/ast.js +11 -9
- package/dist/features/ast.js.map +1 -1
- package/dist/features/boundaries.d.ts +2 -2
- package/dist/features/boundaries.d.ts.map +1 -1
- package/dist/features/boundaries.js +2 -31
- package/dist/features/boundaries.js.map +1 -1
- package/dist/features/snapshot.d.ts.map +1 -1
- package/dist/features/snapshot.js +99 -13
- package/dist/features/snapshot.js.map +1 -1
- package/dist/graph/algorithms/louvain.d.ts.map +1 -1
- package/dist/graph/algorithms/louvain.js +2 -4
- package/dist/graph/algorithms/louvain.js.map +1 -1
- package/dist/infrastructure/config.d.ts.map +1 -1
- package/dist/infrastructure/config.js +12 -2
- package/dist/infrastructure/config.js.map +1 -1
- package/dist/shared/globs.d.ts +40 -0
- package/dist/shared/globs.d.ts.map +1 -0
- package/dist/shared/globs.js +126 -0
- package/dist/shared/globs.js.map +1 -0
- package/dist/types.d.ts +26 -1
- package/dist/types.d.ts.map +1 -1
- package/grammars/tree-sitter-c_sharp.wasm +0 -0
- package/grammars/tree-sitter-erlang.wasm +0 -0
- package/package.json +7 -7
- package/src/ast-analysis/engine.ts +11 -1
- package/src/ast-analysis/rules/csharp.ts +8 -1
- package/src/ast-analysis/rules/go.ts +4 -1
- package/src/ast-analysis/rules/index.ts +181 -4
- package/src/ast-analysis/rules/java.ts +5 -1
- package/src/ast-analysis/rules/php.ts +6 -1
- package/src/ast-analysis/rules/python.ts +5 -1
- package/src/ast-analysis/rules/ruby.ts +4 -1
- package/src/ast-analysis/rules/rust.ts +5 -1
- package/src/ast-analysis/visitors/ast-store-visitor.ts +129 -34
- package/src/cli/commands/watch.ts +2 -0
- package/src/cli.ts +31 -8
- package/src/domain/graph/builder/context.ts +2 -0
- package/src/domain/graph/builder/helpers.ts +53 -3
- package/src/domain/graph/builder/pipeline.ts +162 -3
- package/src/domain/graph/builder/stages/collect-files.ts +56 -26
- package/src/domain/graph/builder/stages/detect-changes.ts +57 -49
- package/src/domain/graph/builder/stages/finalize.ts +16 -0
- package/src/domain/graph/journal.ts +284 -27
- package/src/domain/graph/watcher.ts +29 -9
- package/src/domain/parser.ts +288 -73
- package/src/domain/search/generator.ts +34 -2
- package/src/domain/search/models.ts +1 -1
- package/src/domain/wasm-worker-entry.ts +798 -0
- package/src/domain/wasm-worker-pool.ts +330 -0
- package/src/domain/wasm-worker-protocol.ts +81 -0
- package/src/extractors/javascript.ts +149 -2
- package/src/features/ast.ts +22 -9
- package/src/features/boundaries.ts +2 -27
- package/src/features/snapshot.ts +93 -14
- package/src/graph/algorithms/louvain.ts +2 -4
- package/src/infrastructure/config.ts +12 -2
- package/src/shared/globs.ts +121 -0
- package/src/types.ts +26 -1
|
@@ -5,9 +5,42 @@ import type {
|
|
|
5
5
|
Visitor,
|
|
6
6
|
VisitorContext,
|
|
7
7
|
} from '../../types.js';
|
|
8
|
+
import type { AstStringConfig } from '../rules/index.js';
|
|
8
9
|
|
|
9
10
|
const TEXT_MAX = 200;
|
|
10
11
|
|
|
12
|
+
// ── Cross-language node-type constants (mirror Rust `helpers.rs`) ────────
|
|
13
|
+
const IDENT_TYPES = new Set<string>([
|
|
14
|
+
'identifier',
|
|
15
|
+
'type_identifier',
|
|
16
|
+
'name',
|
|
17
|
+
'qualified_name',
|
|
18
|
+
'scoped_identifier',
|
|
19
|
+
'qualified_identifier',
|
|
20
|
+
'member_expression',
|
|
21
|
+
'member_access_expression',
|
|
22
|
+
'field_expression',
|
|
23
|
+
'attribute',
|
|
24
|
+
'scoped_type_identifier',
|
|
25
|
+
]);
|
|
26
|
+
|
|
27
|
+
const CALL_TYPES = new Set<string>([
|
|
28
|
+
'call_expression',
|
|
29
|
+
'call',
|
|
30
|
+
'invocation_expression',
|
|
31
|
+
'method_invocation',
|
|
32
|
+
'function_call_expression',
|
|
33
|
+
'member_call_expression',
|
|
34
|
+
'scoped_call_expression',
|
|
35
|
+
]);
|
|
36
|
+
|
|
37
|
+
const DEFAULT_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"`', stringPrefixes: '' };
|
|
38
|
+
|
|
39
|
+
// Keyword tokens skipped when extracting the inner expression text of a
|
|
40
|
+
// throw/raise/await/new node. Module-level constant avoids reallocating on
|
|
41
|
+
// every call (can be hot in large files).
|
|
42
|
+
const CHILD_EXPR_SKIP_KEYWORDS = new Set<string>(['throw', 'raise', 'await', 'new']);
|
|
43
|
+
|
|
11
44
|
interface AstStoreRow {
|
|
12
45
|
file: string;
|
|
13
46
|
line: number;
|
|
@@ -20,69 +53,122 @@ interface AstStoreRow {
|
|
|
20
53
|
|
|
21
54
|
function truncate(s: string | null | undefined, max: number = TEXT_MAX): string | null {
|
|
22
55
|
if (!s) return null;
|
|
23
|
-
return s.length <= max ? s : `${s.slice(0, max - 1)}
|
|
56
|
+
return s.length <= max ? s : `${s.slice(0, max - 1)}…`;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function trimLeadingChars(s: string, chars: string): string {
|
|
60
|
+
if (!chars) return s;
|
|
61
|
+
let i = 0;
|
|
62
|
+
while (i < s.length && chars.includes(s[i]!)) i++;
|
|
63
|
+
return i === 0 ? s : s.slice(i);
|
|
24
64
|
}
|
|
25
65
|
|
|
26
|
-
function
|
|
66
|
+
function trimTrailingChars(s: string, chars: string): string {
|
|
67
|
+
if (!chars) return s;
|
|
68
|
+
let i = s.length;
|
|
69
|
+
while (i > 0 && chars.includes(s[i - 1]!)) i--;
|
|
70
|
+
return i === s.length ? s : s.slice(0, i);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/** Extract constructor name from a `new_expression` / `object_creation_expression`. */
|
|
74
|
+
function extractConstructorName(node: TreeSitterNode): string {
|
|
75
|
+
for (const field of ['type', 'class', 'constructor']) {
|
|
76
|
+
const f = node.childForFieldName(field);
|
|
77
|
+
if (f?.text) return f.text;
|
|
78
|
+
}
|
|
27
79
|
for (let i = 0; i < node.childCount; i++) {
|
|
28
80
|
const child = node.child(i);
|
|
29
81
|
if (!child) continue;
|
|
30
|
-
if (child.type
|
|
31
|
-
|
|
82
|
+
if (IDENT_TYPES.has(child.type)) return child.text;
|
|
83
|
+
}
|
|
84
|
+
const raw = node.text || '';
|
|
85
|
+
const beforeParen = raw.split('(')[0] || raw;
|
|
86
|
+
return beforeParen.replace(/^new\s+/, '').trim() || '?';
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/** Extract function name from a call node. */
|
|
90
|
+
function extractCallName(node: TreeSitterNode): string {
|
|
91
|
+
for (const field of ['function', 'method', 'name']) {
|
|
92
|
+
const f = node.childForFieldName(field);
|
|
93
|
+
if (f?.text) return f.text;
|
|
32
94
|
}
|
|
33
|
-
|
|
95
|
+
const text = node.text || '';
|
|
96
|
+
return text.split('(')[0] || '?';
|
|
34
97
|
}
|
|
35
98
|
|
|
36
|
-
|
|
99
|
+
/** Extract name from a throw/raise statement — matches native `extract_throw_target`. */
|
|
100
|
+
function extractThrowName(node: TreeSitterNode, newTypes: Set<string>): string {
|
|
37
101
|
for (let i = 0; i < node.childCount; i++) {
|
|
38
102
|
const child = node.child(i);
|
|
39
103
|
if (!child) continue;
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
104
|
+
const ck = child.type;
|
|
105
|
+
if (newTypes.has(ck)) return extractConstructorName(child);
|
|
106
|
+
if (CALL_TYPES.has(ck)) return extractCallName(child);
|
|
107
|
+
if (IDENT_TYPES.has(ck)) return child.text;
|
|
43
108
|
}
|
|
44
|
-
return truncate(node.text);
|
|
109
|
+
return truncate(node.text) ?? node.text ?? '';
|
|
45
110
|
}
|
|
46
111
|
|
|
47
|
-
/** Extract
|
|
48
|
-
function
|
|
112
|
+
/** Extract name from an await expression — matches native `extract_awaited_name`. */
|
|
113
|
+
function extractAwaitName(node: TreeSitterNode): string {
|
|
49
114
|
for (let i = 0; i < node.childCount; i++) {
|
|
50
115
|
const child = node.child(i);
|
|
51
116
|
if (!child) continue;
|
|
52
|
-
|
|
53
|
-
if (
|
|
54
|
-
|
|
55
|
-
return fn ? fn.text : child.text?.split('(')[0] || '?';
|
|
56
|
-
}
|
|
57
|
-
if (child.type === 'identifier') return child.text;
|
|
117
|
+
const ck = child.type;
|
|
118
|
+
if (CALL_TYPES.has(ck)) return extractCallName(child);
|
|
119
|
+
if (IDENT_TYPES.has(ck)) return child.text;
|
|
58
120
|
}
|
|
59
|
-
return truncate(node.text);
|
|
121
|
+
return truncate(node.text) ?? node.text ?? '';
|
|
60
122
|
}
|
|
61
123
|
|
|
62
|
-
/** Extract the
|
|
63
|
-
function
|
|
124
|
+
/** Extract text of the expression inside a throw/await, skipping the keyword. */
|
|
125
|
+
function extractChildExpressionText(node: TreeSitterNode): string | null {
|
|
64
126
|
for (let i = 0; i < node.childCount; i++) {
|
|
65
127
|
const child = node.child(i);
|
|
66
128
|
if (!child) continue;
|
|
67
|
-
if (child.type
|
|
68
|
-
const fn = child.childForFieldName('function');
|
|
69
|
-
return fn ? fn.text : child.text?.split('(')[0] || '?';
|
|
70
|
-
}
|
|
71
|
-
if (child.type === 'identifier' || child.type === 'member_expression') {
|
|
72
|
-
return child.text;
|
|
73
|
-
}
|
|
129
|
+
if (!CHILD_EXPR_SKIP_KEYWORDS.has(child.type)) return truncate(child.text);
|
|
74
130
|
}
|
|
75
131
|
return truncate(node.text);
|
|
76
132
|
}
|
|
77
133
|
|
|
134
|
+
/**
|
|
135
|
+
* Extract string content from a string-literal node, mirroring the native
|
|
136
|
+
* engine's `build_string_node` (`helpers.rs`). Returns `null` when the
|
|
137
|
+
* content is shorter than 2 Unicode code points.
|
|
138
|
+
*/
|
|
139
|
+
function extractStringContent(node: TreeSitterNode, cfg: AstStringConfig): string | null {
|
|
140
|
+
const raw = node.text ?? '';
|
|
141
|
+
const isRawString = node.type.includes('raw_string');
|
|
142
|
+
|
|
143
|
+
let s = raw;
|
|
144
|
+
s = trimLeadingChars(s, '@');
|
|
145
|
+
s = trimLeadingChars(s, cfg.stringPrefixes);
|
|
146
|
+
if (isRawString) s = trimLeadingChars(s, 'r#');
|
|
147
|
+
s = trimLeadingChars(s, cfg.quoteChars);
|
|
148
|
+
if (isRawString) s = trimTrailingChars(s, '#');
|
|
149
|
+
s = trimTrailingChars(s, cfg.quoteChars);
|
|
150
|
+
|
|
151
|
+
// Count code points, not UTF-16 code units — matches Rust `chars().count()`.
|
|
152
|
+
const codePointCount = [...s].length;
|
|
153
|
+
if (codePointCount < 2) return null;
|
|
154
|
+
return s;
|
|
155
|
+
}
|
|
156
|
+
|
|
78
157
|
export function createAstStoreVisitor(
|
|
79
158
|
astTypeMap: Record<string, string>,
|
|
80
159
|
defs: Definition[],
|
|
81
160
|
relPath: string,
|
|
82
161
|
nodeIdMap: Map<string, number>,
|
|
162
|
+
stringConfig: AstStringConfig = DEFAULT_STRING_CONFIG,
|
|
163
|
+
stopRecurseKinds: ReadonlySet<string> = new Set(),
|
|
83
164
|
): Visitor {
|
|
84
165
|
const rows: AstStoreRow[] = [];
|
|
85
166
|
const matched = new Set<number>();
|
|
167
|
+
const newTypes = new Set<string>(
|
|
168
|
+
Object.entries(astTypeMap)
|
|
169
|
+
.filter(([, kind]) => kind === 'new')
|
|
170
|
+
.map(([type]) => type),
|
|
171
|
+
);
|
|
86
172
|
|
|
87
173
|
function findParentDef(line: number): Definition | null {
|
|
88
174
|
let best: Definition | null = null;
|
|
@@ -106,12 +192,15 @@ export function createAstStoreVisitor(
|
|
|
106
192
|
type KindHandler = (node: TreeSitterNode) => NameTextResult;
|
|
107
193
|
|
|
108
194
|
const kindHandlers: Record<string, KindHandler> = {
|
|
109
|
-
new: (node) => ({ name:
|
|
110
|
-
throw: (node) => ({
|
|
111
|
-
|
|
195
|
+
new: (node) => ({ name: extractConstructorName(node), text: truncate(node.text) }),
|
|
196
|
+
throw: (node) => ({
|
|
197
|
+
name: extractThrowName(node, newTypes),
|
|
198
|
+
text: extractChildExpressionText(node),
|
|
199
|
+
}),
|
|
200
|
+
await: (node) => ({ name: extractAwaitName(node), text: extractChildExpressionText(node) }),
|
|
112
201
|
string: (node) => {
|
|
113
|
-
const content = node
|
|
114
|
-
if (content
|
|
202
|
+
const content = extractStringContent(node, stringConfig);
|
|
203
|
+
if (content == null) return { name: null, text: null, skip: true };
|
|
115
204
|
return { name: truncate(content, 100), text: truncate(node.text) };
|
|
116
205
|
},
|
|
117
206
|
regex: (node) => ({ name: node.text || '?', text: truncate(node.text) }),
|
|
@@ -156,7 +245,13 @@ export function createAstStoreVisitor(
|
|
|
156
245
|
|
|
157
246
|
collectNode(node, kind);
|
|
158
247
|
|
|
159
|
-
|
|
248
|
+
// Mirror the native walker's recursion policy. In JS/TS, the native
|
|
249
|
+
// javascript.rs walker returns after collecting `new` or `throw` to
|
|
250
|
+
// avoid double-counting the wrapped expression (e.g. `throw new
|
|
251
|
+
// Error('x')` emits one `throw` row, not throw+new+string). Other
|
|
252
|
+
// languages go through helpers.rs::walk_ast_nodes_with_config_depth
|
|
253
|
+
// which always recurses — so `stopRecurseKinds` is empty for them.
|
|
254
|
+
if (stopRecurseKinds.has(kind)) {
|
|
160
255
|
return { skipChildren: true };
|
|
161
256
|
}
|
|
162
257
|
},
|
|
@@ -6,6 +6,7 @@ export const command: CommandDefinition = {
|
|
|
6
6
|
name: 'watch [dir]',
|
|
7
7
|
description: 'Watch project for file changes and incrementally update the graph',
|
|
8
8
|
options: [
|
|
9
|
+
['-d, --db <path>', 'Path to graph.db'],
|
|
9
10
|
['--poll', 'Use stat-based polling (default on Windows to avoid ReFS/Dev Drive crashes)'],
|
|
10
11
|
['--native', 'Force native OS file watchers instead of polling'],
|
|
11
12
|
['--poll-interval <ms>', 'Polling interval in milliseconds (default: 2000)'],
|
|
@@ -22,6 +23,7 @@ export const command: CommandDefinition = {
|
|
|
22
23
|
engine,
|
|
23
24
|
poll,
|
|
24
25
|
pollInterval: opts.pollInterval ? Number(opts.pollInterval) : undefined,
|
|
26
|
+
dbPath: opts.db ? path.resolve(opts.db) : undefined,
|
|
25
27
|
});
|
|
26
28
|
},
|
|
27
29
|
};
|
package/src/cli.ts
CHANGED
|
@@ -1,14 +1,37 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
import { run } from './cli/index.js';
|
|
4
|
+
import { disposeParsers } from './domain/parser.js';
|
|
4
5
|
import { CodegraphError, toErrorMessage } from './shared/errors.js';
|
|
5
6
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
/**
|
|
8
|
+
* After the CLI command finishes, tear down any cached WASM parsers and the
|
|
9
|
+
* worker thread pool. The WASM parse worker (see `domain/wasm-worker-pool.ts`)
|
|
10
|
+
* keeps the event loop alive until `worker.terminate()` is called, so without
|
|
11
|
+
* this teardown short-lived commands like `codegraph build` would hang for
|
|
12
|
+
* minutes before Node gives up — surfacing in CI as `spawnSync ETIMEDOUT`
|
|
13
|
+
* even though the command's work is already complete.
|
|
14
|
+
*
|
|
15
|
+
* `disposeParsers` is safe to call when the pool was never instantiated
|
|
16
|
+
* (e.g. native engine, or commands that never parse): it no-ops cleanly.
|
|
17
|
+
*/
|
|
18
|
+
async function shutdown(): Promise<void> {
|
|
19
|
+
try {
|
|
20
|
+
await disposeParsers();
|
|
21
|
+
} catch {
|
|
22
|
+
/* don't mask the real exit status over a teardown failure */
|
|
12
23
|
}
|
|
13
|
-
|
|
14
|
-
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
run()
|
|
27
|
+
.then(shutdown)
|
|
28
|
+
.catch(async (err: unknown) => {
|
|
29
|
+
if (err instanceof CodegraphError) {
|
|
30
|
+
console.error(`codegraph [${err.code}]: ${err.message}`);
|
|
31
|
+
if (err.file) console.error(` file: ${err.file}`);
|
|
32
|
+
} else {
|
|
33
|
+
console.error(`codegraph: fatal error — ${toErrorMessage(err)}`);
|
|
34
|
+
}
|
|
35
|
+
await shutdown();
|
|
36
|
+
process.exit(1);
|
|
37
|
+
});
|
|
@@ -8,7 +8,8 @@ import fs from 'node:fs';
|
|
|
8
8
|
import path from 'node:path';
|
|
9
9
|
import { purgeFilesData } from '../../../db/index.js';
|
|
10
10
|
import { warn } from '../../../infrastructure/logger.js';
|
|
11
|
-
import { EXTENSIONS, IGNORE_DIRS } from '../../../shared/constants.js';
|
|
11
|
+
import { EXTENSIONS, IGNORE_DIRS, normalizePath } from '../../../shared/constants.js';
|
|
12
|
+
import { compileGlobs, matchesAny } from '../../../shared/globs.js';
|
|
12
13
|
import type {
|
|
13
14
|
BetterSqlite3Database,
|
|
14
15
|
CodegraphConfig,
|
|
@@ -58,9 +59,29 @@ function shouldSkipEntry(entry: fs.Dirent, extraIgnore: Set<string> | null): boo
|
|
|
58
59
|
return false;
|
|
59
60
|
}
|
|
60
61
|
|
|
62
|
+
/**
|
|
63
|
+
* Check whether a source file passes the configured include/exclude globs.
|
|
64
|
+
*
|
|
65
|
+
* Patterns are matched against the path relative to the project root,
|
|
66
|
+
* normalized to forward slashes (e.g. `src/foo/bar.ts`). When both lists
|
|
67
|
+
* are set, a file must match at least one include and no exclude.
|
|
68
|
+
*/
|
|
69
|
+
export function passesIncludeExclude(
|
|
70
|
+
relPath: string,
|
|
71
|
+
includeRegexes: readonly RegExp[],
|
|
72
|
+
excludeRegexes: readonly RegExp[],
|
|
73
|
+
): boolean {
|
|
74
|
+
if (includeRegexes.length > 0 && !matchesAny(includeRegexes, relPath)) return false;
|
|
75
|
+
if (excludeRegexes.length > 0 && matchesAny(excludeRegexes, relPath)) return false;
|
|
76
|
+
return true;
|
|
77
|
+
}
|
|
78
|
+
|
|
61
79
|
/**
|
|
62
80
|
* Recursively collect all source files under `dir`.
|
|
63
81
|
* When `directories` is a Set, also tracks which directories contain files.
|
|
82
|
+
*
|
|
83
|
+
* The first invocation establishes `dir` as the project root against which
|
|
84
|
+
* `config.include` / `config.exclude` globs are matched.
|
|
64
85
|
*/
|
|
65
86
|
export function collectFiles(
|
|
66
87
|
dir: string,
|
|
@@ -68,6 +89,9 @@ export function collectFiles(
|
|
|
68
89
|
config: Partial<CodegraphConfig>,
|
|
69
90
|
directories: Set<string>,
|
|
70
91
|
_visited?: Set<string>,
|
|
92
|
+
_rootDir?: string,
|
|
93
|
+
_includeRegexes?: readonly RegExp[],
|
|
94
|
+
_excludeRegexes?: readonly RegExp[],
|
|
71
95
|
): { files: string[]; directories: Set<string> };
|
|
72
96
|
export function collectFiles(
|
|
73
97
|
dir: string,
|
|
@@ -75,6 +99,9 @@ export function collectFiles(
|
|
|
75
99
|
config?: Partial<CodegraphConfig>,
|
|
76
100
|
directories?: null,
|
|
77
101
|
_visited?: Set<string>,
|
|
102
|
+
_rootDir?: string,
|
|
103
|
+
_includeRegexes?: readonly RegExp[],
|
|
104
|
+
_excludeRegexes?: readonly RegExp[],
|
|
78
105
|
): string[];
|
|
79
106
|
export function collectFiles(
|
|
80
107
|
dir: string,
|
|
@@ -82,10 +109,20 @@ export function collectFiles(
|
|
|
82
109
|
config: Partial<CodegraphConfig> = {},
|
|
83
110
|
directories: Set<string> | null = null,
|
|
84
111
|
_visited: Set<string> = new Set(),
|
|
112
|
+
_rootDir?: string,
|
|
113
|
+
_includeRegexes?: readonly RegExp[],
|
|
114
|
+
_excludeRegexes?: readonly RegExp[],
|
|
85
115
|
): string[] | { files: string[]; directories: Set<string> } {
|
|
86
116
|
const trackDirs = directories instanceof Set;
|
|
87
117
|
let hasFiles = false;
|
|
88
118
|
|
|
119
|
+
// First call: compute root and compile include/exclude patterns once,
|
|
120
|
+
// then pass them down recursive calls so we don't recompile per directory.
|
|
121
|
+
const rootDir = _rootDir ?? dir;
|
|
122
|
+
const includeRegexes = _includeRegexes ?? compileGlobs(config.include);
|
|
123
|
+
const excludeRegexes = _excludeRegexes ?? compileGlobs(config.exclude);
|
|
124
|
+
const hasGlobFilters = includeRegexes.length > 0 || excludeRegexes.length > 0;
|
|
125
|
+
|
|
89
126
|
// Merge config ignoreDirs with defaults
|
|
90
127
|
const extraIgnore = config.ignoreDirs ? new Set(config.ignoreDirs) : null;
|
|
91
128
|
|
|
@@ -116,11 +153,24 @@ export function collectFiles(
|
|
|
116
153
|
const full = path.join(dir, entry.name);
|
|
117
154
|
if (entry.isDirectory()) {
|
|
118
155
|
if (trackDirs) {
|
|
119
|
-
collectFiles(
|
|
156
|
+
collectFiles(
|
|
157
|
+
full,
|
|
158
|
+
files,
|
|
159
|
+
config,
|
|
160
|
+
directories as Set<string>,
|
|
161
|
+
_visited,
|
|
162
|
+
rootDir,
|
|
163
|
+
includeRegexes,
|
|
164
|
+
excludeRegexes,
|
|
165
|
+
);
|
|
120
166
|
} else {
|
|
121
|
-
collectFiles(full, files, config, null, _visited);
|
|
167
|
+
collectFiles(full, files, config, null, _visited, rootDir, includeRegexes, excludeRegexes);
|
|
122
168
|
}
|
|
123
169
|
} else if (EXTENSIONS.has(path.extname(entry.name))) {
|
|
170
|
+
if (hasGlobFilters) {
|
|
171
|
+
const rel = normalizePath(path.relative(rootDir, full));
|
|
172
|
+
if (!passesIncludeExclude(rel, includeRegexes, excludeRegexes)) continue;
|
|
173
|
+
}
|
|
124
174
|
files.push(full);
|
|
125
175
|
hasFiles = true;
|
|
126
176
|
}
|
|
@@ -21,6 +21,7 @@ import { detectWorkspaces, loadConfig } from '../../../infrastructure/config.js'
|
|
|
21
21
|
import { debug, info, warn } from '../../../infrastructure/logger.js';
|
|
22
22
|
import { loadNative } from '../../../infrastructure/native.js';
|
|
23
23
|
import { semverCompare } from '../../../infrastructure/update-check.js';
|
|
24
|
+
import { normalizePath } from '../../../shared/constants.js';
|
|
24
25
|
import { toErrorMessage } from '../../../shared/errors.js';
|
|
25
26
|
import { CODEGRAPH_VERSION } from '../../../shared/version.js';
|
|
26
27
|
import type {
|
|
@@ -29,11 +30,18 @@ import type {
|
|
|
29
30
|
BuildResult,
|
|
30
31
|
Definition,
|
|
31
32
|
ExtractorOutput,
|
|
33
|
+
SqliteStatement,
|
|
32
34
|
} from '../../../types.js';
|
|
33
|
-
import {
|
|
35
|
+
import {
|
|
36
|
+
classifyNativeDrops,
|
|
37
|
+
formatDropExtensionSummary,
|
|
38
|
+
getActiveEngine,
|
|
39
|
+
getInstalledWasmExtensions,
|
|
40
|
+
parseFilesAuto,
|
|
41
|
+
} from '../../parser.js';
|
|
34
42
|
import { setWorkspaces } from '../resolve.js';
|
|
35
43
|
import { PipelineContext } from './context.js';
|
|
36
|
-
import { loadPathAliases } from './helpers.js';
|
|
44
|
+
import { batchInsertNodes, collectFiles as collectFilesUtil, loadPathAliases } from './helpers.js';
|
|
37
45
|
import { NativeDbProxy } from './native-db-proxy.js';
|
|
38
46
|
import { buildEdges } from './stages/build-edges.js';
|
|
39
47
|
import { buildStructure } from './stages/build-structure.js';
|
|
@@ -104,6 +112,21 @@ function checkEngineSchemaMismatch(ctx: PipelineContext): void {
|
|
|
104
112
|
}
|
|
105
113
|
}
|
|
106
114
|
|
|
115
|
+
function warnOnEmbeddingsWipe(ctx: PipelineContext): void {
|
|
116
|
+
const willBeFullBuild = !ctx.incremental || ctx.forceFullRebuild;
|
|
117
|
+
if (!willBeFullBuild) return;
|
|
118
|
+
let count = 0;
|
|
119
|
+
try {
|
|
120
|
+
count = (ctx.db.prepare('SELECT COUNT(*) AS c FROM embeddings').get() as { c: number }).c;
|
|
121
|
+
} catch {
|
|
122
|
+
return; // embeddings table missing — nothing to warn about
|
|
123
|
+
}
|
|
124
|
+
if (count === 0) return;
|
|
125
|
+
warn(
|
|
126
|
+
`Full rebuild will discard ${count} embedding${count === 1 ? '' : 's'}; re-run \`codegraph embed\` after the build.`,
|
|
127
|
+
);
|
|
128
|
+
}
|
|
129
|
+
|
|
107
130
|
function loadAliases(ctx: PipelineContext): void {
|
|
108
131
|
ctx.aliases = loadPathAliases(ctx.rootDir);
|
|
109
132
|
if (ctx.config.aliases) {
|
|
@@ -149,6 +172,7 @@ function setupPipeline(ctx: PipelineContext): void {
|
|
|
149
172
|
|
|
150
173
|
initializeEngine(ctx);
|
|
151
174
|
checkEngineSchemaMismatch(ctx);
|
|
175
|
+
warnOnEmbeddingsWipe(ctx);
|
|
152
176
|
loadAliases(ctx);
|
|
153
177
|
|
|
154
178
|
// Workspace packages (monorepo)
|
|
@@ -166,6 +190,8 @@ function formatTimingResult(ctx: PipelineContext): BuildResult {
|
|
|
166
190
|
return {
|
|
167
191
|
phases: {
|
|
168
192
|
setupMs: +(t.setupMs ?? 0).toFixed(1),
|
|
193
|
+
collectMs: +(t.collectMs ?? 0).toFixed(1),
|
|
194
|
+
detectMs: +(t.detectMs ?? 0).toFixed(1),
|
|
169
195
|
parseMs: +(t.parseMs ?? 0).toFixed(1),
|
|
170
196
|
insertMs: +(t.insertMs ?? 0).toFixed(1),
|
|
171
197
|
resolveMs: +(t.resolveMs ?? 0).toFixed(1),
|
|
@@ -540,7 +566,9 @@ function formatNativeTimingResult(
|
|
|
540
566
|
): BuildResult {
|
|
541
567
|
return {
|
|
542
568
|
phases: {
|
|
543
|
-
setupMs: +(
|
|
569
|
+
setupMs: +(p.setupMs ?? 0).toFixed(1),
|
|
570
|
+
collectMs: +(p.collectMs ?? 0).toFixed(1),
|
|
571
|
+
detectMs: +(p.detectMs ?? 0).toFixed(1),
|
|
544
572
|
parseMs: +(p.parseMs ?? 0).toFixed(1),
|
|
545
573
|
insertMs: +(p.insertMs ?? 0).toFixed(1),
|
|
546
574
|
resolveMs: +(p.resolveMs ?? 0).toFixed(1),
|
|
@@ -696,10 +724,137 @@ async function tryNativeOrchestrator(
|
|
|
696
724
|
}
|
|
697
725
|
}
|
|
698
726
|
|
|
727
|
+
// Engine parity: the native orchestrator silently drops files whose
|
|
728
|
+
// Rust extractor/grammar is missing or fails (e.g. HCL, Scala, Swift on
|
|
729
|
+
// stale native binaries). WASM handles those — backfill via WASM so both
|
|
730
|
+
// engines process the same file set (#967).
|
|
731
|
+
//
|
|
732
|
+
// Only runs on full builds: incremental builds only touch changed files,
|
|
733
|
+
// which are parsed through parseFilesAuto (which has its own per-file
|
|
734
|
+
// backfill), so a full filesystem scan here would be wasted work.
|
|
735
|
+
if (result.isFullBuild) {
|
|
736
|
+
await backfillNativeDroppedFiles(ctx);
|
|
737
|
+
}
|
|
738
|
+
|
|
699
739
|
closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
|
|
700
740
|
return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
|
|
701
741
|
}
|
|
702
742
|
|
|
743
|
+
/**
|
|
744
|
+
* Backfill files that the native orchestrator silently dropped during parse.
|
|
745
|
+
* Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
|
|
746
|
+
*/
|
|
747
|
+
async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
|
|
748
|
+
// Needs a real better-sqlite3 connection for INSERT.
|
|
749
|
+
if (ctx.nativeFirstProxy) {
|
|
750
|
+
closeNativeDb(ctx, 'pre-parity-backfill');
|
|
751
|
+
ctx.db = openDb(ctx.dbPath);
|
|
752
|
+
ctx.nativeFirstProxy = false;
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
|
|
756
|
+
const expected = new Set(
|
|
757
|
+
collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
|
|
758
|
+
);
|
|
759
|
+
|
|
760
|
+
const existingRows = ctx.db
|
|
761
|
+
.prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
|
|
762
|
+
.all() as Array<{ file: string }>;
|
|
763
|
+
const existing = new Set(existingRows.map((r) => r.file));
|
|
764
|
+
|
|
765
|
+
// Restrict backfill to files with an installed WASM grammar. Extensions in
|
|
766
|
+
// LANGUAGE_REGISTRY without a shipped grammar file (e.g. groovy, erlang on
|
|
767
|
+
// minimal installs) can't be parsed by either engine, so they're not a
|
|
768
|
+
// native regression — excluding them keeps the warn count meaningful.
|
|
769
|
+
const installedExts = getInstalledWasmExtensions();
|
|
770
|
+
const missingRel: string[] = [];
|
|
771
|
+
const missingAbs: string[] = [];
|
|
772
|
+
for (const rel of expected) {
|
|
773
|
+
if (existing.has(rel)) continue;
|
|
774
|
+
const ext = path.extname(rel).toLowerCase();
|
|
775
|
+
if (!installedExts.has(ext)) continue;
|
|
776
|
+
missingRel.push(rel);
|
|
777
|
+
missingAbs.push(path.join(ctx.rootDir, rel));
|
|
778
|
+
}
|
|
779
|
+
if (missingAbs.length === 0) return;
|
|
780
|
+
|
|
781
|
+
// Classify drops so users see per-extension reasons instead of just a count
|
|
782
|
+
// (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
|
|
783
|
+
// extractor); `native-extractor-failure` indicates a real native bug since
|
|
784
|
+
// the language IS supported by the addon yet the file was dropped anyway.
|
|
785
|
+
const { byReason, totals } = classifyNativeDrops(missingRel);
|
|
786
|
+
if (totals['unsupported-by-native'] > 0) {
|
|
787
|
+
info(
|
|
788
|
+
`Native orchestrator skipped ${totals['unsupported-by-native']} file(s) in languages without a Rust extractor; backfilling via WASM: ${formatDropExtensionSummary(byReason['unsupported-by-native'])}`,
|
|
789
|
+
);
|
|
790
|
+
}
|
|
791
|
+
if (totals['native-extractor-failure'] > 0) {
|
|
792
|
+
warn(
|
|
793
|
+
`Native orchestrator dropped ${totals['native-extractor-failure']} file(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM: ${formatDropExtensionSummary(byReason['native-extractor-failure'])}`,
|
|
794
|
+
);
|
|
795
|
+
}
|
|
796
|
+
const wasmResults = await parseFilesAuto(missingAbs, ctx.rootDir, { engine: 'wasm' });
|
|
797
|
+
|
|
798
|
+
const rows: unknown[][] = [];
|
|
799
|
+
const exportKeys: unknown[][] = [];
|
|
800
|
+
for (const [relPath, symbols] of wasmResults) {
|
|
801
|
+
// File row — mirrors insertDefinitionsAndExports: qualified_name is null.
|
|
802
|
+
rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]);
|
|
803
|
+
for (const def of symbols.definitions ?? []) {
|
|
804
|
+
// Populate qualified_name/scope the same way the JS fallback does so
|
|
805
|
+
// downstream queries (cross-file references, "go to definition") find
|
|
806
|
+
// these symbols.
|
|
807
|
+
const dotIdx = def.name.lastIndexOf('.');
|
|
808
|
+
const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null;
|
|
809
|
+
rows.push([
|
|
810
|
+
def.name,
|
|
811
|
+
def.kind,
|
|
812
|
+
relPath,
|
|
813
|
+
def.line,
|
|
814
|
+
def.endLine ?? null,
|
|
815
|
+
null,
|
|
816
|
+
def.name,
|
|
817
|
+
scope,
|
|
818
|
+
def.visibility ?? null,
|
|
819
|
+
]);
|
|
820
|
+
}
|
|
821
|
+
// Exports: insert the row (INSERT OR IGNORE — a matching definition row
|
|
822
|
+
// is a no-op) and queue a key for the second-pass exported=1 update, so
|
|
823
|
+
// queries filtering on exported=1 find backfilled symbols (#970).
|
|
824
|
+
for (const exp of symbols.exports ?? []) {
|
|
825
|
+
rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]);
|
|
826
|
+
exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
const db = ctx.db as unknown as BetterSqlite3Database;
|
|
830
|
+
batchInsertNodes(db, rows);
|
|
831
|
+
|
|
832
|
+
// Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
|
|
833
|
+
if (exportKeys.length > 0) {
|
|
834
|
+
const EXPORT_CHUNK = 500;
|
|
835
|
+
const exportStmtCache = new Map<number, SqliteStatement>();
|
|
836
|
+
for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) {
|
|
837
|
+
const end = Math.min(i + EXPORT_CHUNK, exportKeys.length);
|
|
838
|
+
const chunkSize = end - i;
|
|
839
|
+
let updateStmt = exportStmtCache.get(chunkSize);
|
|
840
|
+
if (!updateStmt) {
|
|
841
|
+
const conditions = Array.from(
|
|
842
|
+
{ length: chunkSize },
|
|
843
|
+
() => '(name = ? AND kind = ? AND file = ? AND line = ?)',
|
|
844
|
+
).join(' OR ');
|
|
845
|
+
updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`);
|
|
846
|
+
exportStmtCache.set(chunkSize, updateStmt);
|
|
847
|
+
}
|
|
848
|
+
const vals: unknown[] = [];
|
|
849
|
+
for (let j = i; j < end; j++) {
|
|
850
|
+
const k = exportKeys[j] as unknown[];
|
|
851
|
+
vals.push(k[0], k[1], k[2], k[3]);
|
|
852
|
+
}
|
|
853
|
+
updateStmt.run(...vals);
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
}
|
|
857
|
+
|
|
703
858
|
// ── Pipeline stages execution ───────────────────────────────────────────
|
|
704
859
|
|
|
705
860
|
async function runPipelineStages(ctx: PipelineContext): Promise<void> {
|
|
@@ -845,6 +1000,10 @@ export async function buildGraph(
|
|
|
845
1000
|
`Codegraph version changed (${prevVersion} → ${CODEGRAPH_VERSION}), promoting to full rebuild.`,
|
|
846
1001
|
);
|
|
847
1002
|
ctx.forceFullRebuild = true;
|
|
1003
|
+
// Re-check embeddings: the initial warnOnEmbeddingsWipe ran before
|
|
1004
|
+
// forceFullRebuild was set here, so the silent-data-loss guard
|
|
1005
|
+
// would otherwise miss this late-promotion path (#986 follow-up).
|
|
1006
|
+
warnOnEmbeddingsWipe(ctx);
|
|
848
1007
|
}
|
|
849
1008
|
}
|
|
850
1009
|
}
|