@optave/codegraph 3.9.5 → 3.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ast-analysis/engine.d.ts.map +1 -1
- package/dist/ast-analysis/engine.js +3 -2
- package/dist/ast-analysis/engine.js.map +1 -1
- package/dist/ast-analysis/rules/csharp.d.ts.map +1 -1
- package/dist/ast-analysis/rules/csharp.js +8 -1
- package/dist/ast-analysis/rules/csharp.js.map +1 -1
- package/dist/ast-analysis/rules/go.d.ts.map +1 -1
- package/dist/ast-analysis/rules/go.js +4 -1
- package/dist/ast-analysis/rules/go.js.map +1 -1
- package/dist/ast-analysis/rules/index.d.ts +6 -0
- package/dist/ast-analysis/rules/index.d.ts.map +1 -1
- package/dist/ast-analysis/rules/index.js +151 -4
- package/dist/ast-analysis/rules/index.js.map +1 -1
- package/dist/ast-analysis/rules/java.d.ts.map +1 -1
- package/dist/ast-analysis/rules/java.js +5 -1
- package/dist/ast-analysis/rules/java.js.map +1 -1
- package/dist/ast-analysis/rules/php.d.ts.map +1 -1
- package/dist/ast-analysis/rules/php.js +6 -1
- package/dist/ast-analysis/rules/php.js.map +1 -1
- package/dist/ast-analysis/rules/python.d.ts.map +1 -1
- package/dist/ast-analysis/rules/python.js +5 -1
- package/dist/ast-analysis/rules/python.js.map +1 -1
- package/dist/ast-analysis/rules/ruby.d.ts.map +1 -1
- package/dist/ast-analysis/rules/ruby.js +4 -1
- package/dist/ast-analysis/rules/ruby.js.map +1 -1
- package/dist/ast-analysis/rules/rust.d.ts.map +1 -1
- package/dist/ast-analysis/rules/rust.js +5 -1
- package/dist/ast-analysis/rules/rust.js.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.d.ts +2 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.js +129 -37
- package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
- package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
- package/dist/domain/graph/builder/pipeline.js +14 -2
- package/dist/domain/graph/builder/pipeline.js.map +1 -1
- package/dist/domain/parser.d.ts +40 -0
- package/dist/domain/parser.d.ts.map +1 -1
- package/dist/domain/parser.js +104 -0
- package/dist/domain/parser.js.map +1 -1
- package/dist/domain/search/models.js +1 -1
- package/dist/domain/wasm-worker-entry.js +3 -2
- package/dist/domain/wasm-worker-entry.js.map +1 -1
- package/dist/features/ast.d.ts.map +1 -1
- package/dist/features/ast.js +11 -9
- package/dist/features/ast.js.map +1 -1
- package/grammars/tree-sitter-erlang.wasm +0 -0
- package/package.json +7 -7
- package/src/ast-analysis/engine.ts +11 -1
- package/src/ast-analysis/rules/csharp.ts +8 -1
- package/src/ast-analysis/rules/go.ts +4 -1
- package/src/ast-analysis/rules/index.ts +181 -4
- package/src/ast-analysis/rules/java.ts +5 -1
- package/src/ast-analysis/rules/php.ts +6 -1
- package/src/ast-analysis/rules/python.ts +5 -1
- package/src/ast-analysis/rules/ruby.ts +4 -1
- package/src/ast-analysis/rules/rust.ts +5 -1
- package/src/ast-analysis/visitors/ast-store-visitor.ts +129 -34
- package/src/domain/graph/builder/pipeline.ts +24 -4
- package/src/domain/parser.ts +122 -0
- package/src/domain/search/models.ts +1 -1
- package/src/domain/wasm-worker-entry.ts +11 -1
- package/src/features/ast.ts +22 -9
|
@@ -73,10 +73,187 @@ export const DATAFLOW_RULES: Map<string, DataflowRulesConfig> = new Map([
|
|
|
73
73
|
['ruby', ruby.dataflow],
|
|
74
74
|
]);
|
|
75
75
|
|
|
76
|
-
// ─── AST Type Maps
|
|
76
|
+
// ─── AST Node Type Maps ──────────────────────────────────────────────────
|
|
77
|
+
//
|
|
78
|
+
// These mirror the per-language `LangAstConfig` constants in the native Rust
|
|
79
|
+
// engine (`crates/codegraph-core/src/extractors/helpers.rs`). WASM and native
|
|
80
|
+
// must agree on which tree-sitter node types to emit as `ast_nodes` rows.
|
|
81
|
+
// Languages without a dedicated rules/*.ts file have their maps inlined here.
|
|
82
|
+
|
|
83
|
+
const JS_AST_TYPES = javascript.astTypes as Record<string, string>;
|
|
84
|
+
const PY_AST_TYPES = python.astTypes as Record<string, string>;
|
|
85
|
+
const GO_AST_TYPES = go.astTypes as Record<string, string>;
|
|
86
|
+
const RS_AST_TYPES = rust.astTypes as Record<string, string>;
|
|
87
|
+
const JAVA_AST_TYPES = java.astTypes as Record<string, string>;
|
|
88
|
+
const CS_AST_TYPES = csharp.astTypes as Record<string, string>;
|
|
89
|
+
const RB_AST_TYPES = ruby.astTypes as Record<string, string>;
|
|
90
|
+
const PHP_AST_TYPES = php.astTypes as Record<string, string>;
|
|
91
|
+
|
|
92
|
+
const C_AST_TYPES: Record<string, string> = {
|
|
93
|
+
string_literal: 'string',
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
const CPP_AST_TYPES: Record<string, string> = {
|
|
97
|
+
new_expression: 'new',
|
|
98
|
+
throw_statement: 'throw',
|
|
99
|
+
co_await_expression: 'await',
|
|
100
|
+
string_literal: 'string',
|
|
101
|
+
raw_string_literal: 'string',
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
const KOTLIN_AST_TYPES: Record<string, string> = {
|
|
105
|
+
throw_expression: 'throw',
|
|
106
|
+
string_literal: 'string',
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
const SWIFT_AST_TYPES: Record<string, string> = {
|
|
110
|
+
throw_statement: 'throw',
|
|
111
|
+
await_expression: 'await',
|
|
112
|
+
string_literal: 'string',
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
const SCALA_AST_TYPES: Record<string, string> = {
|
|
116
|
+
object_creation_expression: 'new',
|
|
117
|
+
throw_expression: 'throw',
|
|
118
|
+
string_literal: 'string',
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
const BASH_AST_TYPES: Record<string, string> = {
|
|
122
|
+
string: 'string',
|
|
123
|
+
expansion: 'string',
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
const ELIXIR_AST_TYPES: Record<string, string> = {
|
|
127
|
+
string: 'string',
|
|
128
|
+
sigil: 'regex',
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
const LUA_AST_TYPES: Record<string, string> = {
|
|
132
|
+
string: 'string',
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
const DART_AST_TYPES: Record<string, string> = {
|
|
136
|
+
new_expression: 'new',
|
|
137
|
+
constructor_invocation: 'new',
|
|
138
|
+
throw_expression: 'throw',
|
|
139
|
+
await_expression: 'await',
|
|
140
|
+
string_literal: 'string',
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
const ZIG_AST_TYPES: Record<string, string> = {
|
|
144
|
+
string_literal: 'string',
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
const HASKELL_AST_TYPES: Record<string, string> = {
|
|
148
|
+
string: 'string',
|
|
149
|
+
char: 'string',
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
const OCAML_AST_TYPES: Record<string, string> = {
|
|
153
|
+
string: 'string',
|
|
154
|
+
};
|
|
77
155
|
|
|
78
156
|
export const AST_TYPE_MAPS: Map<string, Record<string, string>> = new Map([
|
|
79
|
-
['javascript',
|
|
80
|
-
['typescript',
|
|
81
|
-
['tsx',
|
|
157
|
+
['javascript', JS_AST_TYPES],
|
|
158
|
+
['typescript', JS_AST_TYPES],
|
|
159
|
+
['tsx', JS_AST_TYPES],
|
|
160
|
+
['python', PY_AST_TYPES],
|
|
161
|
+
['go', GO_AST_TYPES],
|
|
162
|
+
['rust', RS_AST_TYPES],
|
|
163
|
+
['java', JAVA_AST_TYPES],
|
|
164
|
+
['csharp', CS_AST_TYPES],
|
|
165
|
+
['ruby', RB_AST_TYPES],
|
|
166
|
+
['php', PHP_AST_TYPES],
|
|
167
|
+
['c', C_AST_TYPES],
|
|
168
|
+
['cpp', CPP_AST_TYPES],
|
|
169
|
+
['kotlin', KOTLIN_AST_TYPES],
|
|
170
|
+
['swift', SWIFT_AST_TYPES],
|
|
171
|
+
['scala', SCALA_AST_TYPES],
|
|
172
|
+
['bash', BASH_AST_TYPES],
|
|
173
|
+
['elixir', ELIXIR_AST_TYPES],
|
|
174
|
+
['lua', LUA_AST_TYPES],
|
|
175
|
+
['dart', DART_AST_TYPES],
|
|
176
|
+
['zig', ZIG_AST_TYPES],
|
|
177
|
+
['haskell', HASKELL_AST_TYPES],
|
|
178
|
+
['ocaml', OCAML_AST_TYPES],
|
|
179
|
+
['ocaml-interface', OCAML_AST_TYPES],
|
|
180
|
+
]);
|
|
181
|
+
|
|
182
|
+
// ─── Per-language string-extraction config ───────────────────────────────
|
|
183
|
+
//
|
|
184
|
+
// Mirrors `quote_chars` + `string_prefixes` in the native `LangAstConfig`.
|
|
185
|
+
// Used by the AST-store visitor to strip quote characters and language-
|
|
186
|
+
// specific prefix sigils (Python `r"..."`, C# verbatim `@"..."`, Rust raw
|
|
187
|
+
// `r#"..."#`, etc.) when computing string content for the `name` column.
|
|
188
|
+
|
|
189
|
+
export interface AstStringConfig {
|
|
190
|
+
quoteChars: string;
|
|
191
|
+
stringPrefixes: string;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const JS_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"`', stringPrefixes: '' };
|
|
195
|
+
const PY_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: 'rbfuRBFU' };
|
|
196
|
+
const GO_STRING_CONFIG: AstStringConfig = { quoteChars: '"`', stringPrefixes: '' };
|
|
197
|
+
const RS_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
|
|
198
|
+
const JAVA_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
|
|
199
|
+
const CS_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
|
|
200
|
+
const RB_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: '' };
|
|
201
|
+
const PHP_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: '' };
|
|
202
|
+
const C_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
|
|
203
|
+
const CPP_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: 'LuUR' };
|
|
204
|
+
const KOTLIN_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
|
|
205
|
+
const SWIFT_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
|
|
206
|
+
const SCALA_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
|
|
207
|
+
const BASH_STRING_CONFIG: AstStringConfig = { quoteChars: '"\'', stringPrefixes: '' };
|
|
208
|
+
const ELIXIR_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
|
|
209
|
+
const LUA_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: '' };
|
|
210
|
+
const DART_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: '' };
|
|
211
|
+
const ZIG_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
|
|
212
|
+
const HASKELL_STRING_CONFIG: AstStringConfig = { quoteChars: '"\'', stringPrefixes: '' };
|
|
213
|
+
const OCAML_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
|
|
214
|
+
|
|
215
|
+
export const AST_STRING_CONFIGS: Map<string, AstStringConfig> = new Map([
|
|
216
|
+
['javascript', JS_STRING_CONFIG],
|
|
217
|
+
['typescript', JS_STRING_CONFIG],
|
|
218
|
+
['tsx', JS_STRING_CONFIG],
|
|
219
|
+
['python', PY_STRING_CONFIG],
|
|
220
|
+
['go', GO_STRING_CONFIG],
|
|
221
|
+
['rust', RS_STRING_CONFIG],
|
|
222
|
+
['java', JAVA_STRING_CONFIG],
|
|
223
|
+
['csharp', CS_STRING_CONFIG],
|
|
224
|
+
['ruby', RB_STRING_CONFIG],
|
|
225
|
+
['php', PHP_STRING_CONFIG],
|
|
226
|
+
['c', C_STRING_CONFIG],
|
|
227
|
+
['cpp', CPP_STRING_CONFIG],
|
|
228
|
+
['kotlin', KOTLIN_STRING_CONFIG],
|
|
229
|
+
['swift', SWIFT_STRING_CONFIG],
|
|
230
|
+
['scala', SCALA_STRING_CONFIG],
|
|
231
|
+
['bash', BASH_STRING_CONFIG],
|
|
232
|
+
['elixir', ELIXIR_STRING_CONFIG],
|
|
233
|
+
['lua', LUA_STRING_CONFIG],
|
|
234
|
+
['dart', DART_STRING_CONFIG],
|
|
235
|
+
['zig', ZIG_STRING_CONFIG],
|
|
236
|
+
['haskell', HASKELL_STRING_CONFIG],
|
|
237
|
+
['ocaml', OCAML_STRING_CONFIG],
|
|
238
|
+
['ocaml-interface', OCAML_STRING_CONFIG],
|
|
82
239
|
]);
|
|
240
|
+
|
|
241
|
+
// ─── Per-language "stop-after-collect" kinds ─────────────────────────────
|
|
242
|
+
//
|
|
243
|
+
// Mirrors the subtle difference between the native JS walker
|
|
244
|
+
// (`extractors/javascript.rs::walk_ast_nodes_depth`) — which *returns* after
|
|
245
|
+
// collecting `new_expression` and `throw_statement` to avoid double-counting
|
|
246
|
+
// the wrapped expression — and the generic walker (`helpers.rs::walk_ast_
|
|
247
|
+
// nodes_with_config_depth`), which always recurses. For WASM/native parity
|
|
248
|
+
// the JS family must skip recursion on `new` and `throw`; every other
|
|
249
|
+
// language recurses normally.
|
|
250
|
+
|
|
251
|
+
const JS_STOP_RECURSE: ReadonlySet<string> = new Set(['new', 'throw']);
|
|
252
|
+
const EMPTY_STOP_RECURSE: ReadonlySet<string> = new Set();
|
|
253
|
+
|
|
254
|
+
export function astStopRecurseKinds(langId: string): ReadonlySet<string> {
|
|
255
|
+
if (langId === 'javascript' || langId === 'typescript' || langId === 'tsx') {
|
|
256
|
+
return JS_STOP_RECURSE;
|
|
257
|
+
}
|
|
258
|
+
return EMPTY_STOP_RECURSE;
|
|
259
|
+
}
|
|
@@ -174,4 +174,8 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({
|
|
|
174
174
|
|
|
175
175
|
// ─── AST Node Types ───────────────────────────────────────────────────────
|
|
176
176
|
|
|
177
|
-
export const astTypes: Record<string, string> | null =
|
|
177
|
+
export const astTypes: Record<string, string> | null = {
|
|
178
|
+
object_creation_expression: 'new',
|
|
179
|
+
throw_statement: 'throw',
|
|
180
|
+
string_literal: 'string',
|
|
181
|
+
};
|
|
@@ -218,4 +218,9 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({
|
|
|
218
218
|
|
|
219
219
|
// ─── AST Node Types ───────────────────────────────────────────────────────
|
|
220
220
|
|
|
221
|
-
export const astTypes: Record<string, string> | null =
|
|
221
|
+
export const astTypes: Record<string, string> | null = {
|
|
222
|
+
object_creation_expression: 'new',
|
|
223
|
+
throw_expression: 'throw',
|
|
224
|
+
string: 'string',
|
|
225
|
+
encapsed_string: 'string',
|
|
226
|
+
};
|
|
@@ -195,4 +195,8 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({
|
|
|
195
195
|
|
|
196
196
|
// ─── AST Node Types ───────────────────────────────────────────────────────
|
|
197
197
|
|
|
198
|
-
export const astTypes: Record<string, string> | null =
|
|
198
|
+
export const astTypes: Record<string, string> | null = {
|
|
199
|
+
raise_statement: 'throw',
|
|
200
|
+
await: 'await',
|
|
201
|
+
string: 'string',
|
|
202
|
+
};
|
|
@@ -203,4 +203,7 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({
|
|
|
203
203
|
|
|
204
204
|
// ─── AST Node Types ───────────────────────────────────────────────────────
|
|
205
205
|
|
|
206
|
-
export const astTypes: Record<string, string> | null =
|
|
206
|
+
export const astTypes: Record<string, string> | null = {
|
|
207
|
+
string: 'string',
|
|
208
|
+
regex: 'regex',
|
|
209
|
+
};
|
|
@@ -172,4 +172,8 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({
|
|
|
172
172
|
|
|
173
173
|
// ─── AST Node Types ───────────────────────────────────────────────────────
|
|
174
174
|
|
|
175
|
-
export const astTypes: Record<string, string> | null =
|
|
175
|
+
export const astTypes: Record<string, string> | null = {
|
|
176
|
+
await_expression: 'await',
|
|
177
|
+
string_literal: 'string',
|
|
178
|
+
raw_string_literal: 'string',
|
|
179
|
+
};
|
|
@@ -5,9 +5,42 @@ import type {
|
|
|
5
5
|
Visitor,
|
|
6
6
|
VisitorContext,
|
|
7
7
|
} from '../../types.js';
|
|
8
|
+
import type { AstStringConfig } from '../rules/index.js';
|
|
8
9
|
|
|
9
10
|
const TEXT_MAX = 200;
|
|
10
11
|
|
|
12
|
+
// ── Cross-language node-type constants (mirror Rust `helpers.rs`) ────────
|
|
13
|
+
const IDENT_TYPES = new Set<string>([
|
|
14
|
+
'identifier',
|
|
15
|
+
'type_identifier',
|
|
16
|
+
'name',
|
|
17
|
+
'qualified_name',
|
|
18
|
+
'scoped_identifier',
|
|
19
|
+
'qualified_identifier',
|
|
20
|
+
'member_expression',
|
|
21
|
+
'member_access_expression',
|
|
22
|
+
'field_expression',
|
|
23
|
+
'attribute',
|
|
24
|
+
'scoped_type_identifier',
|
|
25
|
+
]);
|
|
26
|
+
|
|
27
|
+
const CALL_TYPES = new Set<string>([
|
|
28
|
+
'call_expression',
|
|
29
|
+
'call',
|
|
30
|
+
'invocation_expression',
|
|
31
|
+
'method_invocation',
|
|
32
|
+
'function_call_expression',
|
|
33
|
+
'member_call_expression',
|
|
34
|
+
'scoped_call_expression',
|
|
35
|
+
]);
|
|
36
|
+
|
|
37
|
+
const DEFAULT_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"`', stringPrefixes: '' };
|
|
38
|
+
|
|
39
|
+
// Keyword tokens skipped when extracting the inner expression text of a
|
|
40
|
+
// throw/raise/await/new node. Module-level constant avoids reallocating on
|
|
41
|
+
// every call (can be hot in large files).
|
|
42
|
+
const CHILD_EXPR_SKIP_KEYWORDS = new Set<string>(['throw', 'raise', 'await', 'new']);
|
|
43
|
+
|
|
11
44
|
interface AstStoreRow {
|
|
12
45
|
file: string;
|
|
13
46
|
line: number;
|
|
@@ -20,69 +53,122 @@ interface AstStoreRow {
|
|
|
20
53
|
|
|
21
54
|
function truncate(s: string | null | undefined, max: number = TEXT_MAX): string | null {
|
|
22
55
|
if (!s) return null;
|
|
23
|
-
return s.length <= max ? s : `${s.slice(0, max - 1)}
|
|
56
|
+
return s.length <= max ? s : `${s.slice(0, max - 1)}…`;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function trimLeadingChars(s: string, chars: string): string {
|
|
60
|
+
if (!chars) return s;
|
|
61
|
+
let i = 0;
|
|
62
|
+
while (i < s.length && chars.includes(s[i]!)) i++;
|
|
63
|
+
return i === 0 ? s : s.slice(i);
|
|
24
64
|
}
|
|
25
65
|
|
|
26
|
-
function
|
|
66
|
+
function trimTrailingChars(s: string, chars: string): string {
|
|
67
|
+
if (!chars) return s;
|
|
68
|
+
let i = s.length;
|
|
69
|
+
while (i > 0 && chars.includes(s[i - 1]!)) i--;
|
|
70
|
+
return i === s.length ? s : s.slice(0, i);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/** Extract constructor name from a `new_expression` / `object_creation_expression`. */
|
|
74
|
+
function extractConstructorName(node: TreeSitterNode): string {
|
|
75
|
+
for (const field of ['type', 'class', 'constructor']) {
|
|
76
|
+
const f = node.childForFieldName(field);
|
|
77
|
+
if (f?.text) return f.text;
|
|
78
|
+
}
|
|
27
79
|
for (let i = 0; i < node.childCount; i++) {
|
|
28
80
|
const child = node.child(i);
|
|
29
81
|
if (!child) continue;
|
|
30
|
-
if (child.type
|
|
31
|
-
|
|
82
|
+
if (IDENT_TYPES.has(child.type)) return child.text;
|
|
83
|
+
}
|
|
84
|
+
const raw = node.text || '';
|
|
85
|
+
const beforeParen = raw.split('(')[0] || raw;
|
|
86
|
+
return beforeParen.replace(/^new\s+/, '').trim() || '?';
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/** Extract function name from a call node. */
|
|
90
|
+
function extractCallName(node: TreeSitterNode): string {
|
|
91
|
+
for (const field of ['function', 'method', 'name']) {
|
|
92
|
+
const f = node.childForFieldName(field);
|
|
93
|
+
if (f?.text) return f.text;
|
|
32
94
|
}
|
|
33
|
-
|
|
95
|
+
const text = node.text || '';
|
|
96
|
+
return text.split('(')[0] || '?';
|
|
34
97
|
}
|
|
35
98
|
|
|
36
|
-
|
|
99
|
+
/** Extract name from a throw/raise statement — matches native `extract_throw_target`. */
|
|
100
|
+
function extractThrowName(node: TreeSitterNode, newTypes: Set<string>): string {
|
|
37
101
|
for (let i = 0; i < node.childCount; i++) {
|
|
38
102
|
const child = node.child(i);
|
|
39
103
|
if (!child) continue;
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
104
|
+
const ck = child.type;
|
|
105
|
+
if (newTypes.has(ck)) return extractConstructorName(child);
|
|
106
|
+
if (CALL_TYPES.has(ck)) return extractCallName(child);
|
|
107
|
+
if (IDENT_TYPES.has(ck)) return child.text;
|
|
43
108
|
}
|
|
44
|
-
return truncate(node.text);
|
|
109
|
+
return truncate(node.text) ?? node.text ?? '';
|
|
45
110
|
}
|
|
46
111
|
|
|
47
|
-
/** Extract
|
|
48
|
-
function
|
|
112
|
+
/** Extract name from an await expression — matches native `extract_awaited_name`. */
|
|
113
|
+
function extractAwaitName(node: TreeSitterNode): string {
|
|
49
114
|
for (let i = 0; i < node.childCount; i++) {
|
|
50
115
|
const child = node.child(i);
|
|
51
116
|
if (!child) continue;
|
|
52
|
-
|
|
53
|
-
if (
|
|
54
|
-
|
|
55
|
-
return fn ? fn.text : child.text?.split('(')[0] || '?';
|
|
56
|
-
}
|
|
57
|
-
if (child.type === 'identifier') return child.text;
|
|
117
|
+
const ck = child.type;
|
|
118
|
+
if (CALL_TYPES.has(ck)) return extractCallName(child);
|
|
119
|
+
if (IDENT_TYPES.has(ck)) return child.text;
|
|
58
120
|
}
|
|
59
|
-
return truncate(node.text);
|
|
121
|
+
return truncate(node.text) ?? node.text ?? '';
|
|
60
122
|
}
|
|
61
123
|
|
|
62
|
-
/** Extract the
|
|
63
|
-
function
|
|
124
|
+
/** Extract text of the expression inside a throw/await, skipping the keyword. */
|
|
125
|
+
function extractChildExpressionText(node: TreeSitterNode): string | null {
|
|
64
126
|
for (let i = 0; i < node.childCount; i++) {
|
|
65
127
|
const child = node.child(i);
|
|
66
128
|
if (!child) continue;
|
|
67
|
-
if (child.type
|
|
68
|
-
const fn = child.childForFieldName('function');
|
|
69
|
-
return fn ? fn.text : child.text?.split('(')[0] || '?';
|
|
70
|
-
}
|
|
71
|
-
if (child.type === 'identifier' || child.type === 'member_expression') {
|
|
72
|
-
return child.text;
|
|
73
|
-
}
|
|
129
|
+
if (!CHILD_EXPR_SKIP_KEYWORDS.has(child.type)) return truncate(child.text);
|
|
74
130
|
}
|
|
75
131
|
return truncate(node.text);
|
|
76
132
|
}
|
|
77
133
|
|
|
134
|
+
/**
|
|
135
|
+
* Extract string content from a string-literal node, mirroring the native
|
|
136
|
+
* engine's `build_string_node` (`helpers.rs`). Returns `null` when the
|
|
137
|
+
* content is shorter than 2 Unicode code points.
|
|
138
|
+
*/
|
|
139
|
+
function extractStringContent(node: TreeSitterNode, cfg: AstStringConfig): string | null {
|
|
140
|
+
const raw = node.text ?? '';
|
|
141
|
+
const isRawString = node.type.includes('raw_string');
|
|
142
|
+
|
|
143
|
+
let s = raw;
|
|
144
|
+
s = trimLeadingChars(s, '@');
|
|
145
|
+
s = trimLeadingChars(s, cfg.stringPrefixes);
|
|
146
|
+
if (isRawString) s = trimLeadingChars(s, 'r#');
|
|
147
|
+
s = trimLeadingChars(s, cfg.quoteChars);
|
|
148
|
+
if (isRawString) s = trimTrailingChars(s, '#');
|
|
149
|
+
s = trimTrailingChars(s, cfg.quoteChars);
|
|
150
|
+
|
|
151
|
+
// Count code points, not UTF-16 code units — matches Rust `chars().count()`.
|
|
152
|
+
const codePointCount = [...s].length;
|
|
153
|
+
if (codePointCount < 2) return null;
|
|
154
|
+
return s;
|
|
155
|
+
}
|
|
156
|
+
|
|
78
157
|
export function createAstStoreVisitor(
|
|
79
158
|
astTypeMap: Record<string, string>,
|
|
80
159
|
defs: Definition[],
|
|
81
160
|
relPath: string,
|
|
82
161
|
nodeIdMap: Map<string, number>,
|
|
162
|
+
stringConfig: AstStringConfig = DEFAULT_STRING_CONFIG,
|
|
163
|
+
stopRecurseKinds: ReadonlySet<string> = new Set(),
|
|
83
164
|
): Visitor {
|
|
84
165
|
const rows: AstStoreRow[] = [];
|
|
85
166
|
const matched = new Set<number>();
|
|
167
|
+
const newTypes = new Set<string>(
|
|
168
|
+
Object.entries(astTypeMap)
|
|
169
|
+
.filter(([, kind]) => kind === 'new')
|
|
170
|
+
.map(([type]) => type),
|
|
171
|
+
);
|
|
86
172
|
|
|
87
173
|
function findParentDef(line: number): Definition | null {
|
|
88
174
|
let best: Definition | null = null;
|
|
@@ -106,12 +192,15 @@ export function createAstStoreVisitor(
|
|
|
106
192
|
type KindHandler = (node: TreeSitterNode) => NameTextResult;
|
|
107
193
|
|
|
108
194
|
const kindHandlers: Record<string, KindHandler> = {
|
|
109
|
-
new: (node) => ({ name:
|
|
110
|
-
throw: (node) => ({
|
|
111
|
-
|
|
195
|
+
new: (node) => ({ name: extractConstructorName(node), text: truncate(node.text) }),
|
|
196
|
+
throw: (node) => ({
|
|
197
|
+
name: extractThrowName(node, newTypes),
|
|
198
|
+
text: extractChildExpressionText(node),
|
|
199
|
+
}),
|
|
200
|
+
await: (node) => ({ name: extractAwaitName(node), text: extractChildExpressionText(node) }),
|
|
112
201
|
string: (node) => {
|
|
113
|
-
const content = node
|
|
114
|
-
if (content
|
|
202
|
+
const content = extractStringContent(node, stringConfig);
|
|
203
|
+
if (content == null) return { name: null, text: null, skip: true };
|
|
115
204
|
return { name: truncate(content, 100), text: truncate(node.text) };
|
|
116
205
|
},
|
|
117
206
|
regex: (node) => ({ name: node.text || '?', text: truncate(node.text) }),
|
|
@@ -156,7 +245,13 @@ export function createAstStoreVisitor(
|
|
|
156
245
|
|
|
157
246
|
collectNode(node, kind);
|
|
158
247
|
|
|
159
|
-
|
|
248
|
+
// Mirror the native walker's recursion policy. In JS/TS, the native
|
|
249
|
+
// javascript.rs walker returns after collecting `new` or `throw` to
|
|
250
|
+
// avoid double-counting the wrapped expression (e.g. `throw new
|
|
251
|
+
// Error('x')` emits one `throw` row, not throw+new+string). Other
|
|
252
|
+
// languages go through helpers.rs::walk_ast_nodes_with_config_depth
|
|
253
|
+
// which always recurses — so `stopRecurseKinds` is empty for them.
|
|
254
|
+
if (stopRecurseKinds.has(kind)) {
|
|
160
255
|
return { skipChildren: true };
|
|
161
256
|
}
|
|
162
257
|
},
|
|
@@ -32,7 +32,13 @@ import type {
|
|
|
32
32
|
ExtractorOutput,
|
|
33
33
|
SqliteStatement,
|
|
34
34
|
} from '../../../types.js';
|
|
35
|
-
import {
|
|
35
|
+
import {
|
|
36
|
+
classifyNativeDrops,
|
|
37
|
+
formatDropExtensionSummary,
|
|
38
|
+
getActiveEngine,
|
|
39
|
+
getInstalledWasmExtensions,
|
|
40
|
+
parseFilesAuto,
|
|
41
|
+
} from '../../parser.js';
|
|
36
42
|
import { setWorkspaces } from '../resolve.js';
|
|
37
43
|
import { PipelineContext } from './context.js';
|
|
38
44
|
import { batchInsertNodes, collectFiles as collectFilesUtil, loadPathAliases } from './helpers.js';
|
|
@@ -761,18 +767,32 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
|
|
|
761
767
|
// minimal installs) can't be parsed by either engine, so they're not a
|
|
762
768
|
// native regression — excluding them keeps the warn count meaningful.
|
|
763
769
|
const installedExts = getInstalledWasmExtensions();
|
|
770
|
+
const missingRel: string[] = [];
|
|
764
771
|
const missingAbs: string[] = [];
|
|
765
772
|
for (const rel of expected) {
|
|
766
773
|
if (existing.has(rel)) continue;
|
|
767
774
|
const ext = path.extname(rel).toLowerCase();
|
|
768
775
|
if (!installedExts.has(ext)) continue;
|
|
776
|
+
missingRel.push(rel);
|
|
769
777
|
missingAbs.push(path.join(ctx.rootDir, rel));
|
|
770
778
|
}
|
|
771
779
|
if (missingAbs.length === 0) return;
|
|
772
780
|
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
);
|
|
781
|
+
// Classify drops so users see per-extension reasons instead of just a count
|
|
782
|
+
// (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
|
|
783
|
+
// extractor); `native-extractor-failure` indicates a real native bug since
|
|
784
|
+
// the language IS supported by the addon yet the file was dropped anyway.
|
|
785
|
+
const { byReason, totals } = classifyNativeDrops(missingRel);
|
|
786
|
+
if (totals['unsupported-by-native'] > 0) {
|
|
787
|
+
info(
|
|
788
|
+
`Native orchestrator skipped ${totals['unsupported-by-native']} file(s) in languages without a Rust extractor; backfilling via WASM: ${formatDropExtensionSummary(byReason['unsupported-by-native'])}`,
|
|
789
|
+
);
|
|
790
|
+
}
|
|
791
|
+
if (totals['native-extractor-failure'] > 0) {
|
|
792
|
+
warn(
|
|
793
|
+
`Native orchestrator dropped ${totals['native-extractor-failure']} file(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM: ${formatDropExtensionSummary(byReason['native-extractor-failure'])}`,
|
|
794
|
+
);
|
|
795
|
+
}
|
|
776
796
|
const wasmResults = await parseFilesAuto(missingAbs, ctx.rootDir, { engine: 'wasm' });
|
|
777
797
|
|
|
778
798
|
const rows: unknown[][] = [];
|
package/src/domain/parser.ts
CHANGED
|
@@ -412,6 +412,128 @@ export function getInstalledWasmExtensions(): Set<string> {
|
|
|
412
412
|
return exts;
|
|
413
413
|
}
|
|
414
414
|
|
|
415
|
+
/**
|
|
416
|
+
* Lowercase file extensions covered by the native Rust addon.
|
|
417
|
+
*
|
|
418
|
+
* Mirrors `LanguageKind::from_extension` in
|
|
419
|
+
* `crates/codegraph-core/src/parser_registry.rs`. Used to classify why the
|
|
420
|
+
* native orchestrator dropped a file: extensions outside this set are a
|
|
421
|
+
* legitimate parser limit (no Rust extractor exists), while extensions inside
|
|
422
|
+
* it indicate a real native bug (parse/read/extract failure).
|
|
423
|
+
*
|
|
424
|
+
* Keep this list in sync with the Rust enum — the native addon is a separate
|
|
425
|
+
* npm package, so JS has no runtime way to discover its language coverage.
|
|
426
|
+
*/
|
|
427
|
+
export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet<string> = new Set([
|
|
428
|
+
'.js',
|
|
429
|
+
'.jsx',
|
|
430
|
+
'.mjs',
|
|
431
|
+
'.cjs',
|
|
432
|
+
'.ts',
|
|
433
|
+
'.tsx',
|
|
434
|
+
'.py',
|
|
435
|
+
'.pyi',
|
|
436
|
+
'.tf',
|
|
437
|
+
'.hcl',
|
|
438
|
+
'.go',
|
|
439
|
+
'.rs',
|
|
440
|
+
'.java',
|
|
441
|
+
'.cs',
|
|
442
|
+
'.rb',
|
|
443
|
+
'.rake',
|
|
444
|
+
'.gemspec',
|
|
445
|
+
'.php',
|
|
446
|
+
'.phtml',
|
|
447
|
+
'.c',
|
|
448
|
+
'.h',
|
|
449
|
+
'.cpp',
|
|
450
|
+
'.cc',
|
|
451
|
+
'.cxx',
|
|
452
|
+
'.hpp',
|
|
453
|
+
'.kt',
|
|
454
|
+
'.kts',
|
|
455
|
+
'.swift',
|
|
456
|
+
'.scala',
|
|
457
|
+
'.sh',
|
|
458
|
+
'.bash',
|
|
459
|
+
'.ex',
|
|
460
|
+
'.exs',
|
|
461
|
+
'.lua',
|
|
462
|
+
'.dart',
|
|
463
|
+
'.zig',
|
|
464
|
+
'.hs',
|
|
465
|
+
'.ml',
|
|
466
|
+
'.mli',
|
|
467
|
+
]);
|
|
468
|
+
|
|
469
|
+
/**
|
|
470
|
+
* Classification for a file the native orchestrator dropped.
|
|
471
|
+
* - `unsupported-by-native`: extension has no Rust extractor (legitimate parser limit).
|
|
472
|
+
* - `native-extractor-failure`: extension is supported by native but the file was
|
|
473
|
+
* still dropped — points at a real bug (read error, parse failure, extractor crash).
|
|
474
|
+
*/
|
|
475
|
+
export type NativeDropReason = 'unsupported-by-native' | 'native-extractor-failure';
|
|
476
|
+
|
|
477
|
+
export interface NativeDropClassification {
|
|
478
|
+
/** Per-reason → per-extension → list of relative paths that hit that bucket. */
|
|
479
|
+
byReason: Record<NativeDropReason, Map<string, string[]>>;
|
|
480
|
+
/** Total file count per reason. */
|
|
481
|
+
totals: Record<NativeDropReason, number>;
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
/**
|
|
485
|
+
* Group the missing files (relative paths) by drop reason and extension so the
|
|
486
|
+
* caller can log per-extension counts and a sample path. Pure function — no
|
|
487
|
+
* I/O, safe to unit-test independently of the build pipeline.
|
|
488
|
+
*/
|
|
489
|
+
export function classifyNativeDrops(relPaths: Iterable<string>): NativeDropClassification {
|
|
490
|
+
const byReason: Record<NativeDropReason, Map<string, string[]>> = {
|
|
491
|
+
'unsupported-by-native': new Map(),
|
|
492
|
+
'native-extractor-failure': new Map(),
|
|
493
|
+
};
|
|
494
|
+
const totals: Record<NativeDropReason, number> = {
|
|
495
|
+
'unsupported-by-native': 0,
|
|
496
|
+
'native-extractor-failure': 0,
|
|
497
|
+
};
|
|
498
|
+
for (const rel of relPaths) {
|
|
499
|
+
const ext = path.extname(rel).toLowerCase();
|
|
500
|
+
const reason: NativeDropReason = NATIVE_SUPPORTED_EXTENSIONS.has(ext)
|
|
501
|
+
? 'native-extractor-failure'
|
|
502
|
+
: 'unsupported-by-native';
|
|
503
|
+
const bucket = byReason[reason];
|
|
504
|
+
let list = bucket.get(ext);
|
|
505
|
+
if (!list) {
|
|
506
|
+
list = [];
|
|
507
|
+
bucket.set(ext, list);
|
|
508
|
+
}
|
|
509
|
+
list.push(rel);
|
|
510
|
+
totals[reason]++;
|
|
511
|
+
}
|
|
512
|
+
return { byReason, totals };
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
/**
|
|
516
|
+
* Render `{ ext → paths[] }` as `ext (n: sample.ext, ...)` slices for log lines.
|
|
517
|
+
* Caps at 3 sample paths per extension and 6 extensions total to keep warnings
|
|
518
|
+
* readable when many languages are dropped at once. Extensions are sorted by
|
|
519
|
+
* descending file count so the loudest offender shows up first; ties keep
|
|
520
|
+
* insertion order. Pure function — safe to unit-test independently.
|
|
521
|
+
*/
|
|
522
|
+
export function formatDropExtensionSummary(buckets: Map<string, string[]>): string {
|
|
523
|
+
const MAX_EXTS = 6;
|
|
524
|
+
const MAX_SAMPLES = 3;
|
|
525
|
+
const entries = Array.from(buckets.entries()).sort((a, b) => b[1].length - a[1].length);
|
|
526
|
+
const shown = entries.slice(0, MAX_EXTS).map(([ext, paths]) => {
|
|
527
|
+
const sample = paths.slice(0, MAX_SAMPLES).join(', ');
|
|
528
|
+
const more = paths.length > MAX_SAMPLES ? `, +${paths.length - MAX_SAMPLES} more` : '';
|
|
529
|
+
return `${ext} (${paths.length}: ${sample}${more})`;
|
|
530
|
+
});
|
|
531
|
+
if (entries.length > MAX_EXTS) {
|
|
532
|
+
shown.push(`+${entries.length - MAX_EXTS} more extension(s)`);
|
|
533
|
+
}
|
|
534
|
+
return shown.join('; ');
|
|
535
|
+
}
|
|
536
|
+
|
|
415
537
|
// ── Unified API ──────────────────────────────────────────────────────────────
|
|
416
538
|
|
|
417
539
|
function resolveEngine(opts: ParseEngineOpts = {}): ResolvedEngine {
|
|
@@ -253,7 +253,7 @@ export async function embed(
|
|
|
253
253
|
}
|
|
254
254
|
|
|
255
255
|
if (texts.length > batchSize) {
|
|
256
|
-
process.
|
|
256
|
+
process.stderr.write(` Embedded ${Math.min(i + batchSize, texts.length)}/${texts.length}\r`);
|
|
257
257
|
}
|
|
258
258
|
}
|
|
259
259
|
|