@optave/codegraph 3.0.1 → 3.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -17
- package/package.json +5 -5
- package/src/ast.js +9 -11
- package/src/builder.js +18 -5
- package/src/complexity.js +0 -3
- package/src/dataflow.js +760 -272
- package/src/parser.js +54 -0
package/src/dataflow.js
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* - returns: a call's return value is captured and used in the caller
|
|
7
7
|
* - mutates: a parameter-derived value is mutated (e.g. arr.push())
|
|
8
8
|
*
|
|
9
|
-
* Opt-in via `build --dataflow`.
|
|
9
|
+
* Opt-in via `build --dataflow`. Supports all languages with DATAFLOW_RULES.
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
12
|
import fs from 'node:fs';
|
|
@@ -14,10 +14,91 @@ import path from 'node:path';
|
|
|
14
14
|
import { openReadonlyOrFail } from './db.js';
|
|
15
15
|
import { info } from './logger.js';
|
|
16
16
|
import { paginateResult } from './paginate.js';
|
|
17
|
+
import { LANGUAGE_REGISTRY } from './parser.js';
|
|
17
18
|
import { ALL_SYMBOL_KINDS, isTestFile, normalizeSymbol } from './queries.js';
|
|
18
19
|
|
|
19
|
-
//
|
|
20
|
-
|
|
20
|
+
// ─── Language-Specific Dataflow Rules ────────────────────────────────────
|
|
21
|
+
|
|
22
|
+
const DATAFLOW_DEFAULTS = {
|
|
23
|
+
// Scope entry
|
|
24
|
+
functionNodes: new Set(), // REQUIRED: non-empty
|
|
25
|
+
|
|
26
|
+
// Function name extraction
|
|
27
|
+
nameField: 'name',
|
|
28
|
+
varAssignedFnParent: null, // parent type for `const fn = ...` (JS only)
|
|
29
|
+
assignmentFnParent: null, // parent type for `x = function...` (JS only)
|
|
30
|
+
pairFnParent: null, // parent type for `{ key: function }` (JS only)
|
|
31
|
+
|
|
32
|
+
// Parameters
|
|
33
|
+
paramListField: 'parameters',
|
|
34
|
+
paramIdentifier: 'identifier',
|
|
35
|
+
paramWrapperTypes: new Set(),
|
|
36
|
+
defaultParamType: null,
|
|
37
|
+
restParamType: null,
|
|
38
|
+
objectDestructType: null,
|
|
39
|
+
arrayDestructType: null,
|
|
40
|
+
shorthandPropPattern: null,
|
|
41
|
+
pairPatternType: null,
|
|
42
|
+
extractParamName: null, // override: (node) => string[]
|
|
43
|
+
|
|
44
|
+
// Return
|
|
45
|
+
returnNode: null,
|
|
46
|
+
|
|
47
|
+
// Variable declarations
|
|
48
|
+
varDeclaratorNode: null,
|
|
49
|
+
varDeclaratorNodes: null,
|
|
50
|
+
varNameField: 'name',
|
|
51
|
+
varValueField: 'value',
|
|
52
|
+
assignmentNode: null,
|
|
53
|
+
assignLeftField: 'left',
|
|
54
|
+
assignRightField: 'right',
|
|
55
|
+
|
|
56
|
+
// Calls
|
|
57
|
+
callNode: null,
|
|
58
|
+
callNodes: null,
|
|
59
|
+
callFunctionField: 'function',
|
|
60
|
+
callArgsField: 'arguments',
|
|
61
|
+
spreadType: null,
|
|
62
|
+
|
|
63
|
+
// Member access
|
|
64
|
+
memberNode: null,
|
|
65
|
+
memberObjectField: 'object',
|
|
66
|
+
memberPropertyField: 'property',
|
|
67
|
+
optionalChainNode: null,
|
|
68
|
+
|
|
69
|
+
// Await
|
|
70
|
+
awaitNode: null,
|
|
71
|
+
|
|
72
|
+
// Mutation
|
|
73
|
+
mutatingMethods: new Set(),
|
|
74
|
+
expressionStmtNode: 'expression_statement',
|
|
75
|
+
callObjectField: null, // Java: combined call+member has [object] field on call node
|
|
76
|
+
|
|
77
|
+
// Structural wrappers
|
|
78
|
+
expressionListType: null, // Go: expression_list wraps LHS/RHS of short_var_declaration
|
|
79
|
+
equalsClauseType: null, // C#: equals_value_clause wraps variable initializer
|
|
80
|
+
argumentWrapperType: null, // PHP: individual args wrapped in 'argument' nodes
|
|
81
|
+
extraIdentifierTypes: null, // Set of additional identifier-like types (PHP: variable_name, name)
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
const DATAFLOW_RULE_KEYS = new Set(Object.keys(DATAFLOW_DEFAULTS));
|
|
85
|
+
|
|
86
|
+
export function makeDataflowRules(overrides) {
|
|
87
|
+
for (const key of Object.keys(overrides)) {
|
|
88
|
+
if (!DATAFLOW_RULE_KEYS.has(key)) {
|
|
89
|
+
throw new Error(`Dataflow rules: unknown key "${key}"`);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
const rules = { ...DATAFLOW_DEFAULTS, ...overrides };
|
|
93
|
+
if (!(rules.functionNodes instanceof Set) || rules.functionNodes.size === 0) {
|
|
94
|
+
throw new Error('Dataflow rules: functionNodes must be a non-empty Set');
|
|
95
|
+
}
|
|
96
|
+
return rules;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// ── JS / TS / TSX ────────────────────────────────────────────────────────
|
|
100
|
+
|
|
101
|
+
const JS_TS_MUTATING = new Set([
|
|
21
102
|
'push',
|
|
22
103
|
'pop',
|
|
23
104
|
'shift',
|
|
@@ -32,8 +113,325 @@ const MUTATING_METHODS = new Set([
|
|
|
32
113
|
'clear',
|
|
33
114
|
]);
|
|
34
115
|
|
|
35
|
-
|
|
36
|
-
|
|
116
|
+
const JS_TS_DATAFLOW = makeDataflowRules({
|
|
117
|
+
functionNodes: new Set([
|
|
118
|
+
'function_declaration',
|
|
119
|
+
'method_definition',
|
|
120
|
+
'arrow_function',
|
|
121
|
+
'function_expression',
|
|
122
|
+
'function',
|
|
123
|
+
]),
|
|
124
|
+
varAssignedFnParent: 'variable_declarator',
|
|
125
|
+
assignmentFnParent: 'assignment_expression',
|
|
126
|
+
pairFnParent: 'pair',
|
|
127
|
+
paramWrapperTypes: new Set(['required_parameter', 'optional_parameter']),
|
|
128
|
+
defaultParamType: 'assignment_pattern',
|
|
129
|
+
restParamType: 'rest_pattern',
|
|
130
|
+
objectDestructType: 'object_pattern',
|
|
131
|
+
arrayDestructType: 'array_pattern',
|
|
132
|
+
shorthandPropPattern: 'shorthand_property_identifier_pattern',
|
|
133
|
+
pairPatternType: 'pair_pattern',
|
|
134
|
+
returnNode: 'return_statement',
|
|
135
|
+
varDeclaratorNode: 'variable_declarator',
|
|
136
|
+
assignmentNode: 'assignment_expression',
|
|
137
|
+
callNode: 'call_expression',
|
|
138
|
+
spreadType: 'spread_element',
|
|
139
|
+
memberNode: 'member_expression',
|
|
140
|
+
optionalChainNode: 'optional_chain_expression',
|
|
141
|
+
awaitNode: 'await_expression',
|
|
142
|
+
mutatingMethods: JS_TS_MUTATING,
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
// ── Python ───────────────────────────────────────────────────────────────
|
|
146
|
+
|
|
147
|
+
const PYTHON_DATAFLOW = makeDataflowRules({
|
|
148
|
+
functionNodes: new Set(['function_definition', 'lambda']),
|
|
149
|
+
defaultParamType: 'default_parameter',
|
|
150
|
+
restParamType: 'list_splat_pattern',
|
|
151
|
+
returnNode: 'return_statement',
|
|
152
|
+
varDeclaratorNode: null,
|
|
153
|
+
assignmentNode: 'assignment',
|
|
154
|
+
assignLeftField: 'left',
|
|
155
|
+
assignRightField: 'right',
|
|
156
|
+
callNode: 'call',
|
|
157
|
+
callFunctionField: 'function',
|
|
158
|
+
callArgsField: 'arguments',
|
|
159
|
+
spreadType: 'list_splat',
|
|
160
|
+
memberNode: 'attribute',
|
|
161
|
+
memberObjectField: 'object',
|
|
162
|
+
memberPropertyField: 'attribute',
|
|
163
|
+
awaitNode: 'await',
|
|
164
|
+
mutatingMethods: new Set([
|
|
165
|
+
'append',
|
|
166
|
+
'extend',
|
|
167
|
+
'insert',
|
|
168
|
+
'pop',
|
|
169
|
+
'remove',
|
|
170
|
+
'clear',
|
|
171
|
+
'sort',
|
|
172
|
+
'reverse',
|
|
173
|
+
'add',
|
|
174
|
+
'discard',
|
|
175
|
+
'update',
|
|
176
|
+
]),
|
|
177
|
+
extractParamName(node) {
|
|
178
|
+
// typed_parameter / typed_default_parameter: first identifier child is the name
|
|
179
|
+
if (node.type === 'typed_parameter' || node.type === 'typed_default_parameter') {
|
|
180
|
+
for (const c of node.namedChildren) {
|
|
181
|
+
if (c.type === 'identifier') return [c.text];
|
|
182
|
+
}
|
|
183
|
+
return null;
|
|
184
|
+
}
|
|
185
|
+
if (node.type === 'default_parameter') {
|
|
186
|
+
const nameNode = node.childForFieldName('name');
|
|
187
|
+
return nameNode ? [nameNode.text] : null;
|
|
188
|
+
}
|
|
189
|
+
if (node.type === 'list_splat_pattern' || node.type === 'dictionary_splat_pattern') {
|
|
190
|
+
for (const c of node.namedChildren) {
|
|
191
|
+
if (c.type === 'identifier') return [c.text];
|
|
192
|
+
}
|
|
193
|
+
return null;
|
|
194
|
+
}
|
|
195
|
+
return null;
|
|
196
|
+
},
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
// ── Go ───────────────────────────────────────────────────────────────────
|
|
200
|
+
|
|
201
|
+
const GO_DATAFLOW = makeDataflowRules({
|
|
202
|
+
functionNodes: new Set(['function_declaration', 'method_declaration', 'func_literal']),
|
|
203
|
+
returnNode: 'return_statement',
|
|
204
|
+
varDeclaratorNodes: new Set(['short_var_declaration', 'var_declaration']),
|
|
205
|
+
varNameField: 'left',
|
|
206
|
+
varValueField: 'right',
|
|
207
|
+
assignmentNode: 'assignment_statement',
|
|
208
|
+
assignLeftField: 'left',
|
|
209
|
+
assignRightField: 'right',
|
|
210
|
+
callNode: 'call_expression',
|
|
211
|
+
callFunctionField: 'function',
|
|
212
|
+
callArgsField: 'arguments',
|
|
213
|
+
memberNode: 'selector_expression',
|
|
214
|
+
memberObjectField: 'operand',
|
|
215
|
+
memberPropertyField: 'field',
|
|
216
|
+
mutatingMethods: new Set(),
|
|
217
|
+
expressionListType: 'expression_list',
|
|
218
|
+
extractParamName(node) {
|
|
219
|
+
// Go: parameter_declaration has name(s) + type; e.g. `a, b int`
|
|
220
|
+
if (node.type === 'parameter_declaration') {
|
|
221
|
+
const names = [];
|
|
222
|
+
for (const c of node.namedChildren) {
|
|
223
|
+
if (c.type === 'identifier') names.push(c.text);
|
|
224
|
+
}
|
|
225
|
+
return names.length > 0 ? names : null;
|
|
226
|
+
}
|
|
227
|
+
if (node.type === 'variadic_parameter_declaration') {
|
|
228
|
+
const nameNode = node.childForFieldName('name');
|
|
229
|
+
return nameNode ? [nameNode.text] : null;
|
|
230
|
+
}
|
|
231
|
+
return null;
|
|
232
|
+
},
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
// ── Rust ─────────────────────────────────────────────────────────────────
|
|
236
|
+
|
|
237
|
+
const RUST_DATAFLOW = makeDataflowRules({
|
|
238
|
+
functionNodes: new Set(['function_item', 'closure_expression']),
|
|
239
|
+
returnNode: 'return_expression',
|
|
240
|
+
varDeclaratorNode: 'let_declaration',
|
|
241
|
+
varNameField: 'pattern',
|
|
242
|
+
varValueField: 'value',
|
|
243
|
+
assignmentNode: 'assignment_expression',
|
|
244
|
+
callNode: 'call_expression',
|
|
245
|
+
callFunctionField: 'function',
|
|
246
|
+
callArgsField: 'arguments',
|
|
247
|
+
memberNode: 'field_expression',
|
|
248
|
+
memberObjectField: 'value',
|
|
249
|
+
memberPropertyField: 'field',
|
|
250
|
+
awaitNode: 'await_expression',
|
|
251
|
+
mutatingMethods: new Set(['push', 'pop', 'insert', 'remove', 'clear', 'sort', 'reverse']),
|
|
252
|
+
extractParamName(node) {
|
|
253
|
+
if (node.type === 'parameter') {
|
|
254
|
+
const pat = node.childForFieldName('pattern');
|
|
255
|
+
if (pat?.type === 'identifier') return [pat.text];
|
|
256
|
+
return null;
|
|
257
|
+
}
|
|
258
|
+
if (node.type === 'identifier') return [node.text];
|
|
259
|
+
return null;
|
|
260
|
+
},
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
// ── Java ─────────────────────────────────────────────────────────────────
|
|
264
|
+
|
|
265
|
+
const JAVA_DATAFLOW = makeDataflowRules({
|
|
266
|
+
functionNodes: new Set(['method_declaration', 'constructor_declaration', 'lambda_expression']),
|
|
267
|
+
returnNode: 'return_statement',
|
|
268
|
+
varDeclaratorNode: 'variable_declarator',
|
|
269
|
+
assignmentNode: 'assignment_expression',
|
|
270
|
+
callNodes: new Set(['method_invocation', 'object_creation_expression']),
|
|
271
|
+
callFunctionField: 'name',
|
|
272
|
+
callArgsField: 'arguments',
|
|
273
|
+
memberNode: 'field_access',
|
|
274
|
+
memberObjectField: 'object',
|
|
275
|
+
memberPropertyField: 'field',
|
|
276
|
+
callObjectField: 'object',
|
|
277
|
+
argumentWrapperType: 'argument',
|
|
278
|
+
mutatingMethods: new Set(['add', 'remove', 'clear', 'put', 'set', 'push', 'pop', 'sort']),
|
|
279
|
+
extractParamName(node) {
|
|
280
|
+
if (node.type === 'formal_parameter' || node.type === 'spread_parameter') {
|
|
281
|
+
const nameNode = node.childForFieldName('name');
|
|
282
|
+
return nameNode ? [nameNode.text] : null;
|
|
283
|
+
}
|
|
284
|
+
if (node.type === 'identifier') return [node.text];
|
|
285
|
+
return null;
|
|
286
|
+
},
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
// ── C# ───────────────────────────────────────────────────────────────────
|
|
290
|
+
|
|
291
|
+
const CSHARP_DATAFLOW = makeDataflowRules({
|
|
292
|
+
functionNodes: new Set([
|
|
293
|
+
'method_declaration',
|
|
294
|
+
'constructor_declaration',
|
|
295
|
+
'lambda_expression',
|
|
296
|
+
'local_function_statement',
|
|
297
|
+
]),
|
|
298
|
+
returnNode: 'return_statement',
|
|
299
|
+
varDeclaratorNode: 'variable_declarator',
|
|
300
|
+
varNameField: 'name',
|
|
301
|
+
assignmentNode: 'assignment_expression',
|
|
302
|
+
callNode: 'invocation_expression',
|
|
303
|
+
callFunctionField: 'function',
|
|
304
|
+
callArgsField: 'arguments',
|
|
305
|
+
memberNode: 'member_access_expression',
|
|
306
|
+
memberObjectField: 'expression',
|
|
307
|
+
memberPropertyField: 'name',
|
|
308
|
+
awaitNode: 'await_expression',
|
|
309
|
+
argumentWrapperType: 'argument',
|
|
310
|
+
mutatingMethods: new Set(['Add', 'Remove', 'Clear', 'Insert', 'Sort', 'Reverse', 'Push', 'Pop']),
|
|
311
|
+
extractParamName(node) {
|
|
312
|
+
if (node.type === 'parameter') {
|
|
313
|
+
const nameNode = node.childForFieldName('name');
|
|
314
|
+
return nameNode ? [nameNode.text] : null;
|
|
315
|
+
}
|
|
316
|
+
if (node.type === 'identifier') return [node.text];
|
|
317
|
+
return null;
|
|
318
|
+
},
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
// ── PHP ──────────────────────────────────────────────────────────────────
|
|
322
|
+
|
|
323
|
+
const PHP_DATAFLOW = makeDataflowRules({
|
|
324
|
+
functionNodes: new Set([
|
|
325
|
+
'function_definition',
|
|
326
|
+
'method_declaration',
|
|
327
|
+
'anonymous_function_creation_expression',
|
|
328
|
+
'arrow_function',
|
|
329
|
+
]),
|
|
330
|
+
paramListField: 'parameters',
|
|
331
|
+
paramIdentifier: 'variable_name',
|
|
332
|
+
returnNode: 'return_statement',
|
|
333
|
+
varDeclaratorNode: null,
|
|
334
|
+
assignmentNode: 'assignment_expression',
|
|
335
|
+
assignLeftField: 'left',
|
|
336
|
+
assignRightField: 'right',
|
|
337
|
+
callNodes: new Set([
|
|
338
|
+
'function_call_expression',
|
|
339
|
+
'member_call_expression',
|
|
340
|
+
'scoped_call_expression',
|
|
341
|
+
]),
|
|
342
|
+
callFunctionField: 'function',
|
|
343
|
+
callArgsField: 'arguments',
|
|
344
|
+
spreadType: 'spread_expression',
|
|
345
|
+
memberNode: 'member_access_expression',
|
|
346
|
+
memberObjectField: 'object',
|
|
347
|
+
memberPropertyField: 'name',
|
|
348
|
+
argumentWrapperType: 'argument',
|
|
349
|
+
extraIdentifierTypes: new Set(['variable_name', 'name']),
|
|
350
|
+
mutatingMethods: new Set(['push', 'pop', 'shift', 'unshift', 'splice', 'sort', 'reverse']),
|
|
351
|
+
extractParamName(node) {
|
|
352
|
+
// PHP: simple_parameter → $name or &$name
|
|
353
|
+
if (node.type === 'simple_parameter' || node.type === 'variadic_parameter') {
|
|
354
|
+
const nameNode = node.childForFieldName('name');
|
|
355
|
+
return nameNode ? [nameNode.text] : null;
|
|
356
|
+
}
|
|
357
|
+
if (node.type === 'variable_name') return [node.text];
|
|
358
|
+
return null;
|
|
359
|
+
},
|
|
360
|
+
});
|
|
361
|
+
|
|
362
|
+
// ── Ruby ─────────────────────────────────────────────────────────────────
|
|
363
|
+
|
|
364
|
+
const RUBY_DATAFLOW = makeDataflowRules({
|
|
365
|
+
functionNodes: new Set(['method', 'singleton_method', 'lambda']),
|
|
366
|
+
paramListField: 'parameters',
|
|
367
|
+
returnNode: 'return',
|
|
368
|
+
varDeclaratorNode: null,
|
|
369
|
+
assignmentNode: 'assignment',
|
|
370
|
+
assignLeftField: 'left',
|
|
371
|
+
assignRightField: 'right',
|
|
372
|
+
callNode: 'call',
|
|
373
|
+
callFunctionField: 'method',
|
|
374
|
+
callArgsField: 'arguments',
|
|
375
|
+
spreadType: 'splat_parameter',
|
|
376
|
+
memberNode: 'call',
|
|
377
|
+
memberObjectField: 'receiver',
|
|
378
|
+
memberPropertyField: 'method',
|
|
379
|
+
mutatingMethods: new Set([
|
|
380
|
+
'push',
|
|
381
|
+
'pop',
|
|
382
|
+
'shift',
|
|
383
|
+
'unshift',
|
|
384
|
+
'delete',
|
|
385
|
+
'clear',
|
|
386
|
+
'sort!',
|
|
387
|
+
'reverse!',
|
|
388
|
+
'map!',
|
|
389
|
+
'select!',
|
|
390
|
+
'reject!',
|
|
391
|
+
'compact!',
|
|
392
|
+
'flatten!',
|
|
393
|
+
'concat',
|
|
394
|
+
'replace',
|
|
395
|
+
'insert',
|
|
396
|
+
]),
|
|
397
|
+
extractParamName(node) {
|
|
398
|
+
if (node.type === 'identifier') return [node.text];
|
|
399
|
+
if (
|
|
400
|
+
node.type === 'optional_parameter' ||
|
|
401
|
+
node.type === 'keyword_parameter' ||
|
|
402
|
+
node.type === 'splat_parameter' ||
|
|
403
|
+
node.type === 'hash_splat_parameter'
|
|
404
|
+
) {
|
|
405
|
+
const nameNode = node.childForFieldName('name');
|
|
406
|
+
return nameNode ? [nameNode.text] : null;
|
|
407
|
+
}
|
|
408
|
+
return null;
|
|
409
|
+
},
|
|
410
|
+
});
|
|
411
|
+
|
|
412
|
+
// ── Rules Map + Extensions Set ───────────────────────────────────────────
|
|
413
|
+
|
|
414
|
+
export const DATAFLOW_RULES = new Map([
|
|
415
|
+
['javascript', JS_TS_DATAFLOW],
|
|
416
|
+
['typescript', JS_TS_DATAFLOW],
|
|
417
|
+
['tsx', JS_TS_DATAFLOW],
|
|
418
|
+
['python', PYTHON_DATAFLOW],
|
|
419
|
+
['go', GO_DATAFLOW],
|
|
420
|
+
['rust', RUST_DATAFLOW],
|
|
421
|
+
['java', JAVA_DATAFLOW],
|
|
422
|
+
['csharp', CSHARP_DATAFLOW],
|
|
423
|
+
['php', PHP_DATAFLOW],
|
|
424
|
+
['ruby', RUBY_DATAFLOW],
|
|
425
|
+
]);
|
|
426
|
+
|
|
427
|
+
const DATAFLOW_LANG_IDS = new Set(DATAFLOW_RULES.keys());
|
|
428
|
+
|
|
429
|
+
export const DATAFLOW_EXTENSIONS = new Set();
|
|
430
|
+
for (const entry of LANGUAGE_REGISTRY) {
|
|
431
|
+
if (DATAFLOW_RULES.has(entry.id)) {
|
|
432
|
+
for (const ext of entry.extensions) DATAFLOW_EXTENSIONS.add(ext);
|
|
433
|
+
}
|
|
434
|
+
}
|
|
37
435
|
|
|
38
436
|
// ── AST helpers ──────────────────────────────────────────────────────────────
|
|
39
437
|
|
|
@@ -43,32 +441,27 @@ function truncate(str, max = 120) {
|
|
|
43
441
|
}
|
|
44
442
|
|
|
45
443
|
/**
|
|
46
|
-
* Get the name of a function node from the AST.
|
|
444
|
+
* Get the name of a function node from the AST using rules.
|
|
47
445
|
*/
|
|
48
|
-
function functionName(fnNode) {
|
|
446
|
+
function functionName(fnNode, rules) {
|
|
49
447
|
if (!fnNode) return null;
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
if (t === 'arrow_function' || t === 'function_expression') {
|
|
61
|
-
const parent = fnNode.parent;
|
|
62
|
-
if (parent?.type === 'variable_declarator') {
|
|
63
|
-
const nameNode = parent.childForFieldName('name');
|
|
64
|
-
return nameNode ? nameNode.text : null;
|
|
448
|
+
// Try the standard name field first (works for most languages)
|
|
449
|
+
const nameNode = fnNode.childForFieldName(rules.nameField);
|
|
450
|
+
if (nameNode) return nameNode.text;
|
|
451
|
+
|
|
452
|
+
// JS-specific: arrow_function/function_expression assigned to variable, pair, or assignment
|
|
453
|
+
const parent = fnNode.parent;
|
|
454
|
+
if (parent) {
|
|
455
|
+
if (rules.varAssignedFnParent && parent.type === rules.varAssignedFnParent) {
|
|
456
|
+
const n = parent.childForFieldName('name');
|
|
457
|
+
return n ? n.text : null;
|
|
65
458
|
}
|
|
66
|
-
if (parent
|
|
459
|
+
if (rules.pairFnParent && parent.type === rules.pairFnParent) {
|
|
67
460
|
const keyNode = parent.childForFieldName('key');
|
|
68
461
|
return keyNode ? keyNode.text : null;
|
|
69
462
|
}
|
|
70
|
-
if (parent
|
|
71
|
-
const left = parent.childForFieldName(
|
|
463
|
+
if (rules.assignmentFnParent && parent.type === rules.assignmentFnParent) {
|
|
464
|
+
const left = parent.childForFieldName(rules.assignLeftField);
|
|
72
465
|
return left ? left.text : null;
|
|
73
466
|
}
|
|
74
467
|
}
|
|
@@ -77,14 +470,13 @@ function functionName(fnNode) {
|
|
|
77
470
|
|
|
78
471
|
/**
|
|
79
472
|
* Extract parameter names and indices from a formal_parameters node.
|
|
80
|
-
* Handles: simple identifiers, destructured objects/arrays, defaults, rest, TS typed params.
|
|
81
473
|
*/
|
|
82
|
-
function extractParams(paramsNode) {
|
|
474
|
+
function extractParams(paramsNode, rules) {
|
|
83
475
|
if (!paramsNode) return [];
|
|
84
476
|
const result = [];
|
|
85
477
|
let index = 0;
|
|
86
478
|
for (const child of paramsNode.namedChildren) {
|
|
87
|
-
const names = extractParamNames(child);
|
|
479
|
+
const names = extractParamNames(child, rules);
|
|
88
480
|
for (const name of names) {
|
|
89
481
|
result.push({ name, index });
|
|
90
482
|
}
|
|
@@ -93,81 +485,113 @@ function extractParams(paramsNode) {
|
|
|
93
485
|
return result;
|
|
94
486
|
}
|
|
95
487
|
|
|
96
|
-
function extractParamNames(node) {
|
|
488
|
+
function extractParamNames(node, rules) {
|
|
97
489
|
if (!node) return [];
|
|
98
490
|
const t = node.type;
|
|
99
|
-
|
|
100
|
-
//
|
|
101
|
-
if (
|
|
102
|
-
const
|
|
103
|
-
|
|
491
|
+
|
|
492
|
+
// Language-specific override (Go, Rust, Java, C#, PHP, Ruby)
|
|
493
|
+
if (rules.extractParamName) {
|
|
494
|
+
const result = rules.extractParamName(node);
|
|
495
|
+
if (result) return result;
|
|
104
496
|
}
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
497
|
+
|
|
498
|
+
// Leaf identifier
|
|
499
|
+
if (t === rules.paramIdentifier) return [node.text];
|
|
500
|
+
|
|
501
|
+
// Wrapper types (TS required_parameter, Python typed_parameter, etc.)
|
|
502
|
+
if (rules.paramWrapperTypes.has(t)) {
|
|
503
|
+
const pattern = node.childForFieldName('pattern') || node.childForFieldName('name');
|
|
504
|
+
return pattern ? extractParamNames(pattern, rules) : [];
|
|
108
505
|
}
|
|
109
|
-
|
|
110
|
-
|
|
506
|
+
|
|
507
|
+
// Default parameter (assignment_pattern / default_parameter)
|
|
508
|
+
if (rules.defaultParamType && t === rules.defaultParamType) {
|
|
509
|
+
const left = node.childForFieldName('left') || node.childForFieldName('name');
|
|
510
|
+
return left ? extractParamNames(left, rules) : [];
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
// Rest / splat parameter
|
|
514
|
+
if (rules.restParamType && t === rules.restParamType) {
|
|
515
|
+
// Try name field first, then fall back to scanning children
|
|
516
|
+
const nameNode = node.childForFieldName('name');
|
|
517
|
+
if (nameNode) return [nameNode.text];
|
|
111
518
|
for (const child of node.namedChildren) {
|
|
112
|
-
if (child.type ===
|
|
519
|
+
if (child.type === rules.paramIdentifier) return [child.text];
|
|
113
520
|
}
|
|
114
521
|
return [];
|
|
115
522
|
}
|
|
116
|
-
|
|
523
|
+
|
|
524
|
+
// Object destructuring (JS only)
|
|
525
|
+
if (rules.objectDestructType && t === rules.objectDestructType) {
|
|
117
526
|
const names = [];
|
|
118
527
|
for (const child of node.namedChildren) {
|
|
119
|
-
if (child.type ===
|
|
528
|
+
if (rules.shorthandPropPattern && child.type === rules.shorthandPropPattern) {
|
|
120
529
|
names.push(child.text);
|
|
121
|
-
} else if (child.type ===
|
|
530
|
+
} else if (rules.pairPatternType && child.type === rules.pairPatternType) {
|
|
122
531
|
const value = child.childForFieldName('value');
|
|
123
|
-
if (value) names.push(...extractParamNames(value));
|
|
124
|
-
} else if (child.type ===
|
|
125
|
-
names.push(...extractParamNames(child));
|
|
532
|
+
if (value) names.push(...extractParamNames(value, rules));
|
|
533
|
+
} else if (rules.restParamType && child.type === rules.restParamType) {
|
|
534
|
+
names.push(...extractParamNames(child, rules));
|
|
126
535
|
}
|
|
127
536
|
}
|
|
128
537
|
return names;
|
|
129
538
|
}
|
|
130
|
-
|
|
539
|
+
|
|
540
|
+
// Array destructuring (JS only)
|
|
541
|
+
if (rules.arrayDestructType && t === rules.arrayDestructType) {
|
|
131
542
|
const names = [];
|
|
132
543
|
for (const child of node.namedChildren) {
|
|
133
|
-
names.push(...extractParamNames(child));
|
|
544
|
+
names.push(...extractParamNames(child, rules));
|
|
134
545
|
}
|
|
135
546
|
return names;
|
|
136
547
|
}
|
|
548
|
+
|
|
137
549
|
return [];
|
|
138
550
|
}
|
|
139
551
|
|
|
552
|
+
/** Check if a node type is identifier-like for this language. */
|
|
553
|
+
function isIdent(nodeType, rules) {
|
|
554
|
+
if (nodeType === 'identifier' || nodeType === rules.paramIdentifier) return true;
|
|
555
|
+
return rules.extraIdentifierTypes ? rules.extraIdentifierTypes.has(nodeType) : false;
|
|
556
|
+
}
|
|
557
|
+
|
|
140
558
|
/**
|
|
141
|
-
* Resolve the name a call expression is calling.
|
|
142
|
-
* Handles: `foo()`, `obj.method()`, `obj.nested.method()`.
|
|
559
|
+
* Resolve the name a call expression is calling using rules.
|
|
143
560
|
*/
|
|
144
|
-
function resolveCalleeName(callNode) {
|
|
145
|
-
const fn = callNode.childForFieldName(
|
|
146
|
-
if (!fn)
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
561
|
+
function resolveCalleeName(callNode, rules) {
|
|
562
|
+
const fn = callNode.childForFieldName(rules.callFunctionField);
|
|
563
|
+
if (!fn) {
|
|
564
|
+
// Some languages (Java method_invocation, Ruby call) use 'name' field directly
|
|
565
|
+
const nameNode = callNode.childForFieldName('name') || callNode.childForFieldName('method');
|
|
566
|
+
return nameNode ? nameNode.text : null;
|
|
567
|
+
}
|
|
568
|
+
if (isIdent(fn.type, rules)) return fn.text;
|
|
569
|
+
if (fn.type === rules.memberNode) {
|
|
570
|
+
const prop = fn.childForFieldName(rules.memberPropertyField);
|
|
571
|
+
return prop ? prop.text : null;
|
|
572
|
+
}
|
|
573
|
+
if (rules.optionalChainNode && fn.type === rules.optionalChainNode) {
|
|
574
|
+
const target = fn.namedChildren[0];
|
|
151
575
|
if (!target) return null;
|
|
152
|
-
if (target.type ===
|
|
153
|
-
const prop = target.childForFieldName(
|
|
576
|
+
if (target.type === rules.memberNode) {
|
|
577
|
+
const prop = target.childForFieldName(rules.memberPropertyField);
|
|
154
578
|
return prop ? prop.text : null;
|
|
155
579
|
}
|
|
156
580
|
if (target.type === 'identifier') return target.text;
|
|
157
|
-
const prop = fn.childForFieldName(
|
|
581
|
+
const prop = fn.childForFieldName(rules.memberPropertyField);
|
|
158
582
|
return prop ? prop.text : null;
|
|
159
583
|
}
|
|
160
584
|
return null;
|
|
161
585
|
}
|
|
162
586
|
|
|
163
587
|
/**
|
|
164
|
-
* Get the receiver (object) of a member expression.
|
|
588
|
+
* Get the receiver (object) of a member expression using rules.
|
|
165
589
|
*/
|
|
166
|
-
function memberReceiver(memberExpr) {
|
|
167
|
-
const obj = memberExpr.childForFieldName(
|
|
590
|
+
function memberReceiver(memberExpr, rules) {
|
|
591
|
+
const obj = memberExpr.childForFieldName(rules.memberObjectField);
|
|
168
592
|
if (!obj) return null;
|
|
169
|
-
if (obj.type
|
|
170
|
-
if (obj.type ===
|
|
593
|
+
if (isIdent(obj.type, rules)) return obj.text;
|
|
594
|
+
if (obj.type === rules.memberNode) return memberReceiver(obj, rules);
|
|
171
595
|
return null;
|
|
172
596
|
}
|
|
173
597
|
|
|
@@ -179,17 +603,21 @@ function memberReceiver(memberExpr) {
|
|
|
179
603
|
* @param {object} tree - tree-sitter parse tree
|
|
180
604
|
* @param {string} filePath - relative file path
|
|
181
605
|
* @param {object[]} definitions - symbol definitions from the parser
|
|
606
|
+
* @param {string} [langId='javascript'] - language identifier for rules lookup
|
|
182
607
|
* @returns {{ parameters, returns, assignments, argFlows, mutations }}
|
|
183
608
|
*/
|
|
184
|
-
export function extractDataflow(tree, _filePath, _definitions) {
|
|
609
|
+
export function extractDataflow(tree, _filePath, _definitions, langId = 'javascript') {
|
|
610
|
+
const rules = DATAFLOW_RULES.get(langId);
|
|
611
|
+
if (!rules) return { parameters: [], returns: [], assignments: [], argFlows: [], mutations: [] };
|
|
612
|
+
|
|
613
|
+
const isCallNode = rules.callNodes ? (t) => rules.callNodes.has(t) : (t) => t === rules.callNode;
|
|
614
|
+
|
|
185
615
|
const parameters = [];
|
|
186
616
|
const returns = [];
|
|
187
617
|
const assignments = [];
|
|
188
618
|
const argFlows = [];
|
|
189
619
|
const mutations = [];
|
|
190
620
|
|
|
191
|
-
// Build a scope stack as we traverse
|
|
192
|
-
// Each scope: { funcName, funcNode, params: Map<name, index>, locals: Map<name, source> }
|
|
193
621
|
const scopeStack = [];
|
|
194
622
|
|
|
195
623
|
function currentScope() {
|
|
@@ -197,7 +625,6 @@ export function extractDataflow(tree, _filePath, _definitions) {
|
|
|
197
625
|
}
|
|
198
626
|
|
|
199
627
|
function findBinding(name) {
|
|
200
|
-
// Search from innermost scope outward
|
|
201
628
|
for (let i = scopeStack.length - 1; i >= 0; i--) {
|
|
202
629
|
const scope = scopeStack[i];
|
|
203
630
|
if (scope.params.has(name))
|
|
@@ -209,9 +636,9 @@ export function extractDataflow(tree, _filePath, _definitions) {
|
|
|
209
636
|
}
|
|
210
637
|
|
|
211
638
|
function enterScope(fnNode) {
|
|
212
|
-
const name = functionName(fnNode);
|
|
213
|
-
const paramsNode = fnNode.childForFieldName(
|
|
214
|
-
const paramList = extractParams(paramsNode);
|
|
639
|
+
const name = functionName(fnNode, rules);
|
|
640
|
+
const paramsNode = fnNode.childForFieldName(rules.paramListField);
|
|
641
|
+
const paramList = extractParams(paramsNode, rules);
|
|
215
642
|
const paramMap = new Map();
|
|
216
643
|
for (const p of paramList) {
|
|
217
644
|
paramMap.set(p.name, p.index);
|
|
@@ -231,14 +658,10 @@ export function extractDataflow(tree, _filePath, _definitions) {
|
|
|
231
658
|
scopeStack.pop();
|
|
232
659
|
}
|
|
233
660
|
|
|
234
|
-
/**
|
|
235
|
-
* Determine confidence for a variable binding flowing as an argument.
|
|
236
|
-
*/
|
|
237
661
|
function bindingConfidence(binding) {
|
|
238
662
|
if (!binding) return 0.5;
|
|
239
663
|
if (binding.type === 'param') return 1.0;
|
|
240
664
|
if (binding.type === 'local') {
|
|
241
|
-
// Local from a call return → 0.9, from destructuring → 0.8
|
|
242
665
|
if (binding.source?.type === 'call_return') return 0.9;
|
|
243
666
|
if (binding.source?.type === 'destructured') return 0.8;
|
|
244
667
|
return 0.9;
|
|
@@ -246,21 +669,235 @@ export function extractDataflow(tree, _filePath, _definitions) {
|
|
|
246
669
|
return 0.5;
|
|
247
670
|
}
|
|
248
671
|
|
|
672
|
+
/** Unwrap await if present, returning the inner expression. */
|
|
673
|
+
function unwrapAwait(node) {
|
|
674
|
+
if (rules.awaitNode && node.type === rules.awaitNode) {
|
|
675
|
+
return node.namedChildren[0] || node;
|
|
676
|
+
}
|
|
677
|
+
return node;
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
/** Check if a node is a call expression (single or multi-type). */
|
|
681
|
+
function isCall(node) {
|
|
682
|
+
return node && isCallNode(node.type);
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
/** Handle a variable declarator / short_var_declaration node. */
|
|
686
|
+
function handleVarDeclarator(node) {
|
|
687
|
+
let nameNode = node.childForFieldName(rules.varNameField);
|
|
688
|
+
let valueNode = rules.varValueField ? node.childForFieldName(rules.varValueField) : null;
|
|
689
|
+
|
|
690
|
+
// C#: initializer is inside equals_value_clause child
|
|
691
|
+
if (!valueNode && rules.equalsClauseType) {
|
|
692
|
+
for (const child of node.namedChildren) {
|
|
693
|
+
if (child.type === rules.equalsClauseType) {
|
|
694
|
+
valueNode = child.childForFieldName('value') || child.namedChildren[0];
|
|
695
|
+
break;
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
// Fallback: initializer is a direct unnamed child (C# variable_declarator)
|
|
701
|
+
if (!valueNode) {
|
|
702
|
+
for (const child of node.namedChildren) {
|
|
703
|
+
if (child !== nameNode && isCall(unwrapAwait(child))) {
|
|
704
|
+
valueNode = child;
|
|
705
|
+
break;
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
// Go: expression_list wraps LHS/RHS — unwrap to first named child
|
|
711
|
+
if (rules.expressionListType) {
|
|
712
|
+
if (nameNode?.type === rules.expressionListType) nameNode = nameNode.namedChildren[0];
|
|
713
|
+
if (valueNode?.type === rules.expressionListType) valueNode = valueNode.namedChildren[0];
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
const scope = currentScope();
|
|
717
|
+
if (!nameNode || !valueNode || !scope) return;
|
|
718
|
+
|
|
719
|
+
const unwrapped = unwrapAwait(valueNode);
|
|
720
|
+
const callExpr = isCall(unwrapped) ? unwrapped : null;
|
|
721
|
+
|
|
722
|
+
if (callExpr) {
|
|
723
|
+
const callee = resolveCalleeName(callExpr, rules);
|
|
724
|
+
if (callee && scope.funcName) {
|
|
725
|
+
// Destructuring: const { a, b } = foo()
|
|
726
|
+
if (
|
|
727
|
+
(rules.objectDestructType && nameNode.type === rules.objectDestructType) ||
|
|
728
|
+
(rules.arrayDestructType && nameNode.type === rules.arrayDestructType)
|
|
729
|
+
) {
|
|
730
|
+
const names = extractParamNames(nameNode, rules);
|
|
731
|
+
for (const n of names) {
|
|
732
|
+
assignments.push({
|
|
733
|
+
varName: n,
|
|
734
|
+
callerFunc: scope.funcName,
|
|
735
|
+
sourceCallName: callee,
|
|
736
|
+
expression: truncate(node.text),
|
|
737
|
+
line: node.startPosition.row + 1,
|
|
738
|
+
});
|
|
739
|
+
scope.locals.set(n, { type: 'destructured', callee });
|
|
740
|
+
}
|
|
741
|
+
} else {
|
|
742
|
+
const varName =
|
|
743
|
+
nameNode.type === 'identifier' || nameNode.type === rules.paramIdentifier
|
|
744
|
+
? nameNode.text
|
|
745
|
+
: nameNode.text;
|
|
746
|
+
assignments.push({
|
|
747
|
+
varName,
|
|
748
|
+
callerFunc: scope.funcName,
|
|
749
|
+
sourceCallName: callee,
|
|
750
|
+
expression: truncate(node.text),
|
|
751
|
+
line: node.startPosition.row + 1,
|
|
752
|
+
});
|
|
753
|
+
scope.locals.set(varName, { type: 'call_return', callee });
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
/** Handle assignment expressions (mutation detection + call captures). */
|
|
760
|
+
function handleAssignment(node) {
|
|
761
|
+
const left = node.childForFieldName(rules.assignLeftField);
|
|
762
|
+
const right = node.childForFieldName(rules.assignRightField);
|
|
763
|
+
const scope = currentScope();
|
|
764
|
+
if (!scope?.funcName) return;
|
|
765
|
+
|
|
766
|
+
// Mutation: obj.prop = value
|
|
767
|
+
if (left && rules.memberNode && left.type === rules.memberNode) {
|
|
768
|
+
const receiver = memberReceiver(left, rules);
|
|
769
|
+
if (receiver) {
|
|
770
|
+
const binding = findBinding(receiver);
|
|
771
|
+
if (binding) {
|
|
772
|
+
mutations.push({
|
|
773
|
+
funcName: scope.funcName,
|
|
774
|
+
receiverName: receiver,
|
|
775
|
+
binding,
|
|
776
|
+
mutatingExpr: truncate(node.text),
|
|
777
|
+
line: node.startPosition.row + 1,
|
|
778
|
+
});
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
// Non-declaration assignment: x = foo()
|
|
784
|
+
if (left && isIdent(left.type, rules) && right) {
|
|
785
|
+
const unwrapped = unwrapAwait(right);
|
|
786
|
+
const callExpr = isCall(unwrapped) ? unwrapped : null;
|
|
787
|
+
if (callExpr) {
|
|
788
|
+
const callee = resolveCalleeName(callExpr, rules);
|
|
789
|
+
if (callee) {
|
|
790
|
+
assignments.push({
|
|
791
|
+
varName: left.text,
|
|
792
|
+
callerFunc: scope.funcName,
|
|
793
|
+
sourceCallName: callee,
|
|
794
|
+
expression: truncate(node.text),
|
|
795
|
+
line: node.startPosition.row + 1,
|
|
796
|
+
});
|
|
797
|
+
scope.locals.set(left.text, { type: 'call_return', callee });
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
/** Handle call expressions: track argument flows. */
|
|
804
|
+
function handleCallExpr(node) {
|
|
805
|
+
const callee = resolveCalleeName(node, rules);
|
|
806
|
+
const argsNode = node.childForFieldName(rules.callArgsField);
|
|
807
|
+
const scope = currentScope();
|
|
808
|
+
if (!callee || !argsNode || !scope?.funcName) return;
|
|
809
|
+
|
|
810
|
+
let argIndex = 0;
|
|
811
|
+
for (let arg of argsNode.namedChildren) {
|
|
812
|
+
// PHP/Java: unwrap argument wrapper
|
|
813
|
+
if (rules.argumentWrapperType && arg.type === rules.argumentWrapperType) {
|
|
814
|
+
arg = arg.namedChildren[0] || arg;
|
|
815
|
+
}
|
|
816
|
+
const unwrapped =
|
|
817
|
+
rules.spreadType && arg.type === rules.spreadType ? arg.namedChildren[0] || arg : arg;
|
|
818
|
+
if (!unwrapped) {
|
|
819
|
+
argIndex++;
|
|
820
|
+
continue;
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
const argName = isIdent(unwrapped.type, rules) ? unwrapped.text : null;
|
|
824
|
+
const argMember =
|
|
825
|
+
rules.memberNode && unwrapped.type === rules.memberNode
|
|
826
|
+
? memberReceiver(unwrapped, rules)
|
|
827
|
+
: null;
|
|
828
|
+
const trackedName = argName || argMember;
|
|
829
|
+
|
|
830
|
+
if (trackedName) {
|
|
831
|
+
const binding = findBinding(trackedName);
|
|
832
|
+
if (binding) {
|
|
833
|
+
argFlows.push({
|
|
834
|
+
callerFunc: scope.funcName,
|
|
835
|
+
calleeName: callee,
|
|
836
|
+
argIndex,
|
|
837
|
+
argName: trackedName,
|
|
838
|
+
binding,
|
|
839
|
+
confidence: bindingConfidence(binding),
|
|
840
|
+
expression: truncate(arg.text),
|
|
841
|
+
line: node.startPosition.row + 1,
|
|
842
|
+
});
|
|
843
|
+
}
|
|
844
|
+
}
|
|
845
|
+
argIndex++;
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
/** Detect mutating method calls in expression statements. */
|
|
850
|
+
function handleExprStmtMutation(node) {
|
|
851
|
+
if (rules.mutatingMethods.size === 0) return;
|
|
852
|
+
const expr = node.namedChildren[0];
|
|
853
|
+
if (!expr || !isCall(expr)) return;
|
|
854
|
+
|
|
855
|
+
let methodName = null;
|
|
856
|
+
let receiver = null;
|
|
857
|
+
|
|
858
|
+
// Standard pattern: call(fn: member(obj, prop))
|
|
859
|
+
const fn = expr.childForFieldName(rules.callFunctionField);
|
|
860
|
+
if (fn && fn.type === rules.memberNode) {
|
|
861
|
+
const prop = fn.childForFieldName(rules.memberPropertyField);
|
|
862
|
+
methodName = prop ? prop.text : null;
|
|
863
|
+
receiver = memberReceiver(fn, rules);
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
// Java/combined pattern: call node itself has object + name fields
|
|
867
|
+
if (!receiver && rules.callObjectField) {
|
|
868
|
+
const obj = expr.childForFieldName(rules.callObjectField);
|
|
869
|
+
const name = expr.childForFieldName(rules.callFunctionField);
|
|
870
|
+
if (obj && name) {
|
|
871
|
+
methodName = name.text;
|
|
872
|
+
receiver = isIdent(obj.type, rules) ? obj.text : null;
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
if (!methodName || !rules.mutatingMethods.has(methodName)) return;
|
|
877
|
+
|
|
878
|
+
const scope = currentScope();
|
|
879
|
+
if (!receiver || !scope?.funcName) return;
|
|
880
|
+
|
|
881
|
+
const binding = findBinding(receiver);
|
|
882
|
+
if (binding) {
|
|
883
|
+
mutations.push({
|
|
884
|
+
funcName: scope.funcName,
|
|
885
|
+
receiverName: receiver,
|
|
886
|
+
binding,
|
|
887
|
+
mutatingExpr: truncate(expr.text),
|
|
888
|
+
line: node.startPosition.row + 1,
|
|
889
|
+
});
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
|
|
249
893
|
// Recursive AST walk
|
|
250
894
|
function visit(node) {
|
|
251
895
|
if (!node) return;
|
|
252
896
|
const t = node.type;
|
|
253
897
|
|
|
254
898
|
// Enter function scopes
|
|
255
|
-
if (
|
|
256
|
-
t === 'function_declaration' ||
|
|
257
|
-
t === 'method_definition' ||
|
|
258
|
-
t === 'arrow_function' ||
|
|
259
|
-
t === 'function_expression' ||
|
|
260
|
-
t === 'function'
|
|
261
|
-
) {
|
|
899
|
+
if (rules.functionNodes.has(t)) {
|
|
262
900
|
enterScope(node);
|
|
263
|
-
// Visit body
|
|
264
901
|
for (const child of node.namedChildren) {
|
|
265
902
|
visit(child);
|
|
266
903
|
}
|
|
@@ -269,12 +906,12 @@ export function extractDataflow(tree, _filePath, _definitions) {
|
|
|
269
906
|
}
|
|
270
907
|
|
|
271
908
|
// Return statements
|
|
272
|
-
if (t ===
|
|
909
|
+
if (rules.returnNode && t === rules.returnNode) {
|
|
273
910
|
const scope = currentScope();
|
|
274
911
|
if (scope?.funcName) {
|
|
275
912
|
const expr = node.namedChildren[0];
|
|
276
913
|
const referencedNames = [];
|
|
277
|
-
if (expr) collectIdentifiers(expr, referencedNames);
|
|
914
|
+
if (expr) collectIdentifiers(expr, referencedNames, rules);
|
|
278
915
|
returns.push({
|
|
279
916
|
funcName: scope.funcName,
|
|
280
917
|
expression: truncate(expr ? expr.text : ''),
|
|
@@ -282,192 +919,49 @@ export function extractDataflow(tree, _filePath, _definitions) {
|
|
|
282
919
|
line: node.startPosition.row + 1,
|
|
283
920
|
});
|
|
284
921
|
}
|
|
285
|
-
// Still visit children for nested expressions
|
|
286
922
|
for (const child of node.namedChildren) {
|
|
287
923
|
visit(child);
|
|
288
924
|
}
|
|
289
925
|
return;
|
|
290
926
|
}
|
|
291
927
|
|
|
292
|
-
// Variable declarations
|
|
293
|
-
if (t ===
|
|
294
|
-
|
|
295
|
-
const valueNode = node.childForFieldName('value');
|
|
296
|
-
const scope = currentScope();
|
|
297
|
-
|
|
298
|
-
if (nameNode && valueNode && scope) {
|
|
299
|
-
// Resolve the call expression from the value (handles await wrapping)
|
|
300
|
-
let callExpr = null;
|
|
301
|
-
if (valueNode.type === 'call_expression') {
|
|
302
|
-
callExpr = valueNode;
|
|
303
|
-
} else if (valueNode.type === 'await_expression') {
|
|
304
|
-
const awaitChild = valueNode.namedChildren[0];
|
|
305
|
-
if (awaitChild?.type === 'call_expression') callExpr = awaitChild;
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
if (callExpr) {
|
|
309
|
-
const callee = resolveCalleeName(callExpr);
|
|
310
|
-
if (callee && scope.funcName) {
|
|
311
|
-
// Destructuring: const { a, b } = foo()
|
|
312
|
-
if (nameNode.type === 'object_pattern' || nameNode.type === 'array_pattern') {
|
|
313
|
-
const names = extractParamNames(nameNode);
|
|
314
|
-
for (const n of names) {
|
|
315
|
-
assignments.push({
|
|
316
|
-
varName: n,
|
|
317
|
-
callerFunc: scope.funcName,
|
|
318
|
-
sourceCallName: callee,
|
|
319
|
-
expression: truncate(node.text),
|
|
320
|
-
line: node.startPosition.row + 1,
|
|
321
|
-
});
|
|
322
|
-
scope.locals.set(n, { type: 'destructured', callee });
|
|
323
|
-
}
|
|
324
|
-
} else {
|
|
325
|
-
// Simple: const x = foo()
|
|
326
|
-
assignments.push({
|
|
327
|
-
varName: nameNode.text,
|
|
328
|
-
callerFunc: scope.funcName,
|
|
329
|
-
sourceCallName: callee,
|
|
330
|
-
expression: truncate(node.text),
|
|
331
|
-
line: node.startPosition.row + 1,
|
|
332
|
-
});
|
|
333
|
-
scope.locals.set(nameNode.text, { type: 'call_return', callee });
|
|
334
|
-
}
|
|
335
|
-
}
|
|
336
|
-
}
|
|
337
|
-
}
|
|
338
|
-
// Visit children
|
|
928
|
+
// Variable declarations
|
|
929
|
+
if (rules.varDeclaratorNode && t === rules.varDeclaratorNode) {
|
|
930
|
+
handleVarDeclarator(node);
|
|
339
931
|
for (const child of node.namedChildren) {
|
|
340
932
|
visit(child);
|
|
341
933
|
}
|
|
342
934
|
return;
|
|
343
935
|
}
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
if (t === 'call_expression') {
|
|
347
|
-
const callee = resolveCalleeName(node);
|
|
348
|
-
const argsNode = node.childForFieldName('arguments');
|
|
349
|
-
const scope = currentScope();
|
|
350
|
-
|
|
351
|
-
if (callee && argsNode && scope?.funcName) {
|
|
352
|
-
let argIndex = 0;
|
|
353
|
-
for (const arg of argsNode.namedChildren) {
|
|
354
|
-
// Handle spread arguments: foo(...args)
|
|
355
|
-
const unwrapped = arg.type === 'spread_element' ? arg.namedChildren[0] : arg;
|
|
356
|
-
if (!unwrapped) {
|
|
357
|
-
argIndex++;
|
|
358
|
-
continue;
|
|
359
|
-
}
|
|
360
|
-
const argName = unwrapped.type === 'identifier' ? unwrapped.text : null;
|
|
361
|
-
const argMember =
|
|
362
|
-
unwrapped.type === 'member_expression' ? memberReceiver(unwrapped) : null;
|
|
363
|
-
const trackedName = argName || argMember;
|
|
364
|
-
|
|
365
|
-
if (trackedName) {
|
|
366
|
-
const binding = findBinding(trackedName);
|
|
367
|
-
if (binding) {
|
|
368
|
-
argFlows.push({
|
|
369
|
-
callerFunc: scope.funcName,
|
|
370
|
-
calleeName: callee,
|
|
371
|
-
argIndex,
|
|
372
|
-
argName: trackedName,
|
|
373
|
-
binding,
|
|
374
|
-
confidence: bindingConfidence(binding),
|
|
375
|
-
expression: truncate(arg.text),
|
|
376
|
-
line: node.startPosition.row + 1,
|
|
377
|
-
});
|
|
378
|
-
}
|
|
379
|
-
}
|
|
380
|
-
argIndex++;
|
|
381
|
-
}
|
|
382
|
-
}
|
|
383
|
-
// Visit children (but not arguments again — we handled them)
|
|
936
|
+
if (rules.varDeclaratorNodes?.has(t)) {
|
|
937
|
+
handleVarDeclarator(node);
|
|
384
938
|
for (const child of node.namedChildren) {
|
|
385
939
|
visit(child);
|
|
386
940
|
}
|
|
387
941
|
return;
|
|
388
942
|
}
|
|
389
943
|
|
|
390
|
-
//
|
|
391
|
-
if (t
|
|
392
|
-
|
|
393
|
-
const
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
if (scope?.funcName) {
|
|
397
|
-
// Mutation: obj.prop = value
|
|
398
|
-
if (left?.type === 'member_expression') {
|
|
399
|
-
const receiver = memberReceiver(left);
|
|
400
|
-
if (receiver) {
|
|
401
|
-
const binding = findBinding(receiver);
|
|
402
|
-
if (binding) {
|
|
403
|
-
mutations.push({
|
|
404
|
-
funcName: scope.funcName,
|
|
405
|
-
receiverName: receiver,
|
|
406
|
-
binding,
|
|
407
|
-
mutatingExpr: truncate(node.text),
|
|
408
|
-
line: node.startPosition.row + 1,
|
|
409
|
-
});
|
|
410
|
-
}
|
|
411
|
-
}
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
// Non-declaration assignment: x = foo() (without const/let/var)
|
|
415
|
-
if (left?.type === 'identifier' && right) {
|
|
416
|
-
let callExpr = null;
|
|
417
|
-
if (right.type === 'call_expression') {
|
|
418
|
-
callExpr = right;
|
|
419
|
-
} else if (right.type === 'await_expression') {
|
|
420
|
-
const awaitChild = right.namedChildren[0];
|
|
421
|
-
if (awaitChild?.type === 'call_expression') callExpr = awaitChild;
|
|
422
|
-
}
|
|
423
|
-
if (callExpr) {
|
|
424
|
-
const callee = resolveCalleeName(callExpr);
|
|
425
|
-
if (callee) {
|
|
426
|
-
assignments.push({
|
|
427
|
-
varName: left.text,
|
|
428
|
-
callerFunc: scope.funcName,
|
|
429
|
-
sourceCallName: callee,
|
|
430
|
-
expression: truncate(node.text),
|
|
431
|
-
line: node.startPosition.row + 1,
|
|
432
|
-
});
|
|
433
|
-
scope.locals.set(left.text, { type: 'call_return', callee });
|
|
434
|
-
}
|
|
435
|
-
}
|
|
436
|
-
}
|
|
944
|
+
// Call expressions
|
|
945
|
+
if (isCallNode(t)) {
|
|
946
|
+
handleCallExpr(node);
|
|
947
|
+
for (const child of node.namedChildren) {
|
|
948
|
+
visit(child);
|
|
437
949
|
}
|
|
950
|
+
return;
|
|
951
|
+
}
|
|
438
952
|
|
|
439
|
-
|
|
953
|
+
// Assignment expressions
|
|
954
|
+
if (rules.assignmentNode && t === rules.assignmentNode) {
|
|
955
|
+
handleAssignment(node);
|
|
440
956
|
for (const child of node.namedChildren) {
|
|
441
957
|
visit(child);
|
|
442
958
|
}
|
|
443
959
|
return;
|
|
444
960
|
}
|
|
445
961
|
|
|
446
|
-
// Mutation detection
|
|
447
|
-
if (t ===
|
|
448
|
-
|
|
449
|
-
if (expr?.type === 'call_expression') {
|
|
450
|
-
const fn = expr.childForFieldName('function');
|
|
451
|
-
if (fn?.type === 'member_expression') {
|
|
452
|
-
const prop = fn.childForFieldName('property');
|
|
453
|
-
if (prop && MUTATING_METHODS.has(prop.text)) {
|
|
454
|
-
const receiver = memberReceiver(fn);
|
|
455
|
-
const scope = currentScope();
|
|
456
|
-
if (receiver && scope?.funcName) {
|
|
457
|
-
const binding = findBinding(receiver);
|
|
458
|
-
if (binding) {
|
|
459
|
-
mutations.push({
|
|
460
|
-
funcName: scope.funcName,
|
|
461
|
-
receiverName: receiver,
|
|
462
|
-
binding,
|
|
463
|
-
mutatingExpr: truncate(expr.text),
|
|
464
|
-
line: node.startPosition.row + 1,
|
|
465
|
-
});
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
}
|
|
469
|
-
}
|
|
470
|
-
}
|
|
962
|
+
// Mutation detection via expression_statement
|
|
963
|
+
if (rules.expressionStmtNode && t === rules.expressionStmtNode) {
|
|
964
|
+
handleExprStmtMutation(node);
|
|
471
965
|
}
|
|
472
966
|
|
|
473
967
|
// Default: visit all children
|
|
@@ -483,14 +977,17 @@ export function extractDataflow(tree, _filePath, _definitions) {
|
|
|
483
977
|
|
|
484
978
|
/**
|
|
485
979
|
* Collect all identifier names referenced within a node.
|
|
980
|
+
* Uses isIdent() to support language-specific identifier node types
|
|
981
|
+
* (e.g. PHP's `variable_name`).
|
|
486
982
|
*/
|
|
487
|
-
function collectIdentifiers(node, out) {
|
|
488
|
-
if (node
|
|
983
|
+
function collectIdentifiers(node, out, rules) {
|
|
984
|
+
if (!node) return;
|
|
985
|
+
if (isIdent(node.type, rules)) {
|
|
489
986
|
out.push(node.text);
|
|
490
987
|
return;
|
|
491
988
|
}
|
|
492
989
|
for (const child of node.namedChildren) {
|
|
493
|
-
collectIdentifiers(child, out);
|
|
990
|
+
collectIdentifiers(child, out, rules);
|
|
494
991
|
}
|
|
495
992
|
}
|
|
496
993
|
|
|
@@ -514,14 +1011,7 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts)
|
|
|
514
1011
|
for (const [relPath, symbols] of fileSymbols) {
|
|
515
1012
|
if (!symbols._tree) {
|
|
516
1013
|
const ext = path.extname(relPath).toLowerCase();
|
|
517
|
-
if (
|
|
518
|
-
ext === '.js' ||
|
|
519
|
-
ext === '.ts' ||
|
|
520
|
-
ext === '.tsx' ||
|
|
521
|
-
ext === '.jsx' ||
|
|
522
|
-
ext === '.mjs' ||
|
|
523
|
-
ext === '.cjs'
|
|
524
|
-
) {
|
|
1014
|
+
if (DATAFLOW_EXTENSIONS.has(ext)) {
|
|
525
1015
|
needsFallback = true;
|
|
526
1016
|
break;
|
|
527
1017
|
}
|
|
@@ -529,7 +1019,7 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts)
|
|
|
529
1019
|
}
|
|
530
1020
|
|
|
531
1021
|
if (needsFallback) {
|
|
532
|
-
const { createParsers
|
|
1022
|
+
const { createParsers } = await import('./parser.js');
|
|
533
1023
|
parsers = await createParsers();
|
|
534
1024
|
extToLang = new Map();
|
|
535
1025
|
for (const entry of LANGUAGE_REGISTRY) {
|
|
@@ -569,24 +1059,15 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts)
|
|
|
569
1059
|
const tx = db.transaction(() => {
|
|
570
1060
|
for (const [relPath, symbols] of fileSymbols) {
|
|
571
1061
|
const ext = path.extname(relPath).toLowerCase();
|
|
572
|
-
|
|
573
|
-
if (
|
|
574
|
-
ext !== '.js' &&
|
|
575
|
-
ext !== '.ts' &&
|
|
576
|
-
ext !== '.tsx' &&
|
|
577
|
-
ext !== '.jsx' &&
|
|
578
|
-
ext !== '.mjs' &&
|
|
579
|
-
ext !== '.cjs'
|
|
580
|
-
) {
|
|
581
|
-
continue;
|
|
582
|
-
}
|
|
1062
|
+
if (!DATAFLOW_EXTENSIONS.has(ext)) continue;
|
|
583
1063
|
|
|
584
1064
|
let tree = symbols._tree;
|
|
1065
|
+
let langId = symbols._langId;
|
|
585
1066
|
|
|
586
1067
|
// WASM fallback if no cached tree
|
|
587
1068
|
if (!tree) {
|
|
588
1069
|
if (!extToLang || !getParserFn) continue;
|
|
589
|
-
|
|
1070
|
+
langId = extToLang.get(ext);
|
|
590
1071
|
if (!langId || !DATAFLOW_LANG_IDS.has(langId)) continue;
|
|
591
1072
|
|
|
592
1073
|
const absPath = path.join(rootDir, relPath);
|
|
@@ -607,7 +1088,14 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts)
|
|
|
607
1088
|
}
|
|
608
1089
|
}
|
|
609
1090
|
|
|
610
|
-
|
|
1091
|
+
if (!langId) {
|
|
1092
|
+
langId = extToLang ? extToLang.get(ext) : null;
|
|
1093
|
+
if (!langId) continue;
|
|
1094
|
+
}
|
|
1095
|
+
|
|
1096
|
+
if (!DATAFLOW_RULES.has(langId)) continue;
|
|
1097
|
+
|
|
1098
|
+
const data = extractDataflow(tree, relPath, symbols.definitions, langId);
|
|
611
1099
|
|
|
612
1100
|
// Resolve function names to node IDs in this file first, then globally
|
|
613
1101
|
function resolveNode(funcName) {
|