@optave/codegraph 3.0.1 → 3.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/dataflow.js CHANGED
@@ -6,7 +6,7 @@
6
6
  * - returns: a call's return value is captured and used in the caller
7
7
  * - mutates: a parameter-derived value is mutated (e.g. arr.push())
8
8
  *
9
- * Opt-in via `build --dataflow`. JS/TS only for MVP.
9
+ * Opt-in via `build --dataflow`. Supports all languages with DATAFLOW_RULES.
10
10
  */
11
11
 
12
12
  import fs from 'node:fs';
@@ -14,10 +14,91 @@ import path from 'node:path';
14
14
  import { openReadonlyOrFail } from './db.js';
15
15
  import { info } from './logger.js';
16
16
  import { paginateResult } from './paginate.js';
17
+ import { LANGUAGE_REGISTRY } from './parser.js';
17
18
  import { ALL_SYMBOL_KINDS, isTestFile, normalizeSymbol } from './queries.js';
18
19
 
19
- // Methods that mutate their receiver in-place
20
- const MUTATING_METHODS = new Set([
20
+ // ─── Language-Specific Dataflow Rules ────────────────────────────────────
21
+
22
+ const DATAFLOW_DEFAULTS = {
23
+ // Scope entry
24
+ functionNodes: new Set(), // REQUIRED: non-empty
25
+
26
+ // Function name extraction
27
+ nameField: 'name',
28
+ varAssignedFnParent: null, // parent type for `const fn = ...` (JS only)
29
+ assignmentFnParent: null, // parent type for `x = function...` (JS only)
30
+ pairFnParent: null, // parent type for `{ key: function }` (JS only)
31
+
32
+ // Parameters
33
+ paramListField: 'parameters',
34
+ paramIdentifier: 'identifier',
35
+ paramWrapperTypes: new Set(),
36
+ defaultParamType: null,
37
+ restParamType: null,
38
+ objectDestructType: null,
39
+ arrayDestructType: null,
40
+ shorthandPropPattern: null,
41
+ pairPatternType: null,
42
+ extractParamName: null, // override: (node) => string[]
43
+
44
+ // Return
45
+ returnNode: null,
46
+
47
+ // Variable declarations
48
+ varDeclaratorNode: null,
49
+ varDeclaratorNodes: null,
50
+ varNameField: 'name',
51
+ varValueField: 'value',
52
+ assignmentNode: null,
53
+ assignLeftField: 'left',
54
+ assignRightField: 'right',
55
+
56
+ // Calls
57
+ callNode: null,
58
+ callNodes: null,
59
+ callFunctionField: 'function',
60
+ callArgsField: 'arguments',
61
+ spreadType: null,
62
+
63
+ // Member access
64
+ memberNode: null,
65
+ memberObjectField: 'object',
66
+ memberPropertyField: 'property',
67
+ optionalChainNode: null,
68
+
69
+ // Await
70
+ awaitNode: null,
71
+
72
+ // Mutation
73
+ mutatingMethods: new Set(),
74
+ expressionStmtNode: 'expression_statement',
75
+ callObjectField: null, // Java: combined call+member has [object] field on call node
76
+
77
+ // Structural wrappers
78
+ expressionListType: null, // Go: expression_list wraps LHS/RHS of short_var_declaration
79
+ equalsClauseType: null, // C#: equals_value_clause wraps variable initializer
80
+ argumentWrapperType: null, // PHP: individual args wrapped in 'argument' nodes
81
+ extraIdentifierTypes: null, // Set of additional identifier-like types (PHP: variable_name, name)
82
+ };
83
+
84
+ const DATAFLOW_RULE_KEYS = new Set(Object.keys(DATAFLOW_DEFAULTS));
85
+
86
+ export function makeDataflowRules(overrides) {
87
+ for (const key of Object.keys(overrides)) {
88
+ if (!DATAFLOW_RULE_KEYS.has(key)) {
89
+ throw new Error(`Dataflow rules: unknown key "${key}"`);
90
+ }
91
+ }
92
+ const rules = { ...DATAFLOW_DEFAULTS, ...overrides };
93
+ if (!(rules.functionNodes instanceof Set) || rules.functionNodes.size === 0) {
94
+ throw new Error('Dataflow rules: functionNodes must be a non-empty Set');
95
+ }
96
+ return rules;
97
+ }
98
+
99
+ // ── JS / TS / TSX ────────────────────────────────────────────────────────
100
+
101
+ const JS_TS_MUTATING = new Set([
21
102
  'push',
22
103
  'pop',
23
104
  'shift',
@@ -32,8 +113,325 @@ const MUTATING_METHODS = new Set([
32
113
  'clear',
33
114
  ]);
34
115
 
35
- // JS/TS language IDs that support dataflow extraction
36
- const DATAFLOW_LANG_IDS = new Set(['javascript', 'typescript', 'tsx']);
116
+ const JS_TS_DATAFLOW = makeDataflowRules({
117
+ functionNodes: new Set([
118
+ 'function_declaration',
119
+ 'method_definition',
120
+ 'arrow_function',
121
+ 'function_expression',
122
+ 'function',
123
+ ]),
124
+ varAssignedFnParent: 'variable_declarator',
125
+ assignmentFnParent: 'assignment_expression',
126
+ pairFnParent: 'pair',
127
+ paramWrapperTypes: new Set(['required_parameter', 'optional_parameter']),
128
+ defaultParamType: 'assignment_pattern',
129
+ restParamType: 'rest_pattern',
130
+ objectDestructType: 'object_pattern',
131
+ arrayDestructType: 'array_pattern',
132
+ shorthandPropPattern: 'shorthand_property_identifier_pattern',
133
+ pairPatternType: 'pair_pattern',
134
+ returnNode: 'return_statement',
135
+ varDeclaratorNode: 'variable_declarator',
136
+ assignmentNode: 'assignment_expression',
137
+ callNode: 'call_expression',
138
+ spreadType: 'spread_element',
139
+ memberNode: 'member_expression',
140
+ optionalChainNode: 'optional_chain_expression',
141
+ awaitNode: 'await_expression',
142
+ mutatingMethods: JS_TS_MUTATING,
143
+ });
144
+
145
+ // ── Python ───────────────────────────────────────────────────────────────
146
+
147
+ const PYTHON_DATAFLOW = makeDataflowRules({
148
+ functionNodes: new Set(['function_definition', 'lambda']),
149
+ defaultParamType: 'default_parameter',
150
+ restParamType: 'list_splat_pattern',
151
+ returnNode: 'return_statement',
152
+ varDeclaratorNode: null,
153
+ assignmentNode: 'assignment',
154
+ assignLeftField: 'left',
155
+ assignRightField: 'right',
156
+ callNode: 'call',
157
+ callFunctionField: 'function',
158
+ callArgsField: 'arguments',
159
+ spreadType: 'list_splat',
160
+ memberNode: 'attribute',
161
+ memberObjectField: 'object',
162
+ memberPropertyField: 'attribute',
163
+ awaitNode: 'await',
164
+ mutatingMethods: new Set([
165
+ 'append',
166
+ 'extend',
167
+ 'insert',
168
+ 'pop',
169
+ 'remove',
170
+ 'clear',
171
+ 'sort',
172
+ 'reverse',
173
+ 'add',
174
+ 'discard',
175
+ 'update',
176
+ ]),
177
+ extractParamName(node) {
178
+ // typed_parameter / typed_default_parameter: first identifier child is the name
179
+ if (node.type === 'typed_parameter' || node.type === 'typed_default_parameter') {
180
+ for (const c of node.namedChildren) {
181
+ if (c.type === 'identifier') return [c.text];
182
+ }
183
+ return null;
184
+ }
185
+ if (node.type === 'default_parameter') {
186
+ const nameNode = node.childForFieldName('name');
187
+ return nameNode ? [nameNode.text] : null;
188
+ }
189
+ if (node.type === 'list_splat_pattern' || node.type === 'dictionary_splat_pattern') {
190
+ for (const c of node.namedChildren) {
191
+ if (c.type === 'identifier') return [c.text];
192
+ }
193
+ return null;
194
+ }
195
+ return null;
196
+ },
197
+ });
198
+
199
+ // ── Go ───────────────────────────────────────────────────────────────────
200
+
201
+ const GO_DATAFLOW = makeDataflowRules({
202
+ functionNodes: new Set(['function_declaration', 'method_declaration', 'func_literal']),
203
+ returnNode: 'return_statement',
204
+ varDeclaratorNodes: new Set(['short_var_declaration', 'var_declaration']),
205
+ varNameField: 'left',
206
+ varValueField: 'right',
207
+ assignmentNode: 'assignment_statement',
208
+ assignLeftField: 'left',
209
+ assignRightField: 'right',
210
+ callNode: 'call_expression',
211
+ callFunctionField: 'function',
212
+ callArgsField: 'arguments',
213
+ memberNode: 'selector_expression',
214
+ memberObjectField: 'operand',
215
+ memberPropertyField: 'field',
216
+ mutatingMethods: new Set(),
217
+ expressionListType: 'expression_list',
218
+ extractParamName(node) {
219
+ // Go: parameter_declaration has name(s) + type; e.g. `a, b int`
220
+ if (node.type === 'parameter_declaration') {
221
+ const names = [];
222
+ for (const c of node.namedChildren) {
223
+ if (c.type === 'identifier') names.push(c.text);
224
+ }
225
+ return names.length > 0 ? names : null;
226
+ }
227
+ if (node.type === 'variadic_parameter_declaration') {
228
+ const nameNode = node.childForFieldName('name');
229
+ return nameNode ? [nameNode.text] : null;
230
+ }
231
+ return null;
232
+ },
233
+ });
234
+
235
+ // ── Rust ─────────────────────────────────────────────────────────────────
236
+
237
+ const RUST_DATAFLOW = makeDataflowRules({
238
+ functionNodes: new Set(['function_item', 'closure_expression']),
239
+ returnNode: 'return_expression',
240
+ varDeclaratorNode: 'let_declaration',
241
+ varNameField: 'pattern',
242
+ varValueField: 'value',
243
+ assignmentNode: 'assignment_expression',
244
+ callNode: 'call_expression',
245
+ callFunctionField: 'function',
246
+ callArgsField: 'arguments',
247
+ memberNode: 'field_expression',
248
+ memberObjectField: 'value',
249
+ memberPropertyField: 'field',
250
+ awaitNode: 'await_expression',
251
+ mutatingMethods: new Set(['push', 'pop', 'insert', 'remove', 'clear', 'sort', 'reverse']),
252
+ extractParamName(node) {
253
+ if (node.type === 'parameter') {
254
+ const pat = node.childForFieldName('pattern');
255
+ if (pat?.type === 'identifier') return [pat.text];
256
+ return null;
257
+ }
258
+ if (node.type === 'identifier') return [node.text];
259
+ return null;
260
+ },
261
+ });
262
+
263
+ // ── Java ─────────────────────────────────────────────────────────────────
264
+
265
+ const JAVA_DATAFLOW = makeDataflowRules({
266
+ functionNodes: new Set(['method_declaration', 'constructor_declaration', 'lambda_expression']),
267
+ returnNode: 'return_statement',
268
+ varDeclaratorNode: 'variable_declarator',
269
+ assignmentNode: 'assignment_expression',
270
+ callNodes: new Set(['method_invocation', 'object_creation_expression']),
271
+ callFunctionField: 'name',
272
+ callArgsField: 'arguments',
273
+ memberNode: 'field_access',
274
+ memberObjectField: 'object',
275
+ memberPropertyField: 'field',
276
+ callObjectField: 'object',
277
+ argumentWrapperType: 'argument',
278
+ mutatingMethods: new Set(['add', 'remove', 'clear', 'put', 'set', 'push', 'pop', 'sort']),
279
+ extractParamName(node) {
280
+ if (node.type === 'formal_parameter' || node.type === 'spread_parameter') {
281
+ const nameNode = node.childForFieldName('name');
282
+ return nameNode ? [nameNode.text] : null;
283
+ }
284
+ if (node.type === 'identifier') return [node.text];
285
+ return null;
286
+ },
287
+ });
288
+
289
+ // ── C# ───────────────────────────────────────────────────────────────────
290
+
291
+ const CSHARP_DATAFLOW = makeDataflowRules({
292
+ functionNodes: new Set([
293
+ 'method_declaration',
294
+ 'constructor_declaration',
295
+ 'lambda_expression',
296
+ 'local_function_statement',
297
+ ]),
298
+ returnNode: 'return_statement',
299
+ varDeclaratorNode: 'variable_declarator',
300
+ varNameField: 'name',
301
+ assignmentNode: 'assignment_expression',
302
+ callNode: 'invocation_expression',
303
+ callFunctionField: 'function',
304
+ callArgsField: 'arguments',
305
+ memberNode: 'member_access_expression',
306
+ memberObjectField: 'expression',
307
+ memberPropertyField: 'name',
308
+ awaitNode: 'await_expression',
309
+ argumentWrapperType: 'argument',
310
+ mutatingMethods: new Set(['Add', 'Remove', 'Clear', 'Insert', 'Sort', 'Reverse', 'Push', 'Pop']),
311
+ extractParamName(node) {
312
+ if (node.type === 'parameter') {
313
+ const nameNode = node.childForFieldName('name');
314
+ return nameNode ? [nameNode.text] : null;
315
+ }
316
+ if (node.type === 'identifier') return [node.text];
317
+ return null;
318
+ },
319
+ });
320
+
321
+ // ── PHP ──────────────────────────────────────────────────────────────────
322
+
323
+ const PHP_DATAFLOW = makeDataflowRules({
324
+ functionNodes: new Set([
325
+ 'function_definition',
326
+ 'method_declaration',
327
+ 'anonymous_function_creation_expression',
328
+ 'arrow_function',
329
+ ]),
330
+ paramListField: 'parameters',
331
+ paramIdentifier: 'variable_name',
332
+ returnNode: 'return_statement',
333
+ varDeclaratorNode: null,
334
+ assignmentNode: 'assignment_expression',
335
+ assignLeftField: 'left',
336
+ assignRightField: 'right',
337
+ callNodes: new Set([
338
+ 'function_call_expression',
339
+ 'member_call_expression',
340
+ 'scoped_call_expression',
341
+ ]),
342
+ callFunctionField: 'function',
343
+ callArgsField: 'arguments',
344
+ spreadType: 'spread_expression',
345
+ memberNode: 'member_access_expression',
346
+ memberObjectField: 'object',
347
+ memberPropertyField: 'name',
348
+ argumentWrapperType: 'argument',
349
+ extraIdentifierTypes: new Set(['variable_name', 'name']),
350
+ mutatingMethods: new Set(['push', 'pop', 'shift', 'unshift', 'splice', 'sort', 'reverse']),
351
+ extractParamName(node) {
352
+ // PHP: simple_parameter → $name or &$name
353
+ if (node.type === 'simple_parameter' || node.type === 'variadic_parameter') {
354
+ const nameNode = node.childForFieldName('name');
355
+ return nameNode ? [nameNode.text] : null;
356
+ }
357
+ if (node.type === 'variable_name') return [node.text];
358
+ return null;
359
+ },
360
+ });
361
+
362
+ // ── Ruby ─────────────────────────────────────────────────────────────────
363
+
364
+ const RUBY_DATAFLOW = makeDataflowRules({
365
+ functionNodes: new Set(['method', 'singleton_method', 'lambda']),
366
+ paramListField: 'parameters',
367
+ returnNode: 'return',
368
+ varDeclaratorNode: null,
369
+ assignmentNode: 'assignment',
370
+ assignLeftField: 'left',
371
+ assignRightField: 'right',
372
+ callNode: 'call',
373
+ callFunctionField: 'method',
374
+ callArgsField: 'arguments',
375
+ spreadType: 'splat_parameter',
376
+ memberNode: 'call',
377
+ memberObjectField: 'receiver',
378
+ memberPropertyField: 'method',
379
+ mutatingMethods: new Set([
380
+ 'push',
381
+ 'pop',
382
+ 'shift',
383
+ 'unshift',
384
+ 'delete',
385
+ 'clear',
386
+ 'sort!',
387
+ 'reverse!',
388
+ 'map!',
389
+ 'select!',
390
+ 'reject!',
391
+ 'compact!',
392
+ 'flatten!',
393
+ 'concat',
394
+ 'replace',
395
+ 'insert',
396
+ ]),
397
+ extractParamName(node) {
398
+ if (node.type === 'identifier') return [node.text];
399
+ if (
400
+ node.type === 'optional_parameter' ||
401
+ node.type === 'keyword_parameter' ||
402
+ node.type === 'splat_parameter' ||
403
+ node.type === 'hash_splat_parameter'
404
+ ) {
405
+ const nameNode = node.childForFieldName('name');
406
+ return nameNode ? [nameNode.text] : null;
407
+ }
408
+ return null;
409
+ },
410
+ });
411
+
412
+ // ── Rules Map + Extensions Set ───────────────────────────────────────────
413
+
414
+ export const DATAFLOW_RULES = new Map([
415
+ ['javascript', JS_TS_DATAFLOW],
416
+ ['typescript', JS_TS_DATAFLOW],
417
+ ['tsx', JS_TS_DATAFLOW],
418
+ ['python', PYTHON_DATAFLOW],
419
+ ['go', GO_DATAFLOW],
420
+ ['rust', RUST_DATAFLOW],
421
+ ['java', JAVA_DATAFLOW],
422
+ ['csharp', CSHARP_DATAFLOW],
423
+ ['php', PHP_DATAFLOW],
424
+ ['ruby', RUBY_DATAFLOW],
425
+ ]);
426
+
427
+ const DATAFLOW_LANG_IDS = new Set(DATAFLOW_RULES.keys());
428
+
429
+ export const DATAFLOW_EXTENSIONS = new Set();
430
+ for (const entry of LANGUAGE_REGISTRY) {
431
+ if (DATAFLOW_RULES.has(entry.id)) {
432
+ for (const ext of entry.extensions) DATAFLOW_EXTENSIONS.add(ext);
433
+ }
434
+ }
37
435
 
38
436
  // ── AST helpers ──────────────────────────────────────────────────────────────
39
437
 
@@ -43,32 +441,27 @@ function truncate(str, max = 120) {
43
441
  }
44
442
 
45
443
  /**
46
- * Get the name of a function node from the AST.
444
+ * Get the name of a function node from the AST using rules.
47
445
  */
48
- function functionName(fnNode) {
446
+ function functionName(fnNode, rules) {
49
447
  if (!fnNode) return null;
50
- const t = fnNode.type;
51
- if (t === 'function_declaration') {
52
- const nameNode = fnNode.childForFieldName('name');
53
- return nameNode ? nameNode.text : null;
54
- }
55
- if (t === 'method_definition') {
56
- const nameNode = fnNode.childForFieldName('name');
57
- return nameNode ? nameNode.text : null;
58
- }
59
- // arrow_function or function_expression assigned to a variable
60
- if (t === 'arrow_function' || t === 'function_expression') {
61
- const parent = fnNode.parent;
62
- if (parent?.type === 'variable_declarator') {
63
- const nameNode = parent.childForFieldName('name');
64
- return nameNode ? nameNode.text : null;
448
+ // Try the standard name field first (works for most languages)
449
+ const nameNode = fnNode.childForFieldName(rules.nameField);
450
+ if (nameNode) return nameNode.text;
451
+
452
+ // JS-specific: arrow_function/function_expression assigned to variable, pair, or assignment
453
+ const parent = fnNode.parent;
454
+ if (parent) {
455
+ if (rules.varAssignedFnParent && parent.type === rules.varAssignedFnParent) {
456
+ const n = parent.childForFieldName('name');
457
+ return n ? n.text : null;
65
458
  }
66
- if (parent?.type === 'pair') {
459
+ if (rules.pairFnParent && parent.type === rules.pairFnParent) {
67
460
  const keyNode = parent.childForFieldName('key');
68
461
  return keyNode ? keyNode.text : null;
69
462
  }
70
- if (parent?.type === 'assignment_expression') {
71
- const left = parent.childForFieldName('left');
463
+ if (rules.assignmentFnParent && parent.type === rules.assignmentFnParent) {
464
+ const left = parent.childForFieldName(rules.assignLeftField);
72
465
  return left ? left.text : null;
73
466
  }
74
467
  }
@@ -77,14 +470,13 @@ function functionName(fnNode) {
77
470
 
78
471
  /**
79
472
  * Extract parameter names and indices from a formal_parameters node.
80
- * Handles: simple identifiers, destructured objects/arrays, defaults, rest, TS typed params.
81
473
  */
82
- function extractParams(paramsNode) {
474
+ function extractParams(paramsNode, rules) {
83
475
  if (!paramsNode) return [];
84
476
  const result = [];
85
477
  let index = 0;
86
478
  for (const child of paramsNode.namedChildren) {
87
- const names = extractParamNames(child);
479
+ const names = extractParamNames(child, rules);
88
480
  for (const name of names) {
89
481
  result.push({ name, index });
90
482
  }
@@ -93,81 +485,113 @@ function extractParams(paramsNode) {
93
485
  return result;
94
486
  }
95
487
 
96
- function extractParamNames(node) {
488
+ function extractParamNames(node, rules) {
97
489
  if (!node) return [];
98
490
  const t = node.type;
99
- if (t === 'identifier') return [node.text];
100
- // TS: required_parameter, optional_parameter
101
- if (t === 'required_parameter' || t === 'optional_parameter') {
102
- const pattern = node.childForFieldName('pattern');
103
- return pattern ? extractParamNames(pattern) : [];
491
+
492
+ // Language-specific override (Go, Rust, Java, C#, PHP, Ruby)
493
+ if (rules.extractParamName) {
494
+ const result = rules.extractParamName(node);
495
+ if (result) return result;
104
496
  }
105
- if (t === 'assignment_pattern') {
106
- const left = node.childForFieldName('left');
107
- return left ? extractParamNames(left) : [];
497
+
498
+ // Leaf identifier
499
+ if (t === rules.paramIdentifier) return [node.text];
500
+
501
+ // Wrapper types (TS required_parameter, Python typed_parameter, etc.)
502
+ if (rules.paramWrapperTypes.has(t)) {
503
+ const pattern = node.childForFieldName('pattern') || node.childForFieldName('name');
504
+ return pattern ? extractParamNames(pattern, rules) : [];
108
505
  }
109
- if (t === 'rest_pattern') {
110
- // rest_pattern ...identifier
506
+
507
+ // Default parameter (assignment_pattern / default_parameter)
508
+ if (rules.defaultParamType && t === rules.defaultParamType) {
509
+ const left = node.childForFieldName('left') || node.childForFieldName('name');
510
+ return left ? extractParamNames(left, rules) : [];
511
+ }
512
+
513
+ // Rest / splat parameter
514
+ if (rules.restParamType && t === rules.restParamType) {
515
+ // Try name field first, then fall back to scanning children
516
+ const nameNode = node.childForFieldName('name');
517
+ if (nameNode) return [nameNode.text];
111
518
  for (const child of node.namedChildren) {
112
- if (child.type === 'identifier') return [child.text];
519
+ if (child.type === rules.paramIdentifier) return [child.text];
113
520
  }
114
521
  return [];
115
522
  }
116
- if (t === 'object_pattern') {
523
+
524
+ // Object destructuring (JS only)
525
+ if (rules.objectDestructType && t === rules.objectDestructType) {
117
526
  const names = [];
118
527
  for (const child of node.namedChildren) {
119
- if (child.type === 'shorthand_property_identifier_pattern') {
528
+ if (rules.shorthandPropPattern && child.type === rules.shorthandPropPattern) {
120
529
  names.push(child.text);
121
- } else if (child.type === 'pair_pattern') {
530
+ } else if (rules.pairPatternType && child.type === rules.pairPatternType) {
122
531
  const value = child.childForFieldName('value');
123
- if (value) names.push(...extractParamNames(value));
124
- } else if (child.type === 'rest_pattern') {
125
- names.push(...extractParamNames(child));
532
+ if (value) names.push(...extractParamNames(value, rules));
533
+ } else if (rules.restParamType && child.type === rules.restParamType) {
534
+ names.push(...extractParamNames(child, rules));
126
535
  }
127
536
  }
128
537
  return names;
129
538
  }
130
- if (t === 'array_pattern') {
539
+
540
+ // Array destructuring (JS only)
541
+ if (rules.arrayDestructType && t === rules.arrayDestructType) {
131
542
  const names = [];
132
543
  for (const child of node.namedChildren) {
133
- names.push(...extractParamNames(child));
544
+ names.push(...extractParamNames(child, rules));
134
545
  }
135
546
  return names;
136
547
  }
548
+
137
549
  return [];
138
550
  }
139
551
 
552
+ /** Check if a node type is identifier-like for this language. */
553
+ function isIdent(nodeType, rules) {
554
+ if (nodeType === 'identifier' || nodeType === rules.paramIdentifier) return true;
555
+ return rules.extraIdentifierTypes ? rules.extraIdentifierTypes.has(nodeType) : false;
556
+ }
557
+
140
558
  /**
141
- * Resolve the name a call expression is calling.
142
- * Handles: `foo()`, `obj.method()`, `obj.nested.method()`.
559
+ * Resolve the name a call expression is calling using rules.
143
560
  */
144
- function resolveCalleeName(callNode) {
145
- const fn = callNode.childForFieldName('function');
146
- if (!fn) return null;
147
- if (fn.type === 'identifier') return fn.text;
148
- if (fn.type === 'member_expression' || fn.type === 'optional_chain_expression') {
149
- // Handle optional chaining: foo?.bar() or foo?.()
150
- const target = fn.type === 'optional_chain_expression' ? fn.namedChildren[0] : fn;
561
+ function resolveCalleeName(callNode, rules) {
562
+ const fn = callNode.childForFieldName(rules.callFunctionField);
563
+ if (!fn) {
564
+ // Some languages (Java method_invocation, Ruby call) use 'name' field directly
565
+ const nameNode = callNode.childForFieldName('name') || callNode.childForFieldName('method');
566
+ return nameNode ? nameNode.text : null;
567
+ }
568
+ if (isIdent(fn.type, rules)) return fn.text;
569
+ if (fn.type === rules.memberNode) {
570
+ const prop = fn.childForFieldName(rules.memberPropertyField);
571
+ return prop ? prop.text : null;
572
+ }
573
+ if (rules.optionalChainNode && fn.type === rules.optionalChainNode) {
574
+ const target = fn.namedChildren[0];
151
575
  if (!target) return null;
152
- if (target.type === 'member_expression') {
153
- const prop = target.childForFieldName('property');
576
+ if (target.type === rules.memberNode) {
577
+ const prop = target.childForFieldName(rules.memberPropertyField);
154
578
  return prop ? prop.text : null;
155
579
  }
156
580
  if (target.type === 'identifier') return target.text;
157
- const prop = fn.childForFieldName('property');
581
+ const prop = fn.childForFieldName(rules.memberPropertyField);
158
582
  return prop ? prop.text : null;
159
583
  }
160
584
  return null;
161
585
  }
162
586
 
163
587
  /**
164
- * Get the receiver (object) of a member expression.
588
+ * Get the receiver (object) of a member expression using rules.
165
589
  */
166
- function memberReceiver(memberExpr) {
167
- const obj = memberExpr.childForFieldName('object');
590
+ function memberReceiver(memberExpr, rules) {
591
+ const obj = memberExpr.childForFieldName(rules.memberObjectField);
168
592
  if (!obj) return null;
169
- if (obj.type === 'identifier') return obj.text;
170
- if (obj.type === 'member_expression') return memberReceiver(obj);
593
+ if (isIdent(obj.type, rules)) return obj.text;
594
+ if (obj.type === rules.memberNode) return memberReceiver(obj, rules);
171
595
  return null;
172
596
  }
173
597
 
@@ -179,17 +603,21 @@ function memberReceiver(memberExpr) {
179
603
  * @param {object} tree - tree-sitter parse tree
180
604
  * @param {string} filePath - relative file path
181
605
  * @param {object[]} definitions - symbol definitions from the parser
606
+ * @param {string} [langId='javascript'] - language identifier for rules lookup
182
607
  * @returns {{ parameters, returns, assignments, argFlows, mutations }}
183
608
  */
184
- export function extractDataflow(tree, _filePath, _definitions) {
609
+ export function extractDataflow(tree, _filePath, _definitions, langId = 'javascript') {
610
+ const rules = DATAFLOW_RULES.get(langId);
611
+ if (!rules) return { parameters: [], returns: [], assignments: [], argFlows: [], mutations: [] };
612
+
613
+ const isCallNode = rules.callNodes ? (t) => rules.callNodes.has(t) : (t) => t === rules.callNode;
614
+
185
615
  const parameters = [];
186
616
  const returns = [];
187
617
  const assignments = [];
188
618
  const argFlows = [];
189
619
  const mutations = [];
190
620
 
191
- // Build a scope stack as we traverse
192
- // Each scope: { funcName, funcNode, params: Map<name, index>, locals: Map<name, source> }
193
621
  const scopeStack = [];
194
622
 
195
623
  function currentScope() {
@@ -197,7 +625,6 @@ export function extractDataflow(tree, _filePath, _definitions) {
197
625
  }
198
626
 
199
627
  function findBinding(name) {
200
- // Search from innermost scope outward
201
628
  for (let i = scopeStack.length - 1; i >= 0; i--) {
202
629
  const scope = scopeStack[i];
203
630
  if (scope.params.has(name))
@@ -209,9 +636,9 @@ export function extractDataflow(tree, _filePath, _definitions) {
209
636
  }
210
637
 
211
638
  function enterScope(fnNode) {
212
- const name = functionName(fnNode);
213
- const paramsNode = fnNode.childForFieldName('parameters');
214
- const paramList = extractParams(paramsNode);
639
+ const name = functionName(fnNode, rules);
640
+ const paramsNode = fnNode.childForFieldName(rules.paramListField);
641
+ const paramList = extractParams(paramsNode, rules);
215
642
  const paramMap = new Map();
216
643
  for (const p of paramList) {
217
644
  paramMap.set(p.name, p.index);
@@ -231,14 +658,10 @@ export function extractDataflow(tree, _filePath, _definitions) {
231
658
  scopeStack.pop();
232
659
  }
233
660
 
234
- /**
235
- * Determine confidence for a variable binding flowing as an argument.
236
- */
237
661
  function bindingConfidence(binding) {
238
662
  if (!binding) return 0.5;
239
663
  if (binding.type === 'param') return 1.0;
240
664
  if (binding.type === 'local') {
241
- // Local from a call return → 0.9, from destructuring → 0.8
242
665
  if (binding.source?.type === 'call_return') return 0.9;
243
666
  if (binding.source?.type === 'destructured') return 0.8;
244
667
  return 0.9;
@@ -246,21 +669,235 @@ export function extractDataflow(tree, _filePath, _definitions) {
246
669
  return 0.5;
247
670
  }
248
671
 
672
+ /** Unwrap await if present, returning the inner expression. */
673
+ function unwrapAwait(node) {
674
+ if (rules.awaitNode && node.type === rules.awaitNode) {
675
+ return node.namedChildren[0] || node;
676
+ }
677
+ return node;
678
+ }
679
+
680
+ /** Check if a node is a call expression (single or multi-type). */
681
+ function isCall(node) {
682
+ return node && isCallNode(node.type);
683
+ }
684
+
685
+ /** Handle a variable declarator / short_var_declaration node. */
686
+ function handleVarDeclarator(node) {
687
+ let nameNode = node.childForFieldName(rules.varNameField);
688
+ let valueNode = rules.varValueField ? node.childForFieldName(rules.varValueField) : null;
689
+
690
+ // C#: initializer is inside equals_value_clause child
691
+ if (!valueNode && rules.equalsClauseType) {
692
+ for (const child of node.namedChildren) {
693
+ if (child.type === rules.equalsClauseType) {
694
+ valueNode = child.childForFieldName('value') || child.namedChildren[0];
695
+ break;
696
+ }
697
+ }
698
+ }
699
+
700
+ // Fallback: initializer is a direct unnamed child (C# variable_declarator)
701
+ if (!valueNode) {
702
+ for (const child of node.namedChildren) {
703
+ if (child !== nameNode && isCall(unwrapAwait(child))) {
704
+ valueNode = child;
705
+ break;
706
+ }
707
+ }
708
+ }
709
+
710
+ // Go: expression_list wraps LHS/RHS — unwrap to first named child
711
+ if (rules.expressionListType) {
712
+ if (nameNode?.type === rules.expressionListType) nameNode = nameNode.namedChildren[0];
713
+ if (valueNode?.type === rules.expressionListType) valueNode = valueNode.namedChildren[0];
714
+ }
715
+
716
+ const scope = currentScope();
717
+ if (!nameNode || !valueNode || !scope) return;
718
+
719
+ const unwrapped = unwrapAwait(valueNode);
720
+ const callExpr = isCall(unwrapped) ? unwrapped : null;
721
+
722
+ if (callExpr) {
723
+ const callee = resolveCalleeName(callExpr, rules);
724
+ if (callee && scope.funcName) {
725
+ // Destructuring: const { a, b } = foo()
726
+ if (
727
+ (rules.objectDestructType && nameNode.type === rules.objectDestructType) ||
728
+ (rules.arrayDestructType && nameNode.type === rules.arrayDestructType)
729
+ ) {
730
+ const names = extractParamNames(nameNode, rules);
731
+ for (const n of names) {
732
+ assignments.push({
733
+ varName: n,
734
+ callerFunc: scope.funcName,
735
+ sourceCallName: callee,
736
+ expression: truncate(node.text),
737
+ line: node.startPosition.row + 1,
738
+ });
739
+ scope.locals.set(n, { type: 'destructured', callee });
740
+ }
741
+ } else {
742
+ const varName =
743
+ nameNode.type === 'identifier' || nameNode.type === rules.paramIdentifier
744
+ ? nameNode.text
745
+ : nameNode.text;
746
+ assignments.push({
747
+ varName,
748
+ callerFunc: scope.funcName,
749
+ sourceCallName: callee,
750
+ expression: truncate(node.text),
751
+ line: node.startPosition.row + 1,
752
+ });
753
+ scope.locals.set(varName, { type: 'call_return', callee });
754
+ }
755
+ }
756
+ }
757
+ }
758
+
759
+ /** Handle assignment expressions (mutation detection + call captures). */
760
+ function handleAssignment(node) {
761
+ const left = node.childForFieldName(rules.assignLeftField);
762
+ const right = node.childForFieldName(rules.assignRightField);
763
+ const scope = currentScope();
764
+ if (!scope?.funcName) return;
765
+
766
+ // Mutation: obj.prop = value
767
+ if (left && rules.memberNode && left.type === rules.memberNode) {
768
+ const receiver = memberReceiver(left, rules);
769
+ if (receiver) {
770
+ const binding = findBinding(receiver);
771
+ if (binding) {
772
+ mutations.push({
773
+ funcName: scope.funcName,
774
+ receiverName: receiver,
775
+ binding,
776
+ mutatingExpr: truncate(node.text),
777
+ line: node.startPosition.row + 1,
778
+ });
779
+ }
780
+ }
781
+ }
782
+
783
+ // Non-declaration assignment: x = foo()
784
+ if (left && isIdent(left.type, rules) && right) {
785
+ const unwrapped = unwrapAwait(right);
786
+ const callExpr = isCall(unwrapped) ? unwrapped : null;
787
+ if (callExpr) {
788
+ const callee = resolveCalleeName(callExpr, rules);
789
+ if (callee) {
790
+ assignments.push({
791
+ varName: left.text,
792
+ callerFunc: scope.funcName,
793
+ sourceCallName: callee,
794
+ expression: truncate(node.text),
795
+ line: node.startPosition.row + 1,
796
+ });
797
+ scope.locals.set(left.text, { type: 'call_return', callee });
798
+ }
799
+ }
800
+ }
801
+ }
802
+
803
+ /** Handle call expressions: track argument flows. */
804
+ function handleCallExpr(node) {
805
+ const callee = resolveCalleeName(node, rules);
806
+ const argsNode = node.childForFieldName(rules.callArgsField);
807
+ const scope = currentScope();
808
+ if (!callee || !argsNode || !scope?.funcName) return;
809
+
810
+ let argIndex = 0;
811
+ for (let arg of argsNode.namedChildren) {
812
+ // PHP/Java: unwrap argument wrapper
813
+ if (rules.argumentWrapperType && arg.type === rules.argumentWrapperType) {
814
+ arg = arg.namedChildren[0] || arg;
815
+ }
816
+ const unwrapped =
817
+ rules.spreadType && arg.type === rules.spreadType ? arg.namedChildren[0] || arg : arg;
818
+ if (!unwrapped) {
819
+ argIndex++;
820
+ continue;
821
+ }
822
+
823
+ const argName = isIdent(unwrapped.type, rules) ? unwrapped.text : null;
824
+ const argMember =
825
+ rules.memberNode && unwrapped.type === rules.memberNode
826
+ ? memberReceiver(unwrapped, rules)
827
+ : null;
828
+ const trackedName = argName || argMember;
829
+
830
+ if (trackedName) {
831
+ const binding = findBinding(trackedName);
832
+ if (binding) {
833
+ argFlows.push({
834
+ callerFunc: scope.funcName,
835
+ calleeName: callee,
836
+ argIndex,
837
+ argName: trackedName,
838
+ binding,
839
+ confidence: bindingConfidence(binding),
840
+ expression: truncate(arg.text),
841
+ line: node.startPosition.row + 1,
842
+ });
843
+ }
844
+ }
845
+ argIndex++;
846
+ }
847
+ }
848
+
849
+ /** Detect mutating method calls in expression statements. */
850
+ function handleExprStmtMutation(node) {
851
+ if (rules.mutatingMethods.size === 0) return;
852
+ const expr = node.namedChildren[0];
853
+ if (!expr || !isCall(expr)) return;
854
+
855
+ let methodName = null;
856
+ let receiver = null;
857
+
858
+ // Standard pattern: call(fn: member(obj, prop))
859
+ const fn = expr.childForFieldName(rules.callFunctionField);
860
+ if (fn && fn.type === rules.memberNode) {
861
+ const prop = fn.childForFieldName(rules.memberPropertyField);
862
+ methodName = prop ? prop.text : null;
863
+ receiver = memberReceiver(fn, rules);
864
+ }
865
+
866
+ // Java/combined pattern: call node itself has object + name fields
867
+ if (!receiver && rules.callObjectField) {
868
+ const obj = expr.childForFieldName(rules.callObjectField);
869
+ const name = expr.childForFieldName(rules.callFunctionField);
870
+ if (obj && name) {
871
+ methodName = name.text;
872
+ receiver = isIdent(obj.type, rules) ? obj.text : null;
873
+ }
874
+ }
875
+
876
+ if (!methodName || !rules.mutatingMethods.has(methodName)) return;
877
+
878
+ const scope = currentScope();
879
+ if (!receiver || !scope?.funcName) return;
880
+
881
+ const binding = findBinding(receiver);
882
+ if (binding) {
883
+ mutations.push({
884
+ funcName: scope.funcName,
885
+ receiverName: receiver,
886
+ binding,
887
+ mutatingExpr: truncate(expr.text),
888
+ line: node.startPosition.row + 1,
889
+ });
890
+ }
891
+ }
892
+
249
893
  // Recursive AST walk
250
894
  function visit(node) {
251
895
  if (!node) return;
252
896
  const t = node.type;
253
897
 
254
898
  // Enter function scopes
255
- if (
256
- t === 'function_declaration' ||
257
- t === 'method_definition' ||
258
- t === 'arrow_function' ||
259
- t === 'function_expression' ||
260
- t === 'function'
261
- ) {
899
+ if (rules.functionNodes.has(t)) {
262
900
  enterScope(node);
263
- // Visit body
264
901
  for (const child of node.namedChildren) {
265
902
  visit(child);
266
903
  }
@@ -269,12 +906,12 @@ export function extractDataflow(tree, _filePath, _definitions) {
269
906
  }
270
907
 
271
908
  // Return statements
272
- if (t === 'return_statement') {
909
+ if (rules.returnNode && t === rules.returnNode) {
273
910
  const scope = currentScope();
274
911
  if (scope?.funcName) {
275
912
  const expr = node.namedChildren[0];
276
913
  const referencedNames = [];
277
- if (expr) collectIdentifiers(expr, referencedNames);
914
+ if (expr) collectIdentifiers(expr, referencedNames, rules);
278
915
  returns.push({
279
916
  funcName: scope.funcName,
280
917
  expression: truncate(expr ? expr.text : ''),
@@ -282,192 +919,49 @@ export function extractDataflow(tree, _filePath, _definitions) {
282
919
  line: node.startPosition.row + 1,
283
920
  });
284
921
  }
285
- // Still visit children for nested expressions
286
922
  for (const child of node.namedChildren) {
287
923
  visit(child);
288
924
  }
289
925
  return;
290
926
  }
291
927
 
292
- // Variable declarations: track assignments from calls
293
- if (t === 'variable_declarator') {
294
- const nameNode = node.childForFieldName('name');
295
- const valueNode = node.childForFieldName('value');
296
- const scope = currentScope();
297
-
298
- if (nameNode && valueNode && scope) {
299
- // Resolve the call expression from the value (handles await wrapping)
300
- let callExpr = null;
301
- if (valueNode.type === 'call_expression') {
302
- callExpr = valueNode;
303
- } else if (valueNode.type === 'await_expression') {
304
- const awaitChild = valueNode.namedChildren[0];
305
- if (awaitChild?.type === 'call_expression') callExpr = awaitChild;
306
- }
307
-
308
- if (callExpr) {
309
- const callee = resolveCalleeName(callExpr);
310
- if (callee && scope.funcName) {
311
- // Destructuring: const { a, b } = foo()
312
- if (nameNode.type === 'object_pattern' || nameNode.type === 'array_pattern') {
313
- const names = extractParamNames(nameNode);
314
- for (const n of names) {
315
- assignments.push({
316
- varName: n,
317
- callerFunc: scope.funcName,
318
- sourceCallName: callee,
319
- expression: truncate(node.text),
320
- line: node.startPosition.row + 1,
321
- });
322
- scope.locals.set(n, { type: 'destructured', callee });
323
- }
324
- } else {
325
- // Simple: const x = foo()
326
- assignments.push({
327
- varName: nameNode.text,
328
- callerFunc: scope.funcName,
329
- sourceCallName: callee,
330
- expression: truncate(node.text),
331
- line: node.startPosition.row + 1,
332
- });
333
- scope.locals.set(nameNode.text, { type: 'call_return', callee });
334
- }
335
- }
336
- }
337
- }
338
- // Visit children
928
+ // Variable declarations
929
+ if (rules.varDeclaratorNode && t === rules.varDeclaratorNode) {
930
+ handleVarDeclarator(node);
339
931
  for (const child of node.namedChildren) {
340
932
  visit(child);
341
933
  }
342
934
  return;
343
935
  }
344
-
345
- // Call expressions: track argument flows
346
- if (t === 'call_expression') {
347
- const callee = resolveCalleeName(node);
348
- const argsNode = node.childForFieldName('arguments');
349
- const scope = currentScope();
350
-
351
- if (callee && argsNode && scope?.funcName) {
352
- let argIndex = 0;
353
- for (const arg of argsNode.namedChildren) {
354
- // Handle spread arguments: foo(...args)
355
- const unwrapped = arg.type === 'spread_element' ? arg.namedChildren[0] : arg;
356
- if (!unwrapped) {
357
- argIndex++;
358
- continue;
359
- }
360
- const argName = unwrapped.type === 'identifier' ? unwrapped.text : null;
361
- const argMember =
362
- unwrapped.type === 'member_expression' ? memberReceiver(unwrapped) : null;
363
- const trackedName = argName || argMember;
364
-
365
- if (trackedName) {
366
- const binding = findBinding(trackedName);
367
- if (binding) {
368
- argFlows.push({
369
- callerFunc: scope.funcName,
370
- calleeName: callee,
371
- argIndex,
372
- argName: trackedName,
373
- binding,
374
- confidence: bindingConfidence(binding),
375
- expression: truncate(arg.text),
376
- line: node.startPosition.row + 1,
377
- });
378
- }
379
- }
380
- argIndex++;
381
- }
382
- }
383
- // Visit children (but not arguments again — we handled them)
936
+ if (rules.varDeclaratorNodes?.has(t)) {
937
+ handleVarDeclarator(node);
384
938
  for (const child of node.namedChildren) {
385
939
  visit(child);
386
940
  }
387
941
  return;
388
942
  }
389
943
 
390
- // Assignment expressions: mutation detection + non-declaration call captures
391
- if (t === 'assignment_expression') {
392
- const left = node.childForFieldName('left');
393
- const right = node.childForFieldName('right');
394
- const scope = currentScope();
395
-
396
- if (scope?.funcName) {
397
- // Mutation: obj.prop = value
398
- if (left?.type === 'member_expression') {
399
- const receiver = memberReceiver(left);
400
- if (receiver) {
401
- const binding = findBinding(receiver);
402
- if (binding) {
403
- mutations.push({
404
- funcName: scope.funcName,
405
- receiverName: receiver,
406
- binding,
407
- mutatingExpr: truncate(node.text),
408
- line: node.startPosition.row + 1,
409
- });
410
- }
411
- }
412
- }
413
-
414
- // Non-declaration assignment: x = foo() (without const/let/var)
415
- if (left?.type === 'identifier' && right) {
416
- let callExpr = null;
417
- if (right.type === 'call_expression') {
418
- callExpr = right;
419
- } else if (right.type === 'await_expression') {
420
- const awaitChild = right.namedChildren[0];
421
- if (awaitChild?.type === 'call_expression') callExpr = awaitChild;
422
- }
423
- if (callExpr) {
424
- const callee = resolveCalleeName(callExpr);
425
- if (callee) {
426
- assignments.push({
427
- varName: left.text,
428
- callerFunc: scope.funcName,
429
- sourceCallName: callee,
430
- expression: truncate(node.text),
431
- line: node.startPosition.row + 1,
432
- });
433
- scope.locals.set(left.text, { type: 'call_return', callee });
434
- }
435
- }
436
- }
944
+ // Call expressions
945
+ if (isCallNode(t)) {
946
+ handleCallExpr(node);
947
+ for (const child of node.namedChildren) {
948
+ visit(child);
437
949
  }
950
+ return;
951
+ }
438
952
 
439
- // Visit children
953
+ // Assignment expressions
954
+ if (rules.assignmentNode && t === rules.assignmentNode) {
955
+ handleAssignment(node);
440
956
  for (const child of node.namedChildren) {
441
957
  visit(child);
442
958
  }
443
959
  return;
444
960
  }
445
961
 
446
- // Mutation detection: mutating method calls (push, pop, splice, etc.)
447
- if (t === 'expression_statement') {
448
- const expr = node.namedChildren[0];
449
- if (expr?.type === 'call_expression') {
450
- const fn = expr.childForFieldName('function');
451
- if (fn?.type === 'member_expression') {
452
- const prop = fn.childForFieldName('property');
453
- if (prop && MUTATING_METHODS.has(prop.text)) {
454
- const receiver = memberReceiver(fn);
455
- const scope = currentScope();
456
- if (receiver && scope?.funcName) {
457
- const binding = findBinding(receiver);
458
- if (binding) {
459
- mutations.push({
460
- funcName: scope.funcName,
461
- receiverName: receiver,
462
- binding,
463
- mutatingExpr: truncate(expr.text),
464
- line: node.startPosition.row + 1,
465
- });
466
- }
467
- }
468
- }
469
- }
470
- }
962
+ // Mutation detection via expression_statement
963
+ if (rules.expressionStmtNode && t === rules.expressionStmtNode) {
964
+ handleExprStmtMutation(node);
471
965
  }
472
966
 
473
967
  // Default: visit all children
@@ -483,14 +977,17 @@ export function extractDataflow(tree, _filePath, _definitions) {
483
977
 
484
978
  /**
485
979
  * Collect all identifier names referenced within a node.
980
+ * Uses isIdent() to support language-specific identifier node types
981
+ * (e.g. PHP's `variable_name`).
486
982
  */
487
- function collectIdentifiers(node, out) {
488
- if (node.type === 'identifier') {
983
+ function collectIdentifiers(node, out, rules) {
984
+ if (!node) return;
985
+ if (isIdent(node.type, rules)) {
489
986
  out.push(node.text);
490
987
  return;
491
988
  }
492
989
  for (const child of node.namedChildren) {
493
- collectIdentifiers(child, out);
990
+ collectIdentifiers(child, out, rules);
494
991
  }
495
992
  }
496
993
 
@@ -514,14 +1011,7 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts)
514
1011
  for (const [relPath, symbols] of fileSymbols) {
515
1012
  if (!symbols._tree) {
516
1013
  const ext = path.extname(relPath).toLowerCase();
517
- if (
518
- ext === '.js' ||
519
- ext === '.ts' ||
520
- ext === '.tsx' ||
521
- ext === '.jsx' ||
522
- ext === '.mjs' ||
523
- ext === '.cjs'
524
- ) {
1014
+ if (DATAFLOW_EXTENSIONS.has(ext)) {
525
1015
  needsFallback = true;
526
1016
  break;
527
1017
  }
@@ -529,7 +1019,7 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts)
529
1019
  }
530
1020
 
531
1021
  if (needsFallback) {
532
- const { createParsers, LANGUAGE_REGISTRY } = await import('./parser.js');
1022
+ const { createParsers } = await import('./parser.js');
533
1023
  parsers = await createParsers();
534
1024
  extToLang = new Map();
535
1025
  for (const entry of LANGUAGE_REGISTRY) {
@@ -569,24 +1059,15 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts)
569
1059
  const tx = db.transaction(() => {
570
1060
  for (const [relPath, symbols] of fileSymbols) {
571
1061
  const ext = path.extname(relPath).toLowerCase();
572
- // Only JS/TS for MVP
573
- if (
574
- ext !== '.js' &&
575
- ext !== '.ts' &&
576
- ext !== '.tsx' &&
577
- ext !== '.jsx' &&
578
- ext !== '.mjs' &&
579
- ext !== '.cjs'
580
- ) {
581
- continue;
582
- }
1062
+ if (!DATAFLOW_EXTENSIONS.has(ext)) continue;
583
1063
 
584
1064
  let tree = symbols._tree;
1065
+ let langId = symbols._langId;
585
1066
 
586
1067
  // WASM fallback if no cached tree
587
1068
  if (!tree) {
588
1069
  if (!extToLang || !getParserFn) continue;
589
- const langId = extToLang.get(ext);
1070
+ langId = extToLang.get(ext);
590
1071
  if (!langId || !DATAFLOW_LANG_IDS.has(langId)) continue;
591
1072
 
592
1073
  const absPath = path.join(rootDir, relPath);
@@ -607,7 +1088,14 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts)
607
1088
  }
608
1089
  }
609
1090
 
610
- const data = extractDataflow(tree, relPath, symbols.definitions);
1091
+ if (!langId) {
1092
+ langId = extToLang ? extToLang.get(ext) : null;
1093
+ if (!langId) continue;
1094
+ }
1095
+
1096
+ if (!DATAFLOW_RULES.has(langId)) continue;
1097
+
1098
+ const data = extractDataflow(tree, relPath, symbols.definitions, langId);
611
1099
 
612
1100
  // Resolve function names to node IDs in this file first, then globally
613
1101
  function resolveNode(funcName) {