@optave/codegraph 3.1.0 → 3.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/README.md +5 -5
  2. package/grammars/tree-sitter-go.wasm +0 -0
  3. package/package.json +8 -9
  4. package/src/ast-analysis/engine.js +365 -0
  5. package/src/ast-analysis/metrics.js +118 -0
  6. package/src/ast-analysis/rules/csharp.js +201 -0
  7. package/src/ast-analysis/rules/go.js +182 -0
  8. package/src/ast-analysis/rules/index.js +82 -0
  9. package/src/ast-analysis/rules/java.js +175 -0
  10. package/src/ast-analysis/rules/javascript.js +246 -0
  11. package/src/ast-analysis/rules/php.js +219 -0
  12. package/src/ast-analysis/rules/python.js +196 -0
  13. package/src/ast-analysis/rules/ruby.js +204 -0
  14. package/src/ast-analysis/rules/rust.js +173 -0
  15. package/src/ast-analysis/shared.js +223 -0
  16. package/src/ast-analysis/visitor-utils.js +176 -0
  17. package/src/ast-analysis/visitor.js +162 -0
  18. package/src/ast-analysis/visitors/ast-store-visitor.js +150 -0
  19. package/src/ast-analysis/visitors/cfg-visitor.js +792 -0
  20. package/src/ast-analysis/visitors/complexity-visitor.js +243 -0
  21. package/src/ast-analysis/visitors/dataflow-visitor.js +358 -0
  22. package/src/ast.js +26 -166
  23. package/src/audit.js +2 -88
  24. package/src/batch.js +0 -25
  25. package/src/boundaries.js +1 -1
  26. package/src/branch-compare.js +82 -172
  27. package/src/builder.js +48 -184
  28. package/src/cfg.js +148 -1174
  29. package/src/check.js +1 -84
  30. package/src/cli.js +118 -197
  31. package/src/cochange.js +1 -39
  32. package/src/commands/audit.js +88 -0
  33. package/src/commands/batch.js +26 -0
  34. package/src/commands/branch-compare.js +97 -0
  35. package/src/commands/cfg.js +55 -0
  36. package/src/commands/check.js +82 -0
  37. package/src/commands/cochange.js +37 -0
  38. package/src/commands/communities.js +69 -0
  39. package/src/commands/complexity.js +77 -0
  40. package/src/commands/dataflow.js +110 -0
  41. package/src/commands/flow.js +70 -0
  42. package/src/commands/manifesto.js +77 -0
  43. package/src/commands/owners.js +52 -0
  44. package/src/commands/query.js +21 -0
  45. package/src/commands/sequence.js +33 -0
  46. package/src/commands/structure.js +64 -0
  47. package/src/commands/triage.js +49 -0
  48. package/src/communities.js +22 -96
  49. package/src/complexity.js +234 -1591
  50. package/src/cycles.js +1 -1
  51. package/src/dataflow.js +274 -1352
  52. package/src/db/connection.js +88 -0
  53. package/src/db/migrations.js +312 -0
  54. package/src/db/query-builder.js +280 -0
  55. package/src/db/repository/build-stmts.js +104 -0
  56. package/src/db/repository/cfg.js +83 -0
  57. package/src/db/repository/cochange.js +41 -0
  58. package/src/db/repository/complexity.js +15 -0
  59. package/src/db/repository/dataflow.js +12 -0
  60. package/src/db/repository/edges.js +259 -0
  61. package/src/db/repository/embeddings.js +40 -0
  62. package/src/db/repository/graph-read.js +39 -0
  63. package/src/db/repository/index.js +42 -0
  64. package/src/db/repository/nodes.js +236 -0
  65. package/src/db.js +58 -399
  66. package/src/embedder.js +158 -174
  67. package/src/export.js +1 -1
  68. package/src/extractors/javascript.js +130 -5
  69. package/src/flow.js +153 -222
  70. package/src/index.js +53 -16
  71. package/src/infrastructure/result-formatter.js +21 -0
  72. package/src/infrastructure/test-filter.js +7 -0
  73. package/src/kinds.js +50 -0
  74. package/src/manifesto.js +1 -82
  75. package/src/mcp.js +37 -20
  76. package/src/owners.js +127 -182
  77. package/src/queries-cli.js +866 -0
  78. package/src/queries.js +1271 -2416
  79. package/src/sequence.js +179 -223
  80. package/src/structure.js +211 -269
  81. package/src/triage.js +117 -212
  82. package/src/viewer.js +1 -1
  83. package/src/watcher.js +7 -4
package/src/dataflow.js CHANGED
@@ -11,594 +11,33 @@
11
11
 
12
12
  import fs from 'node:fs';
13
13
  import path from 'node:path';
14
- import { openReadonlyOrFail } from './db.js';
14
+ import { DATAFLOW_RULES } from './ast-analysis/rules/index.js';
15
+ import {
16
+ makeDataflowRules as _makeDataflowRules,
17
+ buildExtensionSet,
18
+ buildExtToLangMap,
19
+ } from './ast-analysis/shared.js';
20
+ import { walkWithVisitors } from './ast-analysis/visitor.js';
21
+ import { createDataflowVisitor } from './ast-analysis/visitors/dataflow-visitor.js';
22
+ import { hasDataflowTable, openReadonlyOrFail } from './db.js';
23
+ import { isTestFile } from './infrastructure/test-filter.js';
15
24
  import { info } from './logger.js';
16
25
  import { paginateResult } from './paginate.js';
17
- import { LANGUAGE_REGISTRY } from './parser.js';
18
- import { ALL_SYMBOL_KINDS, isTestFile, normalizeSymbol } from './queries.js';
19
-
20
- // ─── Language-Specific Dataflow Rules ────────────────────────────────────
21
-
22
- const DATAFLOW_DEFAULTS = {
23
- // Scope entry
24
- functionNodes: new Set(), // REQUIRED: non-empty
25
-
26
- // Function name extraction
27
- nameField: 'name',
28
- varAssignedFnParent: null, // parent type for `const fn = ...` (JS only)
29
- assignmentFnParent: null, // parent type for `x = function...` (JS only)
30
- pairFnParent: null, // parent type for `{ key: function }` (JS only)
31
-
32
- // Parameters
33
- paramListField: 'parameters',
34
- paramIdentifier: 'identifier',
35
- paramWrapperTypes: new Set(),
36
- defaultParamType: null,
37
- restParamType: null,
38
- objectDestructType: null,
39
- arrayDestructType: null,
40
- shorthandPropPattern: null,
41
- pairPatternType: null,
42
- extractParamName: null, // override: (node) => string[]
43
-
44
- // Return
45
- returnNode: null,
46
-
47
- // Variable declarations
48
- varDeclaratorNode: null,
49
- varDeclaratorNodes: null,
50
- varNameField: 'name',
51
- varValueField: 'value',
52
- assignmentNode: null,
53
- assignLeftField: 'left',
54
- assignRightField: 'right',
55
-
56
- // Calls
57
- callNode: null,
58
- callNodes: null,
59
- callFunctionField: 'function',
60
- callArgsField: 'arguments',
61
- spreadType: null,
62
-
63
- // Member access
64
- memberNode: null,
65
- memberObjectField: 'object',
66
- memberPropertyField: 'property',
67
- optionalChainNode: null,
68
-
69
- // Await
70
- awaitNode: null,
71
-
72
- // Mutation
73
- mutatingMethods: new Set(),
74
- expressionStmtNode: 'expression_statement',
75
- callObjectField: null, // Java: combined call+member has [object] field on call node
76
-
77
- // Structural wrappers
78
- expressionListType: null, // Go: expression_list wraps LHS/RHS of short_var_declaration
79
- equalsClauseType: null, // C#: equals_value_clause wraps variable initializer
80
- argumentWrapperType: null, // PHP: individual args wrapped in 'argument' nodes
81
- extraIdentifierTypes: null, // Set of additional identifier-like types (PHP: variable_name, name)
82
- };
83
-
84
- const DATAFLOW_RULE_KEYS = new Set(Object.keys(DATAFLOW_DEFAULTS));
85
-
86
- export function makeDataflowRules(overrides) {
87
- for (const key of Object.keys(overrides)) {
88
- if (!DATAFLOW_RULE_KEYS.has(key)) {
89
- throw new Error(`Dataflow rules: unknown key "${key}"`);
90
- }
91
- }
92
- const rules = { ...DATAFLOW_DEFAULTS, ...overrides };
93
- if (!(rules.functionNodes instanceof Set) || rules.functionNodes.size === 0) {
94
- throw new Error('Dataflow rules: functionNodes must be a non-empty Set');
95
- }
96
- return rules;
97
- }
98
-
99
- // ── JS / TS / TSX ────────────────────────────────────────────────────────
100
-
101
- const JS_TS_MUTATING = new Set([
102
- 'push',
103
- 'pop',
104
- 'shift',
105
- 'unshift',
106
- 'splice',
107
- 'sort',
108
- 'reverse',
109
- 'fill',
110
- 'set',
111
- 'delete',
112
- 'add',
113
- 'clear',
114
- ]);
115
-
116
- const JS_TS_DATAFLOW = makeDataflowRules({
117
- functionNodes: new Set([
118
- 'function_declaration',
119
- 'method_definition',
120
- 'arrow_function',
121
- 'function_expression',
122
- 'function',
123
- ]),
124
- varAssignedFnParent: 'variable_declarator',
125
- assignmentFnParent: 'assignment_expression',
126
- pairFnParent: 'pair',
127
- paramWrapperTypes: new Set(['required_parameter', 'optional_parameter']),
128
- defaultParamType: 'assignment_pattern',
129
- restParamType: 'rest_pattern',
130
- objectDestructType: 'object_pattern',
131
- arrayDestructType: 'array_pattern',
132
- shorthandPropPattern: 'shorthand_property_identifier_pattern',
133
- pairPatternType: 'pair_pattern',
134
- returnNode: 'return_statement',
135
- varDeclaratorNode: 'variable_declarator',
136
- assignmentNode: 'assignment_expression',
137
- callNode: 'call_expression',
138
- spreadType: 'spread_element',
139
- memberNode: 'member_expression',
140
- optionalChainNode: 'optional_chain_expression',
141
- awaitNode: 'await_expression',
142
- mutatingMethods: JS_TS_MUTATING,
143
- });
144
-
145
- // ── Python ───────────────────────────────────────────────────────────────
146
-
147
- const PYTHON_DATAFLOW = makeDataflowRules({
148
- functionNodes: new Set(['function_definition', 'lambda']),
149
- defaultParamType: 'default_parameter',
150
- restParamType: 'list_splat_pattern',
151
- returnNode: 'return_statement',
152
- varDeclaratorNode: null,
153
- assignmentNode: 'assignment',
154
- assignLeftField: 'left',
155
- assignRightField: 'right',
156
- callNode: 'call',
157
- callFunctionField: 'function',
158
- callArgsField: 'arguments',
159
- spreadType: 'list_splat',
160
- memberNode: 'attribute',
161
- memberObjectField: 'object',
162
- memberPropertyField: 'attribute',
163
- awaitNode: 'await',
164
- mutatingMethods: new Set([
165
- 'append',
166
- 'extend',
167
- 'insert',
168
- 'pop',
169
- 'remove',
170
- 'clear',
171
- 'sort',
172
- 'reverse',
173
- 'add',
174
- 'discard',
175
- 'update',
176
- ]),
177
- extractParamName(node) {
178
- // typed_parameter / typed_default_parameter: first identifier child is the name
179
- if (node.type === 'typed_parameter' || node.type === 'typed_default_parameter') {
180
- for (const c of node.namedChildren) {
181
- if (c.type === 'identifier') return [c.text];
182
- }
183
- return null;
184
- }
185
- if (node.type === 'default_parameter') {
186
- const nameNode = node.childForFieldName('name');
187
- return nameNode ? [nameNode.text] : null;
188
- }
189
- if (node.type === 'list_splat_pattern' || node.type === 'dictionary_splat_pattern') {
190
- for (const c of node.namedChildren) {
191
- if (c.type === 'identifier') return [c.text];
192
- }
193
- return null;
194
- }
195
- return null;
196
- },
197
- });
198
-
199
- // ── Go ───────────────────────────────────────────────────────────────────
200
-
201
- const GO_DATAFLOW = makeDataflowRules({
202
- functionNodes: new Set(['function_declaration', 'method_declaration', 'func_literal']),
203
- returnNode: 'return_statement',
204
- varDeclaratorNodes: new Set(['short_var_declaration', 'var_declaration']),
205
- varNameField: 'left',
206
- varValueField: 'right',
207
- assignmentNode: 'assignment_statement',
208
- assignLeftField: 'left',
209
- assignRightField: 'right',
210
- callNode: 'call_expression',
211
- callFunctionField: 'function',
212
- callArgsField: 'arguments',
213
- memberNode: 'selector_expression',
214
- memberObjectField: 'operand',
215
- memberPropertyField: 'field',
216
- mutatingMethods: new Set(),
217
- expressionListType: 'expression_list',
218
- extractParamName(node) {
219
- // Go: parameter_declaration has name(s) + type; e.g. `a, b int`
220
- if (node.type === 'parameter_declaration') {
221
- const names = [];
222
- for (const c of node.namedChildren) {
223
- if (c.type === 'identifier') names.push(c.text);
224
- }
225
- return names.length > 0 ? names : null;
226
- }
227
- if (node.type === 'variadic_parameter_declaration') {
228
- const nameNode = node.childForFieldName('name');
229
- return nameNode ? [nameNode.text] : null;
230
- }
231
- return null;
232
- },
233
- });
234
-
235
- // ── Rust ─────────────────────────────────────────────────────────────────
236
-
237
- const RUST_DATAFLOW = makeDataflowRules({
238
- functionNodes: new Set(['function_item', 'closure_expression']),
239
- returnNode: 'return_expression',
240
- varDeclaratorNode: 'let_declaration',
241
- varNameField: 'pattern',
242
- varValueField: 'value',
243
- assignmentNode: 'assignment_expression',
244
- callNode: 'call_expression',
245
- callFunctionField: 'function',
246
- callArgsField: 'arguments',
247
- memberNode: 'field_expression',
248
- memberObjectField: 'value',
249
- memberPropertyField: 'field',
250
- awaitNode: 'await_expression',
251
- mutatingMethods: new Set(['push', 'pop', 'insert', 'remove', 'clear', 'sort', 'reverse']),
252
- extractParamName(node) {
253
- if (node.type === 'parameter') {
254
- const pat = node.childForFieldName('pattern');
255
- if (pat?.type === 'identifier') return [pat.text];
256
- return null;
257
- }
258
- if (node.type === 'identifier') return [node.text];
259
- return null;
260
- },
261
- });
262
-
263
- // ── Java ─────────────────────────────────────────────────────────────────
264
-
265
- const JAVA_DATAFLOW = makeDataflowRules({
266
- functionNodes: new Set(['method_declaration', 'constructor_declaration', 'lambda_expression']),
267
- returnNode: 'return_statement',
268
- varDeclaratorNode: 'variable_declarator',
269
- assignmentNode: 'assignment_expression',
270
- callNodes: new Set(['method_invocation', 'object_creation_expression']),
271
- callFunctionField: 'name',
272
- callArgsField: 'arguments',
273
- memberNode: 'field_access',
274
- memberObjectField: 'object',
275
- memberPropertyField: 'field',
276
- callObjectField: 'object',
277
- argumentWrapperType: 'argument',
278
- mutatingMethods: new Set(['add', 'remove', 'clear', 'put', 'set', 'push', 'pop', 'sort']),
279
- extractParamName(node) {
280
- if (node.type === 'formal_parameter' || node.type === 'spread_parameter') {
281
- const nameNode = node.childForFieldName('name');
282
- return nameNode ? [nameNode.text] : null;
283
- }
284
- if (node.type === 'identifier') return [node.text];
285
- return null;
286
- },
287
- });
288
-
289
- // ── C# ───────────────────────────────────────────────────────────────────
290
-
291
- const CSHARP_DATAFLOW = makeDataflowRules({
292
- functionNodes: new Set([
293
- 'method_declaration',
294
- 'constructor_declaration',
295
- 'lambda_expression',
296
- 'local_function_statement',
297
- ]),
298
- returnNode: 'return_statement',
299
- varDeclaratorNode: 'variable_declarator',
300
- varNameField: 'name',
301
- assignmentNode: 'assignment_expression',
302
- callNode: 'invocation_expression',
303
- callFunctionField: 'function',
304
- callArgsField: 'arguments',
305
- memberNode: 'member_access_expression',
306
- memberObjectField: 'expression',
307
- memberPropertyField: 'name',
308
- awaitNode: 'await_expression',
309
- argumentWrapperType: 'argument',
310
- mutatingMethods: new Set(['Add', 'Remove', 'Clear', 'Insert', 'Sort', 'Reverse', 'Push', 'Pop']),
311
- extractParamName(node) {
312
- if (node.type === 'parameter') {
313
- const nameNode = node.childForFieldName('name');
314
- return nameNode ? [nameNode.text] : null;
315
- }
316
- if (node.type === 'identifier') return [node.text];
317
- return null;
318
- },
319
- });
320
-
321
- // ── PHP ──────────────────────────────────────────────────────────────────
322
-
323
- const PHP_DATAFLOW = makeDataflowRules({
324
- functionNodes: new Set([
325
- 'function_definition',
326
- 'method_declaration',
327
- 'anonymous_function_creation_expression',
328
- 'arrow_function',
329
- ]),
330
- paramListField: 'parameters',
331
- paramIdentifier: 'variable_name',
332
- returnNode: 'return_statement',
333
- varDeclaratorNode: null,
334
- assignmentNode: 'assignment_expression',
335
- assignLeftField: 'left',
336
- assignRightField: 'right',
337
- callNodes: new Set([
338
- 'function_call_expression',
339
- 'member_call_expression',
340
- 'scoped_call_expression',
341
- ]),
342
- callFunctionField: 'function',
343
- callArgsField: 'arguments',
344
- spreadType: 'spread_expression',
345
- memberNode: 'member_access_expression',
346
- memberObjectField: 'object',
347
- memberPropertyField: 'name',
348
- argumentWrapperType: 'argument',
349
- extraIdentifierTypes: new Set(['variable_name', 'name']),
350
- mutatingMethods: new Set(['push', 'pop', 'shift', 'unshift', 'splice', 'sort', 'reverse']),
351
- extractParamName(node) {
352
- // PHP: simple_parameter → $name or &$name
353
- if (node.type === 'simple_parameter' || node.type === 'variadic_parameter') {
354
- const nameNode = node.childForFieldName('name');
355
- return nameNode ? [nameNode.text] : null;
356
- }
357
- if (node.type === 'variable_name') return [node.text];
358
- return null;
359
- },
360
- });
361
-
362
- // ── Ruby ─────────────────────────────────────────────────────────────────
363
-
364
- const RUBY_DATAFLOW = makeDataflowRules({
365
- functionNodes: new Set(['method', 'singleton_method', 'lambda']),
366
- paramListField: 'parameters',
367
- returnNode: 'return',
368
- varDeclaratorNode: null,
369
- assignmentNode: 'assignment',
370
- assignLeftField: 'left',
371
- assignRightField: 'right',
372
- callNode: 'call',
373
- callFunctionField: 'method',
374
- callArgsField: 'arguments',
375
- spreadType: 'splat_parameter',
376
- memberNode: 'call',
377
- memberObjectField: 'receiver',
378
- memberPropertyField: 'method',
379
- mutatingMethods: new Set([
380
- 'push',
381
- 'pop',
382
- 'shift',
383
- 'unshift',
384
- 'delete',
385
- 'clear',
386
- 'sort!',
387
- 'reverse!',
388
- 'map!',
389
- 'select!',
390
- 'reject!',
391
- 'compact!',
392
- 'flatten!',
393
- 'concat',
394
- 'replace',
395
- 'insert',
396
- ]),
397
- extractParamName(node) {
398
- if (node.type === 'identifier') return [node.text];
399
- if (
400
- node.type === 'optional_parameter' ||
401
- node.type === 'keyword_parameter' ||
402
- node.type === 'splat_parameter' ||
403
- node.type === 'hash_splat_parameter'
404
- ) {
405
- const nameNode = node.childForFieldName('name');
406
- return nameNode ? [nameNode.text] : null;
407
- }
408
- return null;
409
- },
410
- });
411
-
412
- // ── Rules Map + Extensions Set ───────────────────────────────────────────
413
-
414
- export const DATAFLOW_RULES = new Map([
415
- ['javascript', JS_TS_DATAFLOW],
416
- ['typescript', JS_TS_DATAFLOW],
417
- ['tsx', JS_TS_DATAFLOW],
418
- ['python', PYTHON_DATAFLOW],
419
- ['go', GO_DATAFLOW],
420
- ['rust', RUST_DATAFLOW],
421
- ['java', JAVA_DATAFLOW],
422
- ['csharp', CSHARP_DATAFLOW],
423
- ['php', PHP_DATAFLOW],
424
- ['ruby', RUBY_DATAFLOW],
425
- ]);
426
-
427
- const DATAFLOW_LANG_IDS = new Set(DATAFLOW_RULES.keys());
428
-
429
- export const DATAFLOW_EXTENSIONS = new Set();
430
- for (const entry of LANGUAGE_REGISTRY) {
431
- if (DATAFLOW_RULES.has(entry.id)) {
432
- for (const ext of entry.extensions) DATAFLOW_EXTENSIONS.add(ext);
433
- }
434
- }
435
-
436
- // ── AST helpers ──────────────────────────────────────────────────────────────
437
-
438
- function truncate(str, max = 120) {
439
- if (!str) return '';
440
- return str.length > max ? `${str.slice(0, max)}…` : str;
441
- }
442
-
443
- /**
444
- * Get the name of a function node from the AST using rules.
445
- */
446
- function functionName(fnNode, rules) {
447
- if (!fnNode) return null;
448
- // Try the standard name field first (works for most languages)
449
- const nameNode = fnNode.childForFieldName(rules.nameField);
450
- if (nameNode) return nameNode.text;
451
-
452
- // JS-specific: arrow_function/function_expression assigned to variable, pair, or assignment
453
- const parent = fnNode.parent;
454
- if (parent) {
455
- if (rules.varAssignedFnParent && parent.type === rules.varAssignedFnParent) {
456
- const n = parent.childForFieldName('name');
457
- return n ? n.text : null;
458
- }
459
- if (rules.pairFnParent && parent.type === rules.pairFnParent) {
460
- const keyNode = parent.childForFieldName('key');
461
- return keyNode ? keyNode.text : null;
462
- }
463
- if (rules.assignmentFnParent && parent.type === rules.assignmentFnParent) {
464
- const left = parent.childForFieldName(rules.assignLeftField);
465
- return left ? left.text : null;
466
- }
467
- }
468
- return null;
469
- }
470
-
471
- /**
472
- * Extract parameter names and indices from a formal_parameters node.
473
- */
474
- function extractParams(paramsNode, rules) {
475
- if (!paramsNode) return [];
476
- const result = [];
477
- let index = 0;
478
- for (const child of paramsNode.namedChildren) {
479
- const names = extractParamNames(child, rules);
480
- for (const name of names) {
481
- result.push({ name, index });
482
- }
483
- index++;
484
- }
485
- return result;
486
- }
487
-
488
- function extractParamNames(node, rules) {
489
- if (!node) return [];
490
- const t = node.type;
491
-
492
- // Language-specific override (Go, Rust, Java, C#, PHP, Ruby)
493
- if (rules.extractParamName) {
494
- const result = rules.extractParamName(node);
495
- if (result) return result;
496
- }
497
-
498
- // Leaf identifier
499
- if (t === rules.paramIdentifier) return [node.text];
500
-
501
- // Wrapper types (TS required_parameter, Python typed_parameter, etc.)
502
- if (rules.paramWrapperTypes.has(t)) {
503
- const pattern = node.childForFieldName('pattern') || node.childForFieldName('name');
504
- return pattern ? extractParamNames(pattern, rules) : [];
505
- }
506
-
507
- // Default parameter (assignment_pattern / default_parameter)
508
- if (rules.defaultParamType && t === rules.defaultParamType) {
509
- const left = node.childForFieldName('left') || node.childForFieldName('name');
510
- return left ? extractParamNames(left, rules) : [];
511
- }
512
-
513
- // Rest / splat parameter
514
- if (rules.restParamType && t === rules.restParamType) {
515
- // Try name field first, then fall back to scanning children
516
- const nameNode = node.childForFieldName('name');
517
- if (nameNode) return [nameNode.text];
518
- for (const child of node.namedChildren) {
519
- if (child.type === rules.paramIdentifier) return [child.text];
520
- }
521
- return [];
522
- }
523
-
524
- // Object destructuring (JS only)
525
- if (rules.objectDestructType && t === rules.objectDestructType) {
526
- const names = [];
527
- for (const child of node.namedChildren) {
528
- if (rules.shorthandPropPattern && child.type === rules.shorthandPropPattern) {
529
- names.push(child.text);
530
- } else if (rules.pairPatternType && child.type === rules.pairPatternType) {
531
- const value = child.childForFieldName('value');
532
- if (value) names.push(...extractParamNames(value, rules));
533
- } else if (rules.restParamType && child.type === rules.restParamType) {
534
- names.push(...extractParamNames(child, rules));
535
- }
536
- }
537
- return names;
538
- }
26
+ import { ALL_SYMBOL_KINDS, normalizeSymbol } from './queries.js';
539
27
 
540
- // Array destructuring (JS only)
541
- if (rules.arrayDestructType && t === rules.arrayDestructType) {
542
- const names = [];
543
- for (const child of node.namedChildren) {
544
- names.push(...extractParamNames(child, rules));
545
- }
546
- return names;
547
- }
28
+ // Re-export for backward compatibility
29
+ export { DATAFLOW_RULES };
30
+ export { _makeDataflowRules as makeDataflowRules };
548
31
 
549
- return [];
550
- }
32
+ export const DATAFLOW_EXTENSIONS = buildExtensionSet(DATAFLOW_RULES);
551
33
 
552
- /** Check if a node type is identifier-like for this language. */
553
- function isIdent(nodeType, rules) {
554
- if (nodeType === 'identifier' || nodeType === rules.paramIdentifier) return true;
555
- return rules.extraIdentifierTypes ? rules.extraIdentifierTypes.has(nodeType) : false;
556
- }
557
-
558
- /**
559
- * Resolve the name a call expression is calling using rules.
560
- */
561
- function resolveCalleeName(callNode, rules) {
562
- const fn = callNode.childForFieldName(rules.callFunctionField);
563
- if (!fn) {
564
- // Some languages (Java method_invocation, Ruby call) use 'name' field directly
565
- const nameNode = callNode.childForFieldName('name') || callNode.childForFieldName('method');
566
- return nameNode ? nameNode.text : null;
567
- }
568
- if (isIdent(fn.type, rules)) return fn.text;
569
- if (fn.type === rules.memberNode) {
570
- const prop = fn.childForFieldName(rules.memberPropertyField);
571
- return prop ? prop.text : null;
572
- }
573
- if (rules.optionalChainNode && fn.type === rules.optionalChainNode) {
574
- const target = fn.namedChildren[0];
575
- if (!target) return null;
576
- if (target.type === rules.memberNode) {
577
- const prop = target.childForFieldName(rules.memberPropertyField);
578
- return prop ? prop.text : null;
579
- }
580
- if (target.type === 'identifier') return target.text;
581
- const prop = fn.childForFieldName(rules.memberPropertyField);
582
- return prop ? prop.text : null;
583
- }
584
- return null;
585
- }
586
-
587
- /**
588
- * Get the receiver (object) of a member expression using rules.
589
- */
590
- function memberReceiver(memberExpr, rules) {
591
- const obj = memberExpr.childForFieldName(rules.memberObjectField);
592
- if (!obj) return null;
593
- if (isIdent(obj.type, rules)) return obj.text;
594
- if (obj.type === rules.memberNode) return memberReceiver(obj, rules);
595
- return null;
596
- }
34
+ // ── AST helpers (now in ast-analysis/visitor-utils.js, kept as re-exports) ──
597
35
 
598
36
  // ── extractDataflow ──────────────────────────────────────────────────────────
599
37
 
600
38
  /**
601
39
  * Extract dataflow information from a parsed AST.
40
+ * Delegates to the dataflow visitor via the unified walker.
602
41
  *
603
42
  * @param {object} tree - tree-sitter parse tree
604
43
  * @param {string} filePath - relative file path
@@ -610,385 +49,13 @@ export function extractDataflow(tree, _filePath, _definitions, langId = 'javascr
610
49
  const rules = DATAFLOW_RULES.get(langId);
611
50
  if (!rules) return { parameters: [], returns: [], assignments: [], argFlows: [], mutations: [] };
612
51
 
613
- const isCallNode = rules.callNodes ? (t) => rules.callNodes.has(t) : (t) => t === rules.callNode;
614
-
615
- const parameters = [];
616
- const returns = [];
617
- const assignments = [];
618
- const argFlows = [];
619
- const mutations = [];
620
-
621
- const scopeStack = [];
622
-
623
- function currentScope() {
624
- return scopeStack.length > 0 ? scopeStack[scopeStack.length - 1] : null;
625
- }
626
-
627
- function findBinding(name) {
628
- for (let i = scopeStack.length - 1; i >= 0; i--) {
629
- const scope = scopeStack[i];
630
- if (scope.params.has(name))
631
- return { type: 'param', index: scope.params.get(name), funcName: scope.funcName };
632
- if (scope.locals.has(name))
633
- return { type: 'local', source: scope.locals.get(name), funcName: scope.funcName };
634
- }
635
- return null;
636
- }
637
-
638
- function enterScope(fnNode) {
639
- const name = functionName(fnNode, rules);
640
- const paramsNode = fnNode.childForFieldName(rules.paramListField);
641
- const paramList = extractParams(paramsNode, rules);
642
- const paramMap = new Map();
643
- for (const p of paramList) {
644
- paramMap.set(p.name, p.index);
645
- if (name) {
646
- parameters.push({
647
- funcName: name,
648
- paramName: p.name,
649
- paramIndex: p.index,
650
- line: (paramsNode?.startPosition?.row ?? fnNode.startPosition.row) + 1,
651
- });
652
- }
653
- }
654
- scopeStack.push({ funcName: name, funcNode: fnNode, params: paramMap, locals: new Map() });
655
- }
656
-
657
- function exitScope() {
658
- scopeStack.pop();
659
- }
660
-
661
- function bindingConfidence(binding) {
662
- if (!binding) return 0.5;
663
- if (binding.type === 'param') return 1.0;
664
- if (binding.type === 'local') {
665
- if (binding.source?.type === 'call_return') return 0.9;
666
- if (binding.source?.type === 'destructured') return 0.8;
667
- return 0.9;
668
- }
669
- return 0.5;
670
- }
671
-
672
- /** Unwrap await if present, returning the inner expression. */
673
- function unwrapAwait(node) {
674
- if (rules.awaitNode && node.type === rules.awaitNode) {
675
- return node.namedChildren[0] || node;
676
- }
677
- return node;
678
- }
679
-
680
- /** Check if a node is a call expression (single or multi-type). */
681
- function isCall(node) {
682
- return node && isCallNode(node.type);
683
- }
684
-
685
- /** Handle a variable declarator / short_var_declaration node. */
686
- function handleVarDeclarator(node) {
687
- let nameNode = node.childForFieldName(rules.varNameField);
688
- let valueNode = rules.varValueField ? node.childForFieldName(rules.varValueField) : null;
689
-
690
- // C#: initializer is inside equals_value_clause child
691
- if (!valueNode && rules.equalsClauseType) {
692
- for (const child of node.namedChildren) {
693
- if (child.type === rules.equalsClauseType) {
694
- valueNode = child.childForFieldName('value') || child.namedChildren[0];
695
- break;
696
- }
697
- }
698
- }
699
-
700
- // Fallback: initializer is a direct unnamed child (C# variable_declarator)
701
- if (!valueNode) {
702
- for (const child of node.namedChildren) {
703
- if (child !== nameNode && isCall(unwrapAwait(child))) {
704
- valueNode = child;
705
- break;
706
- }
707
- }
708
- }
709
-
710
- // Go: expression_list wraps LHS/RHS — unwrap to first named child
711
- if (rules.expressionListType) {
712
- if (nameNode?.type === rules.expressionListType) nameNode = nameNode.namedChildren[0];
713
- if (valueNode?.type === rules.expressionListType) valueNode = valueNode.namedChildren[0];
714
- }
715
-
716
- const scope = currentScope();
717
- if (!nameNode || !valueNode || !scope) return;
718
-
719
- const unwrapped = unwrapAwait(valueNode);
720
- const callExpr = isCall(unwrapped) ? unwrapped : null;
721
-
722
- if (callExpr) {
723
- const callee = resolveCalleeName(callExpr, rules);
724
- if (callee && scope.funcName) {
725
- // Destructuring: const { a, b } = foo()
726
- if (
727
- (rules.objectDestructType && nameNode.type === rules.objectDestructType) ||
728
- (rules.arrayDestructType && nameNode.type === rules.arrayDestructType)
729
- ) {
730
- const names = extractParamNames(nameNode, rules);
731
- for (const n of names) {
732
- assignments.push({
733
- varName: n,
734
- callerFunc: scope.funcName,
735
- sourceCallName: callee,
736
- expression: truncate(node.text),
737
- line: node.startPosition.row + 1,
738
- });
739
- scope.locals.set(n, { type: 'destructured', callee });
740
- }
741
- } else {
742
- const varName =
743
- nameNode.type === 'identifier' || nameNode.type === rules.paramIdentifier
744
- ? nameNode.text
745
- : nameNode.text;
746
- assignments.push({
747
- varName,
748
- callerFunc: scope.funcName,
749
- sourceCallName: callee,
750
- expression: truncate(node.text),
751
- line: node.startPosition.row + 1,
752
- });
753
- scope.locals.set(varName, { type: 'call_return', callee });
754
- }
755
- }
756
- }
757
- }
758
-
759
- /** Handle assignment expressions (mutation detection + call captures). */
760
- function handleAssignment(node) {
761
- const left = node.childForFieldName(rules.assignLeftField);
762
- const right = node.childForFieldName(rules.assignRightField);
763
- const scope = currentScope();
764
- if (!scope?.funcName) return;
765
-
766
- // Mutation: obj.prop = value
767
- if (left && rules.memberNode && left.type === rules.memberNode) {
768
- const receiver = memberReceiver(left, rules);
769
- if (receiver) {
770
- const binding = findBinding(receiver);
771
- if (binding) {
772
- mutations.push({
773
- funcName: scope.funcName,
774
- receiverName: receiver,
775
- binding,
776
- mutatingExpr: truncate(node.text),
777
- line: node.startPosition.row + 1,
778
- });
779
- }
780
- }
781
- }
782
-
783
- // Non-declaration assignment: x = foo()
784
- if (left && isIdent(left.type, rules) && right) {
785
- const unwrapped = unwrapAwait(right);
786
- const callExpr = isCall(unwrapped) ? unwrapped : null;
787
- if (callExpr) {
788
- const callee = resolveCalleeName(callExpr, rules);
789
- if (callee) {
790
- assignments.push({
791
- varName: left.text,
792
- callerFunc: scope.funcName,
793
- sourceCallName: callee,
794
- expression: truncate(node.text),
795
- line: node.startPosition.row + 1,
796
- });
797
- scope.locals.set(left.text, { type: 'call_return', callee });
798
- }
799
- }
800
- }
801
- }
802
-
803
- /** Handle call expressions: track argument flows. */
804
- function handleCallExpr(node) {
805
- const callee = resolveCalleeName(node, rules);
806
- const argsNode = node.childForFieldName(rules.callArgsField);
807
- const scope = currentScope();
808
- if (!callee || !argsNode || !scope?.funcName) return;
809
-
810
- let argIndex = 0;
811
- for (let arg of argsNode.namedChildren) {
812
- // PHP/Java: unwrap argument wrapper
813
- if (rules.argumentWrapperType && arg.type === rules.argumentWrapperType) {
814
- arg = arg.namedChildren[0] || arg;
815
- }
816
- const unwrapped =
817
- rules.spreadType && arg.type === rules.spreadType ? arg.namedChildren[0] || arg : arg;
818
- if (!unwrapped) {
819
- argIndex++;
820
- continue;
821
- }
822
-
823
- const argName = isIdent(unwrapped.type, rules) ? unwrapped.text : null;
824
- const argMember =
825
- rules.memberNode && unwrapped.type === rules.memberNode
826
- ? memberReceiver(unwrapped, rules)
827
- : null;
828
- const trackedName = argName || argMember;
829
-
830
- if (trackedName) {
831
- const binding = findBinding(trackedName);
832
- if (binding) {
833
- argFlows.push({
834
- callerFunc: scope.funcName,
835
- calleeName: callee,
836
- argIndex,
837
- argName: trackedName,
838
- binding,
839
- confidence: bindingConfidence(binding),
840
- expression: truncate(arg.text),
841
- line: node.startPosition.row + 1,
842
- });
843
- }
844
- }
845
- argIndex++;
846
- }
847
- }
848
-
849
- /** Detect mutating method calls in expression statements. */
850
- function handleExprStmtMutation(node) {
851
- if (rules.mutatingMethods.size === 0) return;
852
- const expr = node.namedChildren[0];
853
- if (!expr || !isCall(expr)) return;
854
-
855
- let methodName = null;
856
- let receiver = null;
857
-
858
- // Standard pattern: call(fn: member(obj, prop))
859
- const fn = expr.childForFieldName(rules.callFunctionField);
860
- if (fn && fn.type === rules.memberNode) {
861
- const prop = fn.childForFieldName(rules.memberPropertyField);
862
- methodName = prop ? prop.text : null;
863
- receiver = memberReceiver(fn, rules);
864
- }
865
-
866
- // Java/combined pattern: call node itself has object + name fields
867
- if (!receiver && rules.callObjectField) {
868
- const obj = expr.childForFieldName(rules.callObjectField);
869
- const name = expr.childForFieldName(rules.callFunctionField);
870
- if (obj && name) {
871
- methodName = name.text;
872
- receiver = isIdent(obj.type, rules) ? obj.text : null;
873
- }
874
- }
875
-
876
- if (!methodName || !rules.mutatingMethods.has(methodName)) return;
877
-
878
- const scope = currentScope();
879
- if (!receiver || !scope?.funcName) return;
880
-
881
- const binding = findBinding(receiver);
882
- if (binding) {
883
- mutations.push({
884
- funcName: scope.funcName,
885
- receiverName: receiver,
886
- binding,
887
- mutatingExpr: truncate(expr.text),
888
- line: node.startPosition.row + 1,
889
- });
890
- }
891
- }
892
-
893
- // Recursive AST walk
894
- function visit(node) {
895
- if (!node) return;
896
- const t = node.type;
897
-
898
- // Enter function scopes
899
- if (rules.functionNodes.has(t)) {
900
- enterScope(node);
901
- for (const child of node.namedChildren) {
902
- visit(child);
903
- }
904
- exitScope();
905
- return;
906
- }
907
-
908
- // Return statements
909
- if (rules.returnNode && t === rules.returnNode) {
910
- const scope = currentScope();
911
- if (scope?.funcName) {
912
- const expr = node.namedChildren[0];
913
- const referencedNames = [];
914
- if (expr) collectIdentifiers(expr, referencedNames, rules);
915
- returns.push({
916
- funcName: scope.funcName,
917
- expression: truncate(expr ? expr.text : ''),
918
- referencedNames,
919
- line: node.startPosition.row + 1,
920
- });
921
- }
922
- for (const child of node.namedChildren) {
923
- visit(child);
924
- }
925
- return;
926
- }
927
-
928
- // Variable declarations
929
- if (rules.varDeclaratorNode && t === rules.varDeclaratorNode) {
930
- handleVarDeclarator(node);
931
- for (const child of node.namedChildren) {
932
- visit(child);
933
- }
934
- return;
935
- }
936
- if (rules.varDeclaratorNodes?.has(t)) {
937
- handleVarDeclarator(node);
938
- for (const child of node.namedChildren) {
939
- visit(child);
940
- }
941
- return;
942
- }
943
-
944
- // Call expressions
945
- if (isCallNode(t)) {
946
- handleCallExpr(node);
947
- for (const child of node.namedChildren) {
948
- visit(child);
949
- }
950
- return;
951
- }
952
-
953
- // Assignment expressions
954
- if (rules.assignmentNode && t === rules.assignmentNode) {
955
- handleAssignment(node);
956
- for (const child of node.namedChildren) {
957
- visit(child);
958
- }
959
- return;
960
- }
961
-
962
- // Mutation detection via expression_statement
963
- if (rules.expressionStmtNode && t === rules.expressionStmtNode) {
964
- handleExprStmtMutation(node);
965
- }
966
-
967
- // Default: visit all children
968
- for (const child of node.namedChildren) {
969
- visit(child);
970
- }
971
- }
972
-
973
- visit(tree.rootNode);
974
-
975
- return { parameters, returns, assignments, argFlows, mutations };
976
- }
52
+ const visitor = createDataflowVisitor(rules);
53
+ const results = walkWithVisitors(tree.rootNode, [visitor], langId, {
54
+ functionNodeTypes: rules.functionNodes,
55
+ getFunctionName: () => null, // dataflow visitor handles its own name extraction
56
+ });
977
57
 
978
- /**
979
- * Collect all identifier names referenced within a node.
980
- * Uses isIdent() to support language-specific identifier node types
981
- * (e.g. PHP's `variable_name`).
982
- */
983
- function collectIdentifiers(node, out, rules) {
984
- if (!node) return;
985
- if (isIdent(node.type, rules)) {
986
- out.push(node.text);
987
- return;
988
- }
989
- for (const child of node.namedChildren) {
990
- collectIdentifiers(child, out, rules);
991
- }
58
+ return results.dataflow;
992
59
  }
993
60
 
994
61
  // ── buildDataflowEdges ──────────────────────────────────────────────────────
@@ -1009,12 +76,7 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts)
1009
76
 
1010
77
  // Always build ext→langId map so native-only builds (where _langId is unset)
1011
78
  // can still derive the language from the file extension.
1012
- const extToLang = new Map();
1013
- for (const entry of LANGUAGE_REGISTRY) {
1014
- for (const ext of entry.extensions) {
1015
- extToLang.set(ext, entry.id);
1016
- }
1017
- }
79
+ const extToLang = buildExtToLangMap();
1018
80
 
1019
81
  for (const [relPath, symbols] of fileSymbols) {
1020
82
  if (!symbols._tree && !symbols.dataflow) {
@@ -1073,7 +135,7 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts)
1073
135
  if (!tree) {
1074
136
  if (!getParserFn) continue;
1075
137
  langId = extToLang.get(ext);
1076
- if (!langId || !DATAFLOW_LANG_IDS.has(langId)) continue;
138
+ if (!langId || !DATAFLOW_RULES.has(langId)) continue;
1077
139
 
1078
140
  const absPath = path.join(rootDir, relPath);
1079
141
  let code;
@@ -1199,18 +261,6 @@ function findNodes(db, name, opts = {}) {
1199
261
  return opts.noTests ? rows.filter((n) => !isTestFile(n.file)) : rows;
1200
262
  }
1201
263
 
1202
- /**
1203
- * Check if the dataflow table exists and has data.
1204
- */
1205
- function hasDataflowTable(db) {
1206
- try {
1207
- const row = db.prepare('SELECT COUNT(*) as c FROM dataflow').get();
1208
- return row.c > 0;
1209
- } catch {
1210
- return false;
1211
- }
1212
- }
1213
-
1214
264
  /**
1215
265
  * Return all dataflow edges for a symbol.
1216
266
  *
@@ -1221,134 +271,135 @@ function hasDataflowTable(db) {
1221
271
  */
1222
272
  export function dataflowData(name, customDbPath, opts = {}) {
1223
273
  const db = openReadonlyOrFail(customDbPath);
1224
- const noTests = opts.noTests || false;
274
+ try {
275
+ const noTests = opts.noTests || false;
1225
276
 
1226
- if (!hasDataflowTable(db)) {
1227
- db.close();
1228
- return {
1229
- name,
1230
- results: [],
1231
- warning:
1232
- 'No dataflow data found. Rebuild with `codegraph build` (dataflow is now included by default).',
1233
- };
1234
- }
277
+ if (!hasDataflowTable(db)) {
278
+ return {
279
+ name,
280
+ results: [],
281
+ warning:
282
+ 'No dataflow data found. Rebuild with `codegraph build` (dataflow is now included by default).',
283
+ };
284
+ }
1235
285
 
1236
- const nodes = findNodes(db, name, { noTests, file: opts.file, kind: opts.kind });
1237
- if (nodes.length === 0) {
1238
- db.close();
1239
- return { name, results: [] };
1240
- }
286
+ const nodes = findNodes(db, name, { noTests, file: opts.file, kind: opts.kind });
287
+ if (nodes.length === 0) {
288
+ return { name, results: [] };
289
+ }
1241
290
 
1242
- const flowsToOut = db.prepare(
1243
- `SELECT d.*, n.name AS target_name, n.kind AS target_kind, n.file AS target_file, n.line AS target_line
291
+ const flowsToOut = db.prepare(
292
+ `SELECT d.*, n.name AS target_name, n.kind AS target_kind, n.file AS target_file, n.line AS target_line
1244
293
  FROM dataflow d JOIN nodes n ON d.target_id = n.id
1245
294
  WHERE d.source_id = ? AND d.kind = 'flows_to'`,
1246
- );
1247
- const flowsToIn = db.prepare(
1248
- `SELECT d.*, n.name AS source_name, n.kind AS source_kind, n.file AS source_file, n.line AS source_line
295
+ );
296
+ const flowsToIn = db.prepare(
297
+ `SELECT d.*, n.name AS source_name, n.kind AS source_kind, n.file AS source_file, n.line AS source_line
1249
298
  FROM dataflow d JOIN nodes n ON d.source_id = n.id
1250
299
  WHERE d.target_id = ? AND d.kind = 'flows_to'`,
1251
- );
1252
- const returnsOut = db.prepare(
1253
- `SELECT d.*, n.name AS target_name, n.kind AS target_kind, n.file AS target_file, n.line AS target_line
300
+ );
301
+ const returnsOut = db.prepare(
302
+ `SELECT d.*, n.name AS target_name, n.kind AS target_kind, n.file AS target_file, n.line AS target_line
1254
303
  FROM dataflow d JOIN nodes n ON d.target_id = n.id
1255
304
  WHERE d.source_id = ? AND d.kind = 'returns'`,
1256
- );
1257
- const returnsIn = db.prepare(
1258
- `SELECT d.*, n.name AS source_name, n.kind AS source_kind, n.file AS source_file, n.line AS source_line
305
+ );
306
+ const returnsIn = db.prepare(
307
+ `SELECT d.*, n.name AS source_name, n.kind AS source_kind, n.file AS source_file, n.line AS source_line
1259
308
  FROM dataflow d JOIN nodes n ON d.source_id = n.id
1260
309
  WHERE d.target_id = ? AND d.kind = 'returns'`,
1261
- );
1262
- const mutatesOut = db.prepare(
1263
- `SELECT d.*, n.name AS target_name, n.kind AS target_kind, n.file AS target_file, n.line AS target_line
310
+ );
311
+ const mutatesOut = db.prepare(
312
+ `SELECT d.*, n.name AS target_name, n.kind AS target_kind, n.file AS target_file, n.line AS target_line
1264
313
  FROM dataflow d JOIN nodes n ON d.target_id = n.id
1265
314
  WHERE d.source_id = ? AND d.kind = 'mutates'`,
1266
- );
1267
- const mutatesIn = db.prepare(
1268
- `SELECT d.*, n.name AS source_name, n.kind AS source_kind, n.file AS source_file, n.line AS source_line
315
+ );
316
+ const mutatesIn = db.prepare(
317
+ `SELECT d.*, n.name AS source_name, n.kind AS source_kind, n.file AS source_file, n.line AS source_line
1269
318
  FROM dataflow d JOIN nodes n ON d.source_id = n.id
1270
319
  WHERE d.target_id = ? AND d.kind = 'mutates'`,
1271
- );
320
+ );
321
+
322
+ const hc = new Map();
323
+ const results = nodes.map((node) => {
324
+ const sym = normalizeSymbol(node, db, hc);
325
+
326
+ const flowsTo = flowsToOut.all(node.id).map((r) => ({
327
+ target: r.target_name,
328
+ kind: r.target_kind,
329
+ file: r.target_file,
330
+ line: r.line,
331
+ paramIndex: r.param_index,
332
+ expression: r.expression,
333
+ confidence: r.confidence,
334
+ }));
335
+
336
+ const flowsFrom = flowsToIn.all(node.id).map((r) => ({
337
+ source: r.source_name,
338
+ kind: r.source_kind,
339
+ file: r.source_file,
340
+ line: r.line,
341
+ paramIndex: r.param_index,
342
+ expression: r.expression,
343
+ confidence: r.confidence,
344
+ }));
345
+
346
+ const returnConsumers = returnsOut.all(node.id).map((r) => ({
347
+ consumer: r.target_name,
348
+ kind: r.target_kind,
349
+ file: r.target_file,
350
+ line: r.line,
351
+ expression: r.expression,
352
+ }));
353
+
354
+ const returnedBy = returnsIn.all(node.id).map((r) => ({
355
+ producer: r.source_name,
356
+ kind: r.source_kind,
357
+ file: r.source_file,
358
+ line: r.line,
359
+ expression: r.expression,
360
+ }));
361
+
362
+ const mutatesTargets = mutatesOut.all(node.id).map((r) => ({
363
+ target: r.target_name,
364
+ expression: r.expression,
365
+ line: r.line,
366
+ }));
367
+
368
+ const mutatedBy = mutatesIn.all(node.id).map((r) => ({
369
+ source: r.source_name,
370
+ expression: r.expression,
371
+ line: r.line,
372
+ }));
373
+
374
+ if (noTests) {
375
+ const filter = (arr) => arr.filter((r) => !isTestFile(r.file));
376
+ return {
377
+ ...sym,
378
+ flowsTo: filter(flowsTo),
379
+ flowsFrom: filter(flowsFrom),
380
+ returns: returnConsumers.filter((r) => !isTestFile(r.file)),
381
+ returnedBy: returnedBy.filter((r) => !isTestFile(r.file)),
382
+ mutates: mutatesTargets,
383
+ mutatedBy,
384
+ };
385
+ }
1272
386
 
1273
- const hc = new Map();
1274
- const results = nodes.map((node) => {
1275
- const sym = normalizeSymbol(node, db, hc);
1276
-
1277
- const flowsTo = flowsToOut.all(node.id).map((r) => ({
1278
- target: r.target_name,
1279
- kind: r.target_kind,
1280
- file: r.target_file,
1281
- line: r.line,
1282
- paramIndex: r.param_index,
1283
- expression: r.expression,
1284
- confidence: r.confidence,
1285
- }));
1286
-
1287
- const flowsFrom = flowsToIn.all(node.id).map((r) => ({
1288
- source: r.source_name,
1289
- kind: r.source_kind,
1290
- file: r.source_file,
1291
- line: r.line,
1292
- paramIndex: r.param_index,
1293
- expression: r.expression,
1294
- confidence: r.confidence,
1295
- }));
1296
-
1297
- const returnConsumers = returnsOut.all(node.id).map((r) => ({
1298
- consumer: r.target_name,
1299
- kind: r.target_kind,
1300
- file: r.target_file,
1301
- line: r.line,
1302
- expression: r.expression,
1303
- }));
1304
-
1305
- const returnedBy = returnsIn.all(node.id).map((r) => ({
1306
- producer: r.source_name,
1307
- kind: r.source_kind,
1308
- file: r.source_file,
1309
- line: r.line,
1310
- expression: r.expression,
1311
- }));
1312
-
1313
- const mutatesTargets = mutatesOut.all(node.id).map((r) => ({
1314
- target: r.target_name,
1315
- expression: r.expression,
1316
- line: r.line,
1317
- }));
1318
-
1319
- const mutatedBy = mutatesIn.all(node.id).map((r) => ({
1320
- source: r.source_name,
1321
- expression: r.expression,
1322
- line: r.line,
1323
- }));
1324
-
1325
- if (noTests) {
1326
- const filter = (arr) => arr.filter((r) => !isTestFile(r.file));
1327
387
  return {
1328
388
  ...sym,
1329
- flowsTo: filter(flowsTo),
1330
- flowsFrom: filter(flowsFrom),
1331
- returns: returnConsumers.filter((r) => !isTestFile(r.file)),
1332
- returnedBy: returnedBy.filter((r) => !isTestFile(r.file)),
389
+ flowsTo,
390
+ flowsFrom,
391
+ returns: returnConsumers,
392
+ returnedBy,
1333
393
  mutates: mutatesTargets,
1334
394
  mutatedBy,
1335
395
  };
1336
- }
1337
-
1338
- return {
1339
- ...sym,
1340
- flowsTo,
1341
- flowsFrom,
1342
- returns: returnConsumers,
1343
- returnedBy,
1344
- mutates: mutatesTargets,
1345
- mutatedBy,
1346
- };
1347
- });
396
+ });
1348
397
 
1349
- db.close();
1350
- const base = { name, results };
1351
- return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset });
398
+ const base = { name, results };
399
+ return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset });
400
+ } finally {
401
+ db.close();
402
+ }
1352
403
  }
1353
404
 
1354
405
  /**
@@ -1362,125 +413,123 @@ export function dataflowData(name, customDbPath, opts = {}) {
1362
413
  */
1363
414
  export function dataflowPathData(from, to, customDbPath, opts = {}) {
1364
415
  const db = openReadonlyOrFail(customDbPath);
1365
- const noTests = opts.noTests || false;
1366
- const maxDepth = opts.maxDepth || 10;
416
+ try {
417
+ const noTests = opts.noTests || false;
418
+ const maxDepth = opts.maxDepth || 10;
1367
419
 
1368
- if (!hasDataflowTable(db)) {
1369
- db.close();
1370
- return {
1371
- from,
1372
- to,
1373
- found: false,
1374
- warning:
1375
- 'No dataflow data found. Rebuild with `codegraph build` (dataflow is now included by default).',
1376
- };
1377
- }
420
+ if (!hasDataflowTable(db)) {
421
+ return {
422
+ from,
423
+ to,
424
+ found: false,
425
+ warning:
426
+ 'No dataflow data found. Rebuild with `codegraph build` (dataflow is now included by default).',
427
+ };
428
+ }
1378
429
 
1379
- const fromNodes = findNodes(db, from, { noTests, file: opts.fromFile, kind: opts.kind });
1380
- if (fromNodes.length === 0) {
1381
- db.close();
1382
- return { from, to, found: false, error: `No symbol matching "${from}"` };
1383
- }
430
+ const fromNodes = findNodes(db, from, { noTests, file: opts.fromFile, kind: opts.kind });
431
+ if (fromNodes.length === 0) {
432
+ return { from, to, found: false, error: `No symbol matching "${from}"` };
433
+ }
1384
434
 
1385
- const toNodes = findNodes(db, to, { noTests, file: opts.toFile, kind: opts.kind });
1386
- if (toNodes.length === 0) {
1387
- db.close();
1388
- return { from, to, found: false, error: `No symbol matching "${to}"` };
1389
- }
435
+ const toNodes = findNodes(db, to, { noTests, file: opts.toFile, kind: opts.kind });
436
+ if (toNodes.length === 0) {
437
+ return { from, to, found: false, error: `No symbol matching "${to}"` };
438
+ }
1390
439
 
1391
- const sourceNode = fromNodes[0];
1392
- const targetNode = toNodes[0];
440
+ const sourceNode = fromNodes[0];
441
+ const targetNode = toNodes[0];
1393
442
 
1394
- if (sourceNode.id === targetNode.id) {
1395
- const hc = new Map();
1396
- const sym = normalizeSymbol(sourceNode, db, hc);
1397
- db.close();
1398
- return {
1399
- from,
1400
- to,
1401
- found: true,
1402
- hops: 0,
1403
- path: [{ ...sym, edgeKind: null }],
1404
- };
1405
- }
443
+ if (sourceNode.id === targetNode.id) {
444
+ const hc = new Map();
445
+ const sym = normalizeSymbol(sourceNode, db, hc);
446
+ return {
447
+ from,
448
+ to,
449
+ found: true,
450
+ hops: 0,
451
+ path: [{ ...sym, edgeKind: null }],
452
+ };
453
+ }
1406
454
 
1407
- // BFS through flows_to and returns edges
1408
- const neighborStmt = db.prepare(
1409
- `SELECT n.id, n.name, n.kind, n.file, n.line, d.kind AS edge_kind, d.expression
455
+ // BFS through flows_to and returns edges
456
+ const neighborStmt = db.prepare(
457
+ `SELECT n.id, n.name, n.kind, n.file, n.line, d.kind AS edge_kind, d.expression
1410
458
  FROM dataflow d JOIN nodes n ON d.target_id = n.id
1411
459
  WHERE d.source_id = ? AND d.kind IN ('flows_to', 'returns')`,
1412
- );
460
+ );
1413
461
 
1414
- const visited = new Set([sourceNode.id]);
1415
- const parent = new Map();
1416
- let queue = [sourceNode.id];
1417
- let found = false;
1418
-
1419
- for (let depth = 1; depth <= maxDepth; depth++) {
1420
- const nextQueue = [];
1421
- for (const currentId of queue) {
1422
- const neighbors = neighborStmt.all(currentId);
1423
- for (const n of neighbors) {
1424
- if (noTests && isTestFile(n.file)) continue;
1425
- if (n.id === targetNode.id) {
1426
- if (!found) {
1427
- found = true;
462
+ const visited = new Set([sourceNode.id]);
463
+ const parent = new Map();
464
+ let queue = [sourceNode.id];
465
+ let found = false;
466
+
467
+ for (let depth = 1; depth <= maxDepth; depth++) {
468
+ const nextQueue = [];
469
+ for (const currentId of queue) {
470
+ const neighbors = neighborStmt.all(currentId);
471
+ for (const n of neighbors) {
472
+ if (noTests && isTestFile(n.file)) continue;
473
+ if (n.id === targetNode.id) {
474
+ if (!found) {
475
+ found = true;
476
+ parent.set(n.id, {
477
+ parentId: currentId,
478
+ edgeKind: n.edge_kind,
479
+ expression: n.expression,
480
+ });
481
+ }
482
+ continue;
483
+ }
484
+ if (!visited.has(n.id)) {
485
+ visited.add(n.id);
1428
486
  parent.set(n.id, {
1429
487
  parentId: currentId,
1430
488
  edgeKind: n.edge_kind,
1431
489
  expression: n.expression,
1432
490
  });
491
+ nextQueue.push(n.id);
1433
492
  }
1434
- continue;
1435
- }
1436
- if (!visited.has(n.id)) {
1437
- visited.add(n.id);
1438
- parent.set(n.id, {
1439
- parentId: currentId,
1440
- edgeKind: n.edge_kind,
1441
- expression: n.expression,
1442
- });
1443
- nextQueue.push(n.id);
1444
493
  }
1445
494
  }
495
+ if (found) break;
496
+ queue = nextQueue;
497
+ if (queue.length === 0) break;
1446
498
  }
1447
- if (found) break;
1448
- queue = nextQueue;
1449
- if (queue.length === 0) break;
1450
- }
1451
499
 
1452
- if (!found) {
1453
- db.close();
1454
- return { from, to, found: false };
1455
- }
500
+ if (!found) {
501
+ return { from, to, found: false };
502
+ }
1456
503
 
1457
- // Reconstruct path
1458
- const nodeById = db.prepare('SELECT * FROM nodes WHERE id = ?');
1459
- const hc = new Map();
1460
- const pathItems = [];
1461
- let cur = targetNode.id;
1462
- while (cur !== undefined) {
1463
- const nodeRow = nodeById.get(cur);
1464
- const parentInfo = parent.get(cur);
1465
- pathItems.unshift({
1466
- ...normalizeSymbol(nodeRow, db, hc),
1467
- edgeKind: parentInfo?.edgeKind ?? null,
1468
- expression: parentInfo?.expression ?? null,
1469
- });
1470
- cur = parentInfo?.parentId;
1471
- if (cur === sourceNode.id) {
1472
- const srcRow = nodeById.get(cur);
504
+ // Reconstruct path
505
+ const nodeById = db.prepare('SELECT * FROM nodes WHERE id = ?');
506
+ const hc = new Map();
507
+ const pathItems = [];
508
+ let cur = targetNode.id;
509
+ while (cur !== undefined) {
510
+ const nodeRow = nodeById.get(cur);
511
+ const parentInfo = parent.get(cur);
1473
512
  pathItems.unshift({
1474
- ...normalizeSymbol(srcRow, db, hc),
1475
- edgeKind: null,
1476
- expression: null,
513
+ ...normalizeSymbol(nodeRow, db, hc),
514
+ edgeKind: parentInfo?.edgeKind ?? null,
515
+ expression: parentInfo?.expression ?? null,
1477
516
  });
1478
- break;
517
+ cur = parentInfo?.parentId;
518
+ if (cur === sourceNode.id) {
519
+ const srcRow = nodeById.get(cur);
520
+ pathItems.unshift({
521
+ ...normalizeSymbol(srcRow, db, hc),
522
+ edgeKind: null,
523
+ expression: null,
524
+ });
525
+ break;
526
+ }
1479
527
  }
1480
- }
1481
528
 
1482
- db.close();
1483
- return { from, to, found: true, hops: pathItems.length - 1, path: pathItems };
529
+ return { from, to, found: true, hops: pathItems.length - 1, path: pathItems };
530
+ } finally {
531
+ db.close();
532
+ }
1484
533
  }
1485
534
 
1486
535
  /**
@@ -1493,192 +542,65 @@ export function dataflowPathData(from, to, customDbPath, opts = {}) {
1493
542
  */
1494
543
  export function dataflowImpactData(name, customDbPath, opts = {}) {
1495
544
  const db = openReadonlyOrFail(customDbPath);
1496
- const maxDepth = opts.depth || 5;
1497
- const noTests = opts.noTests || false;
545
+ try {
546
+ const maxDepth = opts.depth || 5;
547
+ const noTests = opts.noTests || false;
1498
548
 
1499
- if (!hasDataflowTable(db)) {
1500
- db.close();
1501
- return {
1502
- name,
1503
- results: [],
1504
- warning:
1505
- 'No dataflow data found. Rebuild with `codegraph build` (dataflow is now included by default).',
1506
- };
1507
- }
549
+ if (!hasDataflowTable(db)) {
550
+ return {
551
+ name,
552
+ results: [],
553
+ warning:
554
+ 'No dataflow data found. Rebuild with `codegraph build` (dataflow is now included by default).',
555
+ };
556
+ }
1508
557
 
1509
- const nodes = findNodes(db, name, { noTests, file: opts.file, kind: opts.kind });
1510
- if (nodes.length === 0) {
1511
- db.close();
1512
- return { name, results: [] };
1513
- }
558
+ const nodes = findNodes(db, name, { noTests, file: opts.file, kind: opts.kind });
559
+ if (nodes.length === 0) {
560
+ return { name, results: [] };
561
+ }
1514
562
 
1515
- // Forward BFS: who consumes this function's return value (directly or transitively)?
1516
- const consumersStmt = db.prepare(
1517
- `SELECT DISTINCT n.*
563
+ // Forward BFS: who consumes this function's return value (directly or transitively)?
564
+ const consumersStmt = db.prepare(
565
+ `SELECT DISTINCT n.*
1518
566
  FROM dataflow d JOIN nodes n ON d.target_id = n.id
1519
567
  WHERE d.source_id = ? AND d.kind = 'returns'`,
1520
- );
568
+ );
1521
569
 
1522
- const hc = new Map();
1523
- const results = nodes.map((node) => {
1524
- const sym = normalizeSymbol(node, db, hc);
1525
- const visited = new Set([node.id]);
1526
- const levels = {};
1527
- let frontier = [node.id];
1528
-
1529
- for (let d = 1; d <= maxDepth; d++) {
1530
- const nextFrontier = [];
1531
- for (const fid of frontier) {
1532
- const consumers = consumersStmt.all(fid);
1533
- for (const c of consumers) {
1534
- if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) {
1535
- visited.add(c.id);
1536
- nextFrontier.push(c.id);
1537
- if (!levels[d]) levels[d] = [];
1538
- levels[d].push(normalizeSymbol(c, db, hc));
570
+ const hc = new Map();
571
+ const results = nodes.map((node) => {
572
+ const sym = normalizeSymbol(node, db, hc);
573
+ const visited = new Set([node.id]);
574
+ const levels = {};
575
+ let frontier = [node.id];
576
+
577
+ for (let d = 1; d <= maxDepth; d++) {
578
+ const nextFrontier = [];
579
+ for (const fid of frontier) {
580
+ const consumers = consumersStmt.all(fid);
581
+ for (const c of consumers) {
582
+ if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) {
583
+ visited.add(c.id);
584
+ nextFrontier.push(c.id);
585
+ if (!levels[d]) levels[d] = [];
586
+ levels[d].push(normalizeSymbol(c, db, hc));
587
+ }
1539
588
  }
1540
589
  }
590
+ frontier = nextFrontier;
591
+ if (frontier.length === 0) break;
1541
592
  }
1542
- frontier = nextFrontier;
1543
- if (frontier.length === 0) break;
1544
- }
1545
-
1546
- return {
1547
- ...sym,
1548
- levels,
1549
- totalAffected: visited.size - 1,
1550
- };
1551
- });
1552
-
1553
- db.close();
1554
- const base = { name, results };
1555
- return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset });
1556
- }
1557
-
1558
- // ── Display formatters ──────────────────────────────────────────────────────
1559
-
1560
- /**
1561
- * CLI display for dataflow command.
1562
- */
1563
- export function dataflow(name, customDbPath, opts = {}) {
1564
- if (opts.impact) {
1565
- return dataflowImpact(name, customDbPath, opts);
1566
- }
1567
-
1568
- const data = dataflowData(name, customDbPath, opts);
1569
-
1570
- if (opts.json) {
1571
- console.log(JSON.stringify(data, null, 2));
1572
- return;
1573
- }
1574
- if (opts.ndjson) {
1575
- for (const r of data.results) {
1576
- console.log(JSON.stringify(r));
1577
- }
1578
- return;
1579
- }
1580
-
1581
- if (data.warning) {
1582
- console.log(`⚠ ${data.warning}`);
1583
- return;
1584
- }
1585
- if (data.results.length === 0) {
1586
- console.log(`No symbols matching "${name}".`);
1587
- return;
1588
- }
1589
-
1590
- for (const r of data.results) {
1591
- console.log(`\n${r.kind} ${r.name} (${r.file}:${r.line})`);
1592
- console.log('─'.repeat(60));
1593
593
 
1594
- if (r.flowsTo.length > 0) {
1595
- console.log('\n Data flows TO:');
1596
- for (const f of r.flowsTo) {
1597
- const conf = f.confidence < 1.0 ? ` [${(f.confidence * 100).toFixed(0)}%]` : '';
1598
- console.log(` → ${f.target} (${f.file}:${f.line}) arg[${f.paramIndex}]${conf}`);
1599
- }
1600
- }
1601
-
1602
- if (r.flowsFrom.length > 0) {
1603
- console.log('\n Data flows FROM:');
1604
- for (const f of r.flowsFrom) {
1605
- const conf = f.confidence < 1.0 ? ` [${(f.confidence * 100).toFixed(0)}%]` : '';
1606
- console.log(` ← ${f.source} (${f.file}:${f.line}) arg[${f.paramIndex}]${conf}`);
1607
- }
1608
- }
1609
-
1610
- if (r.returns.length > 0) {
1611
- console.log('\n Return value consumed by:');
1612
- for (const c of r.returns) {
1613
- console.log(` → ${c.consumer} (${c.file}:${c.line}) ${c.expression}`);
1614
- }
1615
- }
1616
-
1617
- if (r.returnedBy.length > 0) {
1618
- console.log('\n Uses return value of:');
1619
- for (const p of r.returnedBy) {
1620
- console.log(` ← ${p.producer} (${p.file}:${p.line}) ${p.expression}`);
1621
- }
1622
- }
1623
-
1624
- if (r.mutates.length > 0) {
1625
- console.log('\n Mutates:');
1626
- for (const m of r.mutates) {
1627
- console.log(` ✎ ${m.expression} (line ${m.line})`);
1628
- }
1629
- }
1630
-
1631
- if (r.mutatedBy.length > 0) {
1632
- console.log('\n Mutated by:');
1633
- for (const m of r.mutatedBy) {
1634
- console.log(` ✎ ${m.source} — ${m.expression} (line ${m.line})`);
1635
- }
1636
- }
1637
- }
1638
- }
1639
-
1640
- /**
1641
- * CLI display for dataflow --impact.
1642
- */
1643
- function dataflowImpact(name, customDbPath, opts = {}) {
1644
- const data = dataflowImpactData(name, customDbPath, {
1645
- noTests: opts.noTests,
1646
- depth: opts.depth ? Number(opts.depth) : 5,
1647
- file: opts.file,
1648
- kind: opts.kind,
1649
- limit: opts.limit,
1650
- offset: opts.offset,
1651
- });
1652
-
1653
- if (opts.json) {
1654
- console.log(JSON.stringify(data, null, 2));
1655
- return;
1656
- }
1657
- if (opts.ndjson) {
1658
- for (const r of data.results) {
1659
- console.log(JSON.stringify(r));
1660
- }
1661
- return;
1662
- }
1663
-
1664
- if (data.warning) {
1665
- console.log(`⚠ ${data.warning}`);
1666
- return;
1667
- }
1668
- if (data.results.length === 0) {
1669
- console.log(`No symbols matching "${name}".`);
1670
- return;
1671
- }
594
+ return {
595
+ ...sym,
596
+ levels,
597
+ totalAffected: visited.size - 1,
598
+ };
599
+ });
1672
600
 
1673
- for (const r of data.results) {
1674
- console.log(
1675
- `\n${r.kind} ${r.name} (${r.file}:${r.line}) — ${r.totalAffected} data-dependent consumer${r.totalAffected !== 1 ? 's' : ''}`,
1676
- );
1677
- for (const [level, items] of Object.entries(r.levels)) {
1678
- console.log(` Level ${level}:`);
1679
- for (const item of items) {
1680
- console.log(` ${item.name} (${item.file}:${item.line})`);
1681
- }
1682
- }
601
+ const base = { name, results };
602
+ return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset });
603
+ } finally {
604
+ db.close();
1683
605
  }
1684
606
  }