@optave/codegraph 2.6.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1190 @@
1
+ /**
2
+ * Dataflow analysis — define/use chains and data movement edges.
3
+ *
4
+ * Adds three edge types to track how data moves through functions:
5
+ * - flows_to: parameter/variable flows into another function as an argument
6
+ * - returns: a call's return value is captured and used in the caller
7
+ * - mutates: a parameter-derived value is mutated (e.g. arr.push())
8
+ *
9
+ * Opt-in via `build --dataflow`. JS/TS only for MVP.
10
+ */
11
+
12
+ import fs from 'node:fs';
13
+ import path from 'node:path';
14
+ import { openReadonlyOrFail } from './db.js';
15
+ import { info } from './logger.js';
16
+ import { paginateResult } from './paginate.js';
17
+ import { ALL_SYMBOL_KINDS, isTestFile, normalizeSymbol } from './queries.js';
18
+
19
+ // Methods that mutate their receiver in-place
20
+ const MUTATING_METHODS = new Set([
21
+ 'push',
22
+ 'pop',
23
+ 'shift',
24
+ 'unshift',
25
+ 'splice',
26
+ 'sort',
27
+ 'reverse',
28
+ 'fill',
29
+ 'set',
30
+ 'delete',
31
+ 'add',
32
+ 'clear',
33
+ ]);
34
+
35
+ // JS/TS language IDs that support dataflow extraction
36
+ const DATAFLOW_LANG_IDS = new Set(['javascript', 'typescript', 'tsx']);
37
+
38
+ // ── AST helpers ──────────────────────────────────────────────────────────────
39
+
40
+ function truncate(str, max = 120) {
41
+ if (!str) return '';
42
+ return str.length > max ? `${str.slice(0, max)}…` : str;
43
+ }
44
+
45
+ /**
46
+ * Get the name of a function node from the AST.
47
+ */
48
+ function functionName(fnNode) {
49
+ if (!fnNode) return null;
50
+ const t = fnNode.type;
51
+ if (t === 'function_declaration') {
52
+ const nameNode = fnNode.childForFieldName('name');
53
+ return nameNode ? nameNode.text : null;
54
+ }
55
+ if (t === 'method_definition') {
56
+ const nameNode = fnNode.childForFieldName('name');
57
+ return nameNode ? nameNode.text : null;
58
+ }
59
+ // arrow_function or function_expression assigned to a variable
60
+ if (t === 'arrow_function' || t === 'function_expression') {
61
+ const parent = fnNode.parent;
62
+ if (parent?.type === 'variable_declarator') {
63
+ const nameNode = parent.childForFieldName('name');
64
+ return nameNode ? nameNode.text : null;
65
+ }
66
+ if (parent?.type === 'pair') {
67
+ const keyNode = parent.childForFieldName('key');
68
+ return keyNode ? keyNode.text : null;
69
+ }
70
+ if (parent?.type === 'assignment_expression') {
71
+ const left = parent.childForFieldName('left');
72
+ return left ? left.text : null;
73
+ }
74
+ }
75
+ return null;
76
+ }
77
+
78
+ /**
79
+ * Extract parameter names and indices from a formal_parameters node.
80
+ * Handles: simple identifiers, destructured objects/arrays, defaults, rest, TS typed params.
81
+ */
82
+ function extractParams(paramsNode) {
83
+ if (!paramsNode) return [];
84
+ const result = [];
85
+ let index = 0;
86
+ for (const child of paramsNode.namedChildren) {
87
+ const names = extractParamNames(child);
88
+ for (const name of names) {
89
+ result.push({ name, index });
90
+ }
91
+ index++;
92
+ }
93
+ return result;
94
+ }
95
+
96
+ function extractParamNames(node) {
97
+ if (!node) return [];
98
+ const t = node.type;
99
+ if (t === 'identifier') return [node.text];
100
+ // TS: required_parameter, optional_parameter
101
+ if (t === 'required_parameter' || t === 'optional_parameter') {
102
+ const pattern = node.childForFieldName('pattern');
103
+ return pattern ? extractParamNames(pattern) : [];
104
+ }
105
+ if (t === 'assignment_pattern') {
106
+ const left = node.childForFieldName('left');
107
+ return left ? extractParamNames(left) : [];
108
+ }
109
+ if (t === 'rest_pattern') {
110
+ // rest_pattern → ...identifier
111
+ for (const child of node.namedChildren) {
112
+ if (child.type === 'identifier') return [child.text];
113
+ }
114
+ return [];
115
+ }
116
+ if (t === 'object_pattern') {
117
+ const names = [];
118
+ for (const child of node.namedChildren) {
119
+ if (child.type === 'shorthand_property_identifier_pattern') {
120
+ names.push(child.text);
121
+ } else if (child.type === 'pair_pattern') {
122
+ const value = child.childForFieldName('value');
123
+ if (value) names.push(...extractParamNames(value));
124
+ } else if (child.type === 'rest_pattern') {
125
+ names.push(...extractParamNames(child));
126
+ }
127
+ }
128
+ return names;
129
+ }
130
+ if (t === 'array_pattern') {
131
+ const names = [];
132
+ for (const child of node.namedChildren) {
133
+ names.push(...extractParamNames(child));
134
+ }
135
+ return names;
136
+ }
137
+ return [];
138
+ }
139
+
140
+ /**
141
+ * Resolve the name a call expression is calling.
142
+ * Handles: `foo()`, `obj.method()`, `obj.nested.method()`.
143
+ */
144
+ function resolveCalleeName(callNode) {
145
+ const fn = callNode.childForFieldName('function');
146
+ if (!fn) return null;
147
+ if (fn.type === 'identifier') return fn.text;
148
+ if (fn.type === 'member_expression' || fn.type === 'optional_chain_expression') {
149
+ // Handle optional chaining: foo?.bar() or foo?.()
150
+ const target = fn.type === 'optional_chain_expression' ? fn.namedChildren[0] : fn;
151
+ if (!target) return null;
152
+ if (target.type === 'member_expression') {
153
+ const prop = target.childForFieldName('property');
154
+ return prop ? prop.text : null;
155
+ }
156
+ if (target.type === 'identifier') return target.text;
157
+ const prop = fn.childForFieldName('property');
158
+ return prop ? prop.text : null;
159
+ }
160
+ return null;
161
+ }
162
+
163
+ /**
164
+ * Get the receiver (object) of a member expression.
165
+ */
166
+ function memberReceiver(memberExpr) {
167
+ const obj = memberExpr.childForFieldName('object');
168
+ if (!obj) return null;
169
+ if (obj.type === 'identifier') return obj.text;
170
+ if (obj.type === 'member_expression') return memberReceiver(obj);
171
+ return null;
172
+ }
173
+
174
+ // ── extractDataflow ──────────────────────────────────────────────────────────
175
+
176
+ /**
177
+ * Extract dataflow information from a parsed AST.
178
+ *
179
+ * @param {object} tree - tree-sitter parse tree
180
+ * @param {string} filePath - relative file path
181
+ * @param {object[]} definitions - symbol definitions from the parser
182
+ * @returns {{ parameters, returns, assignments, argFlows, mutations }}
183
+ */
184
+ export function extractDataflow(tree, _filePath, _definitions) {
185
+ const parameters = [];
186
+ const returns = [];
187
+ const assignments = [];
188
+ const argFlows = [];
189
+ const mutations = [];
190
+
191
+ // Build a scope stack as we traverse
192
+ // Each scope: { funcName, funcNode, params: Map<name, index>, locals: Map<name, source> }
193
+ const scopeStack = [];
194
+
195
+ function currentScope() {
196
+ return scopeStack.length > 0 ? scopeStack[scopeStack.length - 1] : null;
197
+ }
198
+
199
+ function findBinding(name) {
200
+ // Search from innermost scope outward
201
+ for (let i = scopeStack.length - 1; i >= 0; i--) {
202
+ const scope = scopeStack[i];
203
+ if (scope.params.has(name))
204
+ return { type: 'param', index: scope.params.get(name), funcName: scope.funcName };
205
+ if (scope.locals.has(name))
206
+ return { type: 'local', source: scope.locals.get(name), funcName: scope.funcName };
207
+ }
208
+ return null;
209
+ }
210
+
211
+ function enterScope(fnNode) {
212
+ const name = functionName(fnNode);
213
+ const paramsNode = fnNode.childForFieldName('parameters');
214
+ const paramList = extractParams(paramsNode);
215
+ const paramMap = new Map();
216
+ for (const p of paramList) {
217
+ paramMap.set(p.name, p.index);
218
+ if (name) {
219
+ parameters.push({
220
+ funcName: name,
221
+ paramName: p.name,
222
+ paramIndex: p.index,
223
+ line: (paramsNode?.startPosition?.row ?? fnNode.startPosition.row) + 1,
224
+ });
225
+ }
226
+ }
227
+ scopeStack.push({ funcName: name, funcNode: fnNode, params: paramMap, locals: new Map() });
228
+ }
229
+
230
+ function exitScope() {
231
+ scopeStack.pop();
232
+ }
233
+
234
+ /**
235
+ * Determine confidence for a variable binding flowing as an argument.
236
+ */
237
+ function bindingConfidence(binding) {
238
+ if (!binding) return 0.5;
239
+ if (binding.type === 'param') return 1.0;
240
+ if (binding.type === 'local') {
241
+ // Local from a call return → 0.9, from destructuring → 0.8
242
+ if (binding.source?.type === 'call_return') return 0.9;
243
+ if (binding.source?.type === 'destructured') return 0.8;
244
+ return 0.9;
245
+ }
246
+ return 0.5;
247
+ }
248
+
249
+ // Recursive AST walk
250
+ function visit(node) {
251
+ if (!node) return;
252
+ const t = node.type;
253
+
254
+ // Enter function scopes
255
+ if (
256
+ t === 'function_declaration' ||
257
+ t === 'method_definition' ||
258
+ t === 'arrow_function' ||
259
+ t === 'function_expression' ||
260
+ t === 'function'
261
+ ) {
262
+ enterScope(node);
263
+ // Visit body
264
+ for (const child of node.namedChildren) {
265
+ visit(child);
266
+ }
267
+ exitScope();
268
+ return;
269
+ }
270
+
271
+ // Return statements
272
+ if (t === 'return_statement') {
273
+ const scope = currentScope();
274
+ if (scope?.funcName) {
275
+ const expr = node.namedChildren[0];
276
+ const referencedNames = [];
277
+ if (expr) collectIdentifiers(expr, referencedNames);
278
+ returns.push({
279
+ funcName: scope.funcName,
280
+ expression: truncate(expr ? expr.text : ''),
281
+ referencedNames,
282
+ line: node.startPosition.row + 1,
283
+ });
284
+ }
285
+ // Still visit children for nested expressions
286
+ for (const child of node.namedChildren) {
287
+ visit(child);
288
+ }
289
+ return;
290
+ }
291
+
292
+ // Variable declarations: track assignments from calls
293
+ if (t === 'variable_declarator') {
294
+ const nameNode = node.childForFieldName('name');
295
+ const valueNode = node.childForFieldName('value');
296
+ const scope = currentScope();
297
+
298
+ if (nameNode && valueNode && scope) {
299
+ // Resolve the call expression from the value (handles await wrapping)
300
+ let callExpr = null;
301
+ if (valueNode.type === 'call_expression') {
302
+ callExpr = valueNode;
303
+ } else if (valueNode.type === 'await_expression') {
304
+ const awaitChild = valueNode.namedChildren[0];
305
+ if (awaitChild?.type === 'call_expression') callExpr = awaitChild;
306
+ }
307
+
308
+ if (callExpr) {
309
+ const callee = resolveCalleeName(callExpr);
310
+ if (callee && scope.funcName) {
311
+ // Destructuring: const { a, b } = foo()
312
+ if (nameNode.type === 'object_pattern' || nameNode.type === 'array_pattern') {
313
+ const names = extractParamNames(nameNode);
314
+ for (const n of names) {
315
+ assignments.push({
316
+ varName: n,
317
+ callerFunc: scope.funcName,
318
+ sourceCallName: callee,
319
+ expression: truncate(node.text),
320
+ line: node.startPosition.row + 1,
321
+ });
322
+ scope.locals.set(n, { type: 'destructured', callee });
323
+ }
324
+ } else {
325
+ // Simple: const x = foo()
326
+ assignments.push({
327
+ varName: nameNode.text,
328
+ callerFunc: scope.funcName,
329
+ sourceCallName: callee,
330
+ expression: truncate(node.text),
331
+ line: node.startPosition.row + 1,
332
+ });
333
+ scope.locals.set(nameNode.text, { type: 'call_return', callee });
334
+ }
335
+ }
336
+ }
337
+ }
338
+ // Visit children
339
+ for (const child of node.namedChildren) {
340
+ visit(child);
341
+ }
342
+ return;
343
+ }
344
+
345
+ // Call expressions: track argument flows
346
+ if (t === 'call_expression') {
347
+ const callee = resolveCalleeName(node);
348
+ const argsNode = node.childForFieldName('arguments');
349
+ const scope = currentScope();
350
+
351
+ if (callee && argsNode && scope?.funcName) {
352
+ let argIndex = 0;
353
+ for (const arg of argsNode.namedChildren) {
354
+ // Handle spread arguments: foo(...args)
355
+ const unwrapped = arg.type === 'spread_element' ? arg.namedChildren[0] : arg;
356
+ if (!unwrapped) {
357
+ argIndex++;
358
+ continue;
359
+ }
360
+ const argName = unwrapped.type === 'identifier' ? unwrapped.text : null;
361
+ const argMember =
362
+ unwrapped.type === 'member_expression' ? memberReceiver(unwrapped) : null;
363
+ const trackedName = argName || argMember;
364
+
365
+ if (trackedName) {
366
+ const binding = findBinding(trackedName);
367
+ if (binding) {
368
+ argFlows.push({
369
+ callerFunc: scope.funcName,
370
+ calleeName: callee,
371
+ argIndex,
372
+ argName: trackedName,
373
+ binding,
374
+ confidence: bindingConfidence(binding),
375
+ expression: truncate(arg.text),
376
+ line: node.startPosition.row + 1,
377
+ });
378
+ }
379
+ }
380
+ argIndex++;
381
+ }
382
+ }
383
+ // Visit children (but not arguments again — we handled them)
384
+ for (const child of node.namedChildren) {
385
+ visit(child);
386
+ }
387
+ return;
388
+ }
389
+
390
+ // Assignment expressions: mutation detection + non-declaration call captures
391
+ if (t === 'assignment_expression') {
392
+ const left = node.childForFieldName('left');
393
+ const right = node.childForFieldName('right');
394
+ const scope = currentScope();
395
+
396
+ if (scope?.funcName) {
397
+ // Mutation: obj.prop = value
398
+ if (left?.type === 'member_expression') {
399
+ const receiver = memberReceiver(left);
400
+ if (receiver) {
401
+ const binding = findBinding(receiver);
402
+ if (binding) {
403
+ mutations.push({
404
+ funcName: scope.funcName,
405
+ receiverName: receiver,
406
+ binding,
407
+ mutatingExpr: truncate(node.text),
408
+ line: node.startPosition.row + 1,
409
+ });
410
+ }
411
+ }
412
+ }
413
+
414
+ // Non-declaration assignment: x = foo() (without const/let/var)
415
+ if (left?.type === 'identifier' && right) {
416
+ let callExpr = null;
417
+ if (right.type === 'call_expression') {
418
+ callExpr = right;
419
+ } else if (right.type === 'await_expression') {
420
+ const awaitChild = right.namedChildren[0];
421
+ if (awaitChild?.type === 'call_expression') callExpr = awaitChild;
422
+ }
423
+ if (callExpr) {
424
+ const callee = resolveCalleeName(callExpr);
425
+ if (callee) {
426
+ assignments.push({
427
+ varName: left.text,
428
+ callerFunc: scope.funcName,
429
+ sourceCallName: callee,
430
+ expression: truncate(node.text),
431
+ line: node.startPosition.row + 1,
432
+ });
433
+ scope.locals.set(left.text, { type: 'call_return', callee });
434
+ }
435
+ }
436
+ }
437
+ }
438
+
439
+ // Visit children
440
+ for (const child of node.namedChildren) {
441
+ visit(child);
442
+ }
443
+ return;
444
+ }
445
+
446
+ // Mutation detection: mutating method calls (push, pop, splice, etc.)
447
+ if (t === 'expression_statement') {
448
+ const expr = node.namedChildren[0];
449
+ if (expr?.type === 'call_expression') {
450
+ const fn = expr.childForFieldName('function');
451
+ if (fn?.type === 'member_expression') {
452
+ const prop = fn.childForFieldName('property');
453
+ if (prop && MUTATING_METHODS.has(prop.text)) {
454
+ const receiver = memberReceiver(fn);
455
+ const scope = currentScope();
456
+ if (receiver && scope?.funcName) {
457
+ const binding = findBinding(receiver);
458
+ if (binding) {
459
+ mutations.push({
460
+ funcName: scope.funcName,
461
+ receiverName: receiver,
462
+ binding,
463
+ mutatingExpr: truncate(expr.text),
464
+ line: node.startPosition.row + 1,
465
+ });
466
+ }
467
+ }
468
+ }
469
+ }
470
+ }
471
+ }
472
+
473
+ // Default: visit all children
474
+ for (const child of node.namedChildren) {
475
+ visit(child);
476
+ }
477
+ }
478
+
479
+ visit(tree.rootNode);
480
+
481
+ return { parameters, returns, assignments, argFlows, mutations };
482
+ }
483
+
484
+ /**
485
+ * Collect all identifier names referenced within a node.
486
+ */
487
+ function collectIdentifiers(node, out) {
488
+ if (node.type === 'identifier') {
489
+ out.push(node.text);
490
+ return;
491
+ }
492
+ for (const child of node.namedChildren) {
493
+ collectIdentifiers(child, out);
494
+ }
495
+ }
496
+
497
+ // ── buildDataflowEdges ──────────────────────────────────────────────────────
498
+
499
+ /**
500
+ * Build dataflow edges and insert them into the database.
501
+ * Called during graph build when --dataflow is enabled.
502
+ *
503
+ * @param {object} db - better-sqlite3 database instance
504
+ * @param {Map<string, object>} fileSymbols - map of relPath → symbols
505
+ * @param {string} rootDir - absolute root directory
506
+ * @param {object} engineOpts - engine options
507
+ */
508
+ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) {
509
+ // Lazily init WASM parsers if needed
510
+ let parsers = null;
511
+ let extToLang = null;
512
+ let needsFallback = false;
513
+
514
+ for (const [relPath, symbols] of fileSymbols) {
515
+ if (!symbols._tree) {
516
+ const ext = path.extname(relPath).toLowerCase();
517
+ if (
518
+ ext === '.js' ||
519
+ ext === '.ts' ||
520
+ ext === '.tsx' ||
521
+ ext === '.jsx' ||
522
+ ext === '.mjs' ||
523
+ ext === '.cjs'
524
+ ) {
525
+ needsFallback = true;
526
+ break;
527
+ }
528
+ }
529
+ }
530
+
531
+ if (needsFallback) {
532
+ const { createParsers, LANGUAGE_REGISTRY } = await import('./parser.js');
533
+ parsers = await createParsers();
534
+ extToLang = new Map();
535
+ for (const entry of LANGUAGE_REGISTRY) {
536
+ for (const ext of entry.extensions) {
537
+ extToLang.set(ext, entry.id);
538
+ }
539
+ }
540
+ }
541
+
542
+ let getParserFn = null;
543
+ if (parsers) {
544
+ const mod = await import('./parser.js');
545
+ getParserFn = mod.getParser;
546
+ }
547
+
548
+ const insert = db.prepare(
549
+ `INSERT INTO dataflow (source_id, target_id, kind, param_index, expression, line, confidence)
550
+ VALUES (?, ?, ?, ?, ?, ?, ?)`,
551
+ );
552
+
553
+ // MVP scope: only resolve function/method nodes for dataflow edges.
554
+ // Future expansion: add 'parameter', 'property', 'constant' kinds to track
555
+ // data flow through property accessors or constant references.
556
+ const getNodeByNameAndFile = db.prepare(
557
+ `SELECT id, name, kind, file, line FROM nodes
558
+ WHERE name = ? AND file = ? AND kind IN ('function', 'method')`,
559
+ );
560
+
561
+ const getNodeByName = db.prepare(
562
+ `SELECT id, name, kind, file, line FROM nodes
563
+ WHERE name = ? AND kind IN ('function', 'method')
564
+ ORDER BY file, line LIMIT 10`,
565
+ );
566
+
567
+ let totalEdges = 0;
568
+
569
+ const tx = db.transaction(() => {
570
+ for (const [relPath, symbols] of fileSymbols) {
571
+ const ext = path.extname(relPath).toLowerCase();
572
+ // Only JS/TS for MVP
573
+ if (
574
+ ext !== '.js' &&
575
+ ext !== '.ts' &&
576
+ ext !== '.tsx' &&
577
+ ext !== '.jsx' &&
578
+ ext !== '.mjs' &&
579
+ ext !== '.cjs'
580
+ ) {
581
+ continue;
582
+ }
583
+
584
+ let tree = symbols._tree;
585
+
586
+ // WASM fallback if no cached tree
587
+ if (!tree) {
588
+ if (!extToLang || !getParserFn) continue;
589
+ const langId = extToLang.get(ext);
590
+ if (!langId || !DATAFLOW_LANG_IDS.has(langId)) continue;
591
+
592
+ const absPath = path.join(rootDir, relPath);
593
+ let code;
594
+ try {
595
+ code = fs.readFileSync(absPath, 'utf-8');
596
+ } catch {
597
+ continue;
598
+ }
599
+
600
+ const parser = getParserFn(parsers, absPath);
601
+ if (!parser) continue;
602
+
603
+ try {
604
+ tree = parser.parse(code);
605
+ } catch {
606
+ continue;
607
+ }
608
+ }
609
+
610
+ const data = extractDataflow(tree, relPath, symbols.definitions);
611
+
612
+ // Resolve function names to node IDs in this file first, then globally
613
+ function resolveNode(funcName) {
614
+ const local = getNodeByNameAndFile.all(funcName, relPath);
615
+ if (local.length > 0) return local[0];
616
+ const global = getNodeByName.all(funcName);
617
+ return global.length > 0 ? global[0] : null;
618
+ }
619
+
620
+ // flows_to: parameter/variable passed as argument to another function
621
+ for (const flow of data.argFlows) {
622
+ const sourceNode = resolveNode(flow.callerFunc);
623
+ const targetNode = resolveNode(flow.calleeName);
624
+ if (sourceNode && targetNode) {
625
+ insert.run(
626
+ sourceNode.id,
627
+ targetNode.id,
628
+ 'flows_to',
629
+ flow.argIndex,
630
+ flow.expression,
631
+ flow.line,
632
+ flow.confidence,
633
+ );
634
+ totalEdges++;
635
+ }
636
+ }
637
+
638
+ // returns: call return value captured in caller
639
+ for (const assignment of data.assignments) {
640
+ const producerNode = resolveNode(assignment.sourceCallName);
641
+ const consumerNode = resolveNode(assignment.callerFunc);
642
+ if (producerNode && consumerNode) {
643
+ insert.run(
644
+ producerNode.id,
645
+ consumerNode.id,
646
+ 'returns',
647
+ null,
648
+ assignment.expression,
649
+ assignment.line,
650
+ 1.0,
651
+ );
652
+ totalEdges++;
653
+ }
654
+ }
655
+
656
+ // mutates: parameter-derived value is mutated
657
+ for (const mut of data.mutations) {
658
+ const mutatorNode = resolveNode(mut.funcName);
659
+ if (mutatorNode && mut.binding?.type === 'param') {
660
+ // The mutation in this function affects the parameter source
661
+ insert.run(
662
+ mutatorNode.id,
663
+ mutatorNode.id,
664
+ 'mutates',
665
+ null,
666
+ mut.mutatingExpr,
667
+ mut.line,
668
+ 1.0,
669
+ );
670
+ totalEdges++;
671
+ }
672
+ }
673
+ }
674
+ });
675
+
676
+ tx();
677
+ info(`Dataflow: ${totalEdges} edges inserted`);
678
+ }
679
+
680
+ // ── Query functions ─────────────────────────────────────────────────────────
681
+
682
+ /**
683
+ * Look up node(s) by name with optional file/kind/noTests filtering.
684
+ * Similar to findMatchingNodes in queries.js but operates on the dataflow table.
685
+ */
686
+ function findNodes(db, name, opts = {}) {
687
+ const kinds = opts.kind ? [opts.kind] : ALL_SYMBOL_KINDS;
688
+ const placeholders = kinds.map(() => '?').join(', ');
689
+ const params = [`%${name}%`, ...kinds];
690
+
691
+ let fileCondition = '';
692
+ if (opts.file) {
693
+ fileCondition = ' AND file LIKE ?';
694
+ params.push(`%${opts.file}%`);
695
+ }
696
+
697
+ const rows = db
698
+ .prepare(
699
+ `SELECT * FROM nodes
700
+ WHERE name LIKE ? AND kind IN (${placeholders})${fileCondition}
701
+ ORDER BY file, line`,
702
+ )
703
+ .all(...params);
704
+
705
+ return opts.noTests ? rows.filter((n) => !isTestFile(n.file)) : rows;
706
+ }
707
+
708
+ /**
709
+ * Check if the dataflow table exists and has data.
710
+ */
711
+ function hasDataflowTable(db) {
712
+ try {
713
+ const row = db.prepare('SELECT COUNT(*) as c FROM dataflow').get();
714
+ return row.c > 0;
715
+ } catch {
716
+ return false;
717
+ }
718
+ }
719
+
720
+ /**
721
+ * Return all dataflow edges for a symbol.
722
+ *
723
+ * @param {string} name - symbol name (partial match)
724
+ * @param {string} [customDbPath] - path to graph.db
725
+ * @param {object} [opts] - { noTests, file, kind, limit, offset }
726
+ * @returns {{ name, results: object[] }}
727
+ */
728
+ export function dataflowData(name, customDbPath, opts = {}) {
729
+ const db = openReadonlyOrFail(customDbPath);
730
+ const noTests = opts.noTests || false;
731
+
732
+ if (!hasDataflowTable(db)) {
733
+ db.close();
734
+ return {
735
+ name,
736
+ results: [],
737
+ warning:
738
+ 'No dataflow data found. Rebuild with `codegraph build` (dataflow is now included by default).',
739
+ };
740
+ }
741
+
742
+ const nodes = findNodes(db, name, { noTests, file: opts.file, kind: opts.kind });
743
+ if (nodes.length === 0) {
744
+ db.close();
745
+ return { name, results: [] };
746
+ }
747
+
748
+ const flowsToOut = db.prepare(
749
+ `SELECT d.*, n.name AS target_name, n.kind AS target_kind, n.file AS target_file, n.line AS target_line
750
+ FROM dataflow d JOIN nodes n ON d.target_id = n.id
751
+ WHERE d.source_id = ? AND d.kind = 'flows_to'`,
752
+ );
753
+ const flowsToIn = db.prepare(
754
+ `SELECT d.*, n.name AS source_name, n.kind AS source_kind, n.file AS source_file, n.line AS source_line
755
+ FROM dataflow d JOIN nodes n ON d.source_id = n.id
756
+ WHERE d.target_id = ? AND d.kind = 'flows_to'`,
757
+ );
758
+ const returnsOut = db.prepare(
759
+ `SELECT d.*, n.name AS target_name, n.kind AS target_kind, n.file AS target_file, n.line AS target_line
760
+ FROM dataflow d JOIN nodes n ON d.target_id = n.id
761
+ WHERE d.source_id = ? AND d.kind = 'returns'`,
762
+ );
763
+ const returnsIn = db.prepare(
764
+ `SELECT d.*, n.name AS source_name, n.kind AS source_kind, n.file AS source_file, n.line AS source_line
765
+ FROM dataflow d JOIN nodes n ON d.source_id = n.id
766
+ WHERE d.target_id = ? AND d.kind = 'returns'`,
767
+ );
768
+ const mutatesOut = db.prepare(
769
+ `SELECT d.*, n.name AS target_name, n.kind AS target_kind, n.file AS target_file, n.line AS target_line
770
+ FROM dataflow d JOIN nodes n ON d.target_id = n.id
771
+ WHERE d.source_id = ? AND d.kind = 'mutates'`,
772
+ );
773
+ const mutatesIn = db.prepare(
774
+ `SELECT d.*, n.name AS source_name, n.kind AS source_kind, n.file AS source_file, n.line AS source_line
775
+ FROM dataflow d JOIN nodes n ON d.source_id = n.id
776
+ WHERE d.target_id = ? AND d.kind = 'mutates'`,
777
+ );
778
+
779
+ const hc = new Map();
780
+ const results = nodes.map((node) => {
781
+ const sym = normalizeSymbol(node, db, hc);
782
+
783
+ const flowsTo = flowsToOut.all(node.id).map((r) => ({
784
+ target: r.target_name,
785
+ kind: r.target_kind,
786
+ file: r.target_file,
787
+ line: r.line,
788
+ paramIndex: r.param_index,
789
+ expression: r.expression,
790
+ confidence: r.confidence,
791
+ }));
792
+
793
+ const flowsFrom = flowsToIn.all(node.id).map((r) => ({
794
+ source: r.source_name,
795
+ kind: r.source_kind,
796
+ file: r.source_file,
797
+ line: r.line,
798
+ paramIndex: r.param_index,
799
+ expression: r.expression,
800
+ confidence: r.confidence,
801
+ }));
802
+
803
+ const returnConsumers = returnsOut.all(node.id).map((r) => ({
804
+ consumer: r.target_name,
805
+ kind: r.target_kind,
806
+ file: r.target_file,
807
+ line: r.line,
808
+ expression: r.expression,
809
+ }));
810
+
811
+ const returnedBy = returnsIn.all(node.id).map((r) => ({
812
+ producer: r.source_name,
813
+ kind: r.source_kind,
814
+ file: r.source_file,
815
+ line: r.line,
816
+ expression: r.expression,
817
+ }));
818
+
819
+ const mutatesTargets = mutatesOut.all(node.id).map((r) => ({
820
+ target: r.target_name,
821
+ expression: r.expression,
822
+ line: r.line,
823
+ }));
824
+
825
+ const mutatedBy = mutatesIn.all(node.id).map((r) => ({
826
+ source: r.source_name,
827
+ expression: r.expression,
828
+ line: r.line,
829
+ }));
830
+
831
+ if (noTests) {
832
+ const filter = (arr) => arr.filter((r) => !isTestFile(r.file));
833
+ return {
834
+ ...sym,
835
+ flowsTo: filter(flowsTo),
836
+ flowsFrom: filter(flowsFrom),
837
+ returns: returnConsumers.filter((r) => !isTestFile(r.file)),
838
+ returnedBy: returnedBy.filter((r) => !isTestFile(r.file)),
839
+ mutates: mutatesTargets,
840
+ mutatedBy,
841
+ };
842
+ }
843
+
844
+ return {
845
+ ...sym,
846
+ flowsTo,
847
+ flowsFrom,
848
+ returns: returnConsumers,
849
+ returnedBy,
850
+ mutates: mutatesTargets,
851
+ mutatedBy,
852
+ };
853
+ });
854
+
855
+ db.close();
856
+ const base = { name, results };
857
+ return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset });
858
+ }
859
+
860
+ /**
861
+ * BFS through flows_to + returns edges to find how data gets from A to B.
862
+ *
863
+ * @param {string} from - source symbol name
864
+ * @param {string} to - target symbol name
865
+ * @param {string} [customDbPath]
866
+ * @param {object} [opts] - { noTests, maxDepth, limit, offset }
867
+ * @returns {{ from, to, found, hops?, path? }}
868
+ */
869
+ export function dataflowPathData(from, to, customDbPath, opts = {}) {
870
+ const db = openReadonlyOrFail(customDbPath);
871
+ const noTests = opts.noTests || false;
872
+ const maxDepth = opts.maxDepth || 10;
873
+
874
+ if (!hasDataflowTable(db)) {
875
+ db.close();
876
+ return {
877
+ from,
878
+ to,
879
+ found: false,
880
+ warning:
881
+ 'No dataflow data found. Rebuild with `codegraph build` (dataflow is now included by default).',
882
+ };
883
+ }
884
+
885
+ const fromNodes = findNodes(db, from, { noTests, file: opts.fromFile, kind: opts.kind });
886
+ if (fromNodes.length === 0) {
887
+ db.close();
888
+ return { from, to, found: false, error: `No symbol matching "${from}"` };
889
+ }
890
+
891
+ const toNodes = findNodes(db, to, { noTests, file: opts.toFile, kind: opts.kind });
892
+ if (toNodes.length === 0) {
893
+ db.close();
894
+ return { from, to, found: false, error: `No symbol matching "${to}"` };
895
+ }
896
+
897
+ const sourceNode = fromNodes[0];
898
+ const targetNode = toNodes[0];
899
+
900
+ if (sourceNode.id === targetNode.id) {
901
+ const hc = new Map();
902
+ const sym = normalizeSymbol(sourceNode, db, hc);
903
+ db.close();
904
+ return {
905
+ from,
906
+ to,
907
+ found: true,
908
+ hops: 0,
909
+ path: [{ ...sym, edgeKind: null }],
910
+ };
911
+ }
912
+
913
+ // BFS through flows_to and returns edges
914
+ const neighborStmt = db.prepare(
915
+ `SELECT n.id, n.name, n.kind, n.file, n.line, d.kind AS edge_kind, d.expression
916
+ FROM dataflow d JOIN nodes n ON d.target_id = n.id
917
+ WHERE d.source_id = ? AND d.kind IN ('flows_to', 'returns')`,
918
+ );
919
+
920
+ const visited = new Set([sourceNode.id]);
921
+ const parent = new Map();
922
+ let queue = [sourceNode.id];
923
+ let found = false;
924
+
925
+ for (let depth = 1; depth <= maxDepth; depth++) {
926
+ const nextQueue = [];
927
+ for (const currentId of queue) {
928
+ const neighbors = neighborStmt.all(currentId);
929
+ for (const n of neighbors) {
930
+ if (noTests && isTestFile(n.file)) continue;
931
+ if (n.id === targetNode.id) {
932
+ if (!found) {
933
+ found = true;
934
+ parent.set(n.id, {
935
+ parentId: currentId,
936
+ edgeKind: n.edge_kind,
937
+ expression: n.expression,
938
+ });
939
+ }
940
+ continue;
941
+ }
942
+ if (!visited.has(n.id)) {
943
+ visited.add(n.id);
944
+ parent.set(n.id, {
945
+ parentId: currentId,
946
+ edgeKind: n.edge_kind,
947
+ expression: n.expression,
948
+ });
949
+ nextQueue.push(n.id);
950
+ }
951
+ }
952
+ }
953
+ if (found) break;
954
+ queue = nextQueue;
955
+ if (queue.length === 0) break;
956
+ }
957
+
958
+ if (!found) {
959
+ db.close();
960
+ return { from, to, found: false };
961
+ }
962
+
963
+ // Reconstruct path
964
+ const nodeById = db.prepare('SELECT * FROM nodes WHERE id = ?');
965
+ const hc = new Map();
966
+ const pathItems = [];
967
+ let cur = targetNode.id;
968
+ while (cur !== undefined) {
969
+ const nodeRow = nodeById.get(cur);
970
+ const parentInfo = parent.get(cur);
971
+ pathItems.unshift({
972
+ ...normalizeSymbol(nodeRow, db, hc),
973
+ edgeKind: parentInfo?.edgeKind ?? null,
974
+ expression: parentInfo?.expression ?? null,
975
+ });
976
+ cur = parentInfo?.parentId;
977
+ if (cur === sourceNode.id) {
978
+ const srcRow = nodeById.get(cur);
979
+ pathItems.unshift({
980
+ ...normalizeSymbol(srcRow, db, hc),
981
+ edgeKind: null,
982
+ expression: null,
983
+ });
984
+ break;
985
+ }
986
+ }
987
+
988
+ db.close();
989
+ return { from, to, found: true, hops: pathItems.length - 1, path: pathItems };
990
+ }
991
+
992
+ /**
993
+ * Forward BFS through returns edges: "if I change this function's return value, what breaks?"
994
+ *
995
+ * @param {string} name - symbol name
996
+ * @param {string} [customDbPath]
997
+ * @param {object} [opts] - { noTests, depth, file, kind, limit, offset }
998
+ * @returns {{ name, results: object[] }}
999
+ */
1000
+ export function dataflowImpactData(name, customDbPath, opts = {}) {
1001
+ const db = openReadonlyOrFail(customDbPath);
1002
+ const maxDepth = opts.depth || 5;
1003
+ const noTests = opts.noTests || false;
1004
+
1005
+ if (!hasDataflowTable(db)) {
1006
+ db.close();
1007
+ return {
1008
+ name,
1009
+ results: [],
1010
+ warning:
1011
+ 'No dataflow data found. Rebuild with `codegraph build` (dataflow is now included by default).',
1012
+ };
1013
+ }
1014
+
1015
+ const nodes = findNodes(db, name, { noTests, file: opts.file, kind: opts.kind });
1016
+ if (nodes.length === 0) {
1017
+ db.close();
1018
+ return { name, results: [] };
1019
+ }
1020
+
1021
+ // Forward BFS: who consumes this function's return value (directly or transitively)?
1022
+ const consumersStmt = db.prepare(
1023
+ `SELECT DISTINCT n.*
1024
+ FROM dataflow d JOIN nodes n ON d.target_id = n.id
1025
+ WHERE d.source_id = ? AND d.kind = 'returns'`,
1026
+ );
1027
+
1028
+ const hc = new Map();
1029
+ const results = nodes.map((node) => {
1030
+ const sym = normalizeSymbol(node, db, hc);
1031
+ const visited = new Set([node.id]);
1032
+ const levels = {};
1033
+ let frontier = [node.id];
1034
+
1035
+ for (let d = 1; d <= maxDepth; d++) {
1036
+ const nextFrontier = [];
1037
+ for (const fid of frontier) {
1038
+ const consumers = consumersStmt.all(fid);
1039
+ for (const c of consumers) {
1040
+ if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) {
1041
+ visited.add(c.id);
1042
+ nextFrontier.push(c.id);
1043
+ if (!levels[d]) levels[d] = [];
1044
+ levels[d].push(normalizeSymbol(c, db, hc));
1045
+ }
1046
+ }
1047
+ }
1048
+ frontier = nextFrontier;
1049
+ if (frontier.length === 0) break;
1050
+ }
1051
+
1052
+ return {
1053
+ ...sym,
1054
+ levels,
1055
+ totalAffected: visited.size - 1,
1056
+ };
1057
+ });
1058
+
1059
+ db.close();
1060
+ const base = { name, results };
1061
+ return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset });
1062
+ }
1063
+
1064
+ // ── Display formatters ──────────────────────────────────────────────────────
1065
+
1066
+ /**
1067
+ * CLI display for dataflow command.
1068
+ */
1069
+ export function dataflow(name, customDbPath, opts = {}) {
1070
+ if (opts.impact) {
1071
+ return dataflowImpact(name, customDbPath, opts);
1072
+ }
1073
+
1074
+ const data = dataflowData(name, customDbPath, opts);
1075
+
1076
+ if (opts.json) {
1077
+ console.log(JSON.stringify(data, null, 2));
1078
+ return;
1079
+ }
1080
+ if (opts.ndjson) {
1081
+ for (const r of data.results) {
1082
+ console.log(JSON.stringify(r));
1083
+ }
1084
+ return;
1085
+ }
1086
+
1087
+ if (data.warning) {
1088
+ console.log(`⚠ ${data.warning}`);
1089
+ return;
1090
+ }
1091
+ if (data.results.length === 0) {
1092
+ console.log(`No symbols matching "${name}".`);
1093
+ return;
1094
+ }
1095
+
1096
+ for (const r of data.results) {
1097
+ console.log(`\n${r.kind} ${r.name} (${r.file}:${r.line})`);
1098
+ console.log('─'.repeat(60));
1099
+
1100
+ if (r.flowsTo.length > 0) {
1101
+ console.log('\n Data flows TO:');
1102
+ for (const f of r.flowsTo) {
1103
+ const conf = f.confidence < 1.0 ? ` [${(f.confidence * 100).toFixed(0)}%]` : '';
1104
+ console.log(` → ${f.target} (${f.file}:${f.line}) arg[${f.paramIndex}]${conf}`);
1105
+ }
1106
+ }
1107
+
1108
+ if (r.flowsFrom.length > 0) {
1109
+ console.log('\n Data flows FROM:');
1110
+ for (const f of r.flowsFrom) {
1111
+ const conf = f.confidence < 1.0 ? ` [${(f.confidence * 100).toFixed(0)}%]` : '';
1112
+ console.log(` ← ${f.source} (${f.file}:${f.line}) arg[${f.paramIndex}]${conf}`);
1113
+ }
1114
+ }
1115
+
1116
+ if (r.returns.length > 0) {
1117
+ console.log('\n Return value consumed by:');
1118
+ for (const c of r.returns) {
1119
+ console.log(` → ${c.consumer} (${c.file}:${c.line}) ${c.expression}`);
1120
+ }
1121
+ }
1122
+
1123
+ if (r.returnedBy.length > 0) {
1124
+ console.log('\n Uses return value of:');
1125
+ for (const p of r.returnedBy) {
1126
+ console.log(` ← ${p.producer} (${p.file}:${p.line}) ${p.expression}`);
1127
+ }
1128
+ }
1129
+
1130
+ if (r.mutates.length > 0) {
1131
+ console.log('\n Mutates:');
1132
+ for (const m of r.mutates) {
1133
+ console.log(` ✎ ${m.expression} (line ${m.line})`);
1134
+ }
1135
+ }
1136
+
1137
+ if (r.mutatedBy.length > 0) {
1138
+ console.log('\n Mutated by:');
1139
+ for (const m of r.mutatedBy) {
1140
+ console.log(` ✎ ${m.source} — ${m.expression} (line ${m.line})`);
1141
+ }
1142
+ }
1143
+ }
1144
+ }
1145
+
1146
+ /**
1147
+ * CLI display for dataflow --impact.
1148
+ */
1149
+ function dataflowImpact(name, customDbPath, opts = {}) {
1150
+ const data = dataflowImpactData(name, customDbPath, {
1151
+ noTests: opts.noTests,
1152
+ depth: opts.depth ? Number(opts.depth) : 5,
1153
+ file: opts.file,
1154
+ kind: opts.kind,
1155
+ limit: opts.limit,
1156
+ offset: opts.offset,
1157
+ });
1158
+
1159
+ if (opts.json) {
1160
+ console.log(JSON.stringify(data, null, 2));
1161
+ return;
1162
+ }
1163
+ if (opts.ndjson) {
1164
+ for (const r of data.results) {
1165
+ console.log(JSON.stringify(r));
1166
+ }
1167
+ return;
1168
+ }
1169
+
1170
+ if (data.warning) {
1171
+ console.log(`⚠ ${data.warning}`);
1172
+ return;
1173
+ }
1174
+ if (data.results.length === 0) {
1175
+ console.log(`No symbols matching "${name}".`);
1176
+ return;
1177
+ }
1178
+
1179
+ for (const r of data.results) {
1180
+ console.log(
1181
+ `\n${r.kind} ${r.name} (${r.file}:${r.line}) — ${r.totalAffected} data-dependent consumer${r.totalAffected !== 1 ? 's' : ''}`,
1182
+ );
1183
+ for (const [level, items] of Object.entries(r.levels)) {
1184
+ console.log(` Level ${level}:`);
1185
+ for (const item of items) {
1186
+ console.log(` ${item.name} (${item.file}:${item.line})`);
1187
+ }
1188
+ }
1189
+ }
1190
+ }