@optave/codegraph 3.11.2 → 3.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/README.md +8 -8
  2. package/dist/db/migrations.d.ts.map +1 -1
  3. package/dist/db/migrations.js +7 -0
  4. package/dist/db/migrations.js.map +1 -1
  5. package/dist/domain/analysis/module-map.d.ts +2 -0
  6. package/dist/domain/analysis/module-map.d.ts.map +1 -1
  7. package/dist/domain/analysis/module-map.js +24 -2
  8. package/dist/domain/analysis/module-map.js.map +1 -1
  9. package/dist/domain/graph/builder/call-resolver.d.ts +4 -2
  10. package/dist/domain/graph/builder/call-resolver.d.ts.map +1 -1
  11. package/dist/domain/graph/builder/call-resolver.js +170 -8
  12. package/dist/domain/graph/builder/call-resolver.js.map +1 -1
  13. package/dist/domain/graph/builder/cha.d.ts +61 -0
  14. package/dist/domain/graph/builder/cha.d.ts.map +1 -0
  15. package/dist/domain/graph/builder/cha.js +143 -0
  16. package/dist/domain/graph/builder/cha.js.map +1 -0
  17. package/dist/domain/graph/builder/context.d.ts +3 -0
  18. package/dist/domain/graph/builder/context.d.ts.map +1 -1
  19. package/dist/domain/graph/builder/context.js +2 -0
  20. package/dist/domain/graph/builder/context.js.map +1 -1
  21. package/dist/domain/graph/builder/helpers.d.ts +17 -1
  22. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  23. package/dist/domain/graph/builder/helpers.js +159 -5
  24. package/dist/domain/graph/builder/helpers.js.map +1 -1
  25. package/dist/domain/graph/builder/incremental.d.ts.map +1 -1
  26. package/dist/domain/graph/builder/incremental.js +73 -1
  27. package/dist/domain/graph/builder/incremental.js.map +1 -1
  28. package/dist/domain/graph/builder/stages/build-edges.d.ts +2 -0
  29. package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
  30. package/dist/domain/graph/builder/stages/build-edges.js +926 -26
  31. package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
  32. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  33. package/dist/domain/graph/builder/stages/detect-changes.js +2 -1
  34. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  35. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts.map +1 -1
  36. package/dist/domain/graph/builder/stages/native-orchestrator.js +501 -14
  37. package/dist/domain/graph/builder/stages/native-orchestrator.js.map +1 -1
  38. package/dist/domain/graph/builder/stages/resolve-imports.d.ts +1 -0
  39. package/dist/domain/graph/builder/stages/resolve-imports.d.ts.map +1 -1
  40. package/dist/domain/graph/builder/stages/resolve-imports.js +9 -0
  41. package/dist/domain/graph/builder/stages/resolve-imports.js.map +1 -1
  42. package/dist/domain/graph/journal.js +1 -1
  43. package/dist/domain/graph/journal.js.map +1 -1
  44. package/dist/domain/graph/resolver/points-to.d.ts +53 -0
  45. package/dist/domain/graph/resolver/points-to.d.ts.map +1 -0
  46. package/dist/domain/graph/resolver/points-to.js +213 -0
  47. package/dist/domain/graph/resolver/points-to.js.map +1 -0
  48. package/dist/domain/graph/resolver/ts-resolver.d.ts +9 -0
  49. package/dist/domain/graph/resolver/ts-resolver.d.ts.map +1 -0
  50. package/dist/domain/graph/resolver/ts-resolver.js +476 -0
  51. package/dist/domain/graph/resolver/ts-resolver.js.map +1 -0
  52. package/dist/domain/parser.d.ts +10 -1
  53. package/dist/domain/parser.d.ts.map +1 -1
  54. package/dist/domain/parser.js +39 -7
  55. package/dist/domain/parser.js.map +1 -1
  56. package/dist/domain/wasm-worker-entry.js +25 -0
  57. package/dist/domain/wasm-worker-entry.js.map +1 -1
  58. package/dist/domain/wasm-worker-pool.d.ts.map +1 -1
  59. package/dist/domain/wasm-worker-pool.js +32 -0
  60. package/dist/domain/wasm-worker-pool.js.map +1 -1
  61. package/dist/domain/wasm-worker-protocol.d.ts +14 -1
  62. package/dist/domain/wasm-worker-protocol.d.ts.map +1 -1
  63. package/dist/extractors/c.js +3 -3
  64. package/dist/extractors/c.js.map +1 -1
  65. package/dist/extractors/clojure.js +1 -1
  66. package/dist/extractors/clojure.js.map +1 -1
  67. package/dist/extractors/cpp.js +3 -3
  68. package/dist/extractors/cpp.js.map +1 -1
  69. package/dist/extractors/csharp.d.ts.map +1 -1
  70. package/dist/extractors/csharp.js +37 -8
  71. package/dist/extractors/csharp.js.map +1 -1
  72. package/dist/extractors/cuda.js +3 -3
  73. package/dist/extractors/cuda.js.map +1 -1
  74. package/dist/extractors/elixir.js +6 -6
  75. package/dist/extractors/elixir.js.map +1 -1
  76. package/dist/extractors/fsharp.js +1 -1
  77. package/dist/extractors/fsharp.js.map +1 -1
  78. package/dist/extractors/go.js +5 -5
  79. package/dist/extractors/go.js.map +1 -1
  80. package/dist/extractors/haskell.js +1 -1
  81. package/dist/extractors/haskell.js.map +1 -1
  82. package/dist/extractors/java.js +2 -2
  83. package/dist/extractors/java.js.map +1 -1
  84. package/dist/extractors/javascript.d.ts +2 -0
  85. package/dist/extractors/javascript.d.ts.map +1 -1
  86. package/dist/extractors/javascript.js +1674 -64
  87. package/dist/extractors/javascript.js.map +1 -1
  88. package/dist/extractors/kotlin.js +5 -5
  89. package/dist/extractors/kotlin.js.map +1 -1
  90. package/dist/extractors/lua.js +1 -1
  91. package/dist/extractors/lua.js.map +1 -1
  92. package/dist/extractors/objc.js +3 -3
  93. package/dist/extractors/objc.js.map +1 -1
  94. package/dist/extractors/ocaml.js +1 -1
  95. package/dist/extractors/ocaml.js.map +1 -1
  96. package/dist/extractors/php.js +2 -2
  97. package/dist/extractors/php.js.map +1 -1
  98. package/dist/extractors/python.js +7 -7
  99. package/dist/extractors/python.js.map +1 -1
  100. package/dist/extractors/ruby.js +2 -2
  101. package/dist/extractors/ruby.js.map +1 -1
  102. package/dist/extractors/scala.js +1 -1
  103. package/dist/extractors/scala.js.map +1 -1
  104. package/dist/extractors/solidity.js +1 -1
  105. package/dist/extractors/solidity.js.map +1 -1
  106. package/dist/extractors/swift.js +4 -4
  107. package/dist/extractors/swift.js.map +1 -1
  108. package/dist/extractors/zig.js +4 -4
  109. package/dist/extractors/zig.js.map +1 -1
  110. package/dist/infrastructure/config.d.ts +10 -0
  111. package/dist/infrastructure/config.d.ts.map +1 -1
  112. package/dist/infrastructure/config.js +15 -0
  113. package/dist/infrastructure/config.js.map +1 -1
  114. package/dist/infrastructure/native.d.ts +11 -0
  115. package/dist/infrastructure/native.d.ts.map +1 -1
  116. package/dist/infrastructure/native.js +78 -5
  117. package/dist/infrastructure/native.js.map +1 -1
  118. package/dist/presentation/queries-cli/overview.d.ts.map +1 -1
  119. package/dist/presentation/queries-cli/overview.js +5 -0
  120. package/dist/presentation/queries-cli/overview.js.map +1 -1
  121. package/dist/types.d.ts +184 -0
  122. package/dist/types.d.ts.map +1 -1
  123. package/package.json +7 -7
  124. package/src/db/migrations.ts +7 -0
  125. package/src/domain/analysis/module-map.ts +29 -1
  126. package/src/domain/graph/builder/call-resolver.ts +177 -7
  127. package/src/domain/graph/builder/cha.ts +175 -0
  128. package/src/domain/graph/builder/context.ts +3 -0
  129. package/src/domain/graph/builder/helpers.ts +175 -5
  130. package/src/domain/graph/builder/incremental.ts +79 -1
  131. package/src/domain/graph/builder/stages/build-edges.ts +1128 -24
  132. package/src/domain/graph/builder/stages/detect-changes.ts +3 -1
  133. package/src/domain/graph/builder/stages/native-orchestrator.ts +583 -20
  134. package/src/domain/graph/builder/stages/resolve-imports.ts +14 -0
  135. package/src/domain/graph/journal.ts +1 -1
  136. package/src/domain/graph/resolver/points-to.ts +254 -0
  137. package/src/domain/graph/resolver/ts-resolver.ts +536 -0
  138. package/src/domain/parser.ts +43 -5
  139. package/src/domain/wasm-worker-entry.ts +25 -0
  140. package/src/domain/wasm-worker-pool.ts +21 -0
  141. package/src/domain/wasm-worker-protocol.ts +14 -0
  142. package/src/extractors/c.ts +3 -3
  143. package/src/extractors/clojure.ts +1 -1
  144. package/src/extractors/cpp.ts +3 -3
  145. package/src/extractors/csharp.ts +33 -9
  146. package/src/extractors/cuda.ts +3 -3
  147. package/src/extractors/elixir.ts +6 -6
  148. package/src/extractors/fsharp.ts +1 -1
  149. package/src/extractors/go.ts +5 -5
  150. package/src/extractors/haskell.ts +1 -1
  151. package/src/extractors/java.ts +2 -2
  152. package/src/extractors/javascript.ts +1802 -66
  153. package/src/extractors/kotlin.ts +5 -5
  154. package/src/extractors/lua.ts +1 -1
  155. package/src/extractors/objc.ts +3 -3
  156. package/src/extractors/ocaml.ts +1 -1
  157. package/src/extractors/php.ts +2 -2
  158. package/src/extractors/python.ts +7 -7
  159. package/src/extractors/ruby.ts +2 -2
  160. package/src/extractors/scala.ts +1 -1
  161. package/src/extractors/solidity.ts +1 -1
  162. package/src/extractors/swift.ts +4 -4
  163. package/src/extractors/zig.ts +4 -4
  164. package/src/infrastructure/config.ts +15 -0
  165. package/src/infrastructure/native.ts +87 -5
  166. package/src/presentation/queries-cli/overview.ts +15 -1
  167. package/src/types.ts +194 -0
@@ -413,7 +413,7 @@ function purgeAndAddReverseDeps(
413
413
  const saveEdgesStmt = db.prepare(`
414
414
  SELECT e.source_id, n_tgt.name AS tgt_name, n_tgt.kind AS tgt_kind,
415
415
  n_tgt.file AS tgt_file, n_tgt.line AS tgt_line,
416
- e.kind AS edge_kind, e.confidence, e.dynamic,
416
+ e.kind AS edge_kind, e.confidence, e.dynamic, e.technique,
417
417
  n_src.file AS src_file
418
418
  FROM edges e
419
419
  JOIN nodes n_src ON e.source_id = n_src.id
@@ -430,6 +430,7 @@ function purgeAndAddReverseDeps(
430
430
  edge_kind: string;
431
431
  confidence: number;
432
432
  dynamic: number;
433
+ technique: string | null;
433
434
  src_file: string;
434
435
  }>) {
435
436
  // Skip edges whose source is also being purged — buildEdges will
@@ -444,6 +445,7 @@ function purgeAndAddReverseDeps(
444
445
  edgeKind: row.edge_kind,
445
446
  confidence: row.confidence,
446
447
  dynamic: row.dynamic,
448
+ technique: row.technique,
447
449
  });
448
450
  }
449
451
  }
@@ -41,8 +41,13 @@ import {
41
41
  NATIVE_SUPPORTED_EXTENSIONS,
42
42
  parseFilesWasmForBackfill,
43
43
  } from '../../../parser.js';
44
+ import { computeConfidence } from '../../resolve.js';
45
+ import type { CallNodeLookup } from '../call-resolver.js';
46
+ import type { ChaContext } from '../cha.js';
47
+ import { resolveThisDispatch } from '../cha.js';
44
48
  import type { PipelineContext } from '../context.js';
45
49
  import {
50
+ batchInsertEdges,
46
51
  batchInsertNodes,
47
52
  collectFiles as collectFilesUtil,
48
53
  fileHash,
@@ -50,6 +55,7 @@ import {
50
55
  readFileSafe,
51
56
  } from '../helpers.js';
52
57
  import { NativeDbProxy } from '../native-db-proxy.js';
58
+ import { CHA_DISPATCH_PENALTY } from './build-edges.js';
53
59
  import { closeNativeDb } from './native-db-lifecycle.js';
54
60
 
55
61
  // ── Native orchestrator types ──────────────────────────────────────────
@@ -382,11 +388,448 @@ async function runPostNativeAnalysis(
382
388
  return timing;
383
389
  }
384
390
 
391
+ /**
392
+ * Phase 8.5: CHA expansion post-pass for the native orchestrator path.
393
+ *
394
+ * The Rust build pipeline resolves typed receiver calls (e.g. `worker.doWork()`
395
+ * where `worker: IWorker`) to the interface method declaration only. This
396
+ * post-pass reads the class hierarchy (via `implements`/`extends` edges) and
397
+ * instantiated types (via `calls` edges to class nodes) from the DB and expands
398
+ * each call to an interface/abstract method to ALL RTA-filtered concrete
399
+ * implementations.
400
+ *
401
+ * Note: `this`/`super` dispatch is handled separately by `runPostNativeThisDispatch`,
402
+ * which WASM-re-parses JS/TS files to obtain raw call site receiver info.
403
+ *
404
+ * Returns the count of newly inserted CHA edges plus the set of files containing
405
+ * the new edges' endpoints, so the caller can scope role re-classification to the
406
+ * nodes whose fan-in/out actually changed. A zero count means no edges were added
407
+ * and role re-classification is unnecessary.
408
+ */
409
+ function runPostNativeCha(db: BetterSqlite3Database): {
410
+ newEdgeCount: number;
411
+ affectedFiles: Set<string>;
412
+ } {
413
+ const affectedFiles = new Set<string>();
414
+ const empty = { newEdgeCount: 0, affectedFiles };
415
+ // Fast guard: no hierarchy edges → no CHA work
416
+ const hasHierarchy = db
417
+ .prepare(`SELECT 1 FROM edges WHERE kind IN ('extends', 'implements') LIMIT 1`)
418
+ .get();
419
+ if (!hasHierarchy) return empty;
420
+
421
+ // Build implementors map: parent/interface name → [child/implementing class names]
422
+ const hierarchyRows = db
423
+ .prepare(`
424
+ SELECT src.name AS child_name, tgt.name AS parent_name
425
+ FROM edges e
426
+ JOIN nodes src ON e.source_id = src.id
427
+ JOIN nodes tgt ON e.target_id = tgt.id
428
+ WHERE e.kind IN ('extends', 'implements')
429
+ `)
430
+ .all() as Array<{ child_name: string; parent_name: string }>;
431
+
432
+ const implementors = new Map<string, string[]>();
433
+ for (const row of hierarchyRows) {
434
+ let list = implementors.get(row.parent_name);
435
+ if (!list) {
436
+ list = [];
437
+ implementors.set(row.parent_name, list);
438
+ }
439
+ if (!list.includes(row.child_name)) list.push(row.child_name);
440
+ }
441
+ if (implementors.size === 0) return empty;
442
+
443
+ // RTA: collect class names that are actually instantiated via `new X()`.
444
+ // Primary query targets `class`-kind nodes (the canonical schema).
445
+ // Fallback also matches `constructor`/`function`-kind nodes because some native
446
+ // engine versions record constructor calls against those kinds instead of `class`.
447
+ let rtaRows = db
448
+ .prepare(`
449
+ SELECT DISTINCT tgt.name
450
+ FROM edges e
451
+ JOIN nodes tgt ON e.target_id = tgt.id
452
+ WHERE e.kind = 'calls' AND tgt.kind = 'class'
453
+ `)
454
+ .all() as Array<{ name: string }>;
455
+ if (rtaRows.length === 0) {
456
+ // Fallback: try constructor/function-kind nodes for older native engine schemas
457
+ rtaRows = db
458
+ .prepare(`
459
+ SELECT DISTINCT tgt.name
460
+ FROM edges e
461
+ JOIN nodes tgt ON e.target_id = tgt.id
462
+ WHERE e.kind = 'calls' AND tgt.kind IN ('constructor', 'function')
463
+ AND INSTR(tgt.name, '.') = 0
464
+ `)
465
+ .all() as Array<{ name: string }>;
466
+ }
467
+ const instantiated = new Set(rtaRows.map((r) => r.name));
468
+ // noRtaEvidence: true when no constructor-call evidence exists in the DB (e.g. graph
469
+ // built by an older native engine that doesn't emit constructor call edges at all).
470
+ // In that case we skip RTA filtering so interface dispatch still produces edges —
471
+ // all instantiated implementors are admitted rather than silently dropping everything.
472
+ const noRtaEvidence = instantiated.size === 0;
473
+ if (noRtaEvidence) {
474
+ debug('runPostNativeCha: no constructor-call evidence found — proceeding without RTA filter');
475
+ }
476
+
477
+ // Find existing call edges targeting qualified methods (e.g., 'IWorker.doWork').
478
+ // Include the caller node's file so confidence can be computed file-pair-aware,
479
+ // matching the WASM path's computeConfidence(callerFile, targetFile, null) - CHA_DISPATCH_PENALTY formula.
480
+ const callToMethods = db
481
+ .prepare(`
482
+ SELECT e.source_id, tgt.name AS method_name, src.file AS caller_file
483
+ FROM edges e
484
+ JOIN nodes tgt ON e.target_id = tgt.id
485
+ JOIN nodes src ON e.source_id = src.id
486
+ WHERE e.kind = 'calls' AND tgt.kind = 'method'
487
+ AND INSTR(tgt.name, '.') > 0
488
+ `)
489
+ .all() as Array<{ source_id: number; method_name: string; caller_file: string | null }>;
490
+
491
+ // Seed seen-pairs only from the source_ids we'll be expanding — avoids loading every
492
+ // call edge in the DB (which would be O(all edges)) for large codebases.
493
+ const seen = new Set<string>();
494
+ if (callToMethods.length > 0) {
495
+ const sourceIds = [...new Set(callToMethods.map((r) => r.source_id))];
496
+ const CHUNK_SIZE = 500;
497
+ for (let i = 0; i < sourceIds.length; i += CHUNK_SIZE) {
498
+ const chunk = sourceIds.slice(i, i + CHUNK_SIZE);
499
+ const placeholders = chunk.map(() => '?').join(',');
500
+ const existingPairs = db
501
+ .prepare(
502
+ `SELECT source_id, target_id FROM edges WHERE kind = 'calls' AND source_id IN (${placeholders})`,
503
+ )
504
+ .all(...chunk) as Array<{ source_id: number; target_id: number }>;
505
+ for (const e of existingPairs) seen.add(`${e.source_id}|${e.target_id}`);
506
+ }
507
+ }
508
+
509
+ // No LIMIT: multiple files can define the same qualified name in a monorepo.
510
+ const findMethodStmt = db.prepare(
511
+ `SELECT id, file AS method_file FROM nodes WHERE name = ? AND kind = 'method'`,
512
+ );
513
+ const newEdges: Array<[number, number, string, number, number, string]> = [];
514
+ let newEdgeCount = 0;
515
+
516
+ for (const { source_id, method_name, caller_file } of callToMethods) {
517
+ const dotIdx = method_name.indexOf('.');
518
+ if (dotIdx === -1) continue;
519
+ const typeName = method_name.slice(0, dotIdx);
520
+ const methodSuffix = method_name.slice(dotIdx + 1);
521
+
522
+ // BFS over the implementors map — handles multi-level hierarchies where
523
+ // abstract/non-instantiated classes sit between the call-site type and
524
+ // the concrete leaf implementations (issue #1311).
525
+ const bfsQueue: string[] = [typeName];
526
+ const bfsVisited = new Set<string>([typeName]);
527
+ while (bfsQueue.length > 0) {
528
+ const current = bfsQueue.shift()!;
529
+ const children = implementors.get(current);
530
+ if (!children?.length) continue;
531
+
532
+ for (const cls of children) {
533
+ if (bfsVisited.has(cls)) continue;
534
+ bfsVisited.add(cls);
535
+
536
+ if (noRtaEvidence || instantiated.has(cls)) {
537
+ const qualifiedName = `${cls}.${methodSuffix}`;
538
+ const methodNodes = findMethodStmt.all(qualifiedName) as Array<{
539
+ id: number;
540
+ method_file: string | null;
541
+ }>;
542
+ for (const methodNode of methodNodes) {
543
+ const key = `${source_id}|${methodNode.id}`;
544
+ if (seen.has(key)) continue;
545
+ seen.add(key);
546
+ // Compute confidence file-pair-aware (mirrors WASM path: computeConfidence - CHA_DISPATCH_PENALTY)
547
+ // Skip zero-confidence edges to match buildFileCallEdges / buildChaPostPass behaviour.
548
+ const conf =
549
+ computeConfidence(caller_file ?? '', methodNode.method_file ?? '', null) -
550
+ CHA_DISPATCH_PENALTY;
551
+ if (conf <= 0) continue;
552
+ newEdges.push([source_id, methodNode.id, 'calls', conf, 0, 'cha']);
553
+ newEdgeCount++;
554
+ if (caller_file) affectedFiles.add(caller_file);
555
+ if (methodNode.method_file) affectedFiles.add(methodNode.method_file);
556
+ }
557
+ }
558
+
559
+ // Always traverse children — non-instantiated classes may have instantiated subclasses.
560
+ bfsQueue.push(cls);
561
+ }
562
+ }
563
+ }
564
+
565
+ if (newEdges.length > 0) {
566
+ db.transaction(() => batchInsertEdges(db, newEdges))();
567
+ }
568
+ return { newEdgeCount, affectedFiles };
569
+ }
570
+
571
+ // Extensions where `this`/`super` dispatch can occur (JS/TS family)
572
+ const THIS_DISPATCH_EXTS = new Set(['.js', '.ts', '.tsx', '.jsx', '.mjs', '.cjs', '.mts', '.cts']);
573
+
574
+ /**
575
+ * Phase 8.5: this/super dispatch post-pass for the native orchestrator path.
576
+ *
577
+ * The Rust build pipeline resolves typed receiver calls but does NOT persist raw
578
+ * unresolved call site receiver info (e.g. `this`, `super`) to the DB. This
579
+ * hybrid post-pass re-parses JS/TS/TSX files via WASM to collect call sites with
580
+ * `this`/`super` receivers, then resolves them through the class hierarchy stored
581
+ * in DB `extends` edges — mirroring what `buildChaPostPass` does on the WASM path.
582
+ *
583
+ * Only runs when `extends` edges exist in the DB; if there is no inheritance
584
+ * hierarchy there is nothing to resolve via `this`/`super` dispatch.
585
+ */
586
+ async function runPostNativeThisDispatch(
587
+ db: BetterSqlite3Database,
588
+ rootDir: string,
589
+ changedFiles: string[] | undefined,
590
+ isFullBuild: boolean,
591
+ ): Promise<{ elapsedMs: number; targetIds: Set<number>; affectedFiles: Set<string> }> {
592
+ const t0 = Date.now();
593
+ const targetIds = new Set<number>();
594
+ // Files containing endpoints of newly inserted edges — lets the caller scope
595
+ // role re-classification to the nodes whose fan-in/out actually changed.
596
+ const affectedFiles = new Set<string>();
597
+ // Fast guard: need at least one extends edge for this/super to have meaning
598
+ const hasExtends = db.prepare(`SELECT 1 FROM edges WHERE kind = 'extends' LIMIT 1`).get();
599
+ if (!hasExtends) return { elapsedMs: 0, targetIds, affectedFiles };
600
+
601
+ // Build parents map: child class → direct parent class (from `extends` edges)
602
+ const parentRows = db
603
+ .prepare(`
604
+ SELECT src.name AS child_name, tgt.name AS parent_name
605
+ FROM edges e
606
+ JOIN nodes src ON e.source_id = src.id
607
+ JOIN nodes tgt ON e.target_id = tgt.id
608
+ WHERE e.kind = 'extends'
609
+ `)
610
+ .all() as Array<{ child_name: string; parent_name: string }>;
611
+
612
+ const parents = new Map<string, string>();
613
+ for (const row of parentRows) {
614
+ if (!parents.has(row.child_name)) parents.set(row.child_name, row.parent_name);
615
+ }
616
+ if (parents.size === 0) return { elapsedMs: 0, targetIds, affectedFiles };
617
+
618
+ const chaCtx: ChaContext = {
619
+ implementors: new Map(), // not needed for this/super resolution
620
+ parents,
621
+ instantiatedTypes: new Set(), // not needed for this/super resolution
622
+ };
623
+
624
+ // Determine which files to re-parse.
625
+ //
626
+ // On a full build we do NOT re-parse every JS/TS file — that would WASM-parse
627
+ // the entire project on top of the native pass, causing a massive regression
628
+ // (measured: +358% ms/file on codegraph itself). Instead we restrict to files
629
+ // that are part of the class inheritance hierarchy: both subclass files (which
630
+ // contain `super.X()` calls dispatching to a parent) and parent-class files
631
+ // (whose method bodies contain `this.X()` calls that CHA must resolve). Any
632
+ // file not in the hierarchy has no `extends` relationship, so `this`/`super`
633
+ // calls in it either resolve locally (same-class dispatch, already handled by
634
+ // the direct-call edge) or have no class context — and will be skipped by
635
+ // `resolveThisDispatch` anyway.
636
+ let relFiles: string[];
637
+ if (isFullBuild || !changedFiles) {
638
+ const rows = db
639
+ .prepare(`
640
+ SELECT DISTINCT file FROM (
641
+ SELECT src.file AS file
642
+ FROM edges e
643
+ JOIN nodes src ON e.source_id = src.id
644
+ WHERE e.kind = 'extends' AND src.file IS NOT NULL
645
+ UNION
646
+ SELECT tgt.file AS file
647
+ FROM edges e
648
+ JOIN nodes tgt ON e.target_id = tgt.id
649
+ WHERE e.kind = 'extends' AND tgt.file IS NOT NULL
650
+ )
651
+ `)
652
+ .all() as Array<{ file: string }>;
653
+ relFiles = rows
654
+ .map((r) => r.file)
655
+ .filter((f) => THIS_DISPATCH_EXTS.has(path.extname(f).toLowerCase()));
656
+ } else {
657
+ // NOTE: Only files explicitly listed in changedFiles are re-parsed.
658
+ // If a parent-class method is replaced (new node ID) but the child file is
659
+ // unchanged, the stale super.method() edge is not refreshed here. A full
660
+ // rebuild (isFullBuild=true) is required to recover in that scenario.
661
+ relFiles = changedFiles.filter((f) => THIS_DISPATCH_EXTS.has(path.extname(f).toLowerCase()));
662
+ }
663
+ if (relFiles.length === 0) return { elapsedMs: 0, targetIds, affectedFiles };
664
+
665
+ // DB-backed CallNodeLookup — resolveThisDispatch only calls byName()
666
+ const findByNameStmt = db.prepare(`SELECT id, file, kind FROM nodes WHERE name = ?`);
667
+ const lookup: CallNodeLookup = {
668
+ byName: (name) => findByNameStmt.all(name) as Array<{ id: number; file: string; kind: string }>,
669
+ byNameAndFile: (name, file) =>
670
+ (findByNameStmt.all(name) as Array<{ id: number; file: string; kind: string }>).filter(
671
+ (n) => n.file === file,
672
+ ),
673
+ isBarrel: () => false,
674
+ resolveBarrel: () => null,
675
+ nodeId: () => undefined,
676
+ };
677
+
678
+ // Seed seen-pairs from existing call edges on source nodes in our file set
679
+ const seen = new Set<string>();
680
+ const CHUNK = 500;
681
+ for (let i = 0; i < relFiles.length; i += CHUNK) {
682
+ const chunk = relFiles.slice(i, i + CHUNK);
683
+ const ph = chunk.map(() => '?').join(',');
684
+ const rows = db
685
+ .prepare(
686
+ `SELECT e.source_id, e.target_id
687
+ FROM edges e
688
+ JOIN nodes n ON e.source_id = n.id
689
+ WHERE e.kind = 'calls' AND n.file IN (${ph})`,
690
+ )
691
+ .all(...chunk) as Array<{ source_id: number; target_id: number }>;
692
+ for (const r of rows) seen.add(`${r.source_id}|${r.target_id}`);
693
+ }
694
+
695
+ // Find the innermost containing method/function for a call at `line` in `file`.
696
+ // COALESCE maps NULL end_line to a large sentinel so unbounded nodes sort last
697
+ // (SQLite ASC orders NULLs first, so a raw `end_line - line` would pick them first).
698
+ const findCallerByLineStmt = db.prepare(`
699
+ SELECT id, name FROM nodes
700
+ WHERE file = ? AND kind IN ('method', 'function')
701
+ AND line <= ? AND (end_line IS NULL OR end_line >= ?)
702
+ ORDER BY COALESCE(end_line - line, 999999999) ASC
703
+ LIMIT 1
704
+ `);
705
+
706
+ // Re-parse the files to obtain raw call sites with receiver info. Only
707
+ // `calls` (with receivers) are consumed here.
708
+ //
709
+ // The native engine is preferred: this pass only runs after a native
710
+ // orchestrator build, so the addon is already loaded and re-parses the
711
+ // hierarchy file set in single-digit milliseconds with the same
712
+ // receiver-annotated call sites as the WASM extractor. Booting the WASM
713
+ // runtime here instead cost ~40–110ms per full build (in-process
714
+ // web-tree-sitter + grammar init dominated) — part of the v3.12.0
715
+ // publish-gate regression. Files the native engine cannot parse (extension
716
+ // outside NATIVE_SUPPORTED_EXTENSIONS, e.g. .mts/.cts) and native parse
717
+ // failures fall back to the WASM backfill path so the sweep stays complete.
718
+ const absFiles = relFiles.map((f) => path.join(rootDir, f));
719
+ const nativeAbs = absFiles.filter((f) =>
720
+ NATIVE_SUPPORTED_EXTENSIONS.has(path.extname(f).toLowerCase()),
721
+ );
722
+ const callsByRel = new Map<string, { name: string; receiver?: string; line: number }[]>();
723
+ // Track native-supported files that returned null (per-file parse error) so
724
+ // they can be included in the WASM fallback set below, ensuring no file's
725
+ // this/super call sites are silently discarded.
726
+ const nativeNullFiles = new Set<string>();
727
+ let nativeParsed = false;
728
+ if (nativeAbs.length > 0) {
729
+ const native = loadNative();
730
+ if (native) {
731
+ try {
732
+ const results = native.parseFiles(nativeAbs, rootDir, false, false) as Array<{
733
+ file: string;
734
+ calls?: { name: string; receiver?: string; line: number }[];
735
+ } | null>;
736
+ for (let i = 0; i < results.length; i++) {
737
+ const r = results[i];
738
+ if (!r) {
739
+ // Per-file parse failure — fall back to WASM for this file.
740
+ const abs = nativeAbs[i];
741
+ if (abs) nativeNullFiles.add(abs);
742
+ continue;
743
+ }
744
+ callsByRel.set(normalizePath(path.relative(rootDir, r.file)), r.calls ?? []);
745
+ }
746
+ nativeParsed = true;
747
+ } catch (e) {
748
+ debug(`this-dispatch native re-parse failed, falling back to WASM: ${toErrorMessage(e)}`);
749
+ }
750
+ }
751
+ }
752
+ // WASM handles: (a) non-native extensions (e.g. .mts/.cts), (b) the entire
753
+ // file list when the native batch threw, and (c) individual files where the
754
+ // native addon returned null (per-file parse error).
755
+ const wasmAbs = nativeParsed
756
+ ? [
757
+ ...absFiles.filter((f) => !NATIVE_SUPPORTED_EXTENSIONS.has(path.extname(f).toLowerCase())),
758
+ ...nativeNullFiles,
759
+ ]
760
+ : absFiles;
761
+ const wasmResults =
762
+ wasmAbs.length > 0
763
+ ? await parseFilesWasmForBackfill(wasmAbs, rootDir, { symbolsOnly: true })
764
+ : new Map<string, ExtractorOutput>();
765
+ for (const [relPath, symbols] of wasmResults) {
766
+ callsByRel.set(relPath, symbols.calls ?? []);
767
+ }
768
+
769
+ const newEdges: Array<[number, number, string, number, number, string]> = [];
770
+
771
+ for (const [relPath, calls] of callsByRel) {
772
+ for (const call of calls) {
773
+ // Only 'this' and 'super' are class-instance receivers in JS/TS.
774
+ // 'self' refers to WindowOrWorkerGlobalScope — not a class instance — so
775
+ // filtering it here prevents spurious dispatch edges from Worker call sites.
776
+ if (call.receiver !== 'this' && call.receiver !== 'super') continue;
777
+
778
+ const callerRow = findCallerByLineStmt.get(relPath, call.line, call.line) as
779
+ | { id: number; name: string }
780
+ | undefined;
781
+ if (!callerRow) continue;
782
+
783
+ const targets = resolveThisDispatch(
784
+ call.name,
785
+ callerRow.name,
786
+ call.receiver as 'this' | 'super',
787
+ chaCtx,
788
+ lookup,
789
+ );
790
+
791
+ for (const t of targets) {
792
+ const key = `${callerRow.id}|${t.id}`;
793
+ if (seen.has(key)) continue;
794
+ seen.add(key);
795
+ const conf = computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY;
796
+ if (conf <= 0) continue;
797
+ newEdges.push([callerRow.id, t.id, 'calls', conf, 0, 'cha']);
798
+ targetIds.add(t.id);
799
+ affectedFiles.add(relPath);
800
+ if (t.file) affectedFiles.add(t.file);
801
+ }
802
+ }
803
+ }
804
+
805
+ if (newEdges.length > 0) {
806
+ db.transaction(() => batchInsertEdges(db, newEdges))();
807
+ debug(`this/super dispatch post-pass: inserted ${newEdges.length} edge(s)`);
808
+ }
809
+
810
+ // Free WASM parse trees — mirrors the cleanup in backfillNativeDroppedFiles
811
+ for (const [, symbols] of wasmResults) {
812
+ const tree = (symbols as { _tree?: { delete?: () => void } })._tree;
813
+ if (tree && typeof tree.delete === 'function') {
814
+ try {
815
+ tree.delete();
816
+ } catch {
817
+ /* ignore cleanup errors */
818
+ }
819
+ }
820
+ (symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined;
821
+ (symbols as { _tree?: unknown; _langId?: unknown })._langId = undefined;
822
+ }
823
+
824
+ return { elapsedMs: Date.now() - t0, targetIds, affectedFiles };
825
+ }
826
+
385
827
  /** Format timing result from native orchestrator phases + JS post-processing. */
386
828
  function formatNativeTimingResult(
387
829
  p: Record<string, number>,
388
830
  structurePatchMs: number,
389
831
  analysisTiming: { astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number },
832
+ thisDispatchMs: number,
390
833
  ): BuildResult {
391
834
  return {
392
835
  phases: {
@@ -399,6 +842,7 @@ function formatNativeTimingResult(
399
842
  edgesMs: +(p.edgesMs ?? 0).toFixed(1),
400
843
  structureMs: +((p.structureMs ?? 0) + structurePatchMs).toFixed(1),
401
844
  rolesMs: +(p.rolesMs ?? 0).toFixed(1),
845
+ thisDispatchMs: +thisDispatchMs.toFixed(1),
402
846
  astMs: +(analysisTiming.astMs ?? 0).toFixed(1),
403
847
  complexityMs: +(analysisTiming.complexityMs ?? 0).toFixed(1),
404
848
  cfgMs: +(analysisTiming.cfgMs ?? 0).toFixed(1),
@@ -740,6 +1184,50 @@ async function backfillNativeDroppedFiles(
740
1184
  }
741
1185
  }
742
1186
 
1187
+ /**
1188
+ * Backfill the `technique` column on `calls` edges written by the native Rust
1189
+ * orchestrator, which does not write the column itself.
1190
+ *
1191
+ * For full builds, all `calls` edges in the DB are new so a global UPDATE is
1192
+ * correct. For incremental builds, only changed-file source nodes are updated
1193
+ * to avoid overwriting previously-set technique values on unchanged edges.
1194
+ */
1195
+ function backfillEdgeTechniquesAfterNativeOrchestrator(
1196
+ db: BetterSqlite3Database,
1197
+ isFullBuild: boolean,
1198
+ changedFiles: string[] | undefined,
1199
+ ): void {
1200
+ // Quiet incremental: no files changed → no new edges inserted, nothing to tag.
1201
+ // Running the global UPDATE here would mis-tag pre-migration NULL-technique edges
1202
+ // from unchanged files as 'ts-native'.
1203
+ if (!isFullBuild && changedFiles && changedFiles.length === 0) {
1204
+ return;
1205
+ }
1206
+ if (isFullBuild || !changedFiles) {
1207
+ db.prepare(
1208
+ "UPDATE edges SET technique = 'ts-native' WHERE kind = 'calls' AND technique IS NULL",
1209
+ ).run();
1210
+ return;
1211
+ }
1212
+ // Incremental: scope to source nodes whose file is one of the changed files.
1213
+ // Chunk to stay within SQLite's SQLITE_LIMIT_VARIABLE_NUMBER (999 on older builds).
1214
+ const CHUNK_SIZE = 500;
1215
+ const tx = db.transaction(() => {
1216
+ for (let i = 0; i < changedFiles.length; i += CHUNK_SIZE) {
1217
+ const chunk = changedFiles.slice(i, i + CHUNK_SIZE);
1218
+ const placeholders = chunk.map(() => '?').join(',');
1219
+ db.prepare(
1220
+ `UPDATE edges SET technique = 'ts-native'
1221
+ WHERE kind = 'calls' AND technique IS NULL
1222
+ AND source_id IN (
1223
+ SELECT id FROM nodes WHERE file IN (${placeholders})
1224
+ )`,
1225
+ ).run(...chunk);
1226
+ }
1227
+ });
1228
+ tx();
1229
+ }
1230
+
743
1231
  /**
744
1232
  * Try the native build orchestrator.
745
1233
  *
@@ -881,35 +1369,25 @@ export async function tryNativeOrchestrator(
881
1369
  ctx.opts.cfg !== false ||
882
1370
  ctx.opts.dataflow !== false);
883
1371
 
1372
+ // ── DB handoff ────────────────────────────────────────────────────────────
1373
+ // Ensure a proper better-sqlite3 connection is open before any post-pass that
1374
+ // writes edges (dropped-language backfill, CHA) and before structure/analysis.
1375
+ // When analysis fallback is needed the handoff already happened above; when
1376
+ // neither structure nor analysis is needed the proxy conversion is deferred to
1377
+ // here so CHA and technique-backfill can still write rows.
884
1378
  if (needsStructure || needsAnalysisFallback) {
885
- // When analysis fallback is needed, handoff to better-sqlite3 — the
886
- // analysis engine uses the suspend/resume WAL pattern that requires a
887
- // real better-sqlite3 connection, not the NativeDbProxy.
888
1379
  if (needsAnalysisFallback && ctx.nativeFirstProxy) {
889
1380
  closeNativeDb(ctx, 'pre-analysis-fallback');
890
1381
  ctx.db = openDb(ctx.dbPath);
891
1382
  ctx.nativeFirstProxy = false;
892
1383
  } else if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) {
893
1384
  // DB reopen failed — return partial result
894
- return formatNativeTimingResult(p, 0, analysisTiming);
895
- }
896
-
897
- const fileSymbols = reconstructFileSymbolsFromDb(ctx);
898
-
899
- if (needsStructure) {
900
- structurePatchMs = await runPostNativeStructure(
901
- ctx,
902
- fileSymbols,
903
- !!result.isFullBuild,
904
- result.changedFiles,
905
- );
906
- }
907
-
908
- if (needsAnalysisFallback) {
909
- analysisTiming = await runPostNativeAnalysis(ctx, fileSymbols, result.changedFiles);
1385
+ return formatNativeTimingResult(p, 0, analysisTiming, 0);
910
1386
  }
911
1387
  }
912
1388
 
1389
+ // ── Edge-writing post-passes (run before structure so roles see full graph) ──
1390
+
913
1391
  // Engine parity: the native orchestrator silently drops files whose
914
1392
  // Rust extractor/grammar is missing or fails (e.g. HCL, Scala, Swift on
915
1393
  // stale native binaries). WASM handles those — backfill via WASM so both
@@ -937,6 +1415,91 @@ export async function tryNativeOrchestrator(
937
1415
  await backfillNativeDroppedFiles(ctx, gap);
938
1416
  }
939
1417
 
1418
+ // Phase 8.5: expand CHA call edges (interface dispatch → concrete implementations).
1419
+ // Returns the affected files so role re-classification below can be scoped to
1420
+ // the nodes whose fan-in/out actually changed.
1421
+ //
1422
+ // Function-as-object-property methods (`fn.method = function() {}`) are extracted
1423
+ // natively by the Rust engine (#1432) and resolved in-build by its edge builder, so
1424
+ // no WASM re-parse post-pass is needed for them. `Foo.prototype.bar = fn` likewise.
1425
+ const { newEdgeCount: chaEdgeCount, affectedFiles: chaAffectedFiles } = runPostNativeCha(
1426
+ ctx.db as unknown as BetterSqlite3Database,
1427
+ );
1428
+
1429
+ // Phase 8.5: this/super dispatch — hybrid WASM re-parse to resolve call sites
1430
+ // whose raw receiver info the Rust pipeline does not persist to DB.
1431
+ const {
1432
+ elapsedMs: thisDispatchMs,
1433
+ targetIds: thisDispatchTargetIds,
1434
+ affectedFiles: thisDispatchAffectedFiles,
1435
+ } = await runPostNativeThisDispatch(
1436
+ ctx.db as unknown as BetterSqlite3Database,
1437
+ ctx.rootDir,
1438
+ result.changedFiles,
1439
+ !!result.isFullBuild,
1440
+ );
1441
+
1442
+ // Role re-classification after JS edge-writing post-passes.
1443
+ // The Rust orchestrator classifies roles before these post-passes (CHA,
1444
+ // this-dispatch) add edges, so roles for the edge endpoints are stale.
1445
+ // Scoped to the files containing those endpoints: a new edge only changes
1446
+ // fan-in/out for its own source and target nodes, so re-classifying their
1447
+ // files restores correctness without re-running the classifier over the
1448
+ // whole graph (which cost ~130ms per build on codegraph itself and was a
1449
+ // major part of the v3.12.0 native full-build benchmark regression).
1450
+ if (chaEdgeCount > 0 || thisDispatchTargetIds.size > 0) {
1451
+ const affectedFiles = [...new Set([...chaAffectedFiles, ...thisDispatchAffectedFiles])];
1452
+ // When edges were inserted but all their endpoint nodes have null `file`
1453
+ // columns (rare but possible), affectedFiles stays empty even though
1454
+ // fan-in/out changed. Fall back to full-graph re-classification in that
1455
+ // case — scoped classification with an empty set would be a no-op, leaving
1456
+ // roles stale for those nodes.
1457
+ const scopedFiles = affectedFiles.length > 0 ? affectedFiles : null;
1458
+ try {
1459
+ const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as {
1460
+ classifyNodeRoles: (
1461
+ db: BetterSqlite3Database,
1462
+ changedFiles?: string[] | null,
1463
+ ) => Record<string, number>;
1464
+ };
1465
+ classifyNodeRoles(ctx.db as unknown as BetterSqlite3Database, scopedFiles);
1466
+ debug(
1467
+ scopedFiles
1468
+ ? `Post-pass role re-classification complete (${scopedFiles.length} file(s))`
1469
+ : 'Post-pass role re-classification complete (full graph — null-file endpoints)',
1470
+ );
1471
+ } catch (err) {
1472
+ debug(`Post-pass role re-classification failed: ${toErrorMessage(err)}`);
1473
+ }
1474
+ }
1475
+
1476
+ // Backfill the `technique` column on `calls` edges written by the Rust
1477
+ // orchestrator, which does not write the column. Runs after all edge-writing
1478
+ // phases (including the WASM dropped-language backfill, CHA post-pass, and
1479
+ // this/super dispatch) so every new edge in this build cycle gets a label.
1480
+ backfillEdgeTechniquesAfterNativeOrchestrator(ctx.db, !!result.isFullBuild, result.changedFiles);
1481
+
1482
+ // ── Structure and analysis fallback (run after edge-writing so roles see full graph) ──
1483
+ // Reconstruct fileSymbols once for both structure and analysis to avoid two
1484
+ // expensive DB scans. The DB handoff above already ensured ctx.db is a proper
1485
+ // better-sqlite3 connection when either flag is set.
1486
+ if (needsStructure || needsAnalysisFallback) {
1487
+ const fileSymbols = reconstructFileSymbolsFromDb(ctx);
1488
+
1489
+ if (needsStructure) {
1490
+ structurePatchMs = await runPostNativeStructure(
1491
+ ctx,
1492
+ fileSymbols,
1493
+ !!result.isFullBuild,
1494
+ result.changedFiles,
1495
+ );
1496
+ }
1497
+
1498
+ if (needsAnalysisFallback) {
1499
+ analysisTiming = await runPostNativeAnalysis(ctx, fileSymbols, result.changedFiles);
1500
+ }
1501
+ }
1502
+
940
1503
  closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
941
- return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
1504
+ return formatNativeTimingResult(p, structurePatchMs, analysisTiming, thisDispatchMs);
942
1505
  }