@optave/codegraph 3.11.1 → 3.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,6 +20,12 @@ import type {
20
20
  TypeMapEntry,
21
21
  } from '../../../../types.js';
22
22
  import { computeConfidence } from '../../resolve.js';
23
+ import {
24
+ type CallNodeLookup,
25
+ findCaller,
26
+ resolveCallTargets,
27
+ resolveReceiverEdge,
28
+ } from '../call-resolver.js';
23
29
  import type { PipelineContext } from '../context.js';
24
30
  import { BUILTIN_RECEIVERS, batchInsertEdges } from '../helpers.js';
25
31
 
@@ -486,6 +492,7 @@ function buildCallEdgesJS(
486
492
  allEdgeRows: EdgeRowTuple[],
487
493
  ): void {
488
494
  const { fileSymbols, barrelOnlyFiles, rootDir } = ctx;
495
+ const lookup = makeContextLookup(ctx, getNodeIdStmt);
489
496
 
490
497
  for (const [relPath, symbols] of fileSymbols) {
491
498
  if (barrelOnlyFiles.has(relPath)) continue;
@@ -497,13 +504,12 @@ function buildCallEdgesJS(
497
504
  const seenCallEdges = new Set<string>();
498
505
 
499
506
  buildFileCallEdges(
500
- ctx,
501
507
  relPath,
502
508
  symbols,
503
509
  fileNodeRow,
504
510
  importedNames,
505
511
  seenCallEdges,
506
- getNodeIdStmt,
512
+ lookup,
507
513
  allEdgeRows,
508
514
  typeMap,
509
515
  );
@@ -540,127 +546,37 @@ function buildImportedNamesMap(
540
546
  return importedNames;
541
547
  }
542
548
 
543
- function findCaller(
544
- call: Call,
545
- definitions: ReadonlyArray<{ name: string; kind: string; line: number; endLine?: number | null }>,
546
- relPath: string,
547
- getNodeIdStmt: NodeIdStmt,
548
- fileNodeRow: { id: number },
549
- ): { id: number } {
550
- let caller: { id: number } | null = null;
551
- let callerSpan = Infinity;
552
- for (const def of definitions) {
553
- if (def.line <= call.line) {
554
- const end = def.endLine || Infinity;
555
- if (call.line <= end) {
556
- const span = end - def.line;
557
- if (span < callerSpan) {
558
- const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line);
559
- if (row) {
560
- caller = row;
561
- callerSpan = span;
562
- }
563
- }
564
- }
565
- }
566
- }
567
- return caller || fileNodeRow;
568
- }
569
-
570
- function resolveCallTargets(
571
- ctx: PipelineContext,
572
- call: Call,
573
- relPath: string,
574
- importedNames: Map<string, string>,
575
- typeMap: Map<string, TypeMapEntry | string>,
576
- ): { targets: NodeRow[]; importedFrom: string | undefined } {
577
- const importedFrom = importedNames.get(call.name);
578
- let targets: NodeRow[] | undefined;
579
-
580
- if (importedFrom) {
581
- targets = ctx.nodesByNameAndFile.get(`${call.name}|${importedFrom}`) || [];
582
- if (targets.length === 0 && isBarrelFile(ctx, importedFrom)) {
583
- const actualSource = resolveBarrelExport(ctx, importedFrom, call.name);
584
- if (actualSource) {
585
- targets = ctx.nodesByNameAndFile.get(`${call.name}|${actualSource}`) || [];
586
- }
587
- }
588
- }
589
-
590
- if (!targets || targets.length === 0) {
591
- targets = ctx.nodesByNameAndFile.get(`${call.name}|${relPath}`) || [];
592
- if (targets.length === 0) {
593
- targets = resolveByMethodOrGlobal(ctx, call, relPath, typeMap);
594
- }
595
- }
596
-
597
- if (targets.length > 1) {
598
- targets.sort((a, b) => {
599
- const confA = computeConfidence(relPath, a.file, importedFrom ?? null);
600
- const confB = computeConfidence(relPath, b.file, importedFrom ?? null);
601
- return confB - confA;
602
- });
603
- }
604
-
605
- return { targets, importedFrom };
606
- }
607
-
608
- function resolveByMethodOrGlobal(
609
- ctx: PipelineContext,
610
- call: Call,
611
- relPath: string,
612
- typeMap: Map<string, TypeMapEntry | string>,
613
- ): NodeRow[] {
614
- // Type-aware resolution: translate variable receiver to its declared type
615
- if (call.receiver && typeMap) {
616
- const typeEntry = typeMap.get(call.receiver);
617
- const typeName = typeEntry
618
- ? typeof typeEntry === 'string'
619
- ? typeEntry
620
- : typeEntry.type
621
- : null;
622
- if (typeName) {
623
- const qualifiedName = `${typeName}.${call.name}`;
624
- const typed = (ctx.nodesByName.get(qualifiedName) || []).filter((n) => n.kind === 'method');
625
- if (typed.length > 0) return typed;
626
- }
627
- }
628
-
629
- if (
630
- !call.receiver ||
631
- call.receiver === 'this' ||
632
- call.receiver === 'self' ||
633
- call.receiver === 'super'
634
- ) {
635
- return (ctx.nodesByName.get(call.name) || []).filter(
636
- (n) => computeConfidence(relPath, n.file, null) >= 0.5,
637
- );
638
- }
639
- return [];
549
+ function makeContextLookup(ctx: PipelineContext, getNodeIdStmt: NodeIdStmt): CallNodeLookup {
550
+ return {
551
+ byNameAndFile: (name, file) => ctx.nodesByNameAndFile.get(`${name}|${file}`) ?? [],
552
+ byName: (name) => ctx.nodesByName.get(name) ?? [],
553
+ isBarrel: (file) => isBarrelFile(ctx, file),
554
+ resolveBarrel: (barrelFile, symbolName) => resolveBarrelExport(ctx, barrelFile, symbolName),
555
+ nodeId: (name, kind, file, line) => getNodeIdStmt.get(name, kind, file, line),
556
+ };
640
557
  }
641
558
 
642
559
  function buildFileCallEdges(
643
- ctx: PipelineContext,
644
560
  relPath: string,
645
561
  symbols: ExtractorOutput,
646
562
  fileNodeRow: { id: number },
647
563
  importedNames: Map<string, string>,
648
564
  seenCallEdges: Set<string>,
649
- getNodeIdStmt: NodeIdStmt,
565
+ lookup: CallNodeLookup,
650
566
  allEdgeRows: EdgeRowTuple[],
651
567
  typeMap: Map<string, TypeMapEntry | string>,
652
568
  ): void {
653
569
  for (const call of symbols.calls) {
654
570
  if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue;
655
571
 
656
- const caller = findCaller(call, symbols.definitions, relPath, getNodeIdStmt, fileNodeRow);
572
+ const caller = findCaller(lookup, call, symbols.definitions, relPath, fileNodeRow);
657
573
  const isDynamic: number = call.dynamic ? 1 : 0;
658
574
  const { targets, importedFrom } = resolveCallTargets(
659
- ctx,
575
+ lookup,
660
576
  call,
661
577
  relPath,
662
578
  importedNames,
663
- typeMap,
579
+ typeMap as Map<string, unknown>,
664
580
  );
665
581
 
666
582
  for (const t of targets) {
@@ -672,7 +588,6 @@ function buildFileCallEdges(
672
588
  }
673
589
  }
674
590
 
675
- // Receiver edge
676
591
  if (
677
592
  call.receiver &&
678
593
  !BUILTIN_RECEIVERS.has(call.receiver) &&
@@ -680,36 +595,17 @@ function buildFileCallEdges(
680
595
  call.receiver !== 'self' &&
681
596
  call.receiver !== 'super'
682
597
  ) {
683
- buildReceiverEdge(ctx, call, caller, relPath, seenCallEdges, allEdgeRows, typeMap);
684
- }
685
- }
686
- }
687
-
688
- function buildReceiverEdge(
689
- ctx: PipelineContext,
690
- call: Call,
691
- caller: { id: number },
692
- relPath: string,
693
- seenCallEdges: Set<string>,
694
- allEdgeRows: EdgeRowTuple[],
695
- typeMap: Map<string, TypeMapEntry | string>,
696
- ): void {
697
- const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']);
698
- const typeEntry = typeMap?.get(call.receiver!);
699
- const typeName = typeEntry ? (typeof typeEntry === 'string' ? typeEntry : typeEntry.type) : null;
700
- const typeConfidence = typeEntry && typeof typeEntry === 'object' ? typeEntry.confidence : null;
701
- const effectiveReceiver = typeName || call.receiver!;
702
- const samefile = ctx.nodesByNameAndFile.get(`${effectiveReceiver}|${relPath}`) || [];
703
- const candidates = samefile.length > 0 ? samefile : ctx.nodesByName.get(effectiveReceiver) || [];
704
- const receiverNodes = candidates.filter((n) => receiverKinds.has(n.kind));
705
- if (receiverNodes.length > 0 && caller) {
706
- const recvTarget = receiverNodes[0]!;
707
- const recvKey = `recv|${caller.id}|${recvTarget.id}`;
708
- if (!seenCallEdges.has(recvKey)) {
709
- seenCallEdges.add(recvKey);
710
- // Use type source confidence when available, otherwise 0.7 for untyped receiver
711
- const confidence = typeConfidence ?? (typeName ? 0.9 : 0.7);
712
- allEdgeRows.push([caller.id, recvTarget.id, 'receiver', confidence, 0]);
598
+ const recv = resolveReceiverEdge(
599
+ lookup,
600
+ { name: call.name, receiver: call.receiver },
601
+ caller,
602
+ relPath,
603
+ typeMap as Map<string, unknown>,
604
+ seenCallEdges,
605
+ );
606
+ if (recv) {
607
+ allEdgeRows.push([recv.callerId, recv.receiverId, 'receiver', recv.confidence, 0]);
608
+ }
713
609
  }
714
610
  }
715
611
  }
@@ -35,10 +35,12 @@ function prepareWatcherStatements(db: ReturnType<typeof openDb>): IncrementalStm
35
35
  'SELECT COUNT(*) as c FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)',
36
36
  ),
37
37
  findNodeInFile: db.prepare(
38
- "SELECT id, file FROM nodes WHERE name = ? AND kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'constant') AND file = ?",
38
+ "SELECT id, kind, file FROM nodes WHERE name = ? AND kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'constant') AND file = ?",
39
39
  ),
40
40
  findNodeByName: db.prepare(
41
- "SELECT id, file FROM nodes WHERE name = ? AND kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'constant')",
41
+ // `kind` is included so resolveByMethodOrGlobal can filter to 'method' for
42
+ // type-aware receiver resolution (mirrors the full-build resolver).
43
+ "SELECT id, file, kind FROM nodes WHERE name = ? AND kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'constant')",
42
44
  ),
43
45
  listSymbols: db.prepare("SELECT name, kind, line FROM nodes WHERE file = ? AND kind != 'file'"),
44
46
  };
@@ -1,5 +1,5 @@
1
1
  import path from 'node:path';
2
- import { getNodeId, testFilterSQL } from '../db/index.js';
2
+ import { getBuildMeta, getNodeId, setBuildMeta, testFilterSQL } from '../db/index.js';
3
3
  import { debug } from '../infrastructure/logger.js';
4
4
  import { normalizePath } from '../shared/constants.js';
5
5
  import type { BetterSqlite3Database } from '../types.js';
@@ -582,6 +582,111 @@ function buildClassifierInput(
582
582
  }));
583
583
  }
584
584
 
585
+ // ─── Median cache helpers ─────────────────────────────────────────────────────
586
+
587
+ const ROLES_MEDIANS_KEY = 'roles_medians';
588
+
589
+ // Invalidate cached medians when the edge count drifts past this threshold.
590
+ // A 1-file rebuild adds/removes < 100 edges — well within the margin.
591
+ const MEDIAN_INVALIDATION_DELTA = 500;
592
+
593
+ /**
594
+ * Full edge-table GROUP BY scan — O(M). Only runs on cache miss.
595
+ *
596
+ * Joins `nodes` to restrict to the same non-leaf kinds that
597
+ * `classifyNodeRolesFull` uses when computing medians from in-memory rows
598
+ * (excludes 'file', 'directory', 'parameter', 'property'). This keeps the
599
+ * two paths consistent so a cold-cache fallback produces the same distribution
600
+ * as the full-build cached value.
601
+ *
602
+ * Also returns the filtered edge count used for computing the medians so the
603
+ * caller can pass it directly to `writeMedianCache` without a second query.
604
+ */
605
+ function computeGlobalMediansFromEdges(db: BetterSqlite3Database): {
606
+ fanIn: number;
607
+ fanOut: number;
608
+ edgeCount: number;
609
+ } {
610
+ const excludedKinds = `('file', 'directory', 'parameter', 'property')`;
611
+ const fanInRows = db
612
+ .prepare(
613
+ `SELECT COUNT(*) AS cnt FROM edges e
614
+ JOIN nodes t ON e.target_id = t.id
615
+ WHERE e.kind IN ('calls', 'imports-type')
616
+ AND t.kind NOT IN ${excludedKinds}
617
+ GROUP BY e.target_id`,
618
+ )
619
+ .all() as { cnt: number }[];
620
+ const fanOutRows = db
621
+ .prepare(
622
+ `SELECT COUNT(*) AS cnt FROM edges e
623
+ JOIN nodes s ON e.source_id = s.id
624
+ WHERE e.kind = 'calls'
625
+ AND s.kind NOT IN ${excludedKinds}
626
+ GROUP BY e.source_id`,
627
+ )
628
+ .all() as { cnt: number }[];
629
+ const fanInDist = fanInRows.map((r) => r.cnt).sort((a, b) => a - b);
630
+ const fanOutDist = fanOutRows.map((r) => r.cnt).sort((a, b) => a - b);
631
+ // Sum of fanInRows[*].cnt equals the total edge count for the relevant
632
+ // edge subset — no extra COUNT query needed.
633
+ const edgeCount = fanInRows.reduce((acc, r) => acc + r.cnt, 0);
634
+ return { fanIn: median(fanInDist), fanOut: median(fanOutDist), edgeCount };
635
+ }
636
+
637
+ /**
638
+ * Read cached role medians from build_meta. Returns null when absent or stale
639
+ * (edge count moved beyond MEDIAN_INVALIDATION_DELTA from the cached value).
640
+ *
641
+ * The staleness check uses the same edge subset (calls + imports-type) that
642
+ * the medians are derived from, so only changes to the edges that actually
643
+ * influence fan-in/fan-out can evict the cache.
644
+ */
645
+ function readCachedMedians(db: BetterSqlite3Database): { fanIn: number; fanOut: number } | null {
646
+ const raw = getBuildMeta(db, ROLES_MEDIANS_KEY);
647
+ if (!raw) return null;
648
+ try {
649
+ const cached = JSON.parse(raw) as { fanIn: number; fanOut: number; edgeCount: number };
650
+ // Count only the edge kinds that drive median computation — same subset
651
+ // used by computeGlobalMediansFromEdges and classifyNodeRolesFull.
652
+ const currentCount = (
653
+ db
654
+ .prepare(`SELECT COUNT(*) AS cnt FROM edges WHERE kind IN ('calls', 'imports-type')`)
655
+ .get() as { cnt: number }
656
+ ).cnt;
657
+ if (
658
+ Math.abs(currentCount - cached.edgeCount) >
659
+ Math.max(MEDIAN_INVALIDATION_DELTA, cached.edgeCount * 0.1)
660
+ )
661
+ return null;
662
+ return { fanIn: cached.fanIn, fanOut: cached.fanOut };
663
+ } catch {
664
+ return null;
665
+ }
666
+ }
667
+
668
+ /**
669
+ * Persist global role medians + current edge count to build_meta.
670
+ *
671
+ * @param edgeCount - pre-computed calls+imports-type edge count. When provided,
672
+ * the function skips the COUNT query entirely. Pass when the count is already
673
+ * known at the call site (e.g. from `computeGlobalMediansFromEdges`).
674
+ */
675
+ function writeMedianCache(
676
+ db: BetterSqlite3Database,
677
+ medians: { fanIn: number; fanOut: number },
678
+ edgeCount?: number,
679
+ ): void {
680
+ const cnt =
681
+ edgeCount ??
682
+ (
683
+ db
684
+ .prepare(`SELECT COUNT(*) AS cnt FROM edges WHERE kind IN ('calls', 'imports-type')`)
685
+ .get() as { cnt: number }
686
+ ).cnt;
687
+ setBuildMeta(db, { [ROLES_MEDIANS_KEY]: JSON.stringify({ ...medians, edgeCount: cnt }) });
688
+ }
689
+
585
690
  function classifyNodeRolesFull(db: BetterSqlite3Database, emptySummary: RoleSummary): RoleSummary {
586
691
  // Leaf kinds (parameter, property) can never have callers/callees.
587
692
  // Classify them directly as dead-leaf without the expensive fan-in/fan-out JOINs.
@@ -672,10 +777,28 @@ function classifyNodeRolesFull(db: BetterSqlite3Database, emptySummary: RoleSumm
672
777
  prodFanInMap.set(r.target_id, r.cnt);
673
778
  }
674
779
 
675
- // Delegate classification to the pure-logic classifier
780
+ // Delegate classification to the pure-logic classifier.
781
+ // Compute medians from the already-loaded rows (no extra DB round-trip),
782
+ // pass them as overrides to avoid recomputing inside classifyRoles,
783
+ // and cache them for subsequent incremental builds.
676
784
  const activeFiles = buildActiveFilesSet(rows);
677
785
  const classifierInput = buildClassifierInput(rows, exportedIds, prodFanInMap, activeFiles);
678
- const roleMap = classifyRoles(classifierInput);
786
+ const nonZeroFanIn = classifierInput
787
+ .filter((n) => n.fanIn > 0)
788
+ .map((n) => n.fanIn)
789
+ .sort((a, b) => a - b);
790
+ const nonZeroFanOut = classifierInput
791
+ .filter((n) => n.fanOut > 0)
792
+ .map((n) => n.fanOut)
793
+ .sort((a, b) => a - b);
794
+ const globalMedians = { fanIn: median(nonZeroFanIn), fanOut: median(nonZeroFanOut) };
795
+ const roleMap = classifyRoles(classifierInput, globalMedians);
796
+ // Derive the edge count from already-loaded in-memory rows: summing fan_in
797
+ // across all nodes equals COUNT(*) FROM edges WHERE kind IN ('calls','imports-type'),
798
+ // since the full-build query left-joins every matching edge exactly once per target.
799
+ // Passing this avoids an extra COUNT query on the full-build path.
800
+ const inMemoryEdgeCount = rows.reduce((acc, r) => acc + r.fan_in, 0);
801
+ writeMedianCache(db, globalMedians, inMemoryEdgeCount);
679
802
 
680
803
  const { summary, idsByRole } = buildRoleSummary(rows, leafRows, roleMap, emptySummary);
681
804
 
@@ -691,7 +814,9 @@ function classifyNodeRolesFull(db: BetterSqlite3Database, emptySummary: RoleSumm
691
814
  * plus their immediate edge neighbours (callers and callees in other files).
692
815
  *
693
816
  * Uses indexed point lookups for fan-in/fan-out instead of full table scans.
694
- * Global medians are computed from edge distribution (fast GROUP BY on index).
817
+ * Global medians are read from the build_meta cache written by the last full
818
+ * classification; the cache is only recomputed when the edge count drifts
819
+ * beyond MEDIAN_INVALIDATION_DELTA (i.e. large structural changes).
695
820
  * Unchanged files not connected to changed files keep their roles from the
696
821
  * previous build.
697
822
  */
@@ -718,25 +843,20 @@ function classifyNodeRolesIncremental(
718
843
  const allAffectedFiles = [...changedFiles, ...neighbourFiles.map((r) => r.file)];
719
844
  const placeholders = allAffectedFiles.map(() => '?').join(',');
720
845
 
721
- // 1. Compute global medians from edge distribution (fast: scans edge index, no node join)
722
- const fanInDist = (
723
- db
724
- .prepare(
725
- `SELECT COUNT(*) AS cnt FROM edges WHERE kind IN ('calls', 'imports-type') GROUP BY target_id`,
726
- )
727
- .all() as { cnt: number }[]
728
- )
729
- .map((r) => r.cnt)
730
- .sort((a, b) => a - b);
731
- const fanOutDist = (
732
- db
733
- .prepare(`SELECT COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY source_id`)
734
- .all() as { cnt: number }[]
735
- )
736
- .map((r) => r.cnt)
737
- .sort((a, b) => a - b);
738
-
739
- const globalMedians = { fanIn: median(fanInDist), fanOut: median(fanOutDist) };
846
+ // 1. Read global medians from cache; fall back to full edge scan only on miss.
847
+ // The median barely moves for a 1-file change, so the cache is almost always
848
+ // valid, eliminating 2× full edge-table GROUP BY queries (~10-15 ms on large graphs).
849
+ const cachedMedians = readCachedMedians(db);
850
+ let globalMedians: { fanIn: number; fanOut: number };
851
+ if (cachedMedians) {
852
+ globalMedians = cachedMedians;
853
+ } else {
854
+ const computed = computeGlobalMediansFromEdges(db);
855
+ // Pass the edgeCount returned by computeGlobalMediansFromEdges so
856
+ // writeMedianCache does not issue a second COUNT query.
857
+ writeMedianCache(db, computed, computed.edgeCount);
858
+ globalMedians = computed;
859
+ }
740
860
 
741
861
  // 2a. Leaf kinds (parameter, property) in affected files — always dead-leaf
742
862
  const leafRows = db