@optave/codegraph 3.11.0 → 3.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (230) hide show
  1. package/README.md +38 -31
  2. package/dist/ast-analysis/engine.d.ts.map +1 -1
  3. package/dist/ast-analysis/engine.js +91 -60
  4. package/dist/ast-analysis/engine.js.map +1 -1
  5. package/dist/ast-analysis/visitor-utils.d.ts +3 -0
  6. package/dist/ast-analysis/visitor-utils.d.ts.map +1 -1
  7. package/dist/ast-analysis/visitor-utils.js +83 -49
  8. package/dist/ast-analysis/visitor-utils.js.map +1 -1
  9. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
  10. package/dist/ast-analysis/visitors/ast-store-visitor.js +78 -62
  11. package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
  12. package/dist/ast-analysis/visitors/dataflow-visitor.d.ts.map +1 -1
  13. package/dist/ast-analysis/visitors/dataflow-visitor.js +61 -42
  14. package/dist/ast-analysis/visitors/dataflow-visitor.js.map +1 -1
  15. package/dist/cli/commands/embed.d.ts.map +1 -1
  16. package/dist/cli/commands/embed.js +49 -4
  17. package/dist/cli/commands/embed.js.map +1 -1
  18. package/dist/domain/analysis/dependencies.d.ts.map +1 -1
  19. package/dist/domain/analysis/dependencies.js +106 -80
  20. package/dist/domain/analysis/dependencies.js.map +1 -1
  21. package/dist/domain/analysis/fn-impact.d.ts.map +1 -1
  22. package/dist/domain/analysis/fn-impact.js +77 -52
  23. package/dist/domain/analysis/fn-impact.js.map +1 -1
  24. package/dist/domain/analysis/module-map.d.ts.map +1 -1
  25. package/dist/domain/analysis/module-map.js +132 -121
  26. package/dist/domain/analysis/module-map.js.map +1 -1
  27. package/dist/domain/graph/builder/call-resolver.d.ts +71 -0
  28. package/dist/domain/graph/builder/call-resolver.d.ts.map +1 -0
  29. package/dist/domain/graph/builder/call-resolver.js +130 -0
  30. package/dist/domain/graph/builder/call-resolver.js.map +1 -0
  31. package/dist/domain/graph/builder/helpers.d.ts +4 -4
  32. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  33. package/dist/domain/graph/builder/helpers.js +47 -33
  34. package/dist/domain/graph/builder/helpers.js.map +1 -1
  35. package/dist/domain/graph/builder/incremental.d.ts +6 -0
  36. package/dist/domain/graph/builder/incremental.d.ts.map +1 -1
  37. package/dist/domain/graph/builder/incremental.js +214 -127
  38. package/dist/domain/graph/builder/incremental.js.map +1 -1
  39. package/dist/domain/graph/builder/pipeline.d.ts +1 -44
  40. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  41. package/dist/domain/graph/builder/pipeline.js +10 -766
  42. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  43. package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
  44. package/dist/domain/graph/builder/stages/build-edges.js +151 -192
  45. package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
  46. package/dist/domain/graph/builder/stages/build-structure.d.ts.map +1 -1
  47. package/dist/domain/graph/builder/stages/build-structure.js +82 -65
  48. package/dist/domain/graph/builder/stages/build-structure.js.map +1 -1
  49. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  50. package/dist/domain/graph/builder/stages/detect-changes.js +84 -56
  51. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  52. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  53. package/dist/domain/graph/builder/stages/finalize.js +60 -51
  54. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  55. package/dist/domain/graph/builder/stages/insert-nodes.d.ts +8 -6
  56. package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
  57. package/dist/domain/graph/builder/stages/insert-nodes.js +107 -122
  58. package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
  59. package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts +14 -0
  60. package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts.map +1 -0
  61. package/dist/domain/graph/builder/stages/native-db-lifecycle.js +77 -0
  62. package/dist/domain/graph/builder/stages/native-db-lifecycle.js.map +1 -0
  63. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts +62 -0
  64. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts.map +1 -0
  65. package/dist/domain/graph/builder/stages/native-orchestrator.js +747 -0
  66. package/dist/domain/graph/builder/stages/native-orchestrator.js.map +1 -0
  67. package/dist/domain/graph/cycles.d.ts +6 -4
  68. package/dist/domain/graph/cycles.d.ts.map +1 -1
  69. package/dist/domain/graph/cycles.js +50 -55
  70. package/dist/domain/graph/cycles.js.map +1 -1
  71. package/dist/domain/graph/journal.d.ts.map +1 -1
  72. package/dist/domain/graph/journal.js +89 -70
  73. package/dist/domain/graph/journal.js.map +1 -1
  74. package/dist/domain/graph/watcher.d.ts.map +1 -1
  75. package/dist/domain/graph/watcher.js +10 -4
  76. package/dist/domain/graph/watcher.js.map +1 -1
  77. package/dist/domain/parser.d.ts +12 -23
  78. package/dist/domain/parser.d.ts.map +1 -1
  79. package/dist/domain/parser.js +126 -79
  80. package/dist/domain/parser.js.map +1 -1
  81. package/dist/domain/search/generator.d.ts +3 -1
  82. package/dist/domain/search/generator.d.ts.map +1 -1
  83. package/dist/domain/search/generator.js +68 -45
  84. package/dist/domain/search/generator.js.map +1 -1
  85. package/dist/domain/search/models.d.ts +2 -0
  86. package/dist/domain/search/models.d.ts.map +1 -1
  87. package/dist/domain/search/models.js +37 -3
  88. package/dist/domain/search/models.js.map +1 -1
  89. package/dist/domain/search/search/hybrid.d.ts.map +1 -1
  90. package/dist/domain/search/search/hybrid.js +49 -40
  91. package/dist/domain/search/search/hybrid.js.map +1 -1
  92. package/dist/domain/search/search/semantic.d.ts.map +1 -1
  93. package/dist/domain/search/search/semantic.js +69 -49
  94. package/dist/domain/search/search/semantic.js.map +1 -1
  95. package/dist/domain/wasm-worker-entry.js +201 -136
  96. package/dist/domain/wasm-worker-entry.js.map +1 -1
  97. package/dist/extractors/elixir.js +95 -71
  98. package/dist/extractors/elixir.js.map +1 -1
  99. package/dist/extractors/gleam.d.ts.map +1 -1
  100. package/dist/extractors/gleam.js +23 -31
  101. package/dist/extractors/gleam.js.map +1 -1
  102. package/dist/extractors/helpers.d.ts +79 -1
  103. package/dist/extractors/helpers.d.ts.map +1 -1
  104. package/dist/extractors/helpers.js +137 -0
  105. package/dist/extractors/helpers.js.map +1 -1
  106. package/dist/extractors/java.d.ts.map +1 -1
  107. package/dist/extractors/java.js +37 -49
  108. package/dist/extractors/java.js.map +1 -1
  109. package/dist/extractors/javascript.d.ts.map +1 -1
  110. package/dist/extractors/javascript.js +44 -44
  111. package/dist/extractors/javascript.js.map +1 -1
  112. package/dist/extractors/julia.js +27 -34
  113. package/dist/extractors/julia.js.map +1 -1
  114. package/dist/extractors/r.d.ts.map +1 -1
  115. package/dist/extractors/r.js +33 -58
  116. package/dist/extractors/r.js.map +1 -1
  117. package/dist/extractors/solidity.d.ts.map +1 -1
  118. package/dist/extractors/solidity.js +38 -61
  119. package/dist/extractors/solidity.js.map +1 -1
  120. package/dist/features/boundaries.d.ts.map +1 -1
  121. package/dist/features/boundaries.js +49 -39
  122. package/dist/features/boundaries.js.map +1 -1
  123. package/dist/features/cfg.d.ts.map +1 -1
  124. package/dist/features/cfg.js +90 -63
  125. package/dist/features/cfg.js.map +1 -1
  126. package/dist/features/check.d.ts.map +1 -1
  127. package/dist/features/check.js +43 -34
  128. package/dist/features/check.js.map +1 -1
  129. package/dist/features/cochange.d.ts.map +1 -1
  130. package/dist/features/cochange.js +68 -56
  131. package/dist/features/cochange.js.map +1 -1
  132. package/dist/features/complexity.d.ts.map +1 -1
  133. package/dist/features/complexity.js +105 -75
  134. package/dist/features/complexity.js.map +1 -1
  135. package/dist/features/dataflow.d.ts.map +1 -1
  136. package/dist/features/dataflow.js +37 -29
  137. package/dist/features/dataflow.js.map +1 -1
  138. package/dist/features/flow.d.ts.map +1 -1
  139. package/dist/features/flow.js +31 -22
  140. package/dist/features/flow.js.map +1 -1
  141. package/dist/features/graph-enrichment.d.ts.map +1 -1
  142. package/dist/features/graph-enrichment.js +77 -70
  143. package/dist/features/graph-enrichment.js.map +1 -1
  144. package/dist/features/owners.d.ts +17 -26
  145. package/dist/features/owners.d.ts.map +1 -1
  146. package/dist/features/owners.js +120 -109
  147. package/dist/features/owners.js.map +1 -1
  148. package/dist/features/sequence.d.ts.map +1 -1
  149. package/dist/features/sequence.js +59 -54
  150. package/dist/features/sequence.js.map +1 -1
  151. package/dist/features/structure-query.d.ts.map +1 -1
  152. package/dist/features/structure-query.js +60 -60
  153. package/dist/features/structure-query.js.map +1 -1
  154. package/dist/features/structure.d.ts.map +1 -1
  155. package/dist/features/structure.js +149 -52
  156. package/dist/features/structure.js.map +1 -1
  157. package/dist/graph/algorithms/leiden/optimiser.d.ts.map +1 -1
  158. package/dist/graph/algorithms/leiden/optimiser.js +100 -69
  159. package/dist/graph/algorithms/leiden/optimiser.js.map +1 -1
  160. package/dist/graph/classifiers/roles.d.ts.map +1 -1
  161. package/dist/graph/classifiers/roles.js +63 -59
  162. package/dist/graph/classifiers/roles.js.map +1 -1
  163. package/dist/infrastructure/config.d.ts +1 -1
  164. package/dist/infrastructure/config.d.ts.map +1 -1
  165. package/dist/infrastructure/config.js +1 -1
  166. package/dist/infrastructure/config.js.map +1 -1
  167. package/dist/presentation/cfg.d.ts.map +1 -1
  168. package/dist/presentation/cfg.js +44 -29
  169. package/dist/presentation/cfg.js.map +1 -1
  170. package/dist/presentation/flow.d.ts.map +1 -1
  171. package/dist/presentation/flow.js +58 -38
  172. package/dist/presentation/flow.js.map +1 -1
  173. package/dist/types.d.ts +1 -1
  174. package/dist/types.d.ts.map +1 -1
  175. package/grammars/tree-sitter-erlang.wasm +0 -0
  176. package/package.json +9 -9
  177. package/src/ast-analysis/engine.ts +145 -61
  178. package/src/ast-analysis/visitor-utils.ts +86 -46
  179. package/src/ast-analysis/visitors/ast-store-visitor.ts +104 -69
  180. package/src/ast-analysis/visitors/dataflow-visitor.ts +86 -47
  181. package/src/cli/commands/embed.ts +54 -4
  182. package/src/domain/analysis/dependencies.ts +166 -85
  183. package/src/domain/analysis/fn-impact.ts +120 -50
  184. package/src/domain/analysis/module-map.ts +175 -140
  185. package/src/domain/graph/builder/call-resolver.ts +181 -0
  186. package/src/domain/graph/builder/helpers.ts +85 -76
  187. package/src/domain/graph/builder/incremental.ts +321 -152
  188. package/src/domain/graph/builder/pipeline.ts +19 -957
  189. package/src/domain/graph/builder/stages/build-edges.ts +229 -275
  190. package/src/domain/graph/builder/stages/build-structure.ts +115 -82
  191. package/src/domain/graph/builder/stages/detect-changes.ts +107 -64
  192. package/src/domain/graph/builder/stages/finalize.ts +72 -70
  193. package/src/domain/graph/builder/stages/insert-nodes.ts +154 -120
  194. package/src/domain/graph/builder/stages/native-db-lifecycle.ts +74 -0
  195. package/src/domain/graph/builder/stages/native-orchestrator.ts +942 -0
  196. package/src/domain/graph/cycles.ts +51 -49
  197. package/src/domain/graph/journal.ts +84 -69
  198. package/src/domain/graph/watcher.ts +12 -4
  199. package/src/domain/parser.ts +143 -66
  200. package/src/domain/search/generator.ts +132 -74
  201. package/src/domain/search/models.ts +39 -3
  202. package/src/domain/search/search/hybrid.ts +53 -42
  203. package/src/domain/search/search/semantic.ts +105 -65
  204. package/src/domain/wasm-worker-entry.ts +235 -152
  205. package/src/extractors/elixir.ts +91 -64
  206. package/src/extractors/gleam.ts +33 -37
  207. package/src/extractors/helpers.ts +205 -1
  208. package/src/extractors/java.ts +42 -45
  209. package/src/extractors/javascript.ts +44 -43
  210. package/src/extractors/julia.ts +28 -35
  211. package/src/extractors/r.ts +38 -56
  212. package/src/extractors/solidity.ts +43 -71
  213. package/src/features/boundaries.ts +64 -46
  214. package/src/features/cfg.ts +145 -74
  215. package/src/features/check.ts +60 -43
  216. package/src/features/cochange.ts +95 -72
  217. package/src/features/complexity.ts +134 -79
  218. package/src/features/dataflow.ts +57 -34
  219. package/src/features/flow.ts +48 -24
  220. package/src/features/graph-enrichment.ts +105 -70
  221. package/src/features/owners.ts +186 -146
  222. package/src/features/sequence.ts +99 -69
  223. package/src/features/structure-query.ts +94 -79
  224. package/src/features/structure.ts +199 -79
  225. package/src/graph/algorithms/leiden/optimiser.ts +142 -87
  226. package/src/graph/classifiers/roles.ts +64 -54
  227. package/src/infrastructure/config.ts +1 -1
  228. package/src/presentation/cfg.ts +48 -32
  229. package/src/presentation/flow.ts +100 -52
  230. package/src/types.ts +1 -1
@@ -11,87 +11,104 @@ import type { ExtractorOutput } from '../../../../types.js';
11
11
  import type { PipelineContext } from '../context.js';
12
12
  import { readFileSafe } from '../helpers.js';
13
13
 
14
- export async function buildStructure(ctx: PipelineContext): Promise<void> {
15
- const { db, fileSymbols, rootDir, discoveredDirs, allSymbols, isFullBuild } = ctx;
16
-
17
- // Build line count map (prefer cached _lineCount from parser)
14
+ /** Populate `ctx.lineCountMap` from cached parser results, falling back to disk. */
15
+ function populateLineCountMap(ctx: PipelineContext): void {
16
+ const { fileSymbols, rootDir } = ctx;
18
17
  ctx.lineCountMap = new Map();
19
18
  for (const [relPath, symbols] of fileSymbols) {
20
19
  const lineCount =
21
20
  (symbols as ExtractorOutput & { lineCount?: number }).lineCount ?? symbols._lineCount;
22
21
  if (lineCount) {
23
22
  ctx.lineCountMap.set(relPath, lineCount);
24
- } else {
25
- const absPath = path.join(rootDir, relPath);
26
- try {
27
- const content = readFileSafe(absPath);
28
- ctx.lineCountMap.set(relPath, content.split('\n').length);
29
- } catch {
30
- ctx.lineCountMap.set(relPath, 0);
31
- }
23
+ continue;
24
+ }
25
+ const absPath = path.join(rootDir, relPath);
26
+ try {
27
+ const content = readFileSafe(absPath);
28
+ ctx.lineCountMap.set(relPath, content.split('\n').length);
29
+ } catch {
30
+ ctx.lineCountMap.set(relPath, 0);
32
31
  }
33
32
  }
33
+ }
34
34
 
35
- const changedFileList = isFullBuild ? null : [...allSymbols.keys()];
36
-
37
- // For small incremental builds on large codebases, use a fast path that
38
- // updates only the changed files' metrics via targeted SQL instead of
39
- // loading ALL definitions from DB (~8ms) and recomputing ALL metrics (~15ms).
40
- // Gate: ≤smallFilesThreshold changed files AND significantly more existing files (>20) to
41
- // avoid triggering on small test fixtures where directory metrics matter.
35
+ /** Count file-kind nodes already in the DB, preferring the native connection. */
36
+ function countExistingFiles(ctx: PipelineContext): number {
42
37
  const useNativeReads = ctx.engineName === 'native' && !!ctx.nativeDb;
43
- const existingFileCount = !isFullBuild
44
- ? (
45
- (useNativeReads
46
- ? ctx.nativeDb!.queryGet("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'", [])
47
- : db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get()) as {
48
- c: number;
49
- }
50
- ).c
51
- : 0;
52
- const useSmallIncrementalFastPath =
53
- !isFullBuild &&
54
- changedFileList != null &&
55
- changedFileList.length <= ctx.config.build.smallFilesThreshold &&
56
- existingFileCount > 20;
57
-
58
- if (!isFullBuild && !useSmallIncrementalFastPath) {
59
- // Medium/large incremental: load unchanged files from DB for complete structure
60
- loadUnchangedFilesFromDb(ctx);
61
- }
38
+ const row = (
39
+ useNativeReads
40
+ ? ctx.nativeDb!.queryGet("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'", [])
41
+ : ctx.db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get()
42
+ ) as { c: number };
43
+ return row.c;
44
+ }
62
45
 
63
- // Build directory structure
64
- const t0 = performance.now();
46
+ /**
47
+ * Build directory structure + metrics. Chooses between the fast incremental
48
+ * path (a handful of files changed on a large codebase) and the full path
49
+ * (delegated to `features/structure`).
50
+ */
51
+ async function buildDirectoryStructure(
52
+ ctx: PipelineContext,
53
+ changedFileList: string[] | null,
54
+ useSmallIncrementalFastPath: boolean,
55
+ ): Promise<void> {
65
56
  if (useSmallIncrementalFastPath) {
66
57
  updateChangedFileMetrics(ctx, changedFileList!);
67
- } else {
68
- const relDirs = new Set<string>();
69
- for (const absDir of discoveredDirs) {
70
- relDirs.add(normalizePath(path.relative(rootDir, absDir)));
71
- }
72
- try {
73
- const { buildStructure: buildStructureFn } = (await import(
74
- '../../../../features/structure.js'
75
- )) as {
76
- buildStructure: (
77
- db: PipelineContext['db'],
78
- fileSymbols: Map<string, ExtractorOutput>,
79
- rootDir: string,
80
- lineCountMap: Map<string, number>,
81
- directories: Set<string>,
82
- changedFiles: string[] | null,
83
- ) => void;
84
- };
85
- const changedFilePaths = isFullBuild ? null : [...allSymbols.keys()];
86
- buildStructureFn(db, fileSymbols, rootDir, ctx.lineCountMap, relDirs, changedFilePaths);
87
- } catch (err) {
88
- debug(`Structure analysis failed: ${(err as Error).message}`);
89
- }
58
+ return;
90
59
  }
91
- ctx.timing.structureMs = performance.now() - t0;
92
60
 
93
- // Classify node roles (incremental: only reclassify changed files' nodes)
94
- const t1 = performance.now();
61
+ const { db, fileSymbols, rootDir, discoveredDirs, allSymbols, isFullBuild } = ctx;
62
+ const relDirs = new Set<string>();
63
+ for (const absDir of discoveredDirs) {
64
+ relDirs.add(normalizePath(path.relative(rootDir, absDir)));
65
+ }
66
+ try {
67
+ const { buildStructure: buildStructureFn } = (await import(
68
+ '../../../../features/structure.js'
69
+ )) as {
70
+ buildStructure: (
71
+ db: PipelineContext['db'],
72
+ fileSymbols: Map<string, ExtractorOutput>,
73
+ rootDir: string,
74
+ lineCountMap: Map<string, number>,
75
+ directories: Set<string>,
76
+ changedFiles: string[] | null,
77
+ ) => void;
78
+ };
79
+ const changedFilePaths = isFullBuild ? null : [...allSymbols.keys()];
80
+ buildStructureFn(db, fileSymbols, rootDir, ctx.lineCountMap, relDirs, changedFilePaths);
81
+ } catch (err) {
82
+ debug(`Structure analysis failed: ${(err as Error).message}`);
83
+ }
84
+ }
85
+
86
+ /** Convert a `NativeDatabase.classifyRoles*` result into the JS summary shape. */
87
+ function nativeRoleSummaryToRecord(
88
+ nativeResult: NonNullable<
89
+ ReturnType<NonNullable<PipelineContext['nativeDb']>['classifyRolesFull']>
90
+ >,
91
+ ): Record<string, number> {
92
+ return {
93
+ entry: nativeResult.entry,
94
+ core: nativeResult.core,
95
+ utility: nativeResult.utility,
96
+ adapter: nativeResult.adapter,
97
+ dead: nativeResult.dead,
98
+ 'dead-leaf': nativeResult.deadLeaf,
99
+ 'dead-entry': nativeResult.deadEntry,
100
+ 'dead-ffi': nativeResult.deadFfi,
101
+ 'dead-unresolved': nativeResult.deadUnresolved,
102
+ 'test-only': nativeResult.testOnly,
103
+ leaf: nativeResult.leaf,
104
+ };
105
+ }
106
+
107
+ async function classifyRoles(
108
+ ctx: PipelineContext,
109
+ changedFileList: string[] | null,
110
+ ): Promise<void> {
111
+ const useNativeReads = ctx.engineName === 'native' && !!ctx.nativeDb;
95
112
  try {
96
113
  let roleSummary: Record<string, number> | null = null;
97
114
 
@@ -103,24 +120,9 @@ export async function buildStructure(ctx: PipelineContext): Promise<void> {
103
120
  changedFileList && changedFileList.length > 0
104
121
  ? ctx.nativeDb.classifyRolesIncremental(changedFileList)
105
122
  : ctx.nativeDb.classifyRolesFull();
106
- if (nativeResult) {
107
- roleSummary = {
108
- entry: nativeResult.entry,
109
- core: nativeResult.core,
110
- utility: nativeResult.utility,
111
- adapter: nativeResult.adapter,
112
- dead: nativeResult.dead,
113
- 'dead-leaf': nativeResult.deadLeaf,
114
- 'dead-entry': nativeResult.deadEntry,
115
- 'dead-ffi': nativeResult.deadFfi,
116
- 'dead-unresolved': nativeResult.deadUnresolved,
117
- 'test-only': nativeResult.testOnly,
118
- leaf: nativeResult.leaf,
119
- };
120
- }
123
+ if (nativeResult) roleSummary = nativeRoleSummaryToRecord(nativeResult);
121
124
  }
122
125
 
123
- // Fall back to JS path
124
126
  if (!roleSummary) {
125
127
  const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as {
126
128
  classifyNodeRoles: (
@@ -141,6 +143,37 @@ export async function buildStructure(ctx: PipelineContext): Promise<void> {
141
143
  } catch (err) {
142
144
  debug(`Role classification failed: ${(err as Error).message}`);
143
145
  }
146
+ }
147
+
148
+ export async function buildStructure(ctx: PipelineContext): Promise<void> {
149
+ const { allSymbols, isFullBuild } = ctx;
150
+
151
+ populateLineCountMap(ctx);
152
+
153
+ const changedFileList = isFullBuild ? null : [...allSymbols.keys()];
154
+
155
+ // For small incremental builds on large codebases, use a fast path that
156
+ // updates only the changed files' metrics via targeted SQL instead of
157
+ // loading ALL definitions from DB (~8ms) and recomputing ALL metrics (~15ms).
158
+ // Gate: ≤smallFilesThreshold changed files AND significantly more existing files (>20) to
159
+ // avoid triggering on small test fixtures where directory metrics matter.
160
+ const existingFileCount = !isFullBuild ? countExistingFiles(ctx) : 0;
161
+ const useSmallIncrementalFastPath =
162
+ !isFullBuild &&
163
+ changedFileList != null &&
164
+ changedFileList.length <= ctx.config.build.smallFilesThreshold &&
165
+ existingFileCount > 20;
166
+
167
+ if (!isFullBuild && !useSmallIncrementalFastPath) {
168
+ loadUnchangedFilesFromDb(ctx);
169
+ }
170
+
171
+ const t0 = performance.now();
172
+ await buildDirectoryStructure(ctx, changedFileList, useSmallIncrementalFastPath);
173
+ ctx.timing.structureMs = performance.now() - t0;
174
+
175
+ const t1 = performance.now();
176
+ await classifyRoles(ctx, changedFileList);
144
177
  ctx.timing.rolesMs = performance.now() - t1;
145
178
  }
146
179
 
@@ -162,14 +162,14 @@ function tryJournalTier(
162
162
  return { changed, removed: [...removedSet], isFullBuild: false };
163
163
  }
164
164
 
165
- function mtimeAndHashTiers(
165
+ /** Tier 1: mtime+size triage. Returns the files that still need hashing. */
166
+ function tierMtimeSize(
166
167
  existing: Map<string, FileHashRow>,
167
168
  allFiles: string[],
168
169
  rootDir: string,
169
- removed: string[],
170
- ): ChangeResult {
170
+ ): { needsHash: NeedsHashItem[]; skipped: number } {
171
171
  const needsHash: NeedsHashItem[] = [];
172
- const skipped: string[] = [];
172
+ let skipped = 0;
173
173
 
174
174
  for (const file of allFiles) {
175
175
  const relPath = normalizePath(path.relative(rootDir, file));
@@ -183,16 +183,17 @@ function mtimeAndHashTiers(
183
183
  const storedMtime = record.mtime || 0;
184
184
  const storedSize = record.size || 0;
185
185
  if (storedSize > 0 && stat.mtime === storedMtime && stat.size === storedSize) {
186
- skipped.push(relPath);
186
+ skipped++;
187
187
  continue;
188
188
  }
189
189
  needsHash.push({ file, relPath, stat });
190
190
  }
191
191
 
192
- if (needsHash.length > 0) {
193
- debug(`Tier 1: ${skipped.length} skipped by mtime+size, ${needsHash.length} need hash check`);
194
- }
192
+ return { needsHash, skipped };
193
+ }
195
194
 
195
+ /** Tier 2: hash candidates from tier 1, classifying changed vs metadata-only. */
196
+ function tierHash(existing: Map<string, FileHashRow>, needsHash: NeedsHashItem[]): ChangedFile[] {
196
197
  const changed: ChangedFile[] = [];
197
198
  for (const item of needsHash) {
198
199
  let content: string | undefined;
@@ -217,11 +218,26 @@ function mtimeAndHashTiers(
217
218
  });
218
219
  }
219
220
  }
221
+ return changed;
222
+ }
223
+
224
+ function mtimeAndHashTiers(
225
+ existing: Map<string, FileHashRow>,
226
+ allFiles: string[],
227
+ rootDir: string,
228
+ removed: string[],
229
+ ): ChangeResult {
230
+ const { needsHash, skipped } = tierMtimeSize(existing, allFiles, rootDir);
231
+ if (needsHash.length > 0) {
232
+ debug(`Tier 1: ${skipped} skipped by mtime+size, ${needsHash.length} need hash check`);
233
+ }
234
+
235
+ const changed = tierHash(existing, needsHash);
220
236
 
221
- const parseChanged = changed.filter((c) => !c.metadataOnly);
222
237
  if (needsHash.length > 0) {
238
+ const parseChangedLen = changed.filter((c) => !c.metadataOnly).length;
223
239
  debug(
224
- `Tier 2: ${parseChanged.length} actually changed, ${changed.length - parseChanged.length} metadata-only`,
240
+ `Tier 2: ${parseChangedLen} actually changed, ${changed.length - parseChangedLen} metadata-only`,
225
241
  );
226
242
  }
227
243
 
@@ -512,61 +528,43 @@ function handleIncrementalBuild(ctx: PipelineContext): void {
512
528
  purgeAndAddReverseDeps(ctx, changePaths, reverseDeps);
513
529
  }
514
530
 
515
- /**
516
- * Read-only pre-flight check for the native orchestrator.
517
- *
518
- * Returns true iff every collected source file has matching mtime+size in
519
- * `file_hashes` and no DB-tracked file has been removed. When true, the
520
- * caller can short-circuit before invoking the native orchestrator —
521
- * matching WASM's ~20 ms early-exit path and avoiding the ~2s flat
522
- * per-call native rebuild overhead seen in CI (#1054).
523
- *
524
- * Intentionally Tier-0/Tier-1 only (journal + mtime/size). Tier-2 content
525
- * hashing is left to the native side: when this returns false the caller
526
- * falls through to the orchestrator, which performs its own complete
527
- * detection and is the source of truth.
528
- *
529
- * Conservatively returns false when CFG or dataflow analysis is enabled
530
- * but the corresponding tables are empty — otherwise the fast-skip would
531
- * silently suppress the pending-analysis pass that the JS path runs via
532
- * `runPendingAnalysis`, and CFG/dataflow data would never populate on
533
- * repos where source files don't change between builds.
534
- *
535
- * Pure read of `db` and the filesystem — never mutates either.
536
- */
537
- export function detectNoChanges(
538
- db: BetterSqlite3Database,
539
- allFiles: string[],
540
- rootDir: string,
541
- opts?: Record<string, unknown>,
542
- ): boolean {
543
- // Diagnostic logging gated by env var — used by the bench gate to surface
544
- // why the fast-skip is not firing on CI runners (#1066). Off by default to
545
- // avoid noise on every regular incremental build.
531
+ /** Diagnostic logger gated by env var, used by both `detectNoChanges` branches. */
532
+ function makeFastSkipLogger(): (reason: string) => void {
546
533
  const diag = process.env.CODEGRAPH_FAST_SKIP_DIAG === '1';
547
- const log = (reason: string): void => {
534
+ return (reason: string): void => {
548
535
  if (diag) info(`[fast-skip] ${reason}`);
549
536
  };
537
+ }
550
538
 
551
- let hasTable = false;
539
+ /**
540
+ * Load the `file_hashes` table for the no-change pre-flight. Returns null
541
+ * if the table is missing or empty (both → caller must fall through).
542
+ */
543
+ function loadFileHashesForPreflight(
544
+ db: BetterSqlite3Database,
545
+ log: (reason: string) => void,
546
+ ): Map<string, FileHashRow> | null {
552
547
  try {
553
548
  db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get();
554
- hasTable = true;
555
549
  } catch {
556
- /* table missing — first build */
557
- }
558
- if (!hasTable) {
559
550
  log('false: file_hashes table missing');
560
- return false;
551
+ return null;
561
552
  }
562
-
563
553
  const rows = db.prepare('SELECT file, hash, mtime, size FROM file_hashes').all() as FileHashRow[];
564
554
  if (rows.length === 0) {
565
555
  log('false: file_hashes table empty');
566
- return false;
556
+ return null;
567
557
  }
568
- const existing = new Map<string, FileHashRow>(rows.map((r) => [r.file, r]));
558
+ return new Map<string, FileHashRow>(rows.map((r) => [r.file, r]));
559
+ }
569
560
 
561
+ /** Returns true iff every file in `allFiles` matches a stored mtime+size record. */
562
+ function allFilesMatchStoredStat(
563
+ existing: Map<string, FileHashRow>,
564
+ allFiles: string[],
565
+ rootDir: string,
566
+ log: (reason: string) => void,
567
+ ): boolean {
570
568
  const currentFiles = new Set<string>();
571
569
  for (const file of allFiles) {
572
570
  currentFiles.add(normalizePath(path.relative(rootDir, file)));
@@ -603,21 +601,66 @@ export function detectNoChanges(
603
601
  return false;
604
602
  }
605
603
  }
604
+ return true;
605
+ }
606
606
 
607
- // Pending-analysis guard: if CFG/dataflow is enabled but the corresponding
608
- // table is empty (analysis newly enabled, or tables wiped between builds),
609
- // fall through so the orchestrator / JS pipeline can run runPendingAnalysis.
610
- // Mirrors the check at the top of runPendingAnalysis (see line ~244).
611
- if (opts) {
612
- if (opts.cfg !== false && hasEmptyAnalysisTable(db, 'cfg_blocks')) {
613
- log('false: pending-analysis guard — cfg_blocks is empty');
614
- return false;
615
- }
616
- if (opts.dataflow !== false && hasEmptyAnalysisTable(db, 'dataflow')) {
617
- log('false: pending-analysis guard — dataflow is empty');
618
- return false;
619
- }
607
+ /**
608
+ * Pending-analysis guard: if CFG/dataflow is enabled but the corresponding
609
+ * table is empty (analysis newly enabled, or tables wiped between builds),
610
+ * fall through so the orchestrator / JS pipeline can run runPendingAnalysis.
611
+ * Mirrors the check at the top of runPendingAnalysis.
612
+ */
613
+ function passesPendingAnalysisGuard(
614
+ db: BetterSqlite3Database,
615
+ opts: Record<string, unknown> | undefined,
616
+ log: (reason: string) => void,
617
+ ): boolean {
618
+ if (!opts) return true;
619
+ if (opts.cfg !== false && hasEmptyAnalysisTable(db, 'cfg_blocks')) {
620
+ log('false: pending-analysis guard — cfg_blocks is empty');
621
+ return false;
620
622
  }
623
+ if (opts.dataflow !== false && hasEmptyAnalysisTable(db, 'dataflow')) {
624
+ log('false: pending-analysis guard — dataflow is empty');
625
+ return false;
626
+ }
627
+ return true;
628
+ }
629
+
630
+ /**
631
+ * Read-only pre-flight check for the native orchestrator.
632
+ *
633
+ * Returns true iff every collected source file has matching mtime+size in
634
+ * `file_hashes` and no DB-tracked file has been removed. When true, the
635
+ * caller can short-circuit before invoking the native orchestrator —
636
+ * matching WASM's ~20 ms early-exit path and avoiding the ~2s flat
637
+ * per-call native rebuild overhead seen in CI (#1054).
638
+ *
639
+ * Intentionally Tier-0/Tier-1 only (journal + mtime/size). Tier-2 content
640
+ * hashing is left to the native side: when this returns false the caller
641
+ * falls through to the orchestrator, which performs its own complete
642
+ * detection and is the source of truth.
643
+ *
644
+ * Conservatively returns false when CFG or dataflow analysis is enabled
645
+ * but the corresponding tables are empty — otherwise the fast-skip would
646
+ * silently suppress the pending-analysis pass that the JS path runs via
647
+ * `runPendingAnalysis`, and CFG/dataflow data would never populate on
648
+ * repos where source files don't change between builds.
649
+ *
650
+ * Pure read of `db` and the filesystem — never mutates either.
651
+ */
652
+ export function detectNoChanges(
653
+ db: BetterSqlite3Database,
654
+ allFiles: string[],
655
+ rootDir: string,
656
+ opts?: Record<string, unknown>,
657
+ ): boolean {
658
+ const log = makeFastSkipLogger();
659
+ const existing = loadFileHashesForPreflight(db, log);
660
+ if (!existing) return false;
661
+
662
+ if (!allFilesMatchStoredStat(existing, allFiles, rootDir, log)) return false;
663
+ if (!passesPendingAnalysisGuard(db, opts, log)) return false;
621
664
 
622
665
  log(`true: all checks passed (${allFiles.length} files)`);
623
666
  return true;
@@ -136,82 +136,72 @@ function persistBuildMetadata(
136
136
  }
137
137
  }
138
138
 
139
- /**
140
- * Run advisory checks on full builds: orphaned embeddings, stale embeddings,
141
- * and unused exports. Informational only does not affect correctness.
142
- */
143
- function runAdvisoryChecks(ctx: PipelineContext, hasEmbeddings: boolean, buildNow: Date): void {
144
- // Batched native path: single napi call for all 3 advisory checks
145
- if (ctx.engineName === 'native' && ctx.nativeDb?.runAdvisoryChecks) {
146
- const result = ctx.nativeDb.runAdvisoryChecks(hasEmbeddings);
147
- if (result.orphanedEmbeddings > 0) {
148
- warn(
149
- `${result.orphanedEmbeddings} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
150
- );
151
- }
152
- if (result.embedBuiltAt) {
153
- const embedTime = new Date(result.embedBuiltAt).getTime();
154
- if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) {
155
- warn(
156
- 'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.',
157
- );
158
- }
159
- }
160
- if (result.unusedExports > 0) {
161
- warn(
162
- `${result.unusedExports} exported symbol${result.unusedExports > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports <file> --unused" to inspect.`,
163
- );
139
+ /** Format the "X exports have zero consumers" warning, with correct plural agreement. */
140
+ function unusedExportsMessage(count: number): string {
141
+ return `${count} exported symbol${count > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports <file> --unused" to inspect.`;
142
+ }
143
+
144
+ /** Run all three advisory checks via the batched native FFI. */
145
+ function runAdvisoryChecksNative(
146
+ ctx: PipelineContext,
147
+ hasEmbeddings: boolean,
148
+ buildNow: Date,
149
+ ): void {
150
+ const result = ctx.nativeDb!.runAdvisoryChecks!(hasEmbeddings);
151
+ if (result.orphanedEmbeddings > 0) {
152
+ warn(
153
+ `${result.orphanedEmbeddings} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
154
+ );
155
+ }
156
+ if (result.embedBuiltAt) {
157
+ const embedTime = new Date(result.embedBuiltAt).getTime();
158
+ if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) {
159
+ warn('Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.');
164
160
  }
165
- return;
166
161
  }
162
+ if (result.unusedExports > 0) {
163
+ warn(unusedExportsMessage(result.unusedExports));
164
+ }
165
+ }
167
166
 
168
- const { db } = ctx;
169
-
170
- // Orphaned embeddings warning
171
- if (hasEmbeddings) {
172
- try {
173
- const orphaned = (
174
- db
175
- .prepare(
176
- 'SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)',
177
- )
178
- .get() as { c: number }
179
- ).c;
180
- if (orphaned > 0) {
181
- warn(
182
- `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
183
- );
184
- }
185
- } catch {
186
- /* ignore - embeddings table may have been dropped */
167
+ function checkOrphanedEmbeddings(ctx: PipelineContext): void {
168
+ try {
169
+ const orphaned = (
170
+ ctx.db
171
+ .prepare('SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)')
172
+ .get() as { c: number }
173
+ ).c;
174
+ if (orphaned > 0) {
175
+ warn(
176
+ `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
177
+ );
187
178
  }
179
+ } catch {
180
+ /* ignore - embeddings table may have been dropped */
188
181
  }
182
+ }
189
183
 
190
- // Stale embeddings warning (built before current graph rebuild)
191
- if (hasEmbeddings) {
192
- try {
193
- const embedBuiltAt = (
194
- db.prepare("SELECT value FROM embedding_meta WHERE key = 'built_at'").get() as
195
- | { value: string }
196
- | undefined
197
- )?.value;
198
- if (embedBuiltAt) {
199
- const embedTime = new Date(embedBuiltAt).getTime();
200
- if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) {
201
- warn(
202
- 'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.',
203
- );
204
- }
205
- }
206
- } catch {
207
- /* ignore - embedding_meta table may not exist */
184
+ function checkStaleEmbeddings(ctx: PipelineContext, buildNow: Date): void {
185
+ try {
186
+ const embedBuiltAt = (
187
+ ctx.db.prepare("SELECT value FROM embedding_meta WHERE key = 'built_at'").get() as
188
+ | { value: string }
189
+ | undefined
190
+ )?.value;
191
+ if (!embedBuiltAt) return;
192
+ const embedTime = new Date(embedBuiltAt).getTime();
193
+ if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) {
194
+ warn('Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.');
208
195
  }
196
+ } catch {
197
+ /* ignore - embedding_meta table may not exist */
209
198
  }
199
+ }
210
200
 
211
- // Unused exports warning
201
+ function checkUnusedExports(ctx: PipelineContext): void {
212
202
  try {
213
203
  const unusedCount = (
214
- db
204
+ ctx.db
215
205
  .prepare(
216
206
  `SELECT COUNT(*) as c FROM nodes
217
207
  WHERE exported = 1 AND kind != 'file'
@@ -224,16 +214,28 @@ function runAdvisoryChecks(ctx: PipelineContext, hasEmbeddings: boolean, buildNo
224
214
  )
225
215
  .get() as { c: number }
226
216
  ).c;
227
- if (unusedCount > 0) {
228
- warn(
229
- `${unusedCount} exported symbol${unusedCount > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports <file> --unused" to inspect.`,
230
- );
231
- }
217
+ if (unusedCount > 0) warn(unusedExportsMessage(unusedCount));
232
218
  } catch {
233
219
  /* exported column may not exist on older DBs */
234
220
  }
235
221
  }
236
222
 
223
+ /**
224
+ * Run advisory checks on full builds: orphaned embeddings, stale embeddings,
225
+ * and unused exports. Informational only — does not affect correctness.
226
+ */
227
+ function runAdvisoryChecks(ctx: PipelineContext, hasEmbeddings: boolean, buildNow: Date): void {
228
+ if (ctx.engineName === 'native' && ctx.nativeDb?.runAdvisoryChecks) {
229
+ runAdvisoryChecksNative(ctx, hasEmbeddings, buildNow);
230
+ return;
231
+ }
232
+ if (hasEmbeddings) {
233
+ checkOrphanedEmbeddings(ctx);
234
+ checkStaleEmbeddings(ctx, buildNow);
235
+ }
236
+ checkUnusedExports(ctx);
237
+ }
238
+
237
239
  export async function finalize(ctx: PipelineContext): Promise<void> {
238
240
  const { allSymbols, rootDir, isFullBuild, hasEmbeddings, opts } = ctx;
239
241