@optave/codegraph 3.1.2 → 3.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/README.md +19 -21
  2. package/package.json +10 -7
  3. package/src/analysis/context.js +408 -0
  4. package/src/analysis/dependencies.js +341 -0
  5. package/src/analysis/exports.js +130 -0
  6. package/src/analysis/impact.js +463 -0
  7. package/src/analysis/module-map.js +322 -0
  8. package/src/analysis/roles.js +45 -0
  9. package/src/analysis/symbol-lookup.js +232 -0
  10. package/src/ast-analysis/shared.js +5 -4
  11. package/src/batch.js +2 -1
  12. package/src/builder/context.js +85 -0
  13. package/src/builder/helpers.js +218 -0
  14. package/src/builder/incremental.js +178 -0
  15. package/src/builder/pipeline.js +130 -0
  16. package/src/builder/stages/build-edges.js +297 -0
  17. package/src/builder/stages/build-structure.js +113 -0
  18. package/src/builder/stages/collect-files.js +44 -0
  19. package/src/builder/stages/detect-changes.js +413 -0
  20. package/src/builder/stages/finalize.js +139 -0
  21. package/src/builder/stages/insert-nodes.js +195 -0
  22. package/src/builder/stages/parse-files.js +28 -0
  23. package/src/builder/stages/resolve-imports.js +143 -0
  24. package/src/builder/stages/run-analyses.js +44 -0
  25. package/src/builder.js +10 -1472
  26. package/src/cfg.js +1 -2
  27. package/src/cli/commands/ast.js +26 -0
  28. package/src/cli/commands/audit.js +46 -0
  29. package/src/cli/commands/batch.js +68 -0
  30. package/src/cli/commands/branch-compare.js +21 -0
  31. package/src/cli/commands/build.js +26 -0
  32. package/src/cli/commands/cfg.js +30 -0
  33. package/src/cli/commands/check.js +79 -0
  34. package/src/cli/commands/children.js +31 -0
  35. package/src/cli/commands/co-change.js +65 -0
  36. package/src/cli/commands/communities.js +23 -0
  37. package/src/cli/commands/complexity.js +45 -0
  38. package/src/cli/commands/context.js +34 -0
  39. package/src/cli/commands/cycles.js +28 -0
  40. package/src/cli/commands/dataflow.js +32 -0
  41. package/src/cli/commands/deps.js +16 -0
  42. package/src/cli/commands/diff-impact.js +30 -0
  43. package/src/cli/commands/embed.js +30 -0
  44. package/src/cli/commands/export.js +75 -0
  45. package/src/cli/commands/exports.js +18 -0
  46. package/src/cli/commands/flow.js +36 -0
  47. package/src/cli/commands/fn-impact.js +30 -0
  48. package/src/cli/commands/impact.js +16 -0
  49. package/src/cli/commands/info.js +76 -0
  50. package/src/cli/commands/map.js +19 -0
  51. package/src/cli/commands/mcp.js +18 -0
  52. package/src/cli/commands/models.js +19 -0
  53. package/src/cli/commands/owners.js +25 -0
  54. package/src/cli/commands/path.js +36 -0
  55. package/src/cli/commands/plot.js +80 -0
  56. package/src/cli/commands/query.js +49 -0
  57. package/src/cli/commands/registry.js +100 -0
  58. package/src/cli/commands/roles.js +34 -0
  59. package/src/cli/commands/search.js +42 -0
  60. package/src/cli/commands/sequence.js +32 -0
  61. package/src/cli/commands/snapshot.js +61 -0
  62. package/src/cli/commands/stats.js +15 -0
  63. package/src/cli/commands/structure.js +32 -0
  64. package/src/cli/commands/triage.js +78 -0
  65. package/src/cli/commands/watch.js +12 -0
  66. package/src/cli/commands/where.js +24 -0
  67. package/src/cli/index.js +118 -0
  68. package/src/cli/shared/options.js +39 -0
  69. package/src/cli/shared/output.js +1 -0
  70. package/src/cli.js +11 -1514
  71. package/src/commands/check.js +5 -5
  72. package/src/commands/manifesto.js +3 -3
  73. package/src/commands/structure.js +1 -1
  74. package/src/communities.js +15 -87
  75. package/src/complexity.js +1 -1
  76. package/src/cycles.js +30 -85
  77. package/src/dataflow.js +1 -2
  78. package/src/db/connection.js +4 -4
  79. package/src/db/migrations.js +41 -0
  80. package/src/db/query-builder.js +6 -5
  81. package/src/db/repository/base.js +201 -0
  82. package/src/db/repository/cached-stmt.js +19 -0
  83. package/src/db/repository/cfg.js +27 -38
  84. package/src/db/repository/cochange.js +16 -3
  85. package/src/db/repository/complexity.js +11 -6
  86. package/src/db/repository/dataflow.js +6 -1
  87. package/src/db/repository/edges.js +120 -98
  88. package/src/db/repository/embeddings.js +14 -3
  89. package/src/db/repository/graph-read.js +32 -9
  90. package/src/db/repository/in-memory-repository.js +584 -0
  91. package/src/db/repository/index.js +6 -1
  92. package/src/db/repository/nodes.js +110 -40
  93. package/src/db/repository/sqlite-repository.js +219 -0
  94. package/src/db.js +5 -0
  95. package/src/embeddings/generator.js +163 -0
  96. package/src/embeddings/index.js +13 -0
  97. package/src/embeddings/models.js +218 -0
  98. package/src/embeddings/search/cli-formatter.js +151 -0
  99. package/src/embeddings/search/filters.js +46 -0
  100. package/src/embeddings/search/hybrid.js +121 -0
  101. package/src/embeddings/search/keyword.js +68 -0
  102. package/src/embeddings/search/prepare.js +66 -0
  103. package/src/embeddings/search/semantic.js +145 -0
  104. package/src/embeddings/stores/fts5.js +27 -0
  105. package/src/embeddings/stores/sqlite-blob.js +24 -0
  106. package/src/embeddings/strategies/source.js +14 -0
  107. package/src/embeddings/strategies/structured.js +43 -0
  108. package/src/embeddings/strategies/text-utils.js +43 -0
  109. package/src/errors.js +78 -0
  110. package/src/export.js +217 -520
  111. package/src/extractors/csharp.js +10 -2
  112. package/src/extractors/go.js +3 -1
  113. package/src/extractors/helpers.js +71 -0
  114. package/src/extractors/java.js +9 -2
  115. package/src/extractors/javascript.js +38 -1
  116. package/src/extractors/php.js +3 -1
  117. package/src/extractors/python.js +14 -3
  118. package/src/extractors/rust.js +3 -1
  119. package/src/graph/algorithms/bfs.js +49 -0
  120. package/src/graph/algorithms/centrality.js +16 -0
  121. package/src/graph/algorithms/index.js +5 -0
  122. package/src/graph/algorithms/louvain.js +26 -0
  123. package/src/graph/algorithms/shortest-path.js +41 -0
  124. package/src/graph/algorithms/tarjan.js +49 -0
  125. package/src/graph/builders/dependency.js +91 -0
  126. package/src/graph/builders/index.js +3 -0
  127. package/src/graph/builders/structure.js +40 -0
  128. package/src/graph/builders/temporal.js +33 -0
  129. package/src/graph/classifiers/index.js +2 -0
  130. package/src/graph/classifiers/risk.js +85 -0
  131. package/src/graph/classifiers/roles.js +64 -0
  132. package/src/graph/index.js +13 -0
  133. package/src/graph/model.js +230 -0
  134. package/src/index.js +33 -204
  135. package/src/infrastructure/result-formatter.js +2 -21
  136. package/src/mcp/index.js +2 -0
  137. package/src/mcp/middleware.js +26 -0
  138. package/src/mcp/server.js +128 -0
  139. package/src/mcp/tool-registry.js +801 -0
  140. package/src/mcp/tools/ast-query.js +14 -0
  141. package/src/mcp/tools/audit.js +21 -0
  142. package/src/mcp/tools/batch-query.js +11 -0
  143. package/src/mcp/tools/branch-compare.js +10 -0
  144. package/src/mcp/tools/cfg.js +21 -0
  145. package/src/mcp/tools/check.js +43 -0
  146. package/src/mcp/tools/co-changes.js +20 -0
  147. package/src/mcp/tools/code-owners.js +12 -0
  148. package/src/mcp/tools/communities.js +15 -0
  149. package/src/mcp/tools/complexity.js +18 -0
  150. package/src/mcp/tools/context.js +17 -0
  151. package/src/mcp/tools/dataflow.js +26 -0
  152. package/src/mcp/tools/diff-impact.js +24 -0
  153. package/src/mcp/tools/execution-flow.js +26 -0
  154. package/src/mcp/tools/export-graph.js +57 -0
  155. package/src/mcp/tools/file-deps.js +12 -0
  156. package/src/mcp/tools/file-exports.js +13 -0
  157. package/src/mcp/tools/find-cycles.js +15 -0
  158. package/src/mcp/tools/fn-impact.js +15 -0
  159. package/src/mcp/tools/impact-analysis.js +12 -0
  160. package/src/mcp/tools/index.js +71 -0
  161. package/src/mcp/tools/list-functions.js +14 -0
  162. package/src/mcp/tools/list-repos.js +11 -0
  163. package/src/mcp/tools/module-map.js +6 -0
  164. package/src/mcp/tools/node-roles.js +14 -0
  165. package/src/mcp/tools/path.js +12 -0
  166. package/src/mcp/tools/query.js +30 -0
  167. package/src/mcp/tools/semantic-search.js +65 -0
  168. package/src/mcp/tools/sequence.js +17 -0
  169. package/src/mcp/tools/structure.js +15 -0
  170. package/src/mcp/tools/symbol-children.js +14 -0
  171. package/src/mcp/tools/triage.js +35 -0
  172. package/src/mcp/tools/where.js +13 -0
  173. package/src/mcp.js +2 -1470
  174. package/src/native.js +34 -10
  175. package/src/parser.js +53 -2
  176. package/src/presentation/colors.js +44 -0
  177. package/src/presentation/export.js +444 -0
  178. package/src/presentation/result-formatter.js +21 -0
  179. package/src/presentation/sequence-renderer.js +43 -0
  180. package/src/presentation/table.js +47 -0
  181. package/src/presentation/viewer.js +634 -0
  182. package/src/queries.js +35 -2276
  183. package/src/resolve.js +1 -1
  184. package/src/sequence.js +2 -38
  185. package/src/shared/file-utils.js +153 -0
  186. package/src/shared/generators.js +125 -0
  187. package/src/shared/hierarchy.js +27 -0
  188. package/src/shared/normalize.js +59 -0
  189. package/src/snapshot.js +6 -5
  190. package/src/structure.js +15 -40
  191. package/src/triage.js +20 -72
  192. package/src/viewer.js +35 -656
  193. package/src/watcher.js +8 -148
  194. package/src/embedder.js +0 -1097
package/src/builder.js CHANGED
@@ -1,1473 +1,11 @@
1
- import { createHash } from 'node:crypto';
2
- import fs from 'node:fs';
3
- import path from 'node:path';
4
- import { performance } from 'node:perf_hooks';
5
- import { loadConfig } from './config.js';
6
- import { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js';
7
- import {
8
- bulkNodeIdsByFile,
9
- closeDb,
10
- getBuildMeta,
11
- getNodeId,
12
- initSchema,
13
- MIGRATIONS,
14
- openDb,
15
- purgeFilesData,
16
- setBuildMeta,
17
- } from './db.js';
18
- import { readJournal, writeJournalHeader } from './journal.js';
19
- import { debug, info, warn } from './logger.js';
20
- import { loadNative } from './native.js';
21
- import { getActiveEngine, parseFilesAuto } from './parser.js';
22
- import { computeConfidence, resolveImportPath, resolveImportsBatch } from './resolve.js';
23
-
1
+ // Barrel re-export — keeps all existing `import { ... } from './builder.js'` working.
2
+ // See src/builder/ for the pipeline implementation (ROADMAP 3.9).
3
+
4
+ export {
5
+ collectFiles,
6
+ loadPathAliases,
7
+ purgeFilesFromGraph,
8
+ readFileSafe,
9
+ } from './builder/helpers.js';
10
+ export { buildGraph } from './builder/pipeline.js';
24
11
  export { resolveImportPath } from './resolve.js';
25
-
26
- const __builderDir = path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/i, '$1'));
27
- const CODEGRAPH_VERSION = JSON.parse(
28
- fs.readFileSync(path.join(__builderDir, '..', 'package.json'), 'utf-8'),
29
- ).version;
30
-
31
- const BUILTIN_RECEIVERS = new Set([
32
- 'console',
33
- 'Math',
34
- 'JSON',
35
- 'Object',
36
- 'Array',
37
- 'String',
38
- 'Number',
39
- 'Boolean',
40
- 'Date',
41
- 'RegExp',
42
- 'Map',
43
- 'Set',
44
- 'WeakMap',
45
- 'WeakSet',
46
- 'Promise',
47
- 'Symbol',
48
- 'Error',
49
- 'TypeError',
50
- 'RangeError',
51
- 'Proxy',
52
- 'Reflect',
53
- 'Intl',
54
- 'globalThis',
55
- 'window',
56
- 'document',
57
- 'process',
58
- 'Buffer',
59
- 'require',
60
- ]);
61
-
62
- export function collectFiles(
63
- dir,
64
- files = [],
65
- config = {},
66
- directories = null,
67
- _visited = new Set(),
68
- ) {
69
- const trackDirs = directories !== null;
70
-
71
- // Resolve real path to detect symlink loops
72
- let realDir;
73
- try {
74
- realDir = fs.realpathSync(dir);
75
- } catch {
76
- return trackDirs ? { files, directories } : files;
77
- }
78
- if (_visited.has(realDir)) {
79
- warn(`Symlink loop detected, skipping: ${dir}`);
80
- return trackDirs ? { files, directories } : files;
81
- }
82
- _visited.add(realDir);
83
-
84
- let entries;
85
- try {
86
- entries = fs.readdirSync(dir, { withFileTypes: true });
87
- } catch (err) {
88
- warn(`Cannot read directory ${dir}: ${err.message}`);
89
- return trackDirs ? { files, directories } : files;
90
- }
91
-
92
- // Merge config ignoreDirs with defaults
93
- const extraIgnore = config.ignoreDirs ? new Set(config.ignoreDirs) : null;
94
-
95
- let hasFiles = false;
96
- for (const entry of entries) {
97
- if (entry.name.startsWith('.') && entry.name !== '.') {
98
- if (IGNORE_DIRS.has(entry.name)) continue;
99
- if (entry.isDirectory()) continue;
100
- }
101
- if (IGNORE_DIRS.has(entry.name)) continue;
102
- if (extraIgnore?.has(entry.name)) continue;
103
-
104
- const full = path.join(dir, entry.name);
105
- if (entry.isDirectory()) {
106
- collectFiles(full, files, config, directories, _visited);
107
- } else if (EXTENSIONS.has(path.extname(entry.name))) {
108
- files.push(full);
109
- hasFiles = true;
110
- }
111
- }
112
- if (trackDirs && hasFiles) {
113
- directories.add(dir);
114
- }
115
- return trackDirs ? { files, directories } : files;
116
- }
117
-
118
- export function loadPathAliases(rootDir) {
119
- const aliases = { baseUrl: null, paths: {} };
120
- for (const configName of ['tsconfig.json', 'jsconfig.json']) {
121
- const configPath = path.join(rootDir, configName);
122
- if (!fs.existsSync(configPath)) continue;
123
- try {
124
- const raw = fs
125
- .readFileSync(configPath, 'utf-8')
126
- .replace(/\/\/.*$/gm, '')
127
- .replace(/\/\*[\s\S]*?\*\//g, '')
128
- .replace(/,\s*([\]}])/g, '$1');
129
- const config = JSON.parse(raw);
130
- const opts = config.compilerOptions || {};
131
- if (opts.baseUrl) aliases.baseUrl = path.resolve(rootDir, opts.baseUrl);
132
- if (opts.paths) {
133
- for (const [pattern, targets] of Object.entries(opts.paths)) {
134
- aliases.paths[pattern] = targets.map((t) => path.resolve(aliases.baseUrl || rootDir, t));
135
- }
136
- }
137
- break;
138
- } catch (err) {
139
- warn(`Failed to parse ${configName}: ${err.message}`);
140
- }
141
- }
142
- return aliases;
143
- }
144
-
145
- /**
146
- * Compute MD5 hash of file contents for incremental builds.
147
- */
148
- function fileHash(content) {
149
- return createHash('md5').update(content).digest('hex');
150
- }
151
-
152
- /**
153
- * Stat a file, returning { mtimeMs, size } or null on error.
154
- */
155
- function fileStat(filePath) {
156
- try {
157
- const s = fs.statSync(filePath);
158
- return { mtimeMs: s.mtimeMs, size: s.size };
159
- } catch {
160
- return null;
161
- }
162
- }
163
-
164
- /**
165
- * Read a file with retry on transient errors (EBUSY/EACCES/EPERM).
166
- * Editors performing non-atomic saves can cause these during mid-write.
167
- */
168
- const TRANSIENT_CODES = new Set(['EBUSY', 'EACCES', 'EPERM']);
169
- const RETRY_DELAY_MS = 50;
170
-
171
- export function readFileSafe(filePath, retries = 2) {
172
- for (let attempt = 0; ; attempt++) {
173
- try {
174
- return fs.readFileSync(filePath, 'utf-8');
175
- } catch (err) {
176
- if (attempt < retries && TRANSIENT_CODES.has(err.code)) {
177
- const end = Date.now() + RETRY_DELAY_MS;
178
- while (Date.now() < end) {}
179
- continue;
180
- }
181
- throw err;
182
- }
183
- }
184
- }
185
-
186
- /**
187
- * Determine which files have changed since last build.
188
- * Three-tier cascade:
189
- * Tier 0 — Journal: O(changed) when watcher was running
190
- * Tier 1 — mtime+size: O(n) stats, O(changed) reads
191
- * Tier 2 — Hash comparison: O(changed) reads (fallback from Tier 1)
192
- */
193
- function getChangedFiles(db, allFiles, rootDir) {
194
- // Check if file_hashes table exists
195
- let hasTable = false;
196
- try {
197
- db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get();
198
- hasTable = true;
199
- } catch {
200
- /* table doesn't exist */
201
- }
202
-
203
- if (!hasTable) {
204
- return {
205
- changed: allFiles.map((f) => ({ file: f })),
206
- removed: [],
207
- isFullBuild: true,
208
- };
209
- }
210
-
211
- const existing = new Map(
212
- db
213
- .prepare('SELECT file, hash, mtime, size FROM file_hashes')
214
- .all()
215
- .map((r) => [r.file, r]),
216
- );
217
-
218
- // Build set of current files for removal detection
219
- const currentFiles = new Set();
220
- for (const file of allFiles) {
221
- currentFiles.add(normalizePath(path.relative(rootDir, file)));
222
- }
223
-
224
- const removed = [];
225
- for (const existingFile of existing.keys()) {
226
- if (!currentFiles.has(existingFile)) {
227
- removed.push(existingFile);
228
- }
229
- }
230
-
231
- // ── Tier 0: Journal ──────────────────────────────────────────────
232
- const journal = readJournal(rootDir);
233
- if (journal.valid) {
234
- // Validate journal timestamp against DB — journal should be from after the last build
235
- const dbMtimes = db.prepare('SELECT MAX(mtime) as latest FROM file_hashes').get();
236
- const latestDbMtime = dbMtimes?.latest || 0;
237
-
238
- // Empty journal = no watcher was running, fall to Tier 1 for safety
239
- const hasJournalEntries = journal.changed.length > 0 || journal.removed.length > 0;
240
-
241
- if (hasJournalEntries && journal.timestamp >= latestDbMtime) {
242
- debug(
243
- `Tier 0: journal valid, ${journal.changed.length} changed, ${journal.removed.length} removed`,
244
- );
245
- const changed = [];
246
-
247
- for (const relPath of journal.changed) {
248
- const absPath = path.join(rootDir, relPath);
249
- const stat = fileStat(absPath);
250
- if (!stat) continue;
251
-
252
- let content;
253
- try {
254
- content = readFileSafe(absPath);
255
- } catch {
256
- continue;
257
- }
258
- const hash = fileHash(content);
259
- const record = existing.get(relPath);
260
- if (!record || record.hash !== hash) {
261
- changed.push({ file: absPath, content, hash, relPath, stat });
262
- }
263
- }
264
-
265
- // Merge journal removals with filesystem removals (dedup)
266
- const removedSet = new Set(removed);
267
- for (const relPath of journal.removed) {
268
- if (existing.has(relPath)) removedSet.add(relPath);
269
- }
270
-
271
- return { changed, removed: [...removedSet], isFullBuild: false };
272
- }
273
- debug(
274
- `Tier 0: skipped (${hasJournalEntries ? 'timestamp stale' : 'no entries'}), falling to Tier 1`,
275
- );
276
- }
277
-
278
- // ── Tier 1: mtime+size fast-path ─────────────────────────────────
279
- const needsHash = []; // Files that failed mtime+size check
280
- const skipped = []; // Files that passed mtime+size check
281
-
282
- for (const file of allFiles) {
283
- const relPath = normalizePath(path.relative(rootDir, file));
284
- const record = existing.get(relPath);
285
-
286
- if (!record) {
287
- // New file — needs full read+hash
288
- needsHash.push({ file, relPath });
289
- continue;
290
- }
291
-
292
- const stat = fileStat(file);
293
- if (!stat) continue;
294
-
295
- const storedMtime = record.mtime || 0;
296
- const storedSize = record.size || 0;
297
-
298
- // size > 0 guard: pre-v4 rows have size=0, always fall through to hash
299
- if (storedSize > 0 && Math.floor(stat.mtimeMs) === storedMtime && stat.size === storedSize) {
300
- skipped.push(relPath);
301
- continue;
302
- }
303
-
304
- needsHash.push({ file, relPath, stat });
305
- }
306
-
307
- if (needsHash.length > 0) {
308
- debug(`Tier 1: ${skipped.length} skipped by mtime+size, ${needsHash.length} need hash check`);
309
- }
310
-
311
- // ── Tier 2: Hash comparison ──────────────────────────────────────
312
- const changed = [];
313
-
314
- for (const item of needsHash) {
315
- let content;
316
- try {
317
- content = readFileSafe(item.file);
318
- } catch {
319
- continue;
320
- }
321
- const hash = fileHash(content);
322
- const stat = item.stat || fileStat(item.file);
323
- const record = existing.get(item.relPath);
324
-
325
- if (!record || record.hash !== hash) {
326
- changed.push({ file: item.file, content, hash, relPath: item.relPath, stat });
327
- } else if (stat) {
328
- // Hash matches but mtime/size was stale — self-heal by updating stored metadata
329
- changed.push({
330
- file: item.file,
331
- content,
332
- hash,
333
- relPath: item.relPath,
334
- stat,
335
- metadataOnly: true,
336
- });
337
- }
338
- }
339
-
340
- // Filter out metadata-only updates from the "changed" list for parsing,
341
- // but keep them so the caller can update file_hashes
342
- const parseChanged = changed.filter((c) => !c.metadataOnly);
343
- if (needsHash.length > 0) {
344
- debug(
345
- `Tier 2: ${parseChanged.length} actually changed, ${changed.length - parseChanged.length} metadata-only`,
346
- );
347
- }
348
-
349
- return { changed, removed, isFullBuild: false };
350
- }
351
-
352
- /**
353
- * Purge all graph data for the specified files.
354
- * Deletes: embeddings → edges (in+out) → node_metrics → function_complexity → dataflow → nodes.
355
- * Handles missing tables gracefully (embeddings, complexity, dataflow may not exist in older DBs).
356
- *
357
- * @param {import('better-sqlite3').Database} db - Open writable database
358
- * @param {string[]} files - Relative file paths to purge
359
- * @param {object} [options]
360
- * @param {boolean} [options.purgeHashes=true] - Also delete file_hashes entries
361
- */
362
- export function purgeFilesFromGraph(db, files, options = {}) {
363
- purgeFilesData(db, files, options);
364
- }
365
-
366
- export async function buildGraph(rootDir, opts = {}) {
367
- rootDir = path.resolve(rootDir);
368
- const dbPath = path.join(rootDir, '.codegraph', 'graph.db');
369
- const db = openDb(dbPath);
370
- initSchema(db);
371
-
372
- const config = loadConfig(rootDir);
373
- const incremental =
374
- opts.incremental !== false && config.build && config.build.incremental !== false;
375
-
376
- // Engine selection: 'native', 'wasm', or 'auto' (default)
377
- const engineOpts = {
378
- engine: opts.engine || 'auto',
379
- dataflow: opts.dataflow !== false,
380
- ast: opts.ast !== false,
381
- };
382
- const { name: engineName, version: engineVersion } = getActiveEngine(engineOpts);
383
- info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`);
384
-
385
- // Check for engine/schema mismatch — auto-promote to full rebuild
386
- // Only trigger on engine change or schema version change (not every patch/minor bump)
387
- const CURRENT_SCHEMA_VERSION = MIGRATIONS[MIGRATIONS.length - 1].version;
388
- let forceFullRebuild = false;
389
- if (incremental) {
390
- const prevEngine = getBuildMeta(db, 'engine');
391
- if (prevEngine && prevEngine !== engineName) {
392
- info(`Engine changed (${prevEngine} → ${engineName}), promoting to full rebuild.`);
393
- forceFullRebuild = true;
394
- }
395
- const prevSchema = getBuildMeta(db, 'schema_version');
396
- if (prevSchema && Number(prevSchema) !== CURRENT_SCHEMA_VERSION) {
397
- info(
398
- `Schema version changed (${prevSchema} → ${CURRENT_SCHEMA_VERSION}), promoting to full rebuild.`,
399
- );
400
- forceFullRebuild = true;
401
- }
402
- }
403
-
404
- const aliases = loadPathAliases(rootDir);
405
- // Merge config aliases
406
- if (config.aliases) {
407
- for (const [key, value] of Object.entries(config.aliases)) {
408
- const pattern = key.endsWith('/') ? `${key}*` : key;
409
- const target = path.resolve(rootDir, value);
410
- aliases.paths[pattern] = [target.endsWith('/') ? `${target}*` : `${target}/*`];
411
- }
412
- }
413
-
414
- if (aliases.baseUrl || Object.keys(aliases.paths).length > 0) {
415
- info(
416
- `Loaded path aliases: baseUrl=${aliases.baseUrl || 'none'}, ${Object.keys(aliases.paths).length} path mappings`,
417
- );
418
- }
419
-
420
- // ── Scoped rebuild: rebuild only specified files ──────────────────
421
- let files, discoveredDirs, parseChanges, metadataUpdates, removed, isFullBuild;
422
-
423
- if (opts.scope) {
424
- const scopedFiles = opts.scope.map((f) => normalizePath(f));
425
- const existing = [];
426
- const missing = [];
427
- for (const rel of scopedFiles) {
428
- const abs = path.join(rootDir, rel);
429
- if (fs.existsSync(abs)) {
430
- existing.push({ file: abs, relPath: rel });
431
- } else {
432
- missing.push(rel);
433
- }
434
- }
435
- files = existing.map((e) => e.file);
436
- // Derive discoveredDirs from scoped files' parent directories
437
- discoveredDirs = new Set(existing.map((e) => path.dirname(e.file)));
438
- parseChanges = existing;
439
- metadataUpdates = [];
440
- removed = missing;
441
- isFullBuild = false;
442
- info(`Scoped rebuild: ${existing.length} files to rebuild, ${missing.length} to purge`);
443
- } else {
444
- const collected = collectFiles(rootDir, [], config, new Set());
445
- files = collected.files;
446
- discoveredDirs = collected.directories;
447
- info(`Found ${files.length} files to parse`);
448
-
449
- // Check for incremental build
450
- const increResult =
451
- incremental && !forceFullRebuild
452
- ? getChangedFiles(db, files, rootDir)
453
- : { changed: files.map((f) => ({ file: f })), removed: [], isFullBuild: true };
454
- removed = increResult.removed;
455
- isFullBuild = increResult.isFullBuild;
456
-
457
- // Separate metadata-only updates (mtime/size self-heal) from real changes
458
- parseChanges = increResult.changed.filter((c) => !c.metadataOnly);
459
- metadataUpdates = increResult.changed.filter((c) => c.metadataOnly);
460
- }
461
-
462
- if (!isFullBuild && parseChanges.length === 0 && removed.length === 0) {
463
- // Check if default analyses were never computed (e.g. legacy DB)
464
- const needsCfg =
465
- opts.cfg !== false &&
466
- (() => {
467
- try {
468
- return db.prepare('SELECT COUNT(*) as c FROM cfg_blocks').get().c === 0;
469
- } catch {
470
- return true;
471
- }
472
- })();
473
- const needsDataflow =
474
- opts.dataflow !== false &&
475
- (() => {
476
- try {
477
- return db.prepare('SELECT COUNT(*) as c FROM dataflow').get().c === 0;
478
- } catch {
479
- return true;
480
- }
481
- })();
482
-
483
- if (needsCfg || needsDataflow) {
484
- info('No file changes. Running pending analysis pass...');
485
- const analysisOpts = {
486
- ...engineOpts,
487
- dataflow: needsDataflow && opts.dataflow !== false,
488
- };
489
- const analysisSymbols = await parseFilesAuto(files, rootDir, analysisOpts);
490
- if (needsCfg) {
491
- const { buildCFGData } = await import('./cfg.js');
492
- await buildCFGData(db, analysisSymbols, rootDir, engineOpts);
493
- }
494
- if (needsDataflow) {
495
- const { buildDataflowEdges } = await import('./dataflow.js');
496
- await buildDataflowEdges(db, analysisSymbols, rootDir, engineOpts);
497
- }
498
- closeDb(db);
499
- writeJournalHeader(rootDir, Date.now());
500
- return;
501
- }
502
-
503
- // Still update metadata for self-healing even when no real changes
504
- if (metadataUpdates.length > 0) {
505
- try {
506
- const healHash = db.prepare(
507
- 'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
508
- );
509
- const healTx = db.transaction(() => {
510
- for (const item of metadataUpdates) {
511
- const mtime = item.stat ? Math.floor(item.stat.mtimeMs) : 0;
512
- const size = item.stat ? item.stat.size : 0;
513
- healHash.run(item.relPath, item.hash, mtime, size);
514
- }
515
- });
516
- healTx();
517
- debug(`Self-healed mtime/size for ${metadataUpdates.length} files`);
518
- } catch {
519
- /* ignore heal errors */
520
- }
521
- }
522
- info('No changes detected. Graph is up to date.');
523
- closeDb(db);
524
- writeJournalHeader(rootDir, Date.now());
525
- return;
526
- }
527
-
528
- // Check if embeddings table exists (created by `embed`, not by initSchema)
529
- let hasEmbeddings = false;
530
- try {
531
- db.prepare('SELECT 1 FROM embeddings LIMIT 1').get();
532
- hasEmbeddings = true;
533
- } catch {
534
- /* table doesn't exist */
535
- }
536
-
537
- if (isFullBuild) {
538
- const deletions =
539
- 'PRAGMA foreign_keys = OFF; DELETE FROM cfg_edges; DELETE FROM cfg_blocks; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM ast_nodes; DELETE FROM nodes; PRAGMA foreign_keys = ON;';
540
- db.exec(
541
- hasEmbeddings
542
- ? `${deletions.replace('PRAGMA foreign_keys = ON;', '')} DELETE FROM embeddings; PRAGMA foreign_keys = ON;`
543
- : deletions,
544
- );
545
- } else {
546
- // ── Reverse-dependency cascade (issue #116) ─────────────────────
547
- // Find files with edges pointing TO changed/removed files.
548
- // Their nodes stay intact (preserving IDs), but outgoing edges are
549
- // deleted so they can be rebuilt during the edge-building pass.
550
- // When opts.noReverseDeps is true (e.g. agent rollback to same version),
551
- // skip this cascade — the agent knows exports didn't change.
552
- const reverseDeps = new Set();
553
- if (!opts.noReverseDeps) {
554
- const changedRelPaths = new Set();
555
- for (const item of parseChanges) {
556
- changedRelPaths.add(item.relPath || normalizePath(path.relative(rootDir, item.file)));
557
- }
558
- for (const relPath of removed) {
559
- changedRelPaths.add(relPath);
560
- }
561
-
562
- if (changedRelPaths.size > 0) {
563
- const findReverseDeps = db.prepare(`
564
- SELECT DISTINCT n_src.file FROM edges e
565
- JOIN nodes n_src ON e.source_id = n_src.id
566
- JOIN nodes n_tgt ON e.target_id = n_tgt.id
567
- WHERE n_tgt.file = ? AND n_src.file != n_tgt.file AND n_src.kind != 'directory'
568
- `);
569
- for (const relPath of changedRelPaths) {
570
- for (const row of findReverseDeps.all(relPath)) {
571
- if (!changedRelPaths.has(row.file) && !reverseDeps.has(row.file)) {
572
- // Verify the file still exists on disk
573
- const absPath = path.join(rootDir, row.file);
574
- if (fs.existsSync(absPath)) {
575
- reverseDeps.add(row.file);
576
- }
577
- }
578
- }
579
- }
580
- }
581
- }
582
-
583
- info(
584
- `Incremental: ${parseChanges.length} changed, ${removed.length} removed${reverseDeps.size > 0 ? `, ${reverseDeps.size} reverse-deps` : ''}`,
585
- );
586
- if (parseChanges.length > 0)
587
- debug(`Changed files: ${parseChanges.map((c) => c.relPath).join(', ')}`);
588
- if (removed.length > 0) debug(`Removed files: ${removed.join(', ')}`);
589
- // Remove embeddings/metrics/edges/nodes for changed and removed files
590
- const changePaths = parseChanges.map(
591
- (item) => item.relPath || normalizePath(path.relative(rootDir, item.file)),
592
- );
593
- purgeFilesFromGraph(db, [...removed, ...changePaths], { purgeHashes: false });
594
-
595
- // Process reverse deps: delete only outgoing edges (nodes/IDs preserved)
596
- // then add them to the parse list so they participate in edge building
597
- const deleteOutgoingEdgesForFile = db.prepare(
598
- 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)',
599
- );
600
- for (const relPath of reverseDeps) {
601
- deleteOutgoingEdgesForFile.run(relPath);
602
- }
603
- for (const relPath of reverseDeps) {
604
- const absPath = path.join(rootDir, relPath);
605
- parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true });
606
- }
607
- }
608
-
609
- const getNodeIdStmt = {
610
- get: (name, kind, file, line) => {
611
- const id = getNodeId(db, name, kind, file, line);
612
- return id != null ? { id } : undefined;
613
- },
614
- };
615
-
616
- // Batch INSERT helpers — multi-value INSERTs reduce SQLite round-trips
617
- const BATCH_CHUNK = 200;
618
- function batchInsertNodes(rows) {
619
- if (!rows.length) return;
620
- const ph = '(?,?,?,?,?,?)';
621
- for (let i = 0; i < rows.length; i += BATCH_CHUNK) {
622
- const chunk = rows.slice(i, i + BATCH_CHUNK);
623
- const vals = [];
624
- for (const r of chunk) vals.push(r[0], r[1], r[2], r[3], r[4], r[5]);
625
- db.prepare(
626
- 'INSERT OR IGNORE INTO nodes (name,kind,file,line,end_line,parent_id) VALUES ' +
627
- chunk.map(() => ph).join(','),
628
- ).run(...vals);
629
- }
630
- }
631
- function batchInsertEdges(rows) {
632
- if (!rows.length) return;
633
- const ph = '(?,?,?,?,?)';
634
- for (let i = 0; i < rows.length; i += BATCH_CHUNK) {
635
- const chunk = rows.slice(i, i + BATCH_CHUNK);
636
- const vals = [];
637
- for (const r of chunk) vals.push(r[0], r[1], r[2], r[3], r[4]);
638
- db.prepare(
639
- 'INSERT INTO edges (source_id,target_id,kind,confidence,dynamic) VALUES ' +
640
- chunk.map(() => ph).join(','),
641
- ).run(...vals);
642
- }
643
- }
644
-
645
- // Prepare hash upsert (with size column from migration v4)
646
- let upsertHash;
647
- try {
648
- upsertHash = db.prepare(
649
- 'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
650
- );
651
- } catch {
652
- upsertHash = null;
653
- }
654
-
655
- // First pass: parse files and insert nodes
656
- const fileSymbols = new Map();
657
-
658
- // For incremental builds, also load existing symbols that aren't changing
659
- if (!isFullBuild) {
660
- // We need to reload ALL file symbols for edge building
661
- const _allExistingFiles = db
662
- .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
663
- .all();
664
- // We'll fill these in during the parse pass + edge pass
665
- }
666
-
667
- const filesToParse = isFullBuild ? files.map((f) => ({ file: f })) : parseChanges;
668
-
669
- // ── Phase timing ────────────────────────────────────────────────────
670
- const _t = {};
671
-
672
- // ── Unified parse via parseFilesAuto ───────────────────────────────
673
- const filePaths = filesToParse.map((item) => item.file);
674
- _t.parse0 = performance.now();
675
- const allSymbols = await parseFilesAuto(filePaths, rootDir, engineOpts);
676
- _t.parseMs = performance.now() - _t.parse0;
677
-
678
- // Build a lookup from incremental data (changed items may carry pre-computed hashes + stats)
679
- const precomputedData = new Map();
680
- for (const item of filesToParse) {
681
- if (item.relPath) {
682
- precomputedData.set(item.relPath, item);
683
- }
684
- }
685
-
686
- // Bulk-fetch all node IDs for a file in one query (replaces per-node getNodeId calls)
687
- const bulkGetNodeIds = { all: (file) => bulkNodeIdsByFile(db, file) };
688
-
689
- const insertAll = db.transaction(() => {
690
- // Phase 1: Batch insert all file nodes + definitions + exports
691
- const phase1Rows = [];
692
- for (const [relPath, symbols] of allSymbols) {
693
- fileSymbols.set(relPath, symbols);
694
- phase1Rows.push([relPath, 'file', relPath, 0, null, null]);
695
- for (const def of symbols.definitions) {
696
- phase1Rows.push([def.name, def.kind, relPath, def.line, def.endLine || null, null]);
697
- }
698
- for (const exp of symbols.exports) {
699
- phase1Rows.push([exp.name, exp.kind, relPath, exp.line, null, null]);
700
- }
701
- }
702
- batchInsertNodes(phase1Rows);
703
-
704
- // Phase 1b: Mark exported symbols
705
- const markExported = db.prepare(
706
- 'UPDATE nodes SET exported = 1 WHERE name = ? AND kind = ? AND file = ? AND line = ?',
707
- );
708
- for (const [relPath, symbols] of allSymbols) {
709
- for (const exp of symbols.exports) {
710
- markExported.run(exp.name, exp.kind, relPath, exp.line);
711
- }
712
- }
713
-
714
- // Phase 3: Batch insert children (needs parent IDs from Phase 2)
715
- const childRows = [];
716
- for (const [relPath, symbols] of allSymbols) {
717
- const nodeIdMap = new Map();
718
- for (const row of bulkGetNodeIds.all(relPath)) {
719
- nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
720
- }
721
- for (const def of symbols.definitions) {
722
- if (!def.children?.length) continue;
723
- const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
724
- if (!defId) continue;
725
- for (const child of def.children) {
726
- childRows.push([
727
- child.name,
728
- child.kind,
729
- relPath,
730
- child.line,
731
- child.endLine || null,
732
- defId,
733
- ]);
734
- }
735
- }
736
- }
737
- batchInsertNodes(childRows);
738
-
739
- // Phase 5: Batch insert contains/parameter_of edges
740
- const edgeRows = [];
741
- for (const [relPath, symbols] of allSymbols) {
742
- // Re-fetch to include children IDs
743
- const nodeIdMap = new Map();
744
- for (const row of bulkGetNodeIds.all(relPath)) {
745
- nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
746
- }
747
- const fileId = nodeIdMap.get(`${relPath}|file|0`);
748
- for (const def of symbols.definitions) {
749
- const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
750
- if (fileId && defId) {
751
- edgeRows.push([fileId, defId, 'contains', 1.0, 0]);
752
- }
753
- if (def.children?.length && defId) {
754
- for (const child of def.children) {
755
- const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`);
756
- if (childId) {
757
- edgeRows.push([defId, childId, 'contains', 1.0, 0]);
758
- if (child.kind === 'parameter') {
759
- edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]);
760
- }
761
- }
762
- }
763
- }
764
- }
765
-
766
- // Update file hash with real mtime+size for incremental builds
767
- // Skip for reverse-dep files — they didn't actually change
768
- if (upsertHash) {
769
- const precomputed = precomputedData.get(relPath);
770
- if (precomputed?._reverseDepOnly) {
771
- // no-op: file unchanged, hash already correct
772
- } else if (precomputed?.hash) {
773
- const stat = precomputed.stat || fileStat(path.join(rootDir, relPath));
774
- const mtime = stat ? Math.floor(stat.mtimeMs) : 0;
775
- const size = stat ? stat.size : 0;
776
- upsertHash.run(relPath, precomputed.hash, mtime, size);
777
- } else {
778
- const absPath = path.join(rootDir, relPath);
779
- let code;
780
- try {
781
- code = readFileSafe(absPath);
782
- } catch {
783
- code = null;
784
- }
785
- if (code !== null) {
786
- const stat = fileStat(absPath);
787
- const mtime = stat ? Math.floor(stat.mtimeMs) : 0;
788
- const size = stat ? stat.size : 0;
789
- upsertHash.run(relPath, fileHash(code), mtime, size);
790
- }
791
- }
792
- }
793
- }
794
- batchInsertEdges(edgeRows);
795
-
796
- // Also update metadata-only entries (self-heal mtime/size without re-parse)
797
- if (upsertHash) {
798
- for (const item of metadataUpdates) {
799
- const mtime = item.stat ? Math.floor(item.stat.mtimeMs) : 0;
800
- const size = item.stat ? item.stat.size : 0;
801
- upsertHash.run(item.relPath, item.hash, mtime, size);
802
- }
803
- }
804
- });
805
- _t.insert0 = performance.now();
806
- insertAll();
807
- _t.insertMs = performance.now() - _t.insert0;
808
-
809
- const parsed = allSymbols.size;
810
- const skipped = filesToParse.length - parsed;
811
- info(`Parsed ${parsed} files (${skipped} skipped)`);
812
-
813
- // Clean up removed file hashes
814
- if (upsertHash && removed.length > 0) {
815
- const deleteHash = db.prepare('DELETE FROM file_hashes WHERE file = ?');
816
- for (const relPath of removed) {
817
- deleteHash.run(relPath);
818
- }
819
- }
820
-
821
- // ── Batch import resolution ────────────────────────────────────────
822
- // Collect all (fromFile, importSource) pairs and resolve in one native call
823
- _t.resolve0 = performance.now();
824
- const batchInputs = [];
825
- for (const [relPath, symbols] of fileSymbols) {
826
- const absFile = path.join(rootDir, relPath);
827
- for (const imp of symbols.imports) {
828
- batchInputs.push({ fromFile: absFile, importSource: imp.source });
829
- }
830
- }
831
- const batchResolved = resolveImportsBatch(batchInputs, rootDir, aliases, files);
832
- _t.resolveMs = performance.now() - _t.resolve0;
833
-
834
- function getResolved(absFile, importSource) {
835
- if (batchResolved) {
836
- const key = `${absFile}|${importSource}`;
837
- const hit = batchResolved.get(key);
838
- if (hit !== undefined) return hit;
839
- }
840
- return resolveImportPath(absFile, importSource, rootDir, aliases);
841
- }
842
-
843
- // Build re-export map for barrel resolution
844
- const reexportMap = new Map();
845
- for (const [relPath, symbols] of fileSymbols) {
846
- const reexports = symbols.imports.filter((imp) => imp.reexport);
847
- if (reexports.length > 0) {
848
- reexportMap.set(
849
- relPath,
850
- reexports.map((imp) => ({
851
- source: getResolved(path.join(rootDir, relPath), imp.source),
852
- names: imp.names,
853
- wildcardReexport: imp.wildcardReexport || false,
854
- })),
855
- );
856
- }
857
- }
858
-
859
- // For incremental builds, load unchanged barrel files into reexportMap
860
- // so barrel-resolved import/call edges aren't dropped for reverse-dep files.
861
- // These files are loaded only for resolution — they must NOT be iterated
862
- // in the edge-building loop (their existing edges are still in the DB).
863
- const barrelOnlyFiles = new Set();
864
- if (!isFullBuild) {
865
- const barrelCandidates = db
866
- .prepare(
867
- `SELECT DISTINCT n1.file FROM edges e
868
- JOIN nodes n1 ON e.source_id = n1.id
869
- WHERE e.kind = 'reexports' AND n1.kind = 'file'`,
870
- )
871
- .all();
872
- for (const { file: relPath } of barrelCandidates) {
873
- if (fileSymbols.has(relPath)) continue;
874
- const absPath = path.join(rootDir, relPath);
875
- try {
876
- const symbols = await parseFilesAuto([absPath], rootDir, engineOpts);
877
- const fileSym = symbols.get(relPath);
878
- if (fileSym) {
879
- fileSymbols.set(relPath, fileSym);
880
- barrelOnlyFiles.add(relPath);
881
- const reexports = fileSym.imports.filter((imp) => imp.reexport);
882
- if (reexports.length > 0) {
883
- reexportMap.set(
884
- relPath,
885
- reexports.map((imp) => ({
886
- source: getResolved(absPath, imp.source),
887
- names: imp.names,
888
- wildcardReexport: imp.wildcardReexport || false,
889
- })),
890
- );
891
- }
892
- }
893
- } catch {
894
- /* skip if unreadable */
895
- }
896
- }
897
- }
898
-
899
- function isBarrelFile(relPath) {
900
- const symbols = fileSymbols.get(relPath);
901
- if (!symbols) return false;
902
- const reexports = symbols.imports.filter((imp) => imp.reexport);
903
- if (reexports.length === 0) return false;
904
- const ownDefs = symbols.definitions.length;
905
- return reexports.length >= ownDefs;
906
- }
907
-
908
- function resolveBarrelExport(barrelPath, symbolName, visited = new Set()) {
909
- if (visited.has(barrelPath)) return null;
910
- visited.add(barrelPath);
911
- const reexports = reexportMap.get(barrelPath);
912
- if (!reexports) return null;
913
-
914
- for (const re of reexports) {
915
- if (re.names.length > 0 && !re.wildcardReexport) {
916
- if (re.names.includes(symbolName)) {
917
- const targetSymbols = fileSymbols.get(re.source);
918
- if (targetSymbols) {
919
- const hasDef = targetSymbols.definitions.some((d) => d.name === symbolName);
920
- if (hasDef) return re.source;
921
- const deeper = resolveBarrelExport(re.source, symbolName, visited);
922
- if (deeper) return deeper;
923
- }
924
- return re.source;
925
- }
926
- continue;
927
- }
928
- if (re.wildcardReexport || re.names.length === 0) {
929
- const targetSymbols = fileSymbols.get(re.source);
930
- if (targetSymbols) {
931
- const hasDef = targetSymbols.definitions.some((d) => d.name === symbolName);
932
- if (hasDef) return re.source;
933
- const deeper = resolveBarrelExport(re.source, symbolName, visited);
934
- if (deeper) return deeper;
935
- }
936
- }
937
- }
938
- return null;
939
- }
940
-
941
- // N+1 optimization: pre-load all nodes into a lookup map for edge building
942
- const allNodes = db
943
- .prepare(
944
- `SELECT id, name, kind, file, line FROM nodes WHERE kind IN ('function','method','class','interface','struct','type','module','enum','trait')`,
945
- )
946
- .all();
947
- const nodesByName = new Map();
948
- for (const node of allNodes) {
949
- if (!nodesByName.has(node.name)) nodesByName.set(node.name, []);
950
- nodesByName.get(node.name).push(node);
951
- }
952
- const nodesByNameAndFile = new Map();
953
- for (const node of allNodes) {
954
- const key = `${node.name}|${node.file}`;
955
- if (!nodesByNameAndFile.has(key)) nodesByNameAndFile.set(key, []);
956
- nodesByNameAndFile.get(key).push(node);
957
- }
958
-
959
- // Second pass: build edges (accumulated and batch-inserted)
960
- _t.edges0 = performance.now();
961
- const buildEdges = db.transaction(() => {
962
- const allEdgeRows = [];
963
-
964
- for (const [relPath, symbols] of fileSymbols) {
965
- // Skip barrel-only files — loaded for resolution, edges already in DB
966
- if (barrelOnlyFiles.has(relPath)) continue;
967
- const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0);
968
- if (!fileNodeRow) continue;
969
- const fileNodeId = fileNodeRow.id;
970
-
971
- // Import edges
972
- for (const imp of symbols.imports) {
973
- const resolvedPath = getResolved(path.join(rootDir, relPath), imp.source);
974
- const targetRow = getNodeIdStmt.get(resolvedPath, 'file', resolvedPath, 0);
975
- if (targetRow) {
976
- const edgeKind = imp.reexport
977
- ? 'reexports'
978
- : imp.typeOnly
979
- ? 'imports-type'
980
- : imp.dynamicImport
981
- ? 'dynamic-imports'
982
- : 'imports';
983
- allEdgeRows.push([fileNodeId, targetRow.id, edgeKind, 1.0, 0]);
984
-
985
- if (!imp.reexport && isBarrelFile(resolvedPath)) {
986
- const resolvedSources = new Set();
987
- for (const name of imp.names) {
988
- const cleanName = name.replace(/^\*\s+as\s+/, '');
989
- const actualSource = resolveBarrelExport(resolvedPath, cleanName);
990
- if (
991
- actualSource &&
992
- actualSource !== resolvedPath &&
993
- !resolvedSources.has(actualSource)
994
- ) {
995
- resolvedSources.add(actualSource);
996
- const actualRow = getNodeIdStmt.get(actualSource, 'file', actualSource, 0);
997
- if (actualRow) {
998
- allEdgeRows.push([
999
- fileNodeId,
1000
- actualRow.id,
1001
- edgeKind === 'imports-type'
1002
- ? 'imports-type'
1003
- : edgeKind === 'dynamic-imports'
1004
- ? 'dynamic-imports'
1005
- : 'imports',
1006
- 0.9,
1007
- 0,
1008
- ]);
1009
- }
1010
- }
1011
- }
1012
- }
1013
- }
1014
- }
1015
- }
1016
-
1017
- // Call/receiver/extends/implements edges — native when available
1018
- const native = engineName === 'native' ? loadNative() : null;
1019
- if (native?.buildCallEdges) {
1020
- const nativeFiles = [];
1021
- for (const [relPath, symbols] of fileSymbols) {
1022
- if (barrelOnlyFiles.has(relPath)) continue;
1023
- const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0);
1024
- if (!fileNodeRow) continue;
1025
-
1026
- // Pre-resolve imported names (including barrel resolution)
1027
- const importedNames = [];
1028
- for (const imp of symbols.imports) {
1029
- const resolvedPath = getResolved(path.join(rootDir, relPath), imp.source);
1030
- for (const name of imp.names) {
1031
- const cleanName = name.replace(/^\*\s+as\s+/, '');
1032
- let targetFile = resolvedPath;
1033
- if (isBarrelFile(resolvedPath)) {
1034
- const actual = resolveBarrelExport(resolvedPath, cleanName);
1035
- if (actual) targetFile = actual;
1036
- }
1037
- importedNames.push({ name: cleanName, file: targetFile });
1038
- }
1039
- }
1040
-
1041
- nativeFiles.push({
1042
- file: relPath,
1043
- fileNodeId: fileNodeRow.id,
1044
- definitions: symbols.definitions.map((d) => ({
1045
- name: d.name,
1046
- kind: d.kind,
1047
- line: d.line,
1048
- endLine: d.endLine ?? null,
1049
- })),
1050
- calls: symbols.calls,
1051
- importedNames,
1052
- classes: symbols.classes,
1053
- });
1054
- }
1055
-
1056
- const nativeEdges = native.buildCallEdges(nativeFiles, allNodes, [...BUILTIN_RECEIVERS]);
1057
-
1058
- for (const e of nativeEdges) {
1059
- allEdgeRows.push([e.sourceId, e.targetId, e.kind, e.confidence, e.dynamic]);
1060
- }
1061
- } else {
1062
- // JS fallback — call/receiver/extends/implements edges
1063
- for (const [relPath, symbols] of fileSymbols) {
1064
- if (barrelOnlyFiles.has(relPath)) continue;
1065
- const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0);
1066
- if (!fileNodeRow) continue;
1067
-
1068
- // Build import name -> target file mapping
1069
- const importedNames = new Map();
1070
- for (const imp of symbols.imports) {
1071
- const resolvedPath = getResolved(path.join(rootDir, relPath), imp.source);
1072
- for (const name of imp.names) {
1073
- const cleanName = name.replace(/^\*\s+as\s+/, '');
1074
- importedNames.set(cleanName, resolvedPath);
1075
- }
1076
- }
1077
-
1078
- // Call edges with confidence scoring — using pre-loaded lookup maps (N+1 fix)
1079
- const seenCallEdges = new Set();
1080
- for (const call of symbols.calls) {
1081
- if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue;
1082
- let caller = null;
1083
- let callerSpan = Infinity;
1084
- for (const def of symbols.definitions) {
1085
- if (def.line <= call.line) {
1086
- const end = def.endLine || Infinity;
1087
- if (call.line <= end) {
1088
- const span = end - def.line;
1089
- if (span < callerSpan) {
1090
- const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line);
1091
- if (row) {
1092
- caller = row;
1093
- callerSpan = span;
1094
- }
1095
- }
1096
- } else if (!caller) {
1097
- const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line);
1098
- if (row) caller = row;
1099
- }
1100
- }
1101
- }
1102
- if (!caller) caller = fileNodeRow;
1103
-
1104
- const isDynamic = call.dynamic ? 1 : 0;
1105
- let targets;
1106
- const importedFrom = importedNames.get(call.name);
1107
-
1108
- if (importedFrom) {
1109
- targets = nodesByNameAndFile.get(`${call.name}|${importedFrom}`) || [];
1110
-
1111
- if (targets.length === 0 && isBarrelFile(importedFrom)) {
1112
- const actualSource = resolveBarrelExport(importedFrom, call.name);
1113
- if (actualSource) {
1114
- targets = nodesByNameAndFile.get(`${call.name}|${actualSource}`) || [];
1115
- }
1116
- }
1117
- }
1118
- if (!targets || targets.length === 0) {
1119
- targets = nodesByNameAndFile.get(`${call.name}|${relPath}`) || [];
1120
- if (targets.length === 0) {
1121
- const methodCandidates = (nodesByName.get(call.name) || []).filter(
1122
- (n) => n.name.endsWith(`.${call.name}`) && n.kind === 'method',
1123
- );
1124
- if (methodCandidates.length > 0) {
1125
- targets = methodCandidates;
1126
- } else if (
1127
- !call.receiver ||
1128
- call.receiver === 'this' ||
1129
- call.receiver === 'self' ||
1130
- call.receiver === 'super'
1131
- ) {
1132
- targets = (nodesByName.get(call.name) || []).filter(
1133
- (n) => computeConfidence(relPath, n.file, null) >= 0.5,
1134
- );
1135
- }
1136
- }
1137
- }
1138
-
1139
- if (targets.length > 1) {
1140
- targets.sort((a, b) => {
1141
- const confA = computeConfidence(relPath, a.file, importedFrom);
1142
- const confB = computeConfidence(relPath, b.file, importedFrom);
1143
- return confB - confA;
1144
- });
1145
- }
1146
-
1147
- for (const t of targets) {
1148
- const edgeKey = `${caller.id}|${t.id}`;
1149
- if (t.id !== caller.id && !seenCallEdges.has(edgeKey)) {
1150
- seenCallEdges.add(edgeKey);
1151
- const confidence = computeConfidence(relPath, t.file, importedFrom);
1152
- allEdgeRows.push([caller.id, t.id, 'calls', confidence, isDynamic]);
1153
- }
1154
- }
1155
-
1156
- // Receiver edge: caller → receiver type node
1157
- if (
1158
- call.receiver &&
1159
- !BUILTIN_RECEIVERS.has(call.receiver) &&
1160
- call.receiver !== 'this' &&
1161
- call.receiver !== 'self' &&
1162
- call.receiver !== 'super'
1163
- ) {
1164
- const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']);
1165
- const samefile = nodesByNameAndFile.get(`${call.receiver}|${relPath}`) || [];
1166
- const candidates =
1167
- samefile.length > 0 ? samefile : nodesByName.get(call.receiver) || [];
1168
- const receiverNodes = candidates.filter((n) => receiverKinds.has(n.kind));
1169
- if (receiverNodes.length > 0 && caller) {
1170
- const recvTarget = receiverNodes[0];
1171
- const recvKey = `recv|${caller.id}|${recvTarget.id}`;
1172
- if (!seenCallEdges.has(recvKey)) {
1173
- seenCallEdges.add(recvKey);
1174
- allEdgeRows.push([caller.id, recvTarget.id, 'receiver', 0.7, 0]);
1175
- }
1176
- }
1177
- }
1178
- }
1179
-
1180
- // Class extends edges
1181
- for (const cls of symbols.classes) {
1182
- if (cls.extends) {
1183
- const sourceRow = (nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find(
1184
- (n) => n.kind === 'class',
1185
- );
1186
- const targetCandidates = nodesByName.get(cls.extends) || [];
1187
- const targetRows = targetCandidates.filter((n) => n.kind === 'class');
1188
- if (sourceRow) {
1189
- for (const t of targetRows) {
1190
- allEdgeRows.push([sourceRow.id, t.id, 'extends', 1.0, 0]);
1191
- }
1192
- }
1193
- }
1194
-
1195
- if (cls.implements) {
1196
- const sourceRow = (nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find(
1197
- (n) => n.kind === 'class',
1198
- );
1199
- const targetCandidates = nodesByName.get(cls.implements) || [];
1200
- const targetRows = targetCandidates.filter(
1201
- (n) => n.kind === 'interface' || n.kind === 'class',
1202
- );
1203
- if (sourceRow) {
1204
- for (const t of targetRows) {
1205
- allEdgeRows.push([sourceRow.id, t.id, 'implements', 1.0, 0]);
1206
- }
1207
- }
1208
- }
1209
- }
1210
- }
1211
- }
1212
-
1213
- batchInsertEdges(allEdgeRows);
1214
- });
1215
- buildEdges();
1216
- _t.edgesMs = performance.now() - _t.edges0;
1217
-
1218
- // Build line count map for structure metrics (prefer cached _lineCount from parser)
1219
- const lineCountMap = new Map();
1220
- for (const [relPath, symbols] of fileSymbols) {
1221
- if (symbols.lineCount ?? symbols._lineCount) {
1222
- lineCountMap.set(relPath, symbols.lineCount ?? symbols._lineCount);
1223
- } else {
1224
- const absPath = path.join(rootDir, relPath);
1225
- try {
1226
- const content = fs.readFileSync(absPath, 'utf-8');
1227
- lineCountMap.set(relPath, content.split('\n').length);
1228
- } catch {
1229
- lineCountMap.set(relPath, 0);
1230
- }
1231
- }
1232
- }
1233
-
1234
- // For incremental builds, buildStructure needs ALL files (not just changed ones)
1235
- // because it clears and rebuilds all contains edges and directory metrics.
1236
- // Load unchanged files from the DB so structure data stays complete.
1237
- if (!isFullBuild) {
1238
- const existingFiles = db.prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'").all();
1239
- const defsByFile = db.prepare(
1240
- "SELECT name, kind, line FROM nodes WHERE file = ? AND kind != 'file' AND kind != 'directory'",
1241
- );
1242
- // Count imports per file — buildStructure only uses imports.length for metrics
1243
- const importCountByFile = db.prepare(
1244
- `SELECT COUNT(DISTINCT n2.file) AS cnt FROM edges e
1245
- JOIN nodes n1 ON e.source_id = n1.id
1246
- JOIN nodes n2 ON e.target_id = n2.id
1247
- WHERE n1.file = ? AND e.kind = 'imports'`,
1248
- );
1249
- const lineCountByFile = db.prepare(
1250
- `SELECT n.name AS file, m.line_count
1251
- FROM node_metrics m JOIN nodes n ON m.node_id = n.id
1252
- WHERE n.kind = 'file'`,
1253
- );
1254
- const cachedLineCounts = new Map();
1255
- for (const row of lineCountByFile.all()) {
1256
- cachedLineCounts.set(row.file, row.line_count);
1257
- }
1258
- let loadedFromDb = 0;
1259
- for (const { file: relPath } of existingFiles) {
1260
- if (!fileSymbols.has(relPath)) {
1261
- const importCount = importCountByFile.get(relPath)?.cnt || 0;
1262
- fileSymbols.set(relPath, {
1263
- definitions: defsByFile.all(relPath),
1264
- imports: new Array(importCount),
1265
- exports: [],
1266
- });
1267
- loadedFromDb++;
1268
- }
1269
- if (!lineCountMap.has(relPath)) {
1270
- const cached = cachedLineCounts.get(relPath);
1271
- if (cached != null) {
1272
- lineCountMap.set(relPath, cached);
1273
- } else {
1274
- const absPath = path.join(rootDir, relPath);
1275
- try {
1276
- const content = fs.readFileSync(absPath, 'utf-8');
1277
- lineCountMap.set(relPath, content.split('\n').length);
1278
- } catch {
1279
- lineCountMap.set(relPath, 0);
1280
- }
1281
- }
1282
- }
1283
- }
1284
- debug(`Structure: ${fileSymbols.size} files (${loadedFromDb} loaded from DB)`);
1285
- }
1286
-
1287
- // Build directory structure, containment edges, and metrics
1288
- _t.structure0 = performance.now();
1289
- const relDirs = new Set();
1290
- for (const absDir of discoveredDirs) {
1291
- relDirs.add(normalizePath(path.relative(rootDir, absDir)));
1292
- }
1293
- try {
1294
- const { buildStructure } = await import('./structure.js');
1295
- // Pass changed file paths so incremental builds can scope the rebuild
1296
- const changedFilePaths = isFullBuild ? null : [...allSymbols.keys()];
1297
- buildStructure(db, fileSymbols, rootDir, lineCountMap, relDirs, changedFilePaths);
1298
- } catch (err) {
1299
- debug(`Structure analysis failed: ${err.message}`);
1300
- }
1301
- _t.structureMs = performance.now() - _t.structure0;
1302
-
1303
- // Classify node roles (entry, core, utility, adapter, dead, leaf)
1304
- _t.roles0 = performance.now();
1305
- try {
1306
- const { classifyNodeRoles } = await import('./structure.js');
1307
- const roleSummary = classifyNodeRoles(db);
1308
- debug(
1309
- `Roles: ${Object.entries(roleSummary)
1310
- .map(([r, c]) => `${r}=${c}`)
1311
- .join(', ')}`,
1312
- );
1313
- } catch (err) {
1314
- debug(`Role classification failed: ${err.message}`);
1315
- }
1316
- _t.rolesMs = performance.now() - _t.roles0;
1317
-
1318
- // For incremental builds, filter out reverse-dep-only files from AST/complexity/CFG/dataflow
1319
- // — their content didn't change, so existing ast_nodes/function_complexity rows are valid.
1320
- let astComplexitySymbols = allSymbols;
1321
- if (!isFullBuild) {
1322
- const reverseDepFiles = new Set(
1323
- filesToParse.filter((item) => item._reverseDepOnly).map((item) => item.relPath),
1324
- );
1325
- if (reverseDepFiles.size > 0) {
1326
- astComplexitySymbols = new Map();
1327
- for (const [relPath, symbols] of allSymbols) {
1328
- if (!reverseDepFiles.has(relPath)) {
1329
- astComplexitySymbols.set(relPath, symbols);
1330
- }
1331
- }
1332
- debug(
1333
- `AST/complexity/CFG/dataflow: processing ${astComplexitySymbols.size} changed files (skipping ${reverseDepFiles.size} reverse-deps)`,
1334
- );
1335
- }
1336
- }
1337
-
1338
- // ── Unified AST analysis engine ──────────────────────────────────────
1339
- // Replaces 4 sequential buildXxx calls with one coordinated pass.
1340
- {
1341
- const { runAnalyses } = await import('./ast-analysis/engine.js');
1342
- try {
1343
- const analysisTiming = await runAnalyses(db, astComplexitySymbols, rootDir, opts, engineOpts);
1344
- _t.astMs = analysisTiming.astMs;
1345
- _t.complexityMs = analysisTiming.complexityMs;
1346
- _t.cfgMs = analysisTiming.cfgMs;
1347
- _t.dataflowMs = analysisTiming.dataflowMs;
1348
- } catch (err) {
1349
- debug(`Unified analysis engine failed: ${err.message}`);
1350
- }
1351
- }
1352
-
1353
- // Release any remaining cached WASM trees for GC
1354
- for (const [, symbols] of allSymbols) {
1355
- symbols._tree = null;
1356
- symbols._langId = null;
1357
- }
1358
-
1359
- const nodeCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c;
1360
- const actualEdgeCount = db.prepare('SELECT COUNT(*) as c FROM edges').get().c;
1361
- info(`Graph built: ${nodeCount} nodes, ${actualEdgeCount} edges`);
1362
- info(`Stored in ${dbPath}`);
1363
-
1364
- // Verify incremental build didn't diverge significantly from previous counts
1365
- if (!isFullBuild) {
1366
- const prevNodes = getBuildMeta(db, 'node_count');
1367
- const prevEdges = getBuildMeta(db, 'edge_count');
1368
- if (prevNodes && prevEdges) {
1369
- const prevN = Number(prevNodes);
1370
- const prevE = Number(prevEdges);
1371
- if (prevN > 0) {
1372
- const nodeDrift = Math.abs(nodeCount - prevN) / prevN;
1373
- const edgeDrift = prevE > 0 ? Math.abs(actualEdgeCount - prevE) / prevE : 0;
1374
- const driftThreshold = config.build?.driftThreshold ?? 0.2;
1375
- if (nodeDrift > driftThreshold || edgeDrift > driftThreshold) {
1376
- warn(
1377
- `Incremental build diverged significantly from previous counts (nodes: ${prevN}→${nodeCount} [${(nodeDrift * 100).toFixed(1)}%], edges: ${prevE}→${actualEdgeCount} [${(edgeDrift * 100).toFixed(1)}%], threshold: ${(driftThreshold * 100).toFixed(0)}%). Consider rebuilding with --no-incremental.`,
1378
- );
1379
- }
1380
- }
1381
- }
1382
- }
1383
-
1384
- // Warn about orphaned embeddings that no longer match any node
1385
- if (hasEmbeddings) {
1386
- try {
1387
- const orphaned = db
1388
- .prepare('SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)')
1389
- .get().c;
1390
- if (orphaned > 0) {
1391
- warn(
1392
- `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
1393
- );
1394
- }
1395
- } catch {
1396
- /* ignore — embeddings table may have been dropped */
1397
- }
1398
- }
1399
-
1400
- // Warn about unused exports (exported but zero cross-file consumers)
1401
- try {
1402
- const unusedCount = db
1403
- .prepare(
1404
- `SELECT COUNT(*) as c FROM nodes
1405
- WHERE exported = 1 AND kind != 'file'
1406
- AND id NOT IN (
1407
- SELECT DISTINCT e.target_id FROM edges e
1408
- JOIN nodes caller ON e.source_id = caller.id
1409
- JOIN nodes target ON e.target_id = target.id
1410
- WHERE e.kind = 'calls' AND caller.file != target.file
1411
- )`,
1412
- )
1413
- .get().c;
1414
- if (unusedCount > 0) {
1415
- warn(
1416
- `${unusedCount} exported symbol${unusedCount > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports <file> --unused" to inspect.`,
1417
- );
1418
- }
1419
- } catch {
1420
- /* exported column may not exist on older DBs */
1421
- }
1422
-
1423
- // Persist build metadata for mismatch detection
1424
- try {
1425
- setBuildMeta(db, {
1426
- engine: engineName,
1427
- engine_version: engineVersion || '',
1428
- codegraph_version: CODEGRAPH_VERSION,
1429
- schema_version: String(CURRENT_SCHEMA_VERSION),
1430
- built_at: new Date().toISOString(),
1431
- node_count: nodeCount,
1432
- edge_count: actualEdgeCount,
1433
- });
1434
- } catch (err) {
1435
- warn(`Failed to write build metadata: ${err.message}`);
1436
- }
1437
-
1438
- closeDb(db);
1439
-
1440
- // Write journal header after successful build
1441
- writeJournalHeader(rootDir, Date.now());
1442
-
1443
- if (!opts.skipRegistry) {
1444
- const { tmpdir } = await import('node:os');
1445
- const tmpDir = path.resolve(tmpdir());
1446
- const resolvedRoot = path.resolve(rootDir);
1447
- if (resolvedRoot.startsWith(tmpDir)) {
1448
- debug(`Skipping auto-registration for temp directory: ${resolvedRoot}`);
1449
- } else {
1450
- try {
1451
- const { registerRepo } = await import('./registry.js');
1452
- registerRepo(rootDir);
1453
- } catch (err) {
1454
- debug(`Auto-registration failed: ${err.message}`);
1455
- }
1456
- }
1457
- }
1458
-
1459
- return {
1460
- phases: {
1461
- parseMs: +_t.parseMs.toFixed(1),
1462
- insertMs: +_t.insertMs.toFixed(1),
1463
- resolveMs: +_t.resolveMs.toFixed(1),
1464
- edgesMs: +_t.edgesMs.toFixed(1),
1465
- structureMs: +_t.structureMs.toFixed(1),
1466
- rolesMs: +_t.rolesMs.toFixed(1),
1467
- astMs: +_t.astMs.toFixed(1),
1468
- complexityMs: +_t.complexityMs.toFixed(1),
1469
- ...(_t.cfgMs != null && { cfgMs: +_t.cfgMs.toFixed(1) }),
1470
- ...(_t.dataflowMs != null && { dataflowMs: +_t.dataflowMs.toFixed(1) }),
1471
- },
1472
- };
1473
- }