@optave/codegraph 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/builder.js ADDED
@@ -0,0 +1,547 @@
1
+
2
+ import fs from 'fs';
3
+ import path from 'path';
4
+ import { createHash } from 'crypto';
5
+ import { openDb, initSchema } from './db.js';
6
+ import { createParsers, getParser, extractSymbols, extractHCLSymbols, extractPythonSymbols } from './parser.js';
7
+ import { IGNORE_DIRS, EXTENSIONS, normalizePath } from './constants.js';
8
+ import { loadConfig } from './config.js';
9
+ import { warn, debug, info } from './logger.js';
10
+
11
+ export function collectFiles(dir, files = [], config = {}) {
12
+ let entries;
13
+ try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
14
+ catch (err) {
15
+ warn(`Cannot read directory ${dir}: ${err.message}`);
16
+ return files;
17
+ }
18
+
19
+ // Merge config ignoreDirs with defaults
20
+ const extraIgnore = config.ignoreDirs ? new Set(config.ignoreDirs) : null;
21
+
22
+ for (const entry of entries) {
23
+ if (entry.name.startsWith('.') && entry.name !== '.') {
24
+ if (IGNORE_DIRS.has(entry.name)) continue;
25
+ if (entry.isDirectory()) continue;
26
+ }
27
+ if (IGNORE_DIRS.has(entry.name)) continue;
28
+ if (extraIgnore && extraIgnore.has(entry.name)) continue;
29
+
30
+ const full = path.join(dir, entry.name);
31
+ if (entry.isDirectory()) {
32
+ collectFiles(full, files, config);
33
+ } else if (EXTENSIONS.has(path.extname(entry.name))) {
34
+ files.push(full);
35
+ }
36
+ }
37
+ return files;
38
+ }
39
+
40
+ export function loadPathAliases(rootDir) {
41
+ const aliases = { baseUrl: null, paths: {} };
42
+ for (const configName of ['tsconfig.json', 'jsconfig.json']) {
43
+ const configPath = path.join(rootDir, configName);
44
+ if (!fs.existsSync(configPath)) continue;
45
+ try {
46
+ const raw = fs.readFileSync(configPath, 'utf-8')
47
+ .replace(/\/\/.*$/gm, '')
48
+ .replace(/\/\*[\s\S]*?\*\//g, '')
49
+ .replace(/,\s*([\]}])/g, '$1');
50
+ const config = JSON.parse(raw);
51
+ const opts = config.compilerOptions || {};
52
+ if (opts.baseUrl) aliases.baseUrl = path.resolve(rootDir, opts.baseUrl);
53
+ if (opts.paths) {
54
+ for (const [pattern, targets] of Object.entries(opts.paths)) {
55
+ aliases.paths[pattern] = targets.map(t => path.resolve(aliases.baseUrl || rootDir, t));
56
+ }
57
+ }
58
+ break;
59
+ } catch (err) {
60
+ warn(`Failed to parse ${configName}: ${err.message}`);
61
+ }
62
+ }
63
+ return aliases;
64
+ }
65
+
66
+ function resolveViaAlias(importSource, aliases, rootDir) {
67
+ if (aliases.baseUrl && !importSource.startsWith('.') && !importSource.startsWith('/')) {
68
+ const candidate = path.resolve(aliases.baseUrl, importSource);
69
+ for (const ext of ['', '.ts', '.tsx', '.js', '.jsx', '/index.ts', '/index.tsx', '/index.js']) {
70
+ const full = candidate + ext;
71
+ if (fs.existsSync(full)) return full;
72
+ }
73
+ }
74
+
75
+ for (const [pattern, targets] of Object.entries(aliases.paths)) {
76
+ const prefix = pattern.replace(/\*$/, '');
77
+ if (!importSource.startsWith(prefix)) continue;
78
+ const rest = importSource.slice(prefix.length);
79
+ for (const target of targets) {
80
+ const resolved = target.replace(/\*$/, rest);
81
+ for (const ext of ['', '.ts', '.tsx', '.js', '.jsx', '/index.ts', '/index.tsx', '/index.js']) {
82
+ const full = resolved + ext;
83
+ if (fs.existsSync(full)) return full;
84
+ }
85
+ }
86
+ }
87
+ return null;
88
+ }
89
+
90
+ export function resolveImportPath(fromFile, importSource, rootDir, aliases) {
91
+ if (!importSource.startsWith('.') && aliases) {
92
+ const aliasResolved = resolveViaAlias(importSource, aliases, rootDir);
93
+ if (aliasResolved) return normalizePath(path.relative(rootDir, aliasResolved));
94
+ }
95
+ if (!importSource.startsWith('.')) return importSource;
96
+ const dir = path.dirname(fromFile);
97
+ let resolved = path.resolve(dir, importSource);
98
+
99
+ if (resolved.endsWith('.js')) {
100
+ const tsCandidate = resolved.replace(/\.js$/, '.ts');
101
+ if (fs.existsSync(tsCandidate)) return normalizePath(path.relative(rootDir, tsCandidate));
102
+ const tsxCandidate = resolved.replace(/\.js$/, '.tsx');
103
+ if (fs.existsSync(tsxCandidate)) return normalizePath(path.relative(rootDir, tsxCandidate));
104
+ }
105
+
106
+ for (const ext of ['.ts', '.tsx', '.js', '.jsx', '.mjs', '.py', '/index.ts', '/index.tsx', '/index.js', '/__init__.py']) {
107
+ const candidate = resolved + ext;
108
+ if (fs.existsSync(candidate)) {
109
+ return normalizePath(path.relative(rootDir, candidate));
110
+ }
111
+ }
112
+ if (fs.existsSync(resolved)) return normalizePath(path.relative(rootDir, resolved));
113
+ return normalizePath(path.relative(rootDir, resolved));
114
+ }
115
+
116
+ /**
117
+ * Compute proximity-based confidence for call resolution.
118
+ */
119
+ function computeConfidence(callerFile, targetFile, importedFrom) {
120
+ if (!targetFile || !callerFile) return 0.3;
121
+ if (callerFile === targetFile) return 1.0;
122
+ if (importedFrom === targetFile) return 1.0;
123
+ if (path.dirname(callerFile) === path.dirname(targetFile)) return 0.7;
124
+ const callerParent = path.dirname(path.dirname(callerFile));
125
+ const targetParent = path.dirname(path.dirname(targetFile));
126
+ if (callerParent === targetParent) return 0.5;
127
+ return 0.3;
128
+ }
129
+
130
+ /**
131
+ * Compute MD5 hash of file contents for incremental builds.
132
+ */
133
+ function fileHash(content) {
134
+ return createHash('md5').update(content).digest('hex');
135
+ }
136
+
137
+ /**
138
+ * Determine which files have changed since last build.
139
+ */
140
+ function getChangedFiles(db, allFiles, rootDir) {
141
+ // Check if file_hashes table exists
142
+ let hasTable = false;
143
+ try {
144
+ db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get();
145
+ hasTable = true;
146
+ } catch { /* table doesn't exist */ }
147
+
148
+ if (!hasTable) {
149
+ // No hash table = first build, everything is new
150
+ return {
151
+ changed: allFiles.map(f => ({ file: f })),
152
+ removed: [],
153
+ isFullBuild: true
154
+ };
155
+ }
156
+
157
+ const existing = new Map(
158
+ db.prepare('SELECT file, hash FROM file_hashes').all()
159
+ .map(r => [r.file, r.hash])
160
+ );
161
+
162
+ const changed = [];
163
+ const currentFiles = new Set();
164
+
165
+ for (const file of allFiles) {
166
+ const relPath = normalizePath(path.relative(rootDir, file));
167
+ currentFiles.add(relPath);
168
+
169
+ let content;
170
+ try { content = fs.readFileSync(file, 'utf-8'); } catch { continue; }
171
+ const hash = fileHash(content);
172
+
173
+ if (existing.get(relPath) !== hash) {
174
+ changed.push({ file, content, hash, relPath });
175
+ }
176
+ }
177
+
178
+ const removed = [];
179
+ for (const existingFile of existing.keys()) {
180
+ if (!currentFiles.has(existingFile)) {
181
+ removed.push(existingFile);
182
+ }
183
+ }
184
+
185
+ return { changed, removed, isFullBuild: false };
186
+ }
187
+
188
+ export async function buildGraph(rootDir, opts = {}) {
189
+ const dbPath = path.join(rootDir, '.codegraph', 'graph.db');
190
+ const db = openDb(dbPath);
191
+ initSchema(db);
192
+
193
+ const config = loadConfig(rootDir);
194
+ const incremental = opts.incremental !== false && config.build && config.build.incremental !== false;
195
+
196
+ const parsers = await createParsers();
197
+ const aliases = loadPathAliases(rootDir);
198
+ // Merge config aliases
199
+ if (config.aliases) {
200
+ for (const [key, value] of Object.entries(config.aliases)) {
201
+ const pattern = key.endsWith('/') ? key + '*' : key;
202
+ const target = path.resolve(rootDir, value);
203
+ aliases.paths[pattern] = [target.endsWith('/') ? target + '*' : target + '/*'];
204
+ }
205
+ }
206
+
207
+ if (aliases.baseUrl || Object.keys(aliases.paths).length > 0) {
208
+ console.log(`Loaded path aliases: baseUrl=${aliases.baseUrl || 'none'}, ${Object.keys(aliases.paths).length} path mappings`);
209
+ }
210
+
211
+ const files = collectFiles(rootDir, [], config);
212
+ console.log(`Found ${files.length} files to parse`);
213
+
214
+ // Check for incremental build
215
+ const { changed, removed, isFullBuild } = incremental
216
+ ? getChangedFiles(db, files, rootDir)
217
+ : { changed: files.map(f => ({ file: f })), removed: [], isFullBuild: true };
218
+
219
+ if (!isFullBuild && changed.length === 0 && removed.length === 0) {
220
+ console.log('No changes detected. Graph is up to date.');
221
+ db.close();
222
+ return;
223
+ }
224
+
225
+ if (isFullBuild) {
226
+ db.exec('PRAGMA foreign_keys = OFF; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;');
227
+ } else {
228
+ console.log(`Incremental: ${changed.length} changed, ${removed.length} removed`);
229
+ // Remove nodes/edges for changed and removed files
230
+ const deleteNodesForFile = db.prepare('DELETE FROM nodes WHERE file = ?');
231
+ const deleteEdgesForFile = db.prepare(`
232
+ DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f)
233
+ OR target_id IN (SELECT id FROM nodes WHERE file = @f)
234
+ `);
235
+ for (const relPath of removed) {
236
+ deleteEdgesForFile.run({ f: relPath });
237
+ deleteNodesForFile.run(relPath);
238
+ }
239
+ for (const item of changed) {
240
+ const relPath = item.relPath || normalizePath(path.relative(rootDir, item.file));
241
+ deleteEdgesForFile.run({ f: relPath });
242
+ deleteNodesForFile.run(relPath);
243
+ }
244
+ }
245
+
246
+ const insertNode = db.prepare('INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)');
247
+ const getNodeId = db.prepare('SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?');
248
+ const insertEdge = db.prepare('INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)');
249
+
250
+ // Prepare hash upsert
251
+ let upsertHash;
252
+ try {
253
+ upsertHash = db.prepare('INSERT OR REPLACE INTO file_hashes (file, hash, mtime) VALUES (?, ?, ?)');
254
+ } catch { upsertHash = null; }
255
+
256
+ // First pass: parse files and insert nodes
257
+ const fileSymbols = new Map();
258
+ let parsed = 0, skipped = 0;
259
+
260
+ // For incremental builds, also load existing symbols that aren't changing
261
+ if (!isFullBuild) {
262
+ // We need to reload ALL file symbols for edge building
263
+ const allExistingFiles = db.prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'").all();
264
+ // We'll fill these in during the parse pass + edge pass
265
+ }
266
+
267
+ const filesToParse = isFullBuild
268
+ ? files.map(f => ({ file: f }))
269
+ : changed;
270
+
271
+ const insertMany = db.transaction(() => {
272
+ for (const item of filesToParse) {
273
+ const filePath = item.file;
274
+ const parser = getParser(parsers, filePath);
275
+ if (!parser) { skipped++; continue; }
276
+
277
+ let code;
278
+ if (item.content) {
279
+ code = item.content;
280
+ } else {
281
+ try { code = fs.readFileSync(filePath, 'utf-8'); }
282
+ catch (err) {
283
+ warn(`Skipping ${path.relative(rootDir, filePath)}: ${err.message}`);
284
+ skipped++;
285
+ continue;
286
+ }
287
+ }
288
+
289
+ let tree;
290
+ try { tree = parser.parse(code); }
291
+ catch (e) {
292
+ warn(`Parse error in ${path.relative(rootDir, filePath)}: ${e.message}`);
293
+ skipped++;
294
+ continue;
295
+ }
296
+
297
+ const relPath = normalizePath(path.relative(rootDir, filePath));
298
+ const isHCL = filePath.endsWith('.tf') || filePath.endsWith('.hcl');
299
+ const isPython = filePath.endsWith('.py');
300
+ const symbols = isHCL ? extractHCLSymbols(tree, filePath)
301
+ : isPython ? extractPythonSymbols(tree, filePath)
302
+ : extractSymbols(tree, filePath);
303
+ fileSymbols.set(relPath, symbols);
304
+
305
+ insertNode.run(relPath, 'file', relPath, 0, null);
306
+
307
+ for (const def of symbols.definitions) {
308
+ insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null);
309
+ }
310
+
311
+ for (const exp of symbols.exports) {
312
+ insertNode.run(exp.name, exp.kind, relPath, exp.line, null);
313
+ }
314
+
315
+ // Update file hash for incremental builds
316
+ if (upsertHash) {
317
+ const hash = item.hash || fileHash(code);
318
+ upsertHash.run(relPath, hash, Date.now());
319
+ }
320
+
321
+ parsed++;
322
+ if (parsed % 100 === 0) process.stdout.write(` Parsed ${parsed}/${filesToParse.length} files\r`);
323
+ }
324
+ });
325
+ insertMany();
326
+ console.log(`Parsed ${parsed} files (${skipped} skipped)`);
327
+
328
+ // Clean up removed file hashes
329
+ if (upsertHash && removed.length > 0) {
330
+ const deleteHash = db.prepare('DELETE FROM file_hashes WHERE file = ?');
331
+ for (const relPath of removed) {
332
+ deleteHash.run(relPath);
333
+ }
334
+ }
335
+
336
+ // Build re-export map for barrel resolution
337
+ const reexportMap = new Map();
338
+ for (const [relPath, symbols] of fileSymbols) {
339
+ const reexports = symbols.imports.filter(imp => imp.reexport);
340
+ if (reexports.length > 0) {
341
+ reexportMap.set(relPath, reexports.map(imp => ({
342
+ source: resolveImportPath(path.join(rootDir, relPath), imp.source, rootDir, aliases),
343
+ names: imp.names,
344
+ wildcardReexport: imp.wildcardReexport || false
345
+ })));
346
+ }
347
+ }
348
+
349
+ function isBarrelFile(relPath) {
350
+ const symbols = fileSymbols.get(relPath);
351
+ if (!symbols) return false;
352
+ const reexports = symbols.imports.filter(imp => imp.reexport);
353
+ if (reexports.length === 0) return false;
354
+ const ownDefs = symbols.definitions.length;
355
+ return reexports.length >= ownDefs;
356
+ }
357
+
358
+ function resolveBarrelExport(barrelPath, symbolName, visited = new Set()) {
359
+ if (visited.has(barrelPath)) return null;
360
+ visited.add(barrelPath);
361
+ const reexports = reexportMap.get(barrelPath);
362
+ if (!reexports) return null;
363
+
364
+ for (const re of reexports) {
365
+ if (re.names.length > 0 && !re.wildcardReexport) {
366
+ if (re.names.includes(symbolName)) {
367
+ const targetSymbols = fileSymbols.get(re.source);
368
+ if (targetSymbols) {
369
+ const hasDef = targetSymbols.definitions.some(d => d.name === symbolName);
370
+ if (hasDef) return re.source;
371
+ const deeper = resolveBarrelExport(re.source, symbolName, visited);
372
+ if (deeper) return deeper;
373
+ }
374
+ return re.source;
375
+ }
376
+ continue;
377
+ }
378
+ if (re.wildcardReexport || re.names.length === 0) {
379
+ const targetSymbols = fileSymbols.get(re.source);
380
+ if (targetSymbols) {
381
+ const hasDef = targetSymbols.definitions.some(d => d.name === symbolName);
382
+ if (hasDef) return re.source;
383
+ const deeper = resolveBarrelExport(re.source, symbolName, visited);
384
+ if (deeper) return deeper;
385
+ }
386
+ }
387
+ }
388
+ return null;
389
+ }
390
+
391
+ // N+1 optimization: pre-load all nodes into a lookup map for edge building
392
+ const allNodes = db.prepare(
393
+ `SELECT id, name, kind, file FROM nodes WHERE kind IN ('function','method','class','interface')`
394
+ ).all();
395
+ const nodesByName = new Map();
396
+ for (const node of allNodes) {
397
+ if (!nodesByName.has(node.name)) nodesByName.set(node.name, []);
398
+ nodesByName.get(node.name).push(node);
399
+ }
400
+ const nodesByNameAndFile = new Map();
401
+ for (const node of allNodes) {
402
+ const key = `${node.name}|${node.file}`;
403
+ if (!nodesByNameAndFile.has(key)) nodesByNameAndFile.set(key, []);
404
+ nodesByNameAndFile.get(key).push(node);
405
+ }
406
+
407
+ // Second pass: build edges
408
+ let edgeCount = 0;
409
+ const buildEdges = db.transaction(() => {
410
+ for (const [relPath, symbols] of fileSymbols) {
411
+ const fileNodeRow = getNodeId.get(relPath, 'file', relPath, 0);
412
+ if (!fileNodeRow) continue;
413
+ const fileNodeId = fileNodeRow.id;
414
+
415
+ // Import edges
416
+ for (const imp of symbols.imports) {
417
+ const resolvedPath = resolveImportPath(path.join(rootDir, relPath), imp.source, rootDir, aliases);
418
+ const targetRow = getNodeId.get(resolvedPath, 'file', resolvedPath, 0);
419
+ if (targetRow) {
420
+ const edgeKind = imp.reexport ? 'reexports' : imp.typeOnly ? 'imports-type' : 'imports';
421
+ insertEdge.run(fileNodeId, targetRow.id, edgeKind, 1.0, 0);
422
+ edgeCount++;
423
+
424
+ if (!imp.reexport && isBarrelFile(resolvedPath)) {
425
+ const resolvedSources = new Set();
426
+ for (const name of imp.names) {
427
+ const cleanName = name.replace(/^\*\s+as\s+/, '');
428
+ const actualSource = resolveBarrelExport(resolvedPath, cleanName);
429
+ if (actualSource && actualSource !== resolvedPath && !resolvedSources.has(actualSource)) {
430
+ resolvedSources.add(actualSource);
431
+ const actualRow = getNodeId.get(actualSource, 'file', actualSource, 0);
432
+ if (actualRow) {
433
+ insertEdge.run(fileNodeId, actualRow.id, edgeKind === 'imports-type' ? 'imports-type' : 'imports', 0.9, 0);
434
+ edgeCount++;
435
+ }
436
+ }
437
+ }
438
+ }
439
+ }
440
+ }
441
+
442
+ // Build import name -> target file mapping
443
+ const importedNames = new Map();
444
+ for (const imp of symbols.imports) {
445
+ const resolvedPath = resolveImportPath(path.join(rootDir, relPath), imp.source, rootDir, aliases);
446
+ for (const name of imp.names) {
447
+ const cleanName = name.replace(/^\*\s+as\s+/, '');
448
+ importedNames.set(cleanName, resolvedPath);
449
+ }
450
+ }
451
+
452
+ // Call edges with confidence scoring — using pre-loaded lookup maps (N+1 fix)
453
+ for (const call of symbols.calls) {
454
+ let caller = null;
455
+ for (const def of symbols.definitions) {
456
+ if (def.line <= call.line) {
457
+ const row = getNodeId.get(def.name, def.kind, relPath, def.line);
458
+ if (row) caller = row;
459
+ }
460
+ }
461
+ if (!caller) caller = fileNodeRow;
462
+
463
+ const isDynamic = call.dynamic ? 1 : 0;
464
+ let targets;
465
+ const importedFrom = importedNames.get(call.name);
466
+
467
+ if (importedFrom) {
468
+ // Use pre-loaded map instead of DB query
469
+ targets = nodesByNameAndFile.get(`${call.name}|${importedFrom}`) || [];
470
+
471
+ if (targets.length === 0 && isBarrelFile(importedFrom)) {
472
+ const actualSource = resolveBarrelExport(importedFrom, call.name);
473
+ if (actualSource) {
474
+ targets = nodesByNameAndFile.get(`${call.name}|${actualSource}`) || [];
475
+ }
476
+ }
477
+ }
478
+ if (!targets || targets.length === 0) {
479
+ // Same file
480
+ targets = nodesByNameAndFile.get(`${call.name}|${relPath}`) || [];
481
+ if (targets.length === 0) {
482
+ // Method name match (e.g. ClassName.methodName)
483
+ const methodCandidates = (nodesByName.get(call.name) || []).filter(n =>
484
+ n.name.endsWith(`.${call.name}`) && n.kind === 'method'
485
+ );
486
+ if (methodCandidates.length > 0) {
487
+ targets = methodCandidates;
488
+ } else {
489
+ // Global fallback
490
+ targets = nodesByName.get(call.name) || [];
491
+ }
492
+ }
493
+ }
494
+
495
+ if (targets.length > 1) {
496
+ targets.sort((a, b) => {
497
+ const confA = computeConfidence(relPath, a.file, importedFrom);
498
+ const confB = computeConfidence(relPath, b.file, importedFrom);
499
+ return confB - confA;
500
+ });
501
+ }
502
+
503
+ for (const t of targets) {
504
+ if (t.id !== caller.id) {
505
+ const confidence = computeConfidence(relPath, t.file, importedFrom);
506
+ insertEdge.run(caller.id, t.id, 'calls', confidence, isDynamic);
507
+ edgeCount++;
508
+ }
509
+ }
510
+ }
511
+
512
+ // Class extends edges
513
+ for (const cls of symbols.classes) {
514
+ if (cls.extends) {
515
+ const sourceRow = db.prepare('SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ?').get(cls.name, 'class', relPath);
516
+ const targetCandidates = nodesByName.get(cls.extends) || [];
517
+ const targetRows = targetCandidates.filter(n => n.kind === 'class');
518
+ if (sourceRow) {
519
+ for (const t of targetRows) {
520
+ insertEdge.run(sourceRow.id, t.id, 'extends', 1.0, 0);
521
+ edgeCount++;
522
+ }
523
+ }
524
+ }
525
+
526
+ if (cls.implements) {
527
+ const sourceRow = db.prepare('SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ?').get(cls.name, 'class', relPath);
528
+ const targetCandidates = nodesByName.get(cls.implements) || [];
529
+ const targetRows = targetCandidates.filter(n => n.kind === 'interface' || n.kind === 'class');
530
+ if (sourceRow) {
531
+ for (const t of targetRows) {
532
+ insertEdge.run(sourceRow.id, t.id, 'implements', 1.0, 0);
533
+ edgeCount++;
534
+ }
535
+ }
536
+ }
537
+ }
538
+ }
539
+ });
540
+ buildEdges();
541
+
542
+ const nodeCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c;
543
+ console.log(`Graph built: ${nodeCount} nodes, ${edgeCount} edges`);
544
+ console.log(`Stored in ${dbPath}`);
545
+ db.close();
546
+ }
547
+