@colbymchenry/codegraph 0.6.8 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +179 -476
- package/dist/bin/codegraph.d.ts +0 -5
- package/dist/bin/codegraph.d.ts.map +1 -1
- package/dist/bin/codegraph.js +217 -237
- package/dist/bin/codegraph.js.map +1 -1
- package/dist/bin/uninstall.d.ts +0 -1
- package/dist/bin/uninstall.d.ts.map +1 -1
- package/dist/bin/uninstall.js +3 -29
- package/dist/bin/uninstall.js.map +1 -1
- package/dist/context/index.d.ts +3 -5
- package/dist/context/index.d.ts.map +1 -1
- package/dist/context/index.js +497 -46
- package/dist/context/index.js.map +1 -1
- package/dist/db/migrations.d.ts +1 -1
- package/dist/db/migrations.d.ts.map +1 -1
- package/dist/db/migrations.js +10 -1
- package/dist/db/migrations.js.map +1 -1
- package/dist/db/queries.d.ts +53 -0
- package/dist/db/queries.d.ts.map +1 -1
- package/dist/db/queries.js +244 -14
- package/dist/db/queries.js.map +1 -1
- package/dist/db/schema.sql +1 -16
- package/dist/extraction/dfm-extractor.d.ts +31 -0
- package/dist/extraction/dfm-extractor.d.ts.map +1 -0
- package/dist/extraction/dfm-extractor.js +151 -0
- package/dist/extraction/dfm-extractor.js.map +1 -0
- package/dist/extraction/grammars.d.ts +9 -1
- package/dist/extraction/grammars.d.ts.map +1 -1
- package/dist/extraction/grammars.js +34 -2
- package/dist/extraction/grammars.js.map +1 -1
- package/dist/extraction/index.d.ts +7 -1
- package/dist/extraction/index.d.ts.map +1 -1
- package/dist/extraction/index.js +373 -22
- package/dist/extraction/index.js.map +1 -1
- package/dist/extraction/languages/c-cpp.d.ts +4 -0
- package/dist/extraction/languages/c-cpp.d.ts.map +1 -0
- package/dist/extraction/languages/c-cpp.js +126 -0
- package/dist/extraction/languages/c-cpp.js.map +1 -0
- package/dist/extraction/languages/csharp.d.ts +3 -0
- package/dist/extraction/languages/csharp.d.ts.map +1 -0
- package/dist/extraction/languages/csharp.js +72 -0
- package/dist/extraction/languages/csharp.js.map +1 -0
- package/dist/extraction/languages/dart.d.ts +3 -0
- package/dist/extraction/languages/dart.d.ts.map +1 -0
- package/dist/extraction/languages/dart.js +192 -0
- package/dist/extraction/languages/dart.js.map +1 -0
- package/dist/extraction/languages/go.d.ts +3 -0
- package/dist/extraction/languages/go.d.ts.map +1 -0
- package/dist/extraction/languages/go.js +58 -0
- package/dist/extraction/languages/go.js.map +1 -0
- package/dist/extraction/languages/index.d.ts +10 -0
- package/dist/extraction/languages/index.d.ts.map +1 -0
- package/dist/extraction/languages/index.js +43 -0
- package/dist/extraction/languages/index.js.map +1 -0
- package/dist/extraction/languages/java.d.ts +3 -0
- package/dist/extraction/languages/java.d.ts.map +1 -0
- package/dist/extraction/languages/java.js +64 -0
- package/dist/extraction/languages/java.js.map +1 -0
- package/dist/extraction/languages/javascript.d.ts +3 -0
- package/dist/extraction/languages/javascript.d.ts.map +1 -0
- package/dist/extraction/languages/javascript.js +90 -0
- package/dist/extraction/languages/javascript.js.map +1 -0
- package/dist/extraction/languages/kotlin.d.ts +3 -0
- package/dist/extraction/languages/kotlin.d.ts.map +1 -0
- package/dist/extraction/languages/kotlin.js +253 -0
- package/dist/extraction/languages/kotlin.js.map +1 -0
- package/dist/extraction/languages/pascal.d.ts +3 -0
- package/dist/extraction/languages/pascal.d.ts.map +1 -0
- package/dist/extraction/languages/pascal.js +66 -0
- package/dist/extraction/languages/pascal.js.map +1 -0
- package/dist/extraction/languages/php.d.ts +3 -0
- package/dist/extraction/languages/php.d.ts.map +1 -0
- package/dist/extraction/languages/php.js +107 -0
- package/dist/extraction/languages/php.js.map +1 -0
- package/dist/extraction/languages/python.d.ts +3 -0
- package/dist/extraction/languages/python.d.ts.map +1 -0
- package/dist/extraction/languages/python.js +56 -0
- package/dist/extraction/languages/python.js.map +1 -0
- package/dist/extraction/languages/ruby.d.ts +3 -0
- package/dist/extraction/languages/ruby.d.ts.map +1 -0
- package/dist/extraction/languages/ruby.js +114 -0
- package/dist/extraction/languages/ruby.js.map +1 -0
- package/dist/extraction/languages/rust.d.ts +3 -0
- package/dist/extraction/languages/rust.d.ts.map +1 -0
- package/dist/extraction/languages/rust.js +109 -0
- package/dist/extraction/languages/rust.js.map +1 -0
- package/dist/extraction/languages/swift.d.ts +3 -0
- package/dist/extraction/languages/swift.d.ts.map +1 -0
- package/dist/extraction/languages/swift.js +91 -0
- package/dist/extraction/languages/swift.js.map +1 -0
- package/dist/extraction/languages/typescript.d.ts +3 -0
- package/dist/extraction/languages/typescript.d.ts.map +1 -0
- package/dist/extraction/languages/typescript.js +129 -0
- package/dist/extraction/languages/typescript.js.map +1 -0
- package/dist/extraction/liquid-extractor.d.ts +52 -0
- package/dist/extraction/liquid-extractor.d.ts.map +1 -0
- package/dist/extraction/liquid-extractor.js +313 -0
- package/dist/extraction/liquid-extractor.js.map +1 -0
- package/dist/extraction/parse-worker.d.ts +8 -0
- package/dist/extraction/parse-worker.d.ts.map +1 -0
- package/dist/extraction/parse-worker.js +57 -0
- package/dist/extraction/parse-worker.js.map +1 -0
- package/dist/extraction/svelte-extractor.d.ts +47 -0
- package/dist/extraction/svelte-extractor.d.ts.map +1 -0
- package/dist/extraction/svelte-extractor.js +230 -0
- package/dist/extraction/svelte-extractor.js.map +1 -0
- package/dist/extraction/tree-sitter-helpers.d.ts +28 -0
- package/dist/extraction/tree-sitter-helpers.d.ts.map +1 -0
- package/dist/extraction/tree-sitter-helpers.js +103 -0
- package/dist/extraction/tree-sitter-helpers.js.map +1 -0
- package/dist/extraction/tree-sitter-types.d.ts +179 -0
- package/dist/extraction/tree-sitter-types.d.ts.map +1 -0
- package/dist/extraction/tree-sitter-types.js +10 -0
- package/dist/extraction/tree-sitter-types.js.map +1 -0
- package/dist/extraction/tree-sitter.d.ts +67 -125
- package/dist/extraction/tree-sitter.d.ts.map +1 -1
- package/dist/extraction/tree-sitter.js +1052 -1855
- package/dist/extraction/tree-sitter.js.map +1 -1
- package/dist/graph/traversal.d.ts.map +1 -1
- package/dist/graph/traversal.js +20 -2
- package/dist/graph/traversal.js.map +1 -1
- package/dist/index.d.ts +29 -53
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +88 -114
- package/dist/index.js.map +1 -1
- package/dist/installer/claude-md-template.d.ts +1 -1
- package/dist/installer/claude-md-template.d.ts.map +1 -1
- package/dist/installer/claude-md-template.js +15 -15
- package/dist/installer/config-writer.d.ts +1 -10
- package/dist/installer/config-writer.d.ts.map +1 -1
- package/dist/installer/config-writer.js +0 -79
- package/dist/installer/config-writer.js.map +1 -1
- package/dist/installer/index.d.ts +3 -4
- package/dist/installer/index.d.ts.map +1 -1
- package/dist/installer/index.js +118 -116
- package/dist/installer/index.js.map +1 -1
- package/dist/mcp/index.d.ts +5 -0
- package/dist/mcp/index.d.ts.map +1 -1
- package/dist/mcp/index.js +25 -1
- package/dist/mcp/index.js.map +1 -1
- package/dist/mcp/tools.d.ts +33 -0
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +405 -21
- package/dist/mcp/tools.js.map +1 -1
- package/dist/resolution/frameworks/csharp.js +29 -84
- package/dist/resolution/frameworks/csharp.js.map +1 -1
- package/dist/resolution/frameworks/express.js +44 -48
- package/dist/resolution/frameworks/express.js.map +1 -1
- package/dist/resolution/frameworks/go.js +34 -70
- package/dist/resolution/frameworks/go.js.map +1 -1
- package/dist/resolution/frameworks/java.js +29 -87
- package/dist/resolution/frameworks/java.js.map +1 -1
- package/dist/resolution/frameworks/laravel.js +6 -6
- package/dist/resolution/frameworks/laravel.js.map +1 -1
- package/dist/resolution/frameworks/python.js +33 -98
- package/dist/resolution/frameworks/python.js.map +1 -1
- package/dist/resolution/frameworks/react.js +53 -76
- package/dist/resolution/frameworks/react.js.map +1 -1
- package/dist/resolution/frameworks/ruby.js +12 -24
- package/dist/resolution/frameworks/ruby.js.map +1 -1
- package/dist/resolution/frameworks/rust.js +26 -66
- package/dist/resolution/frameworks/rust.js.map +1 -1
- package/dist/resolution/frameworks/svelte.js +11 -31
- package/dist/resolution/frameworks/svelte.js.map +1 -1
- package/dist/resolution/frameworks/swift.js +42 -160
- package/dist/resolution/frameworks/swift.js.map +1 -1
- package/dist/resolution/index.d.ts +19 -6
- package/dist/resolution/index.d.ts.map +1 -1
- package/dist/resolution/index.js +300 -141
- package/dist/resolution/index.js.map +1 -1
- package/dist/resolution/name-matcher.d.ts +5 -0
- package/dist/resolution/name-matcher.d.ts.map +1 -1
- package/dist/resolution/name-matcher.js +148 -8
- package/dist/resolution/name-matcher.js.map +1 -1
- package/dist/resolution/types.d.ts +1 -1
- package/dist/resolution/types.d.ts.map +1 -1
- package/dist/search/query-utils.d.ts +26 -1
- package/dist/search/query-utils.d.ts.map +1 -1
- package/dist/search/query-utils.js +209 -9
- package/dist/search/query-utils.js.map +1 -1
- package/dist/sync/index.d.ts +2 -4
- package/dist/sync/index.d.ts.map +1 -1
- package/dist/sync/index.js +4 -3
- package/dist/sync/index.js.map +1 -1
- package/dist/sync/watcher.d.ts +81 -0
- package/dist/sync/watcher.d.ts.map +1 -0
- package/dist/sync/watcher.js +184 -0
- package/dist/sync/watcher.js.map +1 -0
- package/dist/types.d.ts +2 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/ui/shimmer-progress.d.ts +11 -0
- package/dist/ui/shimmer-progress.d.ts.map +1 -0
- package/dist/ui/shimmer-progress.js +90 -0
- package/dist/ui/shimmer-progress.js.map +1 -0
- package/dist/ui/shimmer-worker.d.ts +2 -0
- package/dist/ui/shimmer-worker.d.ts.map +1 -0
- package/dist/ui/shimmer-worker.js +112 -0
- package/dist/ui/shimmer-worker.js.map +1 -0
- package/dist/ui/types.d.ts +17 -0
- package/dist/ui/types.d.ts.map +1 -0
- package/dist/ui/types.js +3 -0
- package/dist/ui/types.js.map +1 -0
- package/dist/vectors/embedder.js +1 -1
- package/dist/vectors/embedder.js.map +1 -1
- package/package.json +7 -12
- package/scripts/postinstall.js +0 -68
package/dist/extraction/index.js
CHANGED
|
@@ -45,6 +45,7 @@ exports.loadAllGrammars = exports.loadGrammarsForLanguages = exports.initGrammar
|
|
|
45
45
|
exports.hashContent = hashContent;
|
|
46
46
|
exports.shouldIncludeFile = shouldIncludeFile;
|
|
47
47
|
exports.scanDirectory = scanDirectory;
|
|
48
|
+
exports.scanDirectoryAsync = scanDirectoryAsync;
|
|
48
49
|
const fs = __importStar(require("fs"));
|
|
49
50
|
const fsp = __importStar(require("fs/promises"));
|
|
50
51
|
const path = __importStar(require("path"));
|
|
@@ -60,6 +61,21 @@ const picomatch_1 = __importDefault(require("picomatch"));
|
|
|
60
61
|
* File reads are I/O-bound; batching overlaps I/O wait with CPU parse work.
|
|
61
62
|
*/
|
|
62
63
|
const FILE_IO_BATCH_SIZE = 10;
|
|
64
|
+
// PARSER_RESET_INTERVAL moved to parse-worker.ts (runs in worker thread)
|
|
65
|
+
/**
|
|
66
|
+
* Maximum time (ms) to wait for a single file to parse in the worker thread.
|
|
67
|
+
* If tree-sitter hangs or WASM runs out of memory, this prevents the entire
|
|
68
|
+
* indexing run from freezing. The worker is restarted after a timeout.
|
|
69
|
+
*/
|
|
70
|
+
const PARSE_TIMEOUT_MS = 10_000;
|
|
71
|
+
/**
|
|
72
|
+
* Number of files to parse before recycling the worker thread.
|
|
73
|
+
* WASM linear memory can grow but NEVER shrink (WebAssembly spec limitation).
|
|
74
|
+
* The only way to reclaim tree-sitter's WASM heap is to destroy the entire
|
|
75
|
+
* V8 isolate by terminating the worker thread and spawning a fresh one.
|
|
76
|
+
* This interval balances memory usage against the cost of reloading grammars.
|
|
77
|
+
*/
|
|
78
|
+
const WORKER_RECYCLE_INTERVAL = 250;
|
|
63
79
|
/**
|
|
64
80
|
* Calculate SHA256 hash of file contents
|
|
65
81
|
*/
|
|
@@ -98,6 +114,21 @@ function shouldIncludeFile(filePath, config) {
|
|
|
98
114
|
*/
|
|
99
115
|
function getGitVisibleFiles(rootDir) {
|
|
100
116
|
try {
|
|
117
|
+
// Check if the project directory is gitignored by a parent repo.
|
|
118
|
+
// When rootDir lives inside a parent git repo that ignores it,
|
|
119
|
+
// `git ls-files` returns nothing — fall back to filesystem walk.
|
|
120
|
+
const gitRoot = (0, child_process_1.execFileSync)('git', ['rev-parse', '--show-toplevel'], { cwd: rootDir, encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'] }).trim();
|
|
121
|
+
if (path.resolve(gitRoot) !== path.resolve(rootDir)) {
|
|
122
|
+
try {
|
|
123
|
+
// git check-ignore exits 0 if the path IS ignored, 1 if not
|
|
124
|
+
(0, child_process_1.execFileSync)('git', ['check-ignore', '-q', path.resolve(rootDir)], { cwd: rootDir, encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'] });
|
|
125
|
+
// Directory is gitignored by parent repo — fall back to filesystem walk
|
|
126
|
+
return null;
|
|
127
|
+
}
|
|
128
|
+
catch {
|
|
129
|
+
// Not ignored — safe to use git ls-files
|
|
130
|
+
}
|
|
131
|
+
}
|
|
101
132
|
// -c = cached (tracked), -o = others (untracked), --exclude-standard = respect .gitignore
|
|
102
133
|
const output = (0, child_process_1.execFileSync)('git', ['ls-files', '-co', '--exclude-standard'], { cwd: rootDir, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] });
|
|
103
134
|
const files = new Set();
|
|
@@ -177,6 +208,30 @@ function scanDirectory(rootDir, config, onProgress) {
|
|
|
177
208
|
// Fallback: walk filesystem for non-git projects
|
|
178
209
|
return scanDirectoryWalk(rootDir, config, onProgress);
|
|
179
210
|
}
|
|
211
|
+
/**
|
|
212
|
+
* Async variant of scanDirectory that yields to the event loop periodically,
|
|
213
|
+
* allowing worker threads to receive and render progress messages.
|
|
214
|
+
*/
|
|
215
|
+
async function scanDirectoryAsync(rootDir, config, onProgress) {
|
|
216
|
+
const gitFiles = getGitVisibleFiles(rootDir);
|
|
217
|
+
if (gitFiles) {
|
|
218
|
+
const files = [];
|
|
219
|
+
let count = 0;
|
|
220
|
+
for (const filePath of gitFiles) {
|
|
221
|
+
if (shouldIncludeFile(filePath, config)) {
|
|
222
|
+
files.push(filePath);
|
|
223
|
+
count++;
|
|
224
|
+
onProgress?.(count, filePath);
|
|
225
|
+
// Yield every 100 files so worker threads can render progress
|
|
226
|
+
if (count % 100 === 0) {
|
|
227
|
+
await new Promise(r => setImmediate(r));
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
return files;
|
|
232
|
+
}
|
|
233
|
+
return scanDirectoryWalk(rootDir, config, onProgress);
|
|
234
|
+
}
|
|
180
235
|
/**
|
|
181
236
|
* Filesystem walk fallback for non-git projects.
|
|
182
237
|
*/
|
|
@@ -285,21 +340,25 @@ class ExtractionOrchestrator {
|
|
|
285
340
|
/**
|
|
286
341
|
* Index all files in the project
|
|
287
342
|
*/
|
|
288
|
-
async indexAll(onProgress, signal) {
|
|
343
|
+
async indexAll(onProgress, signal, verbose) {
|
|
289
344
|
await (0, grammars_1.initGrammars)();
|
|
290
345
|
const startTime = Date.now();
|
|
291
346
|
const errors = [];
|
|
292
347
|
let filesIndexed = 0;
|
|
293
348
|
let filesSkipped = 0;
|
|
349
|
+
let filesErrored = 0;
|
|
294
350
|
let totalNodes = 0;
|
|
295
351
|
let totalEdges = 0;
|
|
352
|
+
const log = verbose
|
|
353
|
+
? (msg) => { console.log(`[worker] ${msg}`); }
|
|
354
|
+
: (_msg) => { };
|
|
296
355
|
// Phase 1: Scan for files
|
|
297
356
|
onProgress?.({
|
|
298
357
|
phase: 'scanning',
|
|
299
358
|
current: 0,
|
|
300
359
|
total: 0,
|
|
301
360
|
});
|
|
302
|
-
const files =
|
|
361
|
+
const files = await scanDirectoryAsync(this.rootDir, this.config, (current, file) => {
|
|
303
362
|
onProgress?.({
|
|
304
363
|
phase: 'scanning',
|
|
305
364
|
current,
|
|
@@ -312,26 +371,164 @@ class ExtractionOrchestrator {
|
|
|
312
371
|
success: false,
|
|
313
372
|
filesIndexed: 0,
|
|
314
373
|
filesSkipped: 0,
|
|
374
|
+
filesErrored: 0,
|
|
315
375
|
nodesCreated: 0,
|
|
316
376
|
edgesCreated: 0,
|
|
317
377
|
errors: [{ message: 'Aborted', severity: 'error' }],
|
|
318
378
|
durationMs: Date.now() - startTime,
|
|
319
379
|
};
|
|
320
380
|
}
|
|
321
|
-
//
|
|
322
|
-
// This avoids compiling all 16+ WASM grammar modules upfront, which can cause
|
|
323
|
-
// V8 WASM Zone OOM on large codebases (see issue #54).
|
|
324
|
-
const neededLanguages = [...new Set(files.map((f) => (0, grammars_1.detectLanguage)(f)))];
|
|
325
|
-
await (0, grammars_1.loadGrammarsForLanguages)(neededLanguages);
|
|
326
|
-
// Phase 2: Parse files (read in parallel batches, parse/store sequentially)
|
|
381
|
+
// Phase 2: Parse files in a worker thread (keeps main thread unblocked for UI)
|
|
327
382
|
const total = files.length;
|
|
328
383
|
let processed = 0;
|
|
384
|
+
// Emit parsing phase immediately so the progress bar appears during worker setup.
|
|
385
|
+
// The yield lets the shimmer worker flush the phase transition to stdout before
|
|
386
|
+
// the main thread starts synchronous grammar detection work.
|
|
387
|
+
onProgress?.({
|
|
388
|
+
phase: 'parsing',
|
|
389
|
+
current: 0,
|
|
390
|
+
total,
|
|
391
|
+
});
|
|
392
|
+
await new Promise(resolve => setImmediate(resolve));
|
|
393
|
+
// Detect needed languages and load grammars in the parse worker
|
|
394
|
+
const neededLanguages = [...new Set(files.map((f) => (0, grammars_1.detectLanguage)(f)))];
|
|
395
|
+
// .h files default to 'c' but may be C++ — ensure cpp grammar is loaded when c is needed
|
|
396
|
+
if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) {
|
|
397
|
+
neededLanguages.push('cpp');
|
|
398
|
+
}
|
|
399
|
+
// Try to use a worker thread for parsing (keeps main thread unblocked for UI).
|
|
400
|
+
// Falls back to in-process parsing if the compiled worker is unavailable (e.g. tests).
|
|
401
|
+
const parseWorkerPath = path.join(__dirname, 'parse-worker.js');
|
|
402
|
+
const useWorker = fs.existsSync(parseWorkerPath);
|
|
403
|
+
let WorkerClass = null;
|
|
404
|
+
if (useWorker) {
|
|
405
|
+
const { Worker } = await Promise.resolve().then(() => __importStar(require('worker_threads')));
|
|
406
|
+
WorkerClass = Worker;
|
|
407
|
+
}
|
|
408
|
+
else {
|
|
409
|
+
// In-process fallback: load grammars locally
|
|
410
|
+
await (0, grammars_1.loadGrammarsForLanguages)(neededLanguages);
|
|
411
|
+
}
|
|
412
|
+
// --- Worker lifecycle management ---
|
|
413
|
+
// The worker can crash (OOM in WASM) or hang on pathological files.
|
|
414
|
+
// We track pending parse promises and handle both cases:
|
|
415
|
+
// - Timeout: terminate + restart the worker, reject the timed-out request
|
|
416
|
+
// - Crash: reject all pending promises, restart for remaining files
|
|
417
|
+
let parseWorker = null;
|
|
418
|
+
let nextId = 0;
|
|
419
|
+
let workerParseCount = 0;
|
|
420
|
+
const pendingParses = new Map();
|
|
421
|
+
function rejectAllPending(reason) {
|
|
422
|
+
for (const [id, pending] of pendingParses) {
|
|
423
|
+
clearTimeout(pending.timer);
|
|
424
|
+
pendingParses.delete(id);
|
|
425
|
+
pending.reject(new Error(reason));
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
function attachWorkerHandlers(w) {
|
|
429
|
+
w.on('message', (msg) => {
|
|
430
|
+
if (msg.type === 'parse-result' && msg.id !== undefined) {
|
|
431
|
+
const pending = pendingParses.get(msg.id);
|
|
432
|
+
if (pending) {
|
|
433
|
+
clearTimeout(pending.timer);
|
|
434
|
+
pendingParses.delete(msg.id);
|
|
435
|
+
pending.resolve(msg.result);
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
});
|
|
439
|
+
w.on('error', (err) => {
|
|
440
|
+
(0, errors_1.logWarn)('Parse worker error', { error: err.message });
|
|
441
|
+
rejectAllPending(`Worker error: ${err.message}`);
|
|
442
|
+
});
|
|
443
|
+
w.on('exit', (code) => {
|
|
444
|
+
if (code !== 0 && pendingParses.size > 0) {
|
|
445
|
+
(0, errors_1.logWarn)('Parse worker exited unexpectedly', { code });
|
|
446
|
+
rejectAllPending(`Worker exited with code ${code}`);
|
|
447
|
+
}
|
|
448
|
+
// Clear reference so we know to respawn, reset count so
|
|
449
|
+
// the fresh worker gets a full cycle before recycling.
|
|
450
|
+
if (parseWorker === w) {
|
|
451
|
+
parseWorker = null;
|
|
452
|
+
workerParseCount = 0;
|
|
453
|
+
}
|
|
454
|
+
});
|
|
455
|
+
}
|
|
456
|
+
async function ensureWorker() {
|
|
457
|
+
if (parseWorker)
|
|
458
|
+
return parseWorker;
|
|
459
|
+
log('Spawning new parse worker...');
|
|
460
|
+
parseWorker = new WorkerClass(parseWorkerPath);
|
|
461
|
+
attachWorkerHandlers(parseWorker);
|
|
462
|
+
// Load grammars in the new worker
|
|
463
|
+
await new Promise((resolve, reject) => {
|
|
464
|
+
parseWorker.once('message', (msg) => {
|
|
465
|
+
if (msg.type === 'grammars-loaded')
|
|
466
|
+
resolve();
|
|
467
|
+
else
|
|
468
|
+
reject(new Error(`Unexpected message: ${msg.type}`));
|
|
469
|
+
});
|
|
470
|
+
parseWorker.postMessage({ type: 'load-grammars', languages: neededLanguages });
|
|
471
|
+
});
|
|
472
|
+
return parseWorker;
|
|
473
|
+
}
|
|
474
|
+
if (WorkerClass) {
|
|
475
|
+
await ensureWorker();
|
|
476
|
+
}
|
|
477
|
+
/**
|
|
478
|
+
* Recycle the worker thread to reclaim WASM memory.
|
|
479
|
+
* Terminates the current worker and clears the reference so
|
|
480
|
+
* ensureWorker() will spawn a fresh one on the next call.
|
|
481
|
+
*/
|
|
482
|
+
function recycleWorker() {
|
|
483
|
+
if (!parseWorker)
|
|
484
|
+
return;
|
|
485
|
+
log(`Recycling worker after ${workerParseCount} parses (heap: ${Math.round(process.memoryUsage().rss / 1024 / 1024)}MB RSS)`);
|
|
486
|
+
const w = parseWorker;
|
|
487
|
+
parseWorker = null;
|
|
488
|
+
workerParseCount = 0;
|
|
489
|
+
// Fire-and-forget: worker.terminate() can hang if WASM is stuck
|
|
490
|
+
w.terminate().catch(() => { });
|
|
491
|
+
}
|
|
492
|
+
async function requestParse(filePath, content) {
|
|
493
|
+
if (!WorkerClass) {
|
|
494
|
+
// In-process fallback
|
|
495
|
+
return (0, tree_sitter_1.extractFromSource)(filePath, content, (0, grammars_1.detectLanguage)(filePath, content));
|
|
496
|
+
}
|
|
497
|
+
// Recycle the worker before the next parse if we've hit the threshold.
|
|
498
|
+
// This destroys the WASM linear memory (which can grow but never shrink)
|
|
499
|
+
// and starts a fresh worker with a clean heap.
|
|
500
|
+
if (workerParseCount >= WORKER_RECYCLE_INTERVAL) {
|
|
501
|
+
await recycleWorker();
|
|
502
|
+
}
|
|
503
|
+
const worker = await ensureWorker();
|
|
504
|
+
const id = nextId++;
|
|
505
|
+
workerParseCount++;
|
|
506
|
+
// Scale timeout for large files: base 10s + 10s per 100KB
|
|
507
|
+
const timeoutMs = PARSE_TIMEOUT_MS + Math.floor(content.length / 100_000) * 10_000;
|
|
508
|
+
return new Promise((resolve, reject) => {
|
|
509
|
+
const timer = setTimeout(() => {
|
|
510
|
+
pendingParses.delete(id);
|
|
511
|
+
log(`TIMEOUT: ${filePath} exceeded ${timeoutMs}ms — killing worker`);
|
|
512
|
+
// Reject FIRST — worker.terminate() can hang if WASM is stuck
|
|
513
|
+
parseWorker = null;
|
|
514
|
+
workerParseCount = 0;
|
|
515
|
+
reject(new Error(`Parse timed out after ${timeoutMs}ms`));
|
|
516
|
+
// Fire-and-forget: kill the stuck worker in the background
|
|
517
|
+
worker.terminate().catch(() => { });
|
|
518
|
+
}, timeoutMs);
|
|
519
|
+
pendingParses.set(id, { resolve, reject, timer });
|
|
520
|
+
worker.postMessage({ type: 'parse', id, filePath, content });
|
|
521
|
+
});
|
|
522
|
+
}
|
|
329
523
|
for (let i = 0; i < files.length; i += FILE_IO_BATCH_SIZE) {
|
|
330
524
|
if (signal?.aborted) {
|
|
525
|
+
if (parseWorker)
|
|
526
|
+
parseWorker.terminate().catch(() => { });
|
|
331
527
|
return {
|
|
332
528
|
success: false,
|
|
333
529
|
filesIndexed,
|
|
334
530
|
filesSkipped,
|
|
531
|
+
filesErrored,
|
|
335
532
|
nodesCreated: totalNodes,
|
|
336
533
|
edgesCreated: totalEdges,
|
|
337
534
|
errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
|
|
@@ -355,20 +552,23 @@ class ExtractionOrchestrator {
|
|
|
355
552
|
return { filePath: fp, content: null, stats: null, error: err };
|
|
356
553
|
}
|
|
357
554
|
}));
|
|
358
|
-
//
|
|
555
|
+
// Send to worker for parsing, store results on main thread
|
|
359
556
|
for (const { filePath, content, stats, error } of fileContents) {
|
|
360
557
|
if (signal?.aborted) {
|
|
558
|
+
if (parseWorker)
|
|
559
|
+
parseWorker.terminate().catch(() => { });
|
|
361
560
|
return {
|
|
362
561
|
success: false,
|
|
363
562
|
filesIndexed,
|
|
364
563
|
filesSkipped,
|
|
564
|
+
filesErrored,
|
|
365
565
|
nodesCreated: totalNodes,
|
|
366
566
|
edgesCreated: totalEdges,
|
|
367
567
|
errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
|
|
368
568
|
durationMs: Date.now() - startTime,
|
|
369
569
|
};
|
|
370
570
|
}
|
|
371
|
-
|
|
571
|
+
// Report progress before parsing (show current file being worked on)
|
|
372
572
|
onProgress?.({
|
|
373
573
|
phase: 'parsing',
|
|
374
574
|
current: processed,
|
|
@@ -376,14 +576,44 @@ class ExtractionOrchestrator {
|
|
|
376
576
|
currentFile: filePath,
|
|
377
577
|
});
|
|
378
578
|
if (error || content === null || stats === null) {
|
|
579
|
+
processed++;
|
|
580
|
+
filesErrored++;
|
|
379
581
|
errors.push({
|
|
380
582
|
message: `Failed to read file: ${error instanceof Error ? error.message : String(error)}`,
|
|
583
|
+
filePath,
|
|
584
|
+
severity: 'error',
|
|
585
|
+
code: 'read_error',
|
|
586
|
+
});
|
|
587
|
+
continue;
|
|
588
|
+
}
|
|
589
|
+
// Parse in worker thread (main thread stays unblocked).
|
|
590
|
+
// Wrapped in try/catch to handle worker timeouts and crashes gracefully.
|
|
591
|
+
let result;
|
|
592
|
+
try {
|
|
593
|
+
result = await requestParse(filePath, content);
|
|
594
|
+
}
|
|
595
|
+
catch (parseErr) {
|
|
596
|
+
processed++;
|
|
597
|
+
filesErrored++;
|
|
598
|
+
errors.push({
|
|
599
|
+
message: parseErr instanceof Error ? parseErr.message : String(parseErr),
|
|
600
|
+
filePath,
|
|
381
601
|
severity: 'error',
|
|
602
|
+
code: 'parse_error',
|
|
382
603
|
});
|
|
383
604
|
continue;
|
|
384
605
|
}
|
|
385
|
-
|
|
606
|
+
processed++;
|
|
607
|
+
// Store in database on main thread (SQLite is not thread-safe)
|
|
608
|
+
if (result.nodes.length > 0 || result.errors.length === 0) {
|
|
609
|
+
const language = (0, grammars_1.detectLanguage)(filePath, content);
|
|
610
|
+
this.storeExtractionResult(filePath, content, language, stats, result);
|
|
611
|
+
}
|
|
386
612
|
if (result.errors.length > 0) {
|
|
613
|
+
for (const err of result.errors) {
|
|
614
|
+
if (!err.filePath)
|
|
615
|
+
err.filePath = filePath;
|
|
616
|
+
}
|
|
387
617
|
errors.push(...result.errors);
|
|
388
618
|
}
|
|
389
619
|
if (result.nodes.length > 0) {
|
|
@@ -391,22 +621,130 @@ class ExtractionOrchestrator {
|
|
|
391
621
|
totalNodes += result.nodes.length;
|
|
392
622
|
totalEdges += result.edges.length;
|
|
393
623
|
}
|
|
394
|
-
else if (result.errors.
|
|
624
|
+
else if (result.errors.some((e) => e.severity === 'error')) {
|
|
625
|
+
filesErrored++;
|
|
626
|
+
}
|
|
627
|
+
else {
|
|
395
628
|
filesSkipped++;
|
|
396
629
|
}
|
|
397
630
|
}
|
|
398
631
|
}
|
|
399
|
-
//
|
|
632
|
+
// Report 100% so the progress bar doesn't hang at 99%
|
|
400
633
|
onProgress?.({
|
|
401
|
-
phase: '
|
|
402
|
-
current:
|
|
403
|
-
total
|
|
634
|
+
phase: 'parsing',
|
|
635
|
+
current: total,
|
|
636
|
+
total,
|
|
404
637
|
});
|
|
405
|
-
//
|
|
638
|
+
// Yield so the shimmer worker's buffered stdout writes can flush.
|
|
639
|
+
// Worker thread stdout is proxied through the main thread's event loop,
|
|
640
|
+
// so synchronous work here blocks the animation from rendering.
|
|
641
|
+
await new Promise(resolve => setImmediate(resolve));
|
|
642
|
+
// Retry pass: files that failed due to WASM memory corruption may succeed
|
|
643
|
+
// on a fresh worker with a clean heap. Recycle before each attempt so
|
|
644
|
+
// every file gets the absolute cleanest WASM state possible.
|
|
645
|
+
const retryableErrors = errors.filter((e) => e.code === 'parse_error' && e.filePath &&
|
|
646
|
+
(e.message.includes('Worker exited') || e.message.includes('memory access out of bounds')));
|
|
647
|
+
if (retryableErrors.length > 0 && WorkerClass) {
|
|
648
|
+
log(`Retrying ${retryableErrors.length} files that failed due to WASM memory errors...`);
|
|
649
|
+
const stillFailing = [];
|
|
650
|
+
for (const errEntry of retryableErrors) {
|
|
651
|
+
const filePath = errEntry.filePath;
|
|
652
|
+
if (signal?.aborted)
|
|
653
|
+
break;
|
|
654
|
+
// Fresh worker for every retry — maximum WASM headroom
|
|
655
|
+
recycleWorker();
|
|
656
|
+
let content;
|
|
657
|
+
try {
|
|
658
|
+
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, filePath);
|
|
659
|
+
if (!fullPath)
|
|
660
|
+
continue;
|
|
661
|
+
content = await fsp.readFile(fullPath, 'utf-8');
|
|
662
|
+
}
|
|
663
|
+
catch {
|
|
664
|
+
continue;
|
|
665
|
+
}
|
|
666
|
+
let result;
|
|
667
|
+
try {
|
|
668
|
+
result = await requestParse(filePath, content);
|
|
669
|
+
}
|
|
670
|
+
catch {
|
|
671
|
+
stillFailing.push(errEntry);
|
|
672
|
+
continue;
|
|
673
|
+
}
|
|
674
|
+
if (result.nodes.length > 0 || result.errors.length === 0) {
|
|
675
|
+
const language = (0, grammars_1.detectLanguage)(filePath, content);
|
|
676
|
+
const stats = await fsp.stat(path.join(this.rootDir, filePath));
|
|
677
|
+
this.storeExtractionResult(filePath, content, language, stats, result);
|
|
678
|
+
const idx = errors.indexOf(errEntry);
|
|
679
|
+
if (idx >= 0)
|
|
680
|
+
errors.splice(idx, 1);
|
|
681
|
+
filesErrored--;
|
|
682
|
+
filesIndexed++;
|
|
683
|
+
totalNodes += result.nodes.length;
|
|
684
|
+
totalEdges += result.edges.length;
|
|
685
|
+
log(`Retry OK: ${filePath} (${result.nodes.length} nodes)`);
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
// Last resort: for files that still crash on a clean worker, strip
|
|
689
|
+
// comment-only lines to reduce WASM memory pressure. Many compiler
|
|
690
|
+
// test files are 90%+ comments (CHECK directives) that don't contribute
|
|
691
|
+
// code nodes but consume parser memory.
|
|
692
|
+
if (stillFailing.length > 0) {
|
|
693
|
+
log(`${stillFailing.length} files still failing — retrying with comments stripped...`);
|
|
694
|
+
for (const errEntry of stillFailing) {
|
|
695
|
+
const filePath = errEntry.filePath;
|
|
696
|
+
if (signal?.aborted)
|
|
697
|
+
break;
|
|
698
|
+
recycleWorker();
|
|
699
|
+
let fullContent;
|
|
700
|
+
try {
|
|
701
|
+
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, filePath);
|
|
702
|
+
if (!fullPath)
|
|
703
|
+
continue;
|
|
704
|
+
fullContent = await fsp.readFile(fullPath, 'utf-8');
|
|
705
|
+
}
|
|
706
|
+
catch {
|
|
707
|
+
continue;
|
|
708
|
+
}
|
|
709
|
+
// Strip lines that are entirely comments (preserving line numbers
|
|
710
|
+
// by replacing with empty lines so node positions stay correct)
|
|
711
|
+
const stripped = fullContent
|
|
712
|
+
.split('\n')
|
|
713
|
+
.map(line => /^\s*\/\//.test(line) ? '' : line)
|
|
714
|
+
.join('\n');
|
|
715
|
+
let result;
|
|
716
|
+
try {
|
|
717
|
+
result = await requestParse(filePath, stripped);
|
|
718
|
+
}
|
|
719
|
+
catch {
|
|
720
|
+
continue;
|
|
721
|
+
}
|
|
722
|
+
if (result.nodes.length > 0 || result.errors.length === 0) {
|
|
723
|
+
const language = (0, grammars_1.detectLanguage)(filePath, fullContent);
|
|
724
|
+
const stats = await fsp.stat(path.join(this.rootDir, filePath));
|
|
725
|
+
this.storeExtractionResult(filePath, fullContent, language, stats, result);
|
|
726
|
+
const idx = errors.indexOf(errEntry);
|
|
727
|
+
if (idx >= 0)
|
|
728
|
+
errors.splice(idx, 1);
|
|
729
|
+
filesErrored--;
|
|
730
|
+
filesIndexed++;
|
|
731
|
+
totalNodes += result.nodes.length;
|
|
732
|
+
totalEdges += result.edges.length;
|
|
733
|
+
log(`Retry (stripped) OK: ${filePath} (${result.nodes.length} nodes)`);
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
// Shut down parse worker and clear any pending timers
|
|
739
|
+
rejectAllPending('Indexing complete');
|
|
740
|
+
if (parseWorker) {
|
|
741
|
+
parseWorker.terminate().catch(() => { });
|
|
742
|
+
}
|
|
406
743
|
return {
|
|
407
|
-
success: errors.filter((e) => e.severity === 'error').length === 0,
|
|
744
|
+
success: filesIndexed > 0 || errors.filter((e) => e.severity === 'error').length === 0,
|
|
408
745
|
filesIndexed,
|
|
409
746
|
filesSkipped,
|
|
747
|
+
filesErrored,
|
|
410
748
|
nodesCreated: totalNodes,
|
|
411
749
|
edgesCreated: totalEdges,
|
|
412
750
|
errors,
|
|
@@ -421,6 +759,7 @@ class ExtractionOrchestrator {
|
|
|
421
759
|
const errors = [];
|
|
422
760
|
let filesIndexed = 0;
|
|
423
761
|
let filesSkipped = 0;
|
|
762
|
+
let filesErrored = 0;
|
|
424
763
|
let totalNodes = 0;
|
|
425
764
|
let totalEdges = 0;
|
|
426
765
|
for (const filePath of filePaths) {
|
|
@@ -433,14 +772,18 @@ class ExtractionOrchestrator {
|
|
|
433
772
|
totalNodes += result.nodes.length;
|
|
434
773
|
totalEdges += result.edges.length;
|
|
435
774
|
}
|
|
775
|
+
else if (result.errors.some((e) => e.severity === 'error')) {
|
|
776
|
+
filesErrored++;
|
|
777
|
+
}
|
|
436
778
|
else {
|
|
437
779
|
filesSkipped++;
|
|
438
780
|
}
|
|
439
781
|
}
|
|
440
782
|
return {
|
|
441
|
-
success: errors.filter((e) => e.severity === 'error').length === 0,
|
|
783
|
+
success: filesIndexed > 0 || errors.filter((e) => e.severity === 'error').length === 0,
|
|
442
784
|
filesIndexed,
|
|
443
785
|
filesSkipped,
|
|
786
|
+
filesErrored,
|
|
444
787
|
nodesCreated: totalNodes,
|
|
445
788
|
edgesCreated: totalEdges,
|
|
446
789
|
errors,
|
|
@@ -457,7 +800,7 @@ class ExtractionOrchestrator {
|
|
|
457
800
|
nodes: [],
|
|
458
801
|
edges: [],
|
|
459
802
|
unresolvedReferences: [],
|
|
460
|
-
errors: [{ message: `Path traversal blocked: ${relativePath}`, severity: 'error' }],
|
|
803
|
+
errors: [{ message: `Path traversal blocked: ${relativePath}`, filePath: relativePath, severity: 'error', code: 'path_traversal' }],
|
|
461
804
|
durationMs: 0,
|
|
462
805
|
};
|
|
463
806
|
}
|
|
@@ -476,7 +819,9 @@ class ExtractionOrchestrator {
|
|
|
476
819
|
errors: [
|
|
477
820
|
{
|
|
478
821
|
message: `Failed to read file: ${error instanceof Error ? error.message : String(error)}`,
|
|
822
|
+
filePath: relativePath,
|
|
479
823
|
severity: 'error',
|
|
824
|
+
code: 'read_error',
|
|
480
825
|
},
|
|
481
826
|
],
|
|
482
827
|
durationMs: 0,
|
|
@@ -497,7 +842,7 @@ class ExtractionOrchestrator {
|
|
|
497
842
|
nodes: [],
|
|
498
843
|
edges: [],
|
|
499
844
|
unresolvedReferences: [],
|
|
500
|
-
errors: [{ message: 'Path traversal blocked', severity: 'error' }],
|
|
845
|
+
errors: [{ message: 'Path traversal blocked', filePath: relativePath, severity: 'error', code: 'path_traversal' }],
|
|
501
846
|
durationMs: 0,
|
|
502
847
|
};
|
|
503
848
|
}
|
|
@@ -510,14 +855,16 @@ class ExtractionOrchestrator {
|
|
|
510
855
|
errors: [
|
|
511
856
|
{
|
|
512
857
|
message: `File exceeds max size (${stats.size} > ${this.config.maxFileSize})`,
|
|
858
|
+
filePath: relativePath,
|
|
513
859
|
severity: 'warning',
|
|
860
|
+
code: 'size_exceeded',
|
|
514
861
|
},
|
|
515
862
|
],
|
|
516
863
|
durationMs: 0,
|
|
517
864
|
};
|
|
518
865
|
}
|
|
519
866
|
// Detect language
|
|
520
|
-
const language = (0, grammars_1.detectLanguage)(relativePath);
|
|
867
|
+
const language = (0, grammars_1.detectLanguage)(relativePath, content);
|
|
521
868
|
if (!(0, grammars_1.isLanguageSupported)(language)) {
|
|
522
869
|
return {
|
|
523
870
|
nodes: [],
|
|
@@ -700,6 +1047,10 @@ class ExtractionOrchestrator {
|
|
|
700
1047
|
// Load only grammars needed for changed files
|
|
701
1048
|
if (filesToIndex.length > 0) {
|
|
702
1049
|
const neededLanguages = [...new Set(filesToIndex.map((f) => (0, grammars_1.detectLanguage)(f)))];
|
|
1050
|
+
// .h files default to 'c' but may be C++ — ensure cpp grammar is loaded
|
|
1051
|
+
if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) {
|
|
1052
|
+
neededLanguages.push('cpp');
|
|
1053
|
+
}
|
|
703
1054
|
await (0, grammars_1.loadGrammarsForLanguages)(neededLanguages);
|
|
704
1055
|
}
|
|
705
1056
|
// Index changed files
|