@colbymchenry/codegraph-darwin-x64 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/dist/bin/codegraph.js +79 -52
- package/lib/dist/bin/codegraph.js.map +1 -1
- package/lib/dist/bin/command-supervision.d.ts +12 -0
- package/lib/dist/bin/command-supervision.d.ts.map +1 -0
- package/lib/dist/bin/command-supervision.js +76 -0
- package/lib/dist/bin/command-supervision.js.map +1 -0
- package/lib/dist/db/queries.d.ts.map +1 -1
- package/lib/dist/db/queries.js +10 -2
- package/lib/dist/db/queries.js.map +1 -1
- package/lib/dist/directory.d.ts +32 -0
- package/lib/dist/directory.d.ts.map +1 -1
- package/lib/dist/directory.js +83 -0
- package/lib/dist/directory.js.map +1 -1
- package/lib/dist/extraction/index.d.ts +13 -1
- package/lib/dist/extraction/index.d.ts.map +1 -1
- package/lib/dist/extraction/index.js +219 -213
- package/lib/dist/extraction/index.js.map +1 -1
- package/lib/dist/extraction/parse-pool.d.ts +126 -0
- package/lib/dist/extraction/parse-pool.d.ts.map +1 -0
- package/lib/dist/extraction/parse-pool.js +319 -0
- package/lib/dist/extraction/parse-pool.js.map +1 -0
- package/lib/dist/extraction/tree-sitter.d.ts.map +1 -1
- package/lib/dist/extraction/tree-sitter.js +48 -19
- package/lib/dist/extraction/tree-sitter.js.map +1 -1
- package/lib/dist/mcp/daemon-paths.d.ts +30 -3
- package/lib/dist/mcp/daemon-paths.d.ts.map +1 -1
- package/lib/dist/mcp/daemon-paths.js +50 -10
- package/lib/dist/mcp/daemon-paths.js.map +1 -1
- package/lib/dist/mcp/daemon-registry.d.ts.map +1 -1
- package/lib/dist/mcp/daemon-registry.js +7 -3
- package/lib/dist/mcp/daemon-registry.js.map +1 -1
- package/lib/dist/mcp/daemon.d.ts +38 -0
- package/lib/dist/mcp/daemon.d.ts.map +1 -1
- package/lib/dist/mcp/daemon.js +164 -31
- package/lib/dist/mcp/daemon.js.map +1 -1
- package/lib/dist/mcp/engine.d.ts +17 -0
- package/lib/dist/mcp/engine.d.ts.map +1 -1
- package/lib/dist/mcp/engine.js +73 -1
- package/lib/dist/mcp/engine.js.map +1 -1
- package/lib/dist/mcp/index.d.ts.map +1 -1
- package/lib/dist/mcp/index.js +25 -43
- package/lib/dist/mcp/index.js.map +1 -1
- package/lib/dist/mcp/ppid-watchdog.d.ts +18 -0
- package/lib/dist/mcp/ppid-watchdog.d.ts.map +1 -1
- package/lib/dist/mcp/ppid-watchdog.js +37 -0
- package/lib/dist/mcp/ppid-watchdog.js.map +1 -1
- package/lib/dist/mcp/query-pool.d.ts +94 -0
- package/lib/dist/mcp/query-pool.d.ts.map +1 -0
- package/lib/dist/mcp/query-pool.js +297 -0
- package/lib/dist/mcp/query-pool.js.map +1 -0
- package/lib/dist/mcp/query-worker.d.ts +24 -0
- package/lib/dist/mcp/query-worker.d.ts.map +1 -0
- package/lib/dist/mcp/query-worker.js +87 -0
- package/lib/dist/mcp/query-worker.js.map +1 -0
- package/lib/dist/mcp/tools.d.ts +57 -0
- package/lib/dist/mcp/tools.d.ts.map +1 -1
- package/lib/dist/mcp/tools.js +147 -37
- package/lib/dist/mcp/tools.js.map +1 -1
- package/lib/dist/project-config.d.ts +20 -0
- package/lib/dist/project-config.d.ts.map +1 -1
- package/lib/dist/project-config.js +42 -2
- package/lib/dist/project-config.js.map +1 -1
- package/lib/dist/resolution/c-fnptr-synthesizer.d.ts +0 -28
- package/lib/dist/resolution/c-fnptr-synthesizer.d.ts.map +1 -1
- package/lib/dist/resolution/c-fnptr-synthesizer.js +765 -79
- package/lib/dist/resolution/c-fnptr-synthesizer.js.map +1 -1
- package/lib/dist/resolution/name-matcher.d.ts.map +1 -1
- package/lib/dist/resolution/name-matcher.js +44 -0
- package/lib/dist/resolution/name-matcher.js.map +1 -1
- package/lib/node_modules/.package-lock.json +1 -1
- package/lib/package.json +1 -1
- package/package.json +1 -1
|
@@ -51,9 +51,11 @@ exports.scanDirectoryAsync = scanDirectoryAsync;
|
|
|
51
51
|
const fs = __importStar(require("fs"));
|
|
52
52
|
const fsp = __importStar(require("fs/promises"));
|
|
53
53
|
const path = __importStar(require("path"));
|
|
54
|
+
const os = __importStar(require("os"));
|
|
54
55
|
const crypto = __importStar(require("crypto"));
|
|
55
56
|
const child_process_1 = require("child_process");
|
|
56
57
|
const tree_sitter_1 = require("./tree-sitter");
|
|
58
|
+
const parse_pool_1 = require("./parse-pool");
|
|
57
59
|
const grammars_1 = require("./grammars");
|
|
58
60
|
const project_config_1 = require("../project-config");
|
|
59
61
|
const directory_1 = require("../directory");
|
|
@@ -265,6 +267,19 @@ function loadIncludeIgnoredMatcher(rootDir) {
|
|
|
265
267
|
const patterns = (0, project_config_1.loadIncludeIgnoredPatterns)(rootDir);
|
|
266
268
|
return patterns.length > 0 ? (0, ignore_1.default)().add(patterns) : null;
|
|
267
269
|
}
|
|
270
|
+
/**
|
|
271
|
+
* Matcher for the project's `codegraph.json` `exclude` patterns — paths to keep
|
|
272
|
+
* OUT of the index even when git-tracked, which `.gitignore` cannot do (#999).
|
|
273
|
+
* The escape hatch for a committed vendor/theme/SDK directory. Returns `null`
|
|
274
|
+
* when nothing is excluded (the zero-config default → no overhead). Matched
|
|
275
|
+
* against project-root-relative paths, so it applies uniformly across the whole
|
|
276
|
+
* workspace, including inside embedded repos (excluding `static/` means gone
|
|
277
|
+
* everywhere). Built once per scan/sync/scope operation from the scan root.
|
|
278
|
+
*/
|
|
279
|
+
function loadExcludeMatcher(rootDir) {
|
|
280
|
+
const patterns = (0, project_config_1.loadExcludePatterns)(rootDir);
|
|
281
|
+
return patterns.length > 0 ? (0, ignore_1.default)().add(patterns) : null;
|
|
282
|
+
}
|
|
268
283
|
/**
|
|
269
284
|
* `git ls-files --directory` collapses a wholly-untracked/ignored directory into
|
|
270
285
|
* one entry — and when the command's own cwd is such a directory (the indexed
|
|
@@ -404,14 +419,27 @@ function findNestedGitRepos(absDir, relPrefix) {
|
|
|
404
419
|
*/
|
|
405
420
|
class ScopeIgnore {
|
|
406
421
|
rootMatcher;
|
|
422
|
+
exclude;
|
|
407
423
|
embedded;
|
|
408
424
|
defaults = defaultsOnlyIgnore();
|
|
409
|
-
constructor(rootMatcher, embedded
|
|
425
|
+
constructor(rootMatcher, embedded,
|
|
426
|
+
/**
|
|
427
|
+
* Project `codegraph.json` `exclude` patterns (#999), matched against the
|
|
428
|
+
* full root-relative path. Wins over everything else — an explicit user
|
|
429
|
+
* exclude applies even to tracked files and even inside embedded repos.
|
|
430
|
+
*/
|
|
431
|
+
exclude = null) {
|
|
410
432
|
this.rootMatcher = rootMatcher;
|
|
433
|
+
this.exclude = exclude;
|
|
411
434
|
// Longest root first so paths in nested embedded repos hit the innermost matcher.
|
|
412
435
|
this.embedded = [...embedded].sort((a, b) => b.root.length - a.root.length);
|
|
413
436
|
}
|
|
414
437
|
ignores(rel) {
|
|
438
|
+
// User `exclude` (#999) is checked first and against the full root-relative
|
|
439
|
+
// path: it must drop git-TRACKED paths (which `.gitignore` can't) and apply
|
|
440
|
+
// everywhere, including ancestors of embedded repos.
|
|
441
|
+
if (this.exclude && this.exclude.ignores(rel))
|
|
442
|
+
return true;
|
|
415
443
|
for (const { root, matcher } of this.embedded) {
|
|
416
444
|
if (rel.startsWith(root)) {
|
|
417
445
|
const inner = rel.slice(root.length);
|
|
@@ -438,7 +466,7 @@ exports.ScopeIgnore = ScopeIgnore;
|
|
|
438
466
|
*/
|
|
439
467
|
function buildScopeIgnore(rootDir, embeddedRoots) {
|
|
440
468
|
const roots = embeddedRoots ? [...embeddedRoots] : discoverEmbeddedRepoRoots(rootDir);
|
|
441
|
-
return new ScopeIgnore(buildDefaultIgnore(rootDir), roots.map((root) => ({ root, matcher: buildDefaultIgnore(path.join(rootDir, root)) })));
|
|
469
|
+
return new ScopeIgnore(buildDefaultIgnore(rootDir), roots.map((root) => ({ root, matcher: buildDefaultIgnore(path.join(rootDir, root)) })), loadExcludeMatcher(rootDir));
|
|
442
470
|
}
|
|
443
471
|
/**
|
|
444
472
|
* Standalone discovery of every embedded repo root under `rootDir` (relative,
|
|
@@ -637,14 +665,14 @@ function getGitChangedFiles(rootDir) {
|
|
|
637
665
|
// Custom extension → language overrides from the project's codegraph.json,
|
|
638
666
|
// so change detection sees the same custom-extension files the full index does.
|
|
639
667
|
const overrides = (0, project_config_1.loadExtensionOverrides)(rootDir);
|
|
640
|
-
collectGitStatus(rootDir, '', changes, overrides, loadIncludeIgnoredMatcher(rootDir));
|
|
668
|
+
collectGitStatus(rootDir, '', changes, overrides, loadIncludeIgnoredMatcher(rootDir), loadExcludeMatcher(rootDir));
|
|
641
669
|
return changes;
|
|
642
670
|
}
|
|
643
671
|
catch {
|
|
644
672
|
return null;
|
|
645
673
|
}
|
|
646
674
|
}
|
|
647
|
-
function collectGitStatus(repoDir, prefix, out, overrides, includeIgnored = null) {
|
|
675
|
+
function collectGitStatus(repoDir, prefix, out, overrides, includeIgnored = null, exclude = null) {
|
|
648
676
|
const output = (0, child_process_1.execFileSync)('git', ['status', '--porcelain', '--no-renames'], { cwd: repoDir, encoding: 'utf-8', timeout: 10000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true });
|
|
649
677
|
// This repo's own ignore rules — built-in defaults (#407) plus its .gitignore.
|
|
650
678
|
// Change detection must exclude the SAME files the full index does, but git
|
|
@@ -683,6 +711,12 @@ function collectGitStatus(repoDir, prefix, out, overrides, includeIgnored = null
|
|
|
683
711
|
// index — match against the repo-relative path, same as the full scan. (#766)
|
|
684
712
|
if (ig.ignores(rel))
|
|
685
713
|
continue;
|
|
714
|
+
// User `codegraph.json` `exclude` (#999) is project-root-relative, so it's
|
|
715
|
+
// matched against the full path — sync must not re-add a tracked file the
|
|
716
|
+
// full index now keeps out. Deletions above stay unfiltered so a file that
|
|
717
|
+
// WAS indexed before an exclude was added still cleans itself out.
|
|
718
|
+
if (exclude && exclude.ignores(filePath))
|
|
719
|
+
continue;
|
|
686
720
|
if (statusCode === '??') {
|
|
687
721
|
out.added.push(filePath);
|
|
688
722
|
}
|
|
@@ -697,11 +731,11 @@ function collectGitStatus(repoDir, prefix, out, overrides, includeIgnored = null
|
|
|
697
731
|
// and they are left alone (#970, #976), mirroring the full-index scan.
|
|
698
732
|
for (const rel of untrackedDirs) {
|
|
699
733
|
for (const repoRel of findNestedGitRepos(path.join(repoDir, rel), rel)) {
|
|
700
|
-
collectGitStatus(path.join(repoDir, repoRel), prefix + repoRel, out, overrides, includeIgnored);
|
|
734
|
+
collectGitStatus(path.join(repoDir, repoRel), prefix + repoRel, out, overrides, includeIgnored, exclude);
|
|
701
735
|
}
|
|
702
736
|
}
|
|
703
737
|
for (const rel of findIgnoredEmbeddedRepos(repoDir, includeIgnored, prefix)) {
|
|
704
|
-
collectGitStatus(path.join(repoDir, rel), prefix + rel, out, overrides, includeIgnored);
|
|
738
|
+
collectGitStatus(path.join(repoDir, rel), prefix + rel, out, overrides, includeIgnored, exclude);
|
|
705
739
|
}
|
|
706
740
|
}
|
|
707
741
|
/**
|
|
@@ -860,7 +894,14 @@ function scanDirectoryWalk(rootDir, onProgress) {
|
|
|
860
894
|
}
|
|
861
895
|
// Seed a base matcher with the built-in default ignores (merged with the root
|
|
862
896
|
// .gitignore so a negation can override). Nested .gitignores still layer per-dir.
|
|
863
|
-
|
|
897
|
+
const baseMatchers = [{ dir: rootDir, ig: buildDefaultIgnore(rootDir) }];
|
|
898
|
+
// Project `codegraph.json` `exclude` patterns (#999), rooted at the project so
|
|
899
|
+
// `isIgnored` matches them against root-relative paths — same coverage the
|
|
900
|
+
// git path gets via ScopeIgnore, for non-git projects.
|
|
901
|
+
const exclude = loadExcludeMatcher(rootDir);
|
|
902
|
+
if (exclude)
|
|
903
|
+
baseMatchers.push({ dir: rootDir, ig: exclude });
|
|
904
|
+
walk(rootDir, baseMatchers);
|
|
864
905
|
return files;
|
|
865
906
|
}
|
|
866
907
|
/**
|
|
@@ -1022,148 +1063,153 @@ class ExtractionOrchestrator {
|
|
|
1022
1063
|
if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) {
|
|
1023
1064
|
neededLanguages.push('cpp');
|
|
1024
1065
|
}
|
|
1025
|
-
//
|
|
1026
|
-
// Falls back to in-process parsing
|
|
1066
|
+
// Parse files on a pool of worker threads (keeps the main thread free for UI
|
|
1067
|
+
// and uses every core). Falls back to in-process parsing when the compiled
|
|
1068
|
+
// worker is unavailable (e.g. running from source in tests).
|
|
1027
1069
|
const parseWorkerPath = path.join(__dirname, 'parse-worker.js');
|
|
1028
1070
|
const useWorker = fs.existsSync(parseWorkerPath);
|
|
1029
|
-
let
|
|
1071
|
+
let pool = null;
|
|
1030
1072
|
if (useWorker) {
|
|
1031
|
-
|
|
1032
|
-
|
|
1073
|
+
// CODEGRAPH_PARSE_WORKERS: explicit worker count; 1 = the old single-worker
|
|
1074
|
+
// behaviour (the conservative rollback). Unset → clamp(cores-1, 1, 8).
|
|
1075
|
+
const poolSize = (0, parse_pool_1.resolveParsePoolSize)(process.env.CODEGRAPH_PARSE_WORKERS, os.cpus().length);
|
|
1076
|
+
pool = new parse_pool_1.ParseWorkerPool({
|
|
1077
|
+
languages: neededLanguages,
|
|
1078
|
+
size: poolSize,
|
|
1079
|
+
workerScriptPath: parseWorkerPath,
|
|
1080
|
+
recycleInterval: WORKER_RECYCLE_INTERVAL,
|
|
1081
|
+
parseTimeoutMs: PARSE_TIMEOUT_MS,
|
|
1082
|
+
log,
|
|
1083
|
+
});
|
|
1084
|
+
log(`Parse worker pool: ${poolSize} worker(s)`);
|
|
1033
1085
|
}
|
|
1034
1086
|
else {
|
|
1035
|
-
// In-process fallback: load grammars locally
|
|
1087
|
+
// In-process fallback: load grammars locally and parse on the main thread.
|
|
1036
1088
|
await (0, grammars_1.loadGrammarsForLanguages)(neededLanguages);
|
|
1037
1089
|
}
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1090
|
+
/**
|
|
1091
|
+
* Parse one file: on the pool when available (the promise REJECTS on a worker
|
|
1092
|
+
* crash/timeout — the caller records it and the retry pass re-attempts), or
|
|
1093
|
+
* in-process synchronously as the no-worker fallback. The language is resolved
|
|
1094
|
+
* here on the main thread, where the codegraph.json overrides are loaded.
|
|
1095
|
+
*/
|
|
1096
|
+
const parseFile = (filePath, content) => {
|
|
1097
|
+
const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1098
|
+
if (!pool)
|
|
1099
|
+
return Promise.resolve((0, tree_sitter_1.extractFromSource)(filePath, content, language, frameworkNames));
|
|
1100
|
+
return pool.requestParse({ filePath, content, language, frameworkNames });
|
|
1101
|
+
};
|
|
1102
|
+
// --- Bounded rolling-window dispatch, ordered commit ---
|
|
1103
|
+
// Reads stay batched/parallel; parses run concurrently across the pool; the
|
|
1104
|
+
// SQLite store stays on the main thread (it isn't thread-safe). Crucially we
|
|
1105
|
+
// COMMIT results in original file order, not parse-completion order: the
|
|
1106
|
+
// resolution phase (run after indexing) resolves an ambiguous reference to one
|
|
1107
|
+
// of several same-named candidates by the nodes' DB insertion order, so a
|
|
1108
|
+
// stable commit order keeps the resulting graph deterministic — byte-identical
|
|
1109
|
+
// to the single-worker path — instead of drifting with parse timing. The
|
|
1110
|
+
// `completed` buffer holds at most ~windowSize out-of-order results, so memory
|
|
1111
|
+
// stays bounded.
|
|
1112
|
+
const windowSize = pool ? Math.max(4, pool.size * 2) : 1;
|
|
1113
|
+
const inFlight = new Set();
|
|
1114
|
+
const completed = new Map();
|
|
1115
|
+
let nextSeq = 0; // file-order sequence assigned at dispatch
|
|
1116
|
+
let nextToStore = 0; // cursor: next sequence to commit
|
|
1117
|
+
let aborted = false;
|
|
1118
|
+
const storeResult = (filePath, content, stats, result) => {
|
|
1119
|
+
processed++;
|
|
1120
|
+
// Store in database on main thread (SQLite is not thread-safe)
|
|
1121
|
+
if (result.nodes.length > 0 || result.errors.length === 0) {
|
|
1122
|
+
const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1123
|
+
this.storeExtractionResult(filePath, content, language, stats, result);
|
|
1052
1124
|
}
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
const pending = pendingParses.get(msg.id);
|
|
1058
|
-
if (pending) {
|
|
1059
|
-
clearTimeout(pending.timer);
|
|
1060
|
-
pendingParses.delete(msg.id);
|
|
1061
|
-
pending.resolve(msg.result);
|
|
1062
|
-
}
|
|
1125
|
+
if (result.errors.length > 0) {
|
|
1126
|
+
for (const err of result.errors) {
|
|
1127
|
+
if (!err.filePath)
|
|
1128
|
+
err.filePath = filePath;
|
|
1063
1129
|
}
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1130
|
+
errors.push(...result.errors);
|
|
1131
|
+
}
|
|
1132
|
+
if (result.nodes.length > 0) {
|
|
1133
|
+
filesIndexed++;
|
|
1134
|
+
totalNodes += result.nodes.length;
|
|
1135
|
+
totalEdges += result.edges.length;
|
|
1136
|
+
}
|
|
1137
|
+
else if (result.errors.some((e) => e.severity === 'error')) {
|
|
1138
|
+
filesErrored++;
|
|
1139
|
+
}
|
|
1140
|
+
else {
|
|
1141
|
+
// Files with no symbols but no errors (yaml, twig, properties) are
|
|
1142
|
+
// tracked at the file level — count them as indexed so the CLI doesn't
|
|
1143
|
+
// misleadingly report "No files found to index".
|
|
1144
|
+
const lang = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1145
|
+
if ((0, grammars_1.isFileLevelOnlyLanguage)(lang)) {
|
|
1146
|
+
filesIndexed++;
|
|
1073
1147
|
}
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
if (parseWorker === w) {
|
|
1077
|
-
parseWorker = null;
|
|
1078
|
-
workerParseCount = 0;
|
|
1148
|
+
else {
|
|
1149
|
+
filesSkipped++;
|
|
1079
1150
|
}
|
|
1151
|
+
}
|
|
1152
|
+
onProgress?.({ phase: 'parsing', current: processed, total, currentFile: filePath });
|
|
1153
|
+
};
|
|
1154
|
+
const recordParseFailure = (filePath, err) => {
|
|
1155
|
+
processed++;
|
|
1156
|
+
filesErrored++;
|
|
1157
|
+
errors.push({
|
|
1158
|
+
message: err instanceof Error ? err.message : String(err),
|
|
1159
|
+
filePath,
|
|
1160
|
+
severity: 'error',
|
|
1161
|
+
code: 'parse_error',
|
|
1080
1162
|
});
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
await new Promise((resolve, reject) => {
|
|
1090
|
-
parseWorker.once('message', (msg) => {
|
|
1091
|
-
if (msg.type === 'grammars-loaded')
|
|
1092
|
-
resolve();
|
|
1093
|
-
else
|
|
1094
|
-
reject(new Error(`Unexpected message: ${msg.type}`));
|
|
1095
|
-
});
|
|
1096
|
-
parseWorker.postMessage({ type: 'load-grammars', languages: neededLanguages });
|
|
1097
|
-
});
|
|
1098
|
-
return parseWorker;
|
|
1099
|
-
}
|
|
1100
|
-
if (WorkerClass) {
|
|
1101
|
-
await ensureWorker();
|
|
1102
|
-
}
|
|
1103
|
-
/**
|
|
1104
|
-
* Recycle the worker thread to reclaim WASM memory.
|
|
1105
|
-
* Terminates the current worker and clears the reference so
|
|
1106
|
-
* ensureWorker() will spawn a fresh one on the next call.
|
|
1107
|
-
*/
|
|
1108
|
-
function recycleWorker() {
|
|
1109
|
-
if (!parseWorker)
|
|
1163
|
+
onProgress?.({ phase: 'parsing', current: processed, total });
|
|
1164
|
+
};
|
|
1165
|
+
// Commit buffered parses to the DB in file order, advancing the cursor over
|
|
1166
|
+
// contiguous completed results. Runs after each parse settles (and once more
|
|
1167
|
+
// after the drain). storeResult / recordParseFailure run here single-threaded,
|
|
1168
|
+
// so shared counters and SQLite writes never race despite parallel parsing.
|
|
1169
|
+
const flushOrdered = () => {
|
|
1170
|
+
if (aborted)
|
|
1110
1171
|
return;
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
// Resolve the language on the main thread (where the project's
|
|
1120
|
-
// codegraph.json overrides are loaded) and hand it to the worker, so the
|
|
1121
|
-
// worker never needs the override map itself.
|
|
1122
|
-
const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1123
|
-
if (!WorkerClass) {
|
|
1124
|
-
// In-process fallback
|
|
1125
|
-
return (0, tree_sitter_1.extractFromSource)(filePath, content, language, frameworkNames);
|
|
1172
|
+
while (completed.has(nextToStore)) {
|
|
1173
|
+
const item = completed.get(nextToStore);
|
|
1174
|
+
completed.delete(nextToStore);
|
|
1175
|
+
nextToStore++;
|
|
1176
|
+
if (item.ok)
|
|
1177
|
+
storeResult(item.filePath, item.content, item.stats, item.result);
|
|
1178
|
+
else
|
|
1179
|
+
recordParseFailure(item.filePath, item.err);
|
|
1126
1180
|
}
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1181
|
+
};
|
|
1182
|
+
// Dispatch one file's parse (parses run concurrently across the pool), tagged
|
|
1183
|
+
// with its file-order sequence so flushOrdered commits results in order. The
|
|
1184
|
+
// backpressure below bounds how far parsing runs ahead of the in-order commit.
|
|
1185
|
+
const feed = async (filePath, content, stats) => {
|
|
1186
|
+
const seq = nextSeq++;
|
|
1187
|
+
const p = (async () => {
|
|
1188
|
+
try {
|
|
1189
|
+
const result = await parseFile(filePath, content);
|
|
1190
|
+
completed.set(seq, { ok: true, filePath, content, stats, result });
|
|
1191
|
+
}
|
|
1192
|
+
catch (parseErr) {
|
|
1193
|
+
completed.set(seq, { ok: false, filePath, err: parseErr });
|
|
1194
|
+
}
|
|
1195
|
+
flushOrdered();
|
|
1196
|
+
})();
|
|
1197
|
+
const tracked = p.finally(() => { inFlight.delete(tracked); });
|
|
1198
|
+
inFlight.add(tracked);
|
|
1199
|
+
// Backpressure on the dispatched-but-not-yet-committed count (in-flight +
|
|
1200
|
+
// buffered), not just in-flight: a slow file sitting at the commit cursor
|
|
1201
|
+
// lets later parses finish and buffer, which would otherwise grow without
|
|
1202
|
+
// bound. Wait for parses to settle (each may advance the cursor) until the
|
|
1203
|
+
// window has room. `inFlight.size > 0` guards against an empty race — the
|
|
1204
|
+
// cursor file is always still in flight when the window is full.
|
|
1205
|
+
while (nextSeq - nextToStore >= windowSize && inFlight.size > 0) {
|
|
1206
|
+
await Promise.race(inFlight);
|
|
1132
1207
|
}
|
|
1133
|
-
|
|
1134
|
-
const id = nextId++;
|
|
1135
|
-
workerParseCount++;
|
|
1136
|
-
// Scale timeout for large files: base 10s + 10s per 100KB
|
|
1137
|
-
const timeoutMs = PARSE_TIMEOUT_MS + Math.floor(content.length / 100_000) * 10_000;
|
|
1138
|
-
return new Promise((resolve, reject) => {
|
|
1139
|
-
const timer = setTimeout(() => {
|
|
1140
|
-
pendingParses.delete(id);
|
|
1141
|
-
log(`TIMEOUT: ${filePath} exceeded ${timeoutMs}ms — killing worker`);
|
|
1142
|
-
// Reject FIRST — worker.terminate() can hang if WASM is stuck
|
|
1143
|
-
parseWorker = null;
|
|
1144
|
-
workerParseCount = 0;
|
|
1145
|
-
reject(new Error(`Parse timed out after ${timeoutMs}ms`));
|
|
1146
|
-
// Fire-and-forget: kill the stuck worker in the background
|
|
1147
|
-
worker.terminate().catch(() => { });
|
|
1148
|
-
}, timeoutMs);
|
|
1149
|
-
pendingParses.set(id, { resolve, reject, timer });
|
|
1150
|
-
worker.postMessage({ type: 'parse', id, filePath, content, frameworkNames, language });
|
|
1151
|
-
});
|
|
1152
|
-
}
|
|
1208
|
+
};
|
|
1153
1209
|
for (let i = 0; i < files.length; i += FILE_IO_BATCH_SIZE) {
|
|
1154
1210
|
if (signal?.aborted) {
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
return {
|
|
1158
|
-
success: false,
|
|
1159
|
-
filesIndexed,
|
|
1160
|
-
filesSkipped,
|
|
1161
|
-
filesErrored,
|
|
1162
|
-
nodesCreated: totalNodes,
|
|
1163
|
-
edgesCreated: totalEdges,
|
|
1164
|
-
errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
|
|
1165
|
-
durationMs: Date.now() - startTime,
|
|
1166
|
-
};
|
|
1211
|
+
aborted = true;
|
|
1212
|
+
break;
|
|
1167
1213
|
}
|
|
1168
1214
|
const batch = files.slice(i, i + FILE_IO_BATCH_SIZE);
|
|
1169
1215
|
// Read files in parallel (with path validation before any I/O)
|
|
@@ -1185,29 +1231,13 @@ class ExtractionOrchestrator {
|
|
|
1185
1231
|
return { filePath: fp, content: null, stats: null, error: err };
|
|
1186
1232
|
}
|
|
1187
1233
|
}));
|
|
1188
|
-
//
|
|
1234
|
+
// Dispatch each readable file into the bounded parse window; the window
|
|
1235
|
+
// stores results on the main thread as they arrive.
|
|
1189
1236
|
for (const { filePath, content, stats, error } of fileContents) {
|
|
1190
1237
|
if (signal?.aborted) {
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
return {
|
|
1194
|
-
success: false,
|
|
1195
|
-
filesIndexed,
|
|
1196
|
-
filesSkipped,
|
|
1197
|
-
filesErrored,
|
|
1198
|
-
nodesCreated: totalNodes,
|
|
1199
|
-
edgesCreated: totalEdges,
|
|
1200
|
-
errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
|
|
1201
|
-
durationMs: Date.now() - startTime,
|
|
1202
|
-
};
|
|
1238
|
+
aborted = true;
|
|
1239
|
+
break;
|
|
1203
1240
|
}
|
|
1204
|
-
// Report progress before parsing (show current file being worked on)
|
|
1205
|
-
onProgress?.({
|
|
1206
|
-
phase: 'parsing',
|
|
1207
|
-
current: processed,
|
|
1208
|
-
total,
|
|
1209
|
-
currentFile: filePath,
|
|
1210
|
-
});
|
|
1211
1241
|
if (error || content === null || stats === null) {
|
|
1212
1242
|
processed++;
|
|
1213
1243
|
filesErrored++;
|
|
@@ -1217,6 +1247,7 @@ class ExtractionOrchestrator {
|
|
|
1217
1247
|
severity: 'error',
|
|
1218
1248
|
code: 'read_error',
|
|
1219
1249
|
});
|
|
1250
|
+
onProgress?.({ phase: 'parsing', current: processed, total });
|
|
1220
1251
|
continue;
|
|
1221
1252
|
}
|
|
1222
1253
|
// Honour MAX_FILE_SIZE. Without this check, vendored generated
|
|
@@ -1236,57 +1267,32 @@ class ExtractionOrchestrator {
|
|
|
1236
1267
|
onProgress?.({ phase: 'parsing', current: processed, total });
|
|
1237
1268
|
continue;
|
|
1238
1269
|
}
|
|
1239
|
-
// Parse
|
|
1240
|
-
//
|
|
1241
|
-
|
|
1242
|
-
try {
|
|
1243
|
-
result = await requestParse(filePath, content);
|
|
1244
|
-
}
|
|
1245
|
-
catch (parseErr) {
|
|
1246
|
-
processed++;
|
|
1247
|
-
filesErrored++;
|
|
1248
|
-
errors.push({
|
|
1249
|
-
message: parseErr instanceof Error ? parseErr.message : String(parseErr),
|
|
1250
|
-
filePath,
|
|
1251
|
-
severity: 'error',
|
|
1252
|
-
code: 'parse_error',
|
|
1253
|
-
});
|
|
1254
|
-
continue;
|
|
1255
|
-
}
|
|
1256
|
-
processed++;
|
|
1257
|
-
// Store in database on main thread (SQLite is not thread-safe)
|
|
1258
|
-
if (result.nodes.length > 0 || result.errors.length === 0) {
|
|
1259
|
-
const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1260
|
-
this.storeExtractionResult(filePath, content, language, stats, result);
|
|
1261
|
-
}
|
|
1262
|
-
if (result.errors.length > 0) {
|
|
1263
|
-
for (const err of result.errors) {
|
|
1264
|
-
if (!err.filePath)
|
|
1265
|
-
err.filePath = filePath;
|
|
1266
|
-
}
|
|
1267
|
-
errors.push(...result.errors);
|
|
1268
|
-
}
|
|
1269
|
-
if (result.nodes.length > 0) {
|
|
1270
|
-
filesIndexed++;
|
|
1271
|
-
totalNodes += result.nodes.length;
|
|
1272
|
-
totalEdges += result.edges.length;
|
|
1273
|
-
}
|
|
1274
|
-
else if (result.errors.some((e) => e.severity === 'error')) {
|
|
1275
|
-
filesErrored++;
|
|
1276
|
-
}
|
|
1277
|
-
else {
|
|
1278
|
-
// Files with no symbols but no errors (yaml, twig, properties) are
|
|
1279
|
-
// tracked at the file level — count them as indexed so the CLI
|
|
1280
|
-
// doesn't misleadingly report "No files found to index".
|
|
1281
|
-
const lang = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1282
|
-
if ((0, grammars_1.isFileLevelOnlyLanguage)(lang)) {
|
|
1283
|
-
filesIndexed++;
|
|
1284
|
-
}
|
|
1285
|
-
else {
|
|
1286
|
-
filesSkipped++;
|
|
1287
|
-
}
|
|
1288
|
-
}
|
|
1270
|
+
// Parse on the pool (main thread stays unblocked). Errors/timeouts are
|
|
1271
|
+
// handled inside feed() → recordParseFailure, feeding the retry pass.
|
|
1272
|
+
await feed(filePath, content, stats);
|
|
1289
1273
|
}
|
|
1274
|
+
if (aborted)
|
|
1275
|
+
break;
|
|
1276
|
+
}
|
|
1277
|
+
// Drain parses still in flight (skip on abort — we tear down below instead),
|
|
1278
|
+
// then commit any results the cursor hasn't reached yet.
|
|
1279
|
+
if (!aborted) {
|
|
1280
|
+
await Promise.all(inFlight);
|
|
1281
|
+
flushOrdered();
|
|
1282
|
+
}
|
|
1283
|
+
if (signal?.aborted || aborted) {
|
|
1284
|
+
if (pool)
|
|
1285
|
+
await pool.destroy();
|
|
1286
|
+
return {
|
|
1287
|
+
success: false,
|
|
1288
|
+
filesIndexed,
|
|
1289
|
+
filesSkipped,
|
|
1290
|
+
filesErrored,
|
|
1291
|
+
nodesCreated: totalNodes,
|
|
1292
|
+
edgesCreated: totalEdges,
|
|
1293
|
+
errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
|
|
1294
|
+
durationMs: Date.now() - startTime,
|
|
1295
|
+
};
|
|
1290
1296
|
}
|
|
1291
1297
|
// Report 100% so the progress bar doesn't hang at 99%
|
|
1292
1298
|
onProgress?.({
|
|
@@ -1303,15 +1309,17 @@ class ExtractionOrchestrator {
|
|
|
1303
1309
|
// every file gets the absolute cleanest WASM state possible.
|
|
1304
1310
|
const retryableErrors = errors.filter((e) => e.code === 'parse_error' && e.filePath &&
|
|
1305
1311
|
(e.message.includes('Worker exited') || e.message.includes('memory access out of bounds')));
|
|
1306
|
-
if (retryableErrors.length > 0 &&
|
|
1312
|
+
if (retryableErrors.length > 0 && pool) {
|
|
1307
1313
|
log(`Retrying ${retryableErrors.length} files that failed due to WASM memory errors...`);
|
|
1314
|
+
// Fresh WASM heaps for the retry phase. A retry that still crashes its
|
|
1315
|
+
// worker makes the pool respawn it, so later retries keep landing on clean
|
|
1316
|
+
// workers too.
|
|
1317
|
+
pool.recycleAll();
|
|
1308
1318
|
const stillFailing = [];
|
|
1309
1319
|
for (const errEntry of retryableErrors) {
|
|
1310
1320
|
const filePath = errEntry.filePath;
|
|
1311
1321
|
if (signal?.aborted)
|
|
1312
1322
|
break;
|
|
1313
|
-
// Fresh worker for every retry — maximum WASM headroom
|
|
1314
|
-
recycleWorker();
|
|
1315
1323
|
let content;
|
|
1316
1324
|
try {
|
|
1317
1325
|
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, filePath);
|
|
@@ -1324,7 +1332,7 @@ class ExtractionOrchestrator {
|
|
|
1324
1332
|
}
|
|
1325
1333
|
let result;
|
|
1326
1334
|
try {
|
|
1327
|
-
result = await
|
|
1335
|
+
result = await parseFile(filePath, content);
|
|
1328
1336
|
}
|
|
1329
1337
|
catch {
|
|
1330
1338
|
stillFailing.push(errEntry);
|
|
@@ -1350,11 +1358,11 @@ class ExtractionOrchestrator {
|
|
|
1350
1358
|
// code nodes but consume parser memory.
|
|
1351
1359
|
if (stillFailing.length > 0) {
|
|
1352
1360
|
log(`${stillFailing.length} files still failing — retrying with comments stripped...`);
|
|
1361
|
+
pool.recycleAll();
|
|
1353
1362
|
for (const errEntry of stillFailing) {
|
|
1354
1363
|
const filePath = errEntry.filePath;
|
|
1355
1364
|
if (signal?.aborted)
|
|
1356
1365
|
break;
|
|
1357
|
-
recycleWorker();
|
|
1358
1366
|
let fullContent;
|
|
1359
1367
|
try {
|
|
1360
1368
|
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, filePath);
|
|
@@ -1373,7 +1381,7 @@ class ExtractionOrchestrator {
|
|
|
1373
1381
|
.join('\n');
|
|
1374
1382
|
let result;
|
|
1375
1383
|
try {
|
|
1376
|
-
result = await
|
|
1384
|
+
result = await parseFile(filePath, stripped);
|
|
1377
1385
|
}
|
|
1378
1386
|
catch {
|
|
1379
1387
|
continue;
|
|
@@ -1394,11 +1402,9 @@ class ExtractionOrchestrator {
|
|
|
1394
1402
|
}
|
|
1395
1403
|
}
|
|
1396
1404
|
}
|
|
1397
|
-
// Shut down parse worker
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
parseWorker.terminate().catch(() => { });
|
|
1401
|
-
}
|
|
1405
|
+
// Shut down the parse worker pool.
|
|
1406
|
+
if (pool)
|
|
1407
|
+
await pool.destroy();
|
|
1402
1408
|
return {
|
|
1403
1409
|
success: filesIndexed > 0 || errors.filter((e) => e.severity === 'error').length === 0,
|
|
1404
1410
|
filesIndexed,
|