@colbymchenry/codegraph-darwin-x64 1.1.1 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/dist/bin/codegraph.js +99 -59
- package/lib/dist/bin/codegraph.js.map +1 -1
- package/lib/dist/bin/command-supervision.d.ts +12 -0
- package/lib/dist/bin/command-supervision.d.ts.map +1 -0
- package/lib/dist/bin/command-supervision.js +76 -0
- package/lib/dist/bin/command-supervision.js.map +1 -0
- package/lib/dist/db/migrations.d.ts +1 -1
- package/lib/dist/db/migrations.d.ts.map +1 -1
- package/lib/dist/db/migrations.js +25 -1
- package/lib/dist/db/migrations.js.map +1 -1
- package/lib/dist/db/queries.d.ts.map +1 -1
- package/lib/dist/db/queries.js +10 -2
- package/lib/dist/db/queries.js.map +1 -1
- package/lib/dist/db/schema.sql +11 -0
- package/lib/dist/directory.d.ts +32 -0
- package/lib/dist/directory.d.ts.map +1 -1
- package/lib/dist/directory.js +83 -0
- package/lib/dist/directory.js.map +1 -1
- package/lib/dist/extraction/index.d.ts +13 -1
- package/lib/dist/extraction/index.d.ts.map +1 -1
- package/lib/dist/extraction/index.js +310 -218
- package/lib/dist/extraction/index.js.map +1 -1
- package/lib/dist/extraction/languages/c-cpp.d.ts +16 -0
- package/lib/dist/extraction/languages/c-cpp.d.ts.map +1 -1
- package/lib/dist/extraction/languages/c-cpp.js +33 -0
- package/lib/dist/extraction/languages/c-cpp.js.map +1 -1
- package/lib/dist/extraction/parse-pool.d.ts +126 -0
- package/lib/dist/extraction/parse-pool.d.ts.map +1 -0
- package/lib/dist/extraction/parse-pool.js +319 -0
- package/lib/dist/extraction/parse-pool.js.map +1 -0
- package/lib/dist/extraction/tree-sitter.d.ts +21 -0
- package/lib/dist/extraction/tree-sitter.d.ts.map +1 -1
- package/lib/dist/extraction/tree-sitter.js +106 -21
- package/lib/dist/extraction/tree-sitter.js.map +1 -1
- package/lib/dist/mcp/daemon-paths.d.ts +30 -3
- package/lib/dist/mcp/daemon-paths.d.ts.map +1 -1
- package/lib/dist/mcp/daemon-paths.js +50 -10
- package/lib/dist/mcp/daemon-paths.js.map +1 -1
- package/lib/dist/mcp/daemon-registry.d.ts.map +1 -1
- package/lib/dist/mcp/daemon-registry.js +7 -3
- package/lib/dist/mcp/daemon-registry.js.map +1 -1
- package/lib/dist/mcp/daemon.d.ts +48 -0
- package/lib/dist/mcp/daemon.d.ts.map +1 -1
- package/lib/dist/mcp/daemon.js +203 -32
- package/lib/dist/mcp/daemon.js.map +1 -1
- package/lib/dist/mcp/engine.d.ts +17 -0
- package/lib/dist/mcp/engine.d.ts.map +1 -1
- package/lib/dist/mcp/engine.js +73 -1
- package/lib/dist/mcp/engine.js.map +1 -1
- package/lib/dist/mcp/index.d.ts.map +1 -1
- package/lib/dist/mcp/index.js +25 -43
- package/lib/dist/mcp/index.js.map +1 -1
- package/lib/dist/mcp/ppid-watchdog.d.ts +18 -0
- package/lib/dist/mcp/ppid-watchdog.d.ts.map +1 -1
- package/lib/dist/mcp/ppid-watchdog.js +37 -0
- package/lib/dist/mcp/ppid-watchdog.js.map +1 -1
- package/lib/dist/mcp/query-pool.d.ts +94 -0
- package/lib/dist/mcp/query-pool.d.ts.map +1 -0
- package/lib/dist/mcp/query-pool.js +297 -0
- package/lib/dist/mcp/query-pool.js.map +1 -0
- package/lib/dist/mcp/query-worker.d.ts +24 -0
- package/lib/dist/mcp/query-worker.d.ts.map +1 -0
- package/lib/dist/mcp/query-worker.js +87 -0
- package/lib/dist/mcp/query-worker.js.map +1 -0
- package/lib/dist/mcp/tools.d.ts +57 -0
- package/lib/dist/mcp/tools.d.ts.map +1 -1
- package/lib/dist/mcp/tools.js +196 -40
- package/lib/dist/mcp/tools.js.map +1 -1
- package/lib/dist/project-config.d.ts +20 -0
- package/lib/dist/project-config.d.ts.map +1 -1
- package/lib/dist/project-config.js +42 -2
- package/lib/dist/project-config.js.map +1 -1
- package/lib/dist/resolution/c-fnptr-synthesizer.d.ts +0 -28
- package/lib/dist/resolution/c-fnptr-synthesizer.d.ts.map +1 -1
- package/lib/dist/resolution/c-fnptr-synthesizer.js +765 -79
- package/lib/dist/resolution/c-fnptr-synthesizer.js.map +1 -1
- package/lib/dist/resolution/name-matcher.d.ts.map +1 -1
- package/lib/dist/resolution/name-matcher.js +44 -0
- package/lib/dist/resolution/name-matcher.js.map +1 -1
- package/lib/dist/sync/worktree.d.ts +9 -0
- package/lib/dist/sync/worktree.d.ts.map +1 -1
- package/lib/dist/sync/worktree.js +40 -0
- package/lib/dist/sync/worktree.js.map +1 -1
- package/lib/dist/types.d.ts +6 -1
- package/lib/dist/types.d.ts.map +1 -1
- package/lib/node_modules/.package-lock.json +1 -1
- package/lib/package.json +1 -1
- package/package.json +1 -1
|
@@ -51,9 +51,11 @@ exports.scanDirectoryAsync = scanDirectoryAsync;
|
|
|
51
51
|
const fs = __importStar(require("fs"));
|
|
52
52
|
const fsp = __importStar(require("fs/promises"));
|
|
53
53
|
const path = __importStar(require("path"));
|
|
54
|
+
const os = __importStar(require("os"));
|
|
54
55
|
const crypto = __importStar(require("crypto"));
|
|
55
56
|
const child_process_1 = require("child_process");
|
|
56
57
|
const tree_sitter_1 = require("./tree-sitter");
|
|
58
|
+
const parse_pool_1 = require("./parse-pool");
|
|
57
59
|
const grammars_1 = require("./grammars");
|
|
58
60
|
const project_config_1 = require("../project-config");
|
|
59
61
|
const directory_1 = require("../directory");
|
|
@@ -155,12 +157,34 @@ const DEFAULT_IGNORE_DIRS = new Set([
|
|
|
155
157
|
// Generic cache
|
|
156
158
|
'.cache',
|
|
157
159
|
]);
|
|
160
|
+
/**
|
|
161
|
+
* Android resource directory types. A `res/` tree holds ONLY non-code resources —
|
|
162
|
+
* layouts, drawables, value bags (strings/colors/styles), menus, navigation
|
|
163
|
+
* graphs — split into one typed subdirectory per kind, optionally density/locale/
|
|
164
|
+
* version-qualified (`values-es`, `drawable-hdpi`, `layout-v21`, …). None of it
|
|
165
|
+
* yields an extractable code symbol, yet on an Android app it DOMINATES the tree
|
|
166
|
+
* (one report: 26k XML files = 97% of the project, 0 symbols), bloating the DB,
|
|
167
|
+
* slowing indexing, and skewing both the file count and `codegraph_explore`
|
|
168
|
+
* results (#1047). So these are excluded by default. The structure is
|
|
169
|
+
* self-identifying — a non-Android project has no `res/layout/` etc., so it's
|
|
170
|
+
* untouched — and the only XML that DOES produce symbols (MyBatis mappers) lives
|
|
171
|
+
* under `src/main/resources/`, never `res/`, so nothing useful is dropped.
|
|
172
|
+
* `res/raw/` is deliberately NOT here: it holds arbitrary bundled assets that can
|
|
173
|
+
* be code-ish (a `.sql` schema, a `.js`), so we leave it indexed. Override any of
|
|
174
|
+
* these with a `.gitignore` negation (e.g. `!res/values/`).
|
|
175
|
+
*/
|
|
176
|
+
const ANDROID_RES_TYPES = [
|
|
177
|
+
'anim', 'animator', 'color', 'drawable', 'font', 'layout',
|
|
178
|
+
'menu', 'mipmap', 'navigation', 'transition', 'values', 'xml',
|
|
179
|
+
];
|
|
158
180
|
/** Gitignore-style patterns for the `ignore` matcher: the dirs above plus a few globs. */
|
|
159
181
|
const DEFAULT_IGNORE_PATTERNS = [
|
|
160
182
|
...Array.from(DEFAULT_IGNORE_DIRS, (d) => `${d}/`),
|
|
161
183
|
'*.egg-info/', // Python packaging metadata
|
|
162
184
|
'cmake-build-*/', // CLion / CMake build trees
|
|
163
185
|
'bazel-*/', // Bazel output symlink trees
|
|
186
|
+
// Android resource dirs at any depth, with their qualifier variants (#1047).
|
|
187
|
+
...ANDROID_RES_TYPES.map((t) => `**/res/${t}*/`),
|
|
164
188
|
];
|
|
165
189
|
/** True if `buf` decodes as strict UTF-8 (no invalid byte sequences). */
|
|
166
190
|
function isValidUtf8(buf) {
|
|
@@ -265,6 +289,19 @@ function loadIncludeIgnoredMatcher(rootDir) {
|
|
|
265
289
|
const patterns = (0, project_config_1.loadIncludeIgnoredPatterns)(rootDir);
|
|
266
290
|
return patterns.length > 0 ? (0, ignore_1.default)().add(patterns) : null;
|
|
267
291
|
}
|
|
292
|
+
/**
|
|
293
|
+
* Matcher for the project's `codegraph.json` `exclude` patterns — paths to keep
|
|
294
|
+
* OUT of the index even when git-tracked, which `.gitignore` cannot do (#999).
|
|
295
|
+
* The escape hatch for a committed vendor/theme/SDK directory. Returns `null`
|
|
296
|
+
* when nothing is excluded (the zero-config default → no overhead). Matched
|
|
297
|
+
* against project-root-relative paths, so it applies uniformly across the whole
|
|
298
|
+
* workspace, including inside embedded repos (excluding `static/` means gone
|
|
299
|
+
* everywhere). Built once per scan/sync/scope operation from the scan root.
|
|
300
|
+
*/
|
|
301
|
+
function loadExcludeMatcher(rootDir) {
|
|
302
|
+
const patterns = (0, project_config_1.loadExcludePatterns)(rootDir);
|
|
303
|
+
return patterns.length > 0 ? (0, ignore_1.default)().add(patterns) : null;
|
|
304
|
+
}
|
|
268
305
|
/**
|
|
269
306
|
* `git ls-files --directory` collapses a wholly-untracked/ignored directory into
|
|
270
307
|
* one entry — and when the command's own cwd is such a directory (the indexed
|
|
@@ -404,14 +441,27 @@ function findNestedGitRepos(absDir, relPrefix) {
|
|
|
404
441
|
*/
|
|
405
442
|
class ScopeIgnore {
|
|
406
443
|
rootMatcher;
|
|
444
|
+
exclude;
|
|
407
445
|
embedded;
|
|
408
446
|
defaults = defaultsOnlyIgnore();
|
|
409
|
-
constructor(rootMatcher, embedded
|
|
447
|
+
constructor(rootMatcher, embedded,
|
|
448
|
+
/**
|
|
449
|
+
* Project `codegraph.json` `exclude` patterns (#999), matched against the
|
|
450
|
+
* full root-relative path. Wins over everything else — an explicit user
|
|
451
|
+
* exclude applies even to tracked files and even inside embedded repos.
|
|
452
|
+
*/
|
|
453
|
+
exclude = null) {
|
|
410
454
|
this.rootMatcher = rootMatcher;
|
|
455
|
+
this.exclude = exclude;
|
|
411
456
|
// Longest root first so paths in nested embedded repos hit the innermost matcher.
|
|
412
457
|
this.embedded = [...embedded].sort((a, b) => b.root.length - a.root.length);
|
|
413
458
|
}
|
|
414
459
|
ignores(rel) {
|
|
460
|
+
// User `exclude` (#999) is checked first and against the full root-relative
|
|
461
|
+
// path: it must drop git-TRACKED paths (which `.gitignore` can't) and apply
|
|
462
|
+
// everywhere, including ancestors of embedded repos.
|
|
463
|
+
if (this.exclude && this.exclude.ignores(rel))
|
|
464
|
+
return true;
|
|
415
465
|
for (const { root, matcher } of this.embedded) {
|
|
416
466
|
if (rel.startsWith(root)) {
|
|
417
467
|
const inner = rel.slice(root.length);
|
|
@@ -438,7 +488,7 @@ exports.ScopeIgnore = ScopeIgnore;
|
|
|
438
488
|
*/
|
|
439
489
|
function buildScopeIgnore(rootDir, embeddedRoots) {
|
|
440
490
|
const roots = embeddedRoots ? [...embeddedRoots] : discoverEmbeddedRepoRoots(rootDir);
|
|
441
|
-
return new ScopeIgnore(buildDefaultIgnore(rootDir), roots.map((root) => ({ root, matcher: buildDefaultIgnore(path.join(rootDir, root)) })));
|
|
491
|
+
return new ScopeIgnore(buildDefaultIgnore(rootDir), roots.map((root) => ({ root, matcher: buildDefaultIgnore(path.join(rootDir, root)) })), loadExcludeMatcher(rootDir));
|
|
442
492
|
}
|
|
443
493
|
/**
|
|
444
494
|
* Standalone discovery of every embedded repo root under `rootDir` (relative,
|
|
@@ -470,6 +520,28 @@ function discoverEmbeddedRepoRoots(rootDir) {
|
|
|
470
520
|
}
|
|
471
521
|
}
|
|
472
522
|
catch { /* untracked listing failed — ignored-side discovery still runs */ }
|
|
523
|
+
// Unexpanded gitlinks (mode 160000) with a real checkout on disk — embedded
|
|
524
|
+
// repos `git add`ed without `.gitmodules`, or submodules not active here. The
|
|
525
|
+
// untracked listing above can't see them (they're tracked), so find them the
|
|
526
|
+
// same way collectGitFiles does, keeping watcher scope == indexer scope.
|
|
527
|
+
// (#1031, #1033)
|
|
528
|
+
try {
|
|
529
|
+
const staged = (0, child_process_1.execFileSync)('git', ['ls-files', '-z', '-s', '--recurse-submodules'], { cwd: repoAbs, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true });
|
|
530
|
+
for (const entry of staged.split('\0')) {
|
|
531
|
+
if (!entry || entry.slice(0, 6) !== '160000')
|
|
532
|
+
continue;
|
|
533
|
+
const tab = entry.indexOf('\t');
|
|
534
|
+
if (tab === -1)
|
|
535
|
+
continue;
|
|
536
|
+
const rel = entry.slice(tab + 1);
|
|
537
|
+
const relDir = rel.endsWith('/') ? rel : rel + '/';
|
|
538
|
+
if (defaults.ignores(relDir))
|
|
539
|
+
continue;
|
|
540
|
+
if (classifyGitDir(path.join(repoAbs, rel)) === 'embedded')
|
|
541
|
+
candidates.push(relDir);
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
catch { /* staged listing failed — other discovery still runs */ }
|
|
473
545
|
candidates.push(...findIgnoredEmbeddedRepos(repoAbs, includeIgnored, prefix));
|
|
474
546
|
for (const rel of candidates) {
|
|
475
547
|
const full = (0, utils_1.normalizePath)(prefix + rel);
|
|
@@ -535,14 +607,37 @@ function collectGitFiles(repoDir, prefix, files, embeddedRoots, includeIgnored =
|
|
|
535
607
|
// Without this, monorepos using submodules index 0 files. (See issue #147.)
|
|
536
608
|
// Note: --recurse-submodules only supports -c/--cached and --stage modes — it
|
|
537
609
|
// can't be combined with -o, so untracked files are gathered separately below.
|
|
610
|
+
//
|
|
611
|
+
// We use --stage (-s) rather than -c so each entry carries its file mode. That
|
|
612
|
+
// lets us spot gitlink entries (mode 160000) that --recurse-submodules did NOT
|
|
613
|
+
// expand: a nested repo `git add`ed without a `.gitmodules` entry, or a
|
|
614
|
+
// submodule that isn't active/initialized in this checkout. Such a gitlink
|
|
615
|
+
// falls through every pass — it's tracked, so the untracked `-o` listing below
|
|
616
|
+
// never reports it, and --recurse-submodules only expands ACTIVE submodules —
|
|
617
|
+
// so its source would be silently skipped, leaving only the super-repo's own
|
|
618
|
+
// files indexed. We collect those gitlinks here and recurse into them below.
|
|
619
|
+
// (An active submodule is expanded inline by --recurse-submodules and so never
|
|
620
|
+
// surfaces as a 160000 entry — only the unhandled gitlinks do.) (#1031, #1033)
|
|
621
|
+
//
|
|
538
622
|
// -z gives NUL-separated, unquoted output so non-ASCII (e.g. CJK) paths
|
|
539
623
|
// survive verbatim. Without it git octal-escapes and double-quotes such paths
|
|
540
624
|
// (the core.quotepath default), and the quoted form never matches a real file
|
|
541
|
-
// on disk → those files are silently dropped from the index. (#541)
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
625
|
+
// on disk → those files are silently dropped from the index. (#541) With -s the
|
|
626
|
+
// path follows a TAB after the `<mode> <object> <stage>` prefix.
|
|
627
|
+
const gitlinkRels = [];
|
|
628
|
+
const tracked = (0, child_process_1.execFileSync)('git', ['ls-files', '-z', '-s', '--recurse-submodules'], gitOpts);
|
|
629
|
+
for (const entry of tracked.split('\0')) {
|
|
630
|
+
if (!entry)
|
|
631
|
+
continue;
|
|
632
|
+
const tab = entry.indexOf('\t');
|
|
633
|
+
if (tab === -1)
|
|
634
|
+
continue; // --stage always emits "<mode> <object> <stage>\t<path>"
|
|
635
|
+
const rel = entry.slice(tab + 1);
|
|
636
|
+
if (entry.slice(0, 6) === '160000') {
|
|
637
|
+
gitlinkRels.push(rel); // an unexpanded gitlink — recursed into below, not a source file itself
|
|
638
|
+
continue;
|
|
639
|
+
}
|
|
640
|
+
files.add((0, utils_1.normalizePath)(prefix + rel));
|
|
546
641
|
}
|
|
547
642
|
// Untracked files (submodules manage their own untracked state). Embedded git
|
|
548
643
|
// repos surface here as a single "subdir/" entry that git refuses to descend
|
|
@@ -568,6 +663,25 @@ function collectGitFiles(repoDir, prefix, files, embeddedRoots, includeIgnored =
|
|
|
568
663
|
}
|
|
569
664
|
files.add((0, utils_1.normalizePath)(prefix + rel));
|
|
570
665
|
}
|
|
666
|
+
// Gitlink entries (mode 160000) that --recurse-submodules left unexpanded —
|
|
667
|
+
// an embedded repo `git add`ed without `.gitmodules`, or a submodule not
|
|
668
|
+
// active/initialized in this checkout. When such a gitlink has a real working
|
|
669
|
+
// tree on disk it is distinct first-party code we must index as its own
|
|
670
|
+
// embedded repo: the tracked pass skipped its contents and the untracked pass
|
|
671
|
+
// never sees it (it's tracked, not "other"). A gitlink with no checkout on disk
|
|
672
|
+
// (an uninitialized submodule — empty dir, no `.git`) has nothing to index and
|
|
673
|
+
// is left alone, as is a submodule worktree (a duplicate view, #945). (#1031, #1033)
|
|
674
|
+
for (const rel of gitlinkRels) {
|
|
675
|
+
const relDir = rel.endsWith('/') ? rel : rel + '/';
|
|
676
|
+
if (defaultsOnlyIgnore().ignores(relDir))
|
|
677
|
+
continue;
|
|
678
|
+
const childDir = path.join(repoDir, rel);
|
|
679
|
+
// 'embedded' = a real .git checkout on disk; 'worktree' and 'none' are skipped.
|
|
680
|
+
if (classifyGitDir(childDir) !== 'embedded')
|
|
681
|
+
continue;
|
|
682
|
+
embeddedRoots?.add((0, utils_1.normalizePath)(prefix + relDir));
|
|
683
|
+
collectGitFiles(childDir, prefix + relDir, files, embeddedRoots, includeIgnored);
|
|
684
|
+
}
|
|
571
685
|
// Embedded repos hidden by THIS repo's ignore rules (`/packages/` in a
|
|
572
686
|
// super-repo .gitignore) never appear in any listing above. By default they
|
|
573
687
|
// stay hidden — `.gitignore` is respected (#970, #976). They are recursed into
|
|
@@ -637,14 +751,14 @@ function getGitChangedFiles(rootDir) {
|
|
|
637
751
|
// Custom extension → language overrides from the project's codegraph.json,
|
|
638
752
|
// so change detection sees the same custom-extension files the full index does.
|
|
639
753
|
const overrides = (0, project_config_1.loadExtensionOverrides)(rootDir);
|
|
640
|
-
collectGitStatus(rootDir, '', changes, overrides, loadIncludeIgnoredMatcher(rootDir));
|
|
754
|
+
collectGitStatus(rootDir, '', changes, overrides, loadIncludeIgnoredMatcher(rootDir), loadExcludeMatcher(rootDir));
|
|
641
755
|
return changes;
|
|
642
756
|
}
|
|
643
757
|
catch {
|
|
644
758
|
return null;
|
|
645
759
|
}
|
|
646
760
|
}
|
|
647
|
-
function collectGitStatus(repoDir, prefix, out, overrides, includeIgnored = null) {
|
|
761
|
+
function collectGitStatus(repoDir, prefix, out, overrides, includeIgnored = null, exclude = null) {
|
|
648
762
|
const output = (0, child_process_1.execFileSync)('git', ['status', '--porcelain', '--no-renames'], { cwd: repoDir, encoding: 'utf-8', timeout: 10000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true });
|
|
649
763
|
// This repo's own ignore rules — built-in defaults (#407) plus its .gitignore.
|
|
650
764
|
// Change detection must exclude the SAME files the full index does, but git
|
|
@@ -683,6 +797,12 @@ function collectGitStatus(repoDir, prefix, out, overrides, includeIgnored = null
|
|
|
683
797
|
// index — match against the repo-relative path, same as the full scan. (#766)
|
|
684
798
|
if (ig.ignores(rel))
|
|
685
799
|
continue;
|
|
800
|
+
// User `codegraph.json` `exclude` (#999) is project-root-relative, so it's
|
|
801
|
+
// matched against the full path — sync must not re-add a tracked file the
|
|
802
|
+
// full index now keeps out. Deletions above stay unfiltered so a file that
|
|
803
|
+
// WAS indexed before an exclude was added still cleans itself out.
|
|
804
|
+
if (exclude && exclude.ignores(filePath))
|
|
805
|
+
continue;
|
|
686
806
|
if (statusCode === '??') {
|
|
687
807
|
out.added.push(filePath);
|
|
688
808
|
}
|
|
@@ -697,11 +817,11 @@ function collectGitStatus(repoDir, prefix, out, overrides, includeIgnored = null
|
|
|
697
817
|
// and they are left alone (#970, #976), mirroring the full-index scan.
|
|
698
818
|
for (const rel of untrackedDirs) {
|
|
699
819
|
for (const repoRel of findNestedGitRepos(path.join(repoDir, rel), rel)) {
|
|
700
|
-
collectGitStatus(path.join(repoDir, repoRel), prefix + repoRel, out, overrides, includeIgnored);
|
|
820
|
+
collectGitStatus(path.join(repoDir, repoRel), prefix + repoRel, out, overrides, includeIgnored, exclude);
|
|
701
821
|
}
|
|
702
822
|
}
|
|
703
823
|
for (const rel of findIgnoredEmbeddedRepos(repoDir, includeIgnored, prefix)) {
|
|
704
|
-
collectGitStatus(path.join(repoDir, rel), prefix + rel, out, overrides, includeIgnored);
|
|
824
|
+
collectGitStatus(path.join(repoDir, rel), prefix + rel, out, overrides, includeIgnored, exclude);
|
|
705
825
|
}
|
|
706
826
|
}
|
|
707
827
|
/**
|
|
@@ -860,7 +980,14 @@ function scanDirectoryWalk(rootDir, onProgress) {
|
|
|
860
980
|
}
|
|
861
981
|
// Seed a base matcher with the built-in default ignores (merged with the root
|
|
862
982
|
// .gitignore so a negation can override). Nested .gitignores still layer per-dir.
|
|
863
|
-
|
|
983
|
+
const baseMatchers = [{ dir: rootDir, ig: buildDefaultIgnore(rootDir) }];
|
|
984
|
+
// Project `codegraph.json` `exclude` patterns (#999), rooted at the project so
|
|
985
|
+
// `isIgnored` matches them against root-relative paths — same coverage the
|
|
986
|
+
// git path gets via ScopeIgnore, for non-git projects.
|
|
987
|
+
const exclude = loadExcludeMatcher(rootDir);
|
|
988
|
+
if (exclude)
|
|
989
|
+
baseMatchers.push({ dir: rootDir, ig: exclude });
|
|
990
|
+
walk(rootDir, baseMatchers);
|
|
864
991
|
return files;
|
|
865
992
|
}
|
|
866
993
|
/**
|
|
@@ -1022,148 +1149,153 @@ class ExtractionOrchestrator {
|
|
|
1022
1149
|
if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) {
|
|
1023
1150
|
neededLanguages.push('cpp');
|
|
1024
1151
|
}
|
|
1025
|
-
//
|
|
1026
|
-
// Falls back to in-process parsing
|
|
1152
|
+
// Parse files on a pool of worker threads (keeps the main thread free for UI
|
|
1153
|
+
// and uses every core). Falls back to in-process parsing when the compiled
|
|
1154
|
+
// worker is unavailable (e.g. running from source in tests).
|
|
1027
1155
|
const parseWorkerPath = path.join(__dirname, 'parse-worker.js');
|
|
1028
1156
|
const useWorker = fs.existsSync(parseWorkerPath);
|
|
1029
|
-
let
|
|
1157
|
+
let pool = null;
|
|
1030
1158
|
if (useWorker) {
|
|
1031
|
-
|
|
1032
|
-
|
|
1159
|
+
// CODEGRAPH_PARSE_WORKERS: explicit worker count; 1 = the old single-worker
|
|
1160
|
+
// behaviour (the conservative rollback). Unset → clamp(cores-1, 1, 8).
|
|
1161
|
+
const poolSize = (0, parse_pool_1.resolveParsePoolSize)(process.env.CODEGRAPH_PARSE_WORKERS, os.cpus().length);
|
|
1162
|
+
pool = new parse_pool_1.ParseWorkerPool({
|
|
1163
|
+
languages: neededLanguages,
|
|
1164
|
+
size: poolSize,
|
|
1165
|
+
workerScriptPath: parseWorkerPath,
|
|
1166
|
+
recycleInterval: WORKER_RECYCLE_INTERVAL,
|
|
1167
|
+
parseTimeoutMs: PARSE_TIMEOUT_MS,
|
|
1168
|
+
log,
|
|
1169
|
+
});
|
|
1170
|
+
log(`Parse worker pool: ${poolSize} worker(s)`);
|
|
1033
1171
|
}
|
|
1034
1172
|
else {
|
|
1035
|
-
// In-process fallback: load grammars locally
|
|
1173
|
+
// In-process fallback: load grammars locally and parse on the main thread.
|
|
1036
1174
|
await (0, grammars_1.loadGrammarsForLanguages)(neededLanguages);
|
|
1037
1175
|
}
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1176
|
+
/**
|
|
1177
|
+
* Parse one file: on the pool when available (the promise REJECTS on a worker
|
|
1178
|
+
* crash/timeout — the caller records it and the retry pass re-attempts), or
|
|
1179
|
+
* in-process synchronously as the no-worker fallback. The language is resolved
|
|
1180
|
+
* here on the main thread, where the codegraph.json overrides are loaded.
|
|
1181
|
+
*/
|
|
1182
|
+
const parseFile = (filePath, content) => {
|
|
1183
|
+
const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1184
|
+
if (!pool)
|
|
1185
|
+
return Promise.resolve((0, tree_sitter_1.extractFromSource)(filePath, content, language, frameworkNames));
|
|
1186
|
+
return pool.requestParse({ filePath, content, language, frameworkNames });
|
|
1187
|
+
};
|
|
1188
|
+
// --- Bounded rolling-window dispatch, ordered commit ---
|
|
1189
|
+
// Reads stay batched/parallel; parses run concurrently across the pool; the
|
|
1190
|
+
// SQLite store stays on the main thread (it isn't thread-safe). Crucially we
|
|
1191
|
+
// COMMIT results in original file order, not parse-completion order: the
|
|
1192
|
+
// resolution phase (run after indexing) resolves an ambiguous reference to one
|
|
1193
|
+
// of several same-named candidates by the nodes' DB insertion order, so a
|
|
1194
|
+
// stable commit order keeps the resulting graph deterministic — byte-identical
|
|
1195
|
+
// to the single-worker path — instead of drifting with parse timing. The
|
|
1196
|
+
// `completed` buffer holds at most ~windowSize out-of-order results, so memory
|
|
1197
|
+
// stays bounded.
|
|
1198
|
+
const windowSize = pool ? Math.max(4, pool.size * 2) : 1;
|
|
1199
|
+
const inFlight = new Set();
|
|
1200
|
+
const completed = new Map();
|
|
1201
|
+
let nextSeq = 0; // file-order sequence assigned at dispatch
|
|
1202
|
+
let nextToStore = 0; // cursor: next sequence to commit
|
|
1203
|
+
let aborted = false;
|
|
1204
|
+
const storeResult = (filePath, content, stats, result) => {
|
|
1205
|
+
processed++;
|
|
1206
|
+
// Store in database on main thread (SQLite is not thread-safe)
|
|
1207
|
+
if (result.nodes.length > 0 || result.errors.length === 0) {
|
|
1208
|
+
const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1209
|
+
this.storeExtractionResult(filePath, content, language, stats, result);
|
|
1052
1210
|
}
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
const pending = pendingParses.get(msg.id);
|
|
1058
|
-
if (pending) {
|
|
1059
|
-
clearTimeout(pending.timer);
|
|
1060
|
-
pendingParses.delete(msg.id);
|
|
1061
|
-
pending.resolve(msg.result);
|
|
1062
|
-
}
|
|
1211
|
+
if (result.errors.length > 0) {
|
|
1212
|
+
for (const err of result.errors) {
|
|
1213
|
+
if (!err.filePath)
|
|
1214
|
+
err.filePath = filePath;
|
|
1063
1215
|
}
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1216
|
+
errors.push(...result.errors);
|
|
1217
|
+
}
|
|
1218
|
+
if (result.nodes.length > 0) {
|
|
1219
|
+
filesIndexed++;
|
|
1220
|
+
totalNodes += result.nodes.length;
|
|
1221
|
+
totalEdges += result.edges.length;
|
|
1222
|
+
}
|
|
1223
|
+
else if (result.errors.some((e) => e.severity === 'error')) {
|
|
1224
|
+
filesErrored++;
|
|
1225
|
+
}
|
|
1226
|
+
else {
|
|
1227
|
+
// Files with no symbols but no errors (yaml, twig, properties) are
|
|
1228
|
+
// tracked at the file level — count them as indexed so the CLI doesn't
|
|
1229
|
+
// misleadingly report "No files found to index".
|
|
1230
|
+
const lang = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1231
|
+
if ((0, grammars_1.isFileLevelOnlyLanguage)(lang)) {
|
|
1232
|
+
filesIndexed++;
|
|
1073
1233
|
}
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
if (parseWorker === w) {
|
|
1077
|
-
parseWorker = null;
|
|
1078
|
-
workerParseCount = 0;
|
|
1234
|
+
else {
|
|
1235
|
+
filesSkipped++;
|
|
1079
1236
|
}
|
|
1237
|
+
}
|
|
1238
|
+
onProgress?.({ phase: 'parsing', current: processed, total, currentFile: filePath });
|
|
1239
|
+
};
|
|
1240
|
+
const recordParseFailure = (filePath, err) => {
|
|
1241
|
+
processed++;
|
|
1242
|
+
filesErrored++;
|
|
1243
|
+
errors.push({
|
|
1244
|
+
message: err instanceof Error ? err.message : String(err),
|
|
1245
|
+
filePath,
|
|
1246
|
+
severity: 'error',
|
|
1247
|
+
code: 'parse_error',
|
|
1080
1248
|
});
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
await new Promise((resolve, reject) => {
|
|
1090
|
-
parseWorker.once('message', (msg) => {
|
|
1091
|
-
if (msg.type === 'grammars-loaded')
|
|
1092
|
-
resolve();
|
|
1093
|
-
else
|
|
1094
|
-
reject(new Error(`Unexpected message: ${msg.type}`));
|
|
1095
|
-
});
|
|
1096
|
-
parseWorker.postMessage({ type: 'load-grammars', languages: neededLanguages });
|
|
1097
|
-
});
|
|
1098
|
-
return parseWorker;
|
|
1099
|
-
}
|
|
1100
|
-
if (WorkerClass) {
|
|
1101
|
-
await ensureWorker();
|
|
1102
|
-
}
|
|
1103
|
-
/**
|
|
1104
|
-
* Recycle the worker thread to reclaim WASM memory.
|
|
1105
|
-
* Terminates the current worker and clears the reference so
|
|
1106
|
-
* ensureWorker() will spawn a fresh one on the next call.
|
|
1107
|
-
*/
|
|
1108
|
-
function recycleWorker() {
|
|
1109
|
-
if (!parseWorker)
|
|
1249
|
+
onProgress?.({ phase: 'parsing', current: processed, total });
|
|
1250
|
+
};
|
|
1251
|
+
// Commit buffered parses to the DB in file order, advancing the cursor over
|
|
1252
|
+
// contiguous completed results. Runs after each parse settles (and once more
|
|
1253
|
+
// after the drain). storeResult / recordParseFailure run here single-threaded,
|
|
1254
|
+
// so shared counters and SQLite writes never race despite parallel parsing.
|
|
1255
|
+
const flushOrdered = () => {
|
|
1256
|
+
if (aborted)
|
|
1110
1257
|
return;
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
// Resolve the language on the main thread (where the project's
|
|
1120
|
-
// codegraph.json overrides are loaded) and hand it to the worker, so the
|
|
1121
|
-
// worker never needs the override map itself.
|
|
1122
|
-
const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1123
|
-
if (!WorkerClass) {
|
|
1124
|
-
// In-process fallback
|
|
1125
|
-
return (0, tree_sitter_1.extractFromSource)(filePath, content, language, frameworkNames);
|
|
1258
|
+
while (completed.has(nextToStore)) {
|
|
1259
|
+
const item = completed.get(nextToStore);
|
|
1260
|
+
completed.delete(nextToStore);
|
|
1261
|
+
nextToStore++;
|
|
1262
|
+
if (item.ok)
|
|
1263
|
+
storeResult(item.filePath, item.content, item.stats, item.result);
|
|
1264
|
+
else
|
|
1265
|
+
recordParseFailure(item.filePath, item.err);
|
|
1126
1266
|
}
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1267
|
+
};
|
|
1268
|
+
// Dispatch one file's parse (parses run concurrently across the pool), tagged
|
|
1269
|
+
// with its file-order sequence so flushOrdered commits results in order. The
|
|
1270
|
+
// backpressure below bounds how far parsing runs ahead of the in-order commit.
|
|
1271
|
+
const feed = async (filePath, content, stats) => {
|
|
1272
|
+
const seq = nextSeq++;
|
|
1273
|
+
const p = (async () => {
|
|
1274
|
+
try {
|
|
1275
|
+
const result = await parseFile(filePath, content);
|
|
1276
|
+
completed.set(seq, { ok: true, filePath, content, stats, result });
|
|
1277
|
+
}
|
|
1278
|
+
catch (parseErr) {
|
|
1279
|
+
completed.set(seq, { ok: false, filePath, err: parseErr });
|
|
1280
|
+
}
|
|
1281
|
+
flushOrdered();
|
|
1282
|
+
})();
|
|
1283
|
+
const tracked = p.finally(() => { inFlight.delete(tracked); });
|
|
1284
|
+
inFlight.add(tracked);
|
|
1285
|
+
// Backpressure on the dispatched-but-not-yet-committed count (in-flight +
|
|
1286
|
+
// buffered), not just in-flight: a slow file sitting at the commit cursor
|
|
1287
|
+
// lets later parses finish and buffer, which would otherwise grow without
|
|
1288
|
+
// bound. Wait for parses to settle (each may advance the cursor) until the
|
|
1289
|
+
// window has room. `inFlight.size > 0` guards against an empty race — the
|
|
1290
|
+
// cursor file is always still in flight when the window is full.
|
|
1291
|
+
while (nextSeq - nextToStore >= windowSize && inFlight.size > 0) {
|
|
1292
|
+
await Promise.race(inFlight);
|
|
1132
1293
|
}
|
|
1133
|
-
|
|
1134
|
-
const id = nextId++;
|
|
1135
|
-
workerParseCount++;
|
|
1136
|
-
// Scale timeout for large files: base 10s + 10s per 100KB
|
|
1137
|
-
const timeoutMs = PARSE_TIMEOUT_MS + Math.floor(content.length / 100_000) * 10_000;
|
|
1138
|
-
return new Promise((resolve, reject) => {
|
|
1139
|
-
const timer = setTimeout(() => {
|
|
1140
|
-
pendingParses.delete(id);
|
|
1141
|
-
log(`TIMEOUT: ${filePath} exceeded ${timeoutMs}ms — killing worker`);
|
|
1142
|
-
// Reject FIRST — worker.terminate() can hang if WASM is stuck
|
|
1143
|
-
parseWorker = null;
|
|
1144
|
-
workerParseCount = 0;
|
|
1145
|
-
reject(new Error(`Parse timed out after ${timeoutMs}ms`));
|
|
1146
|
-
// Fire-and-forget: kill the stuck worker in the background
|
|
1147
|
-
worker.terminate().catch(() => { });
|
|
1148
|
-
}, timeoutMs);
|
|
1149
|
-
pendingParses.set(id, { resolve, reject, timer });
|
|
1150
|
-
worker.postMessage({ type: 'parse', id, filePath, content, frameworkNames, language });
|
|
1151
|
-
});
|
|
1152
|
-
}
|
|
1294
|
+
};
|
|
1153
1295
|
for (let i = 0; i < files.length; i += FILE_IO_BATCH_SIZE) {
|
|
1154
1296
|
if (signal?.aborted) {
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
return {
|
|
1158
|
-
success: false,
|
|
1159
|
-
filesIndexed,
|
|
1160
|
-
filesSkipped,
|
|
1161
|
-
filesErrored,
|
|
1162
|
-
nodesCreated: totalNodes,
|
|
1163
|
-
edgesCreated: totalEdges,
|
|
1164
|
-
errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
|
|
1165
|
-
durationMs: Date.now() - startTime,
|
|
1166
|
-
};
|
|
1297
|
+
aborted = true;
|
|
1298
|
+
break;
|
|
1167
1299
|
}
|
|
1168
1300
|
const batch = files.slice(i, i + FILE_IO_BATCH_SIZE);
|
|
1169
1301
|
// Read files in parallel (with path validation before any I/O)
|
|
@@ -1185,29 +1317,13 @@ class ExtractionOrchestrator {
|
|
|
1185
1317
|
return { filePath: fp, content: null, stats: null, error: err };
|
|
1186
1318
|
}
|
|
1187
1319
|
}));
|
|
1188
|
-
//
|
|
1320
|
+
// Dispatch each readable file into the bounded parse window; the window
|
|
1321
|
+
// stores results on the main thread as they arrive.
|
|
1189
1322
|
for (const { filePath, content, stats, error } of fileContents) {
|
|
1190
1323
|
if (signal?.aborted) {
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
return {
|
|
1194
|
-
success: false,
|
|
1195
|
-
filesIndexed,
|
|
1196
|
-
filesSkipped,
|
|
1197
|
-
filesErrored,
|
|
1198
|
-
nodesCreated: totalNodes,
|
|
1199
|
-
edgesCreated: totalEdges,
|
|
1200
|
-
errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
|
|
1201
|
-
durationMs: Date.now() - startTime,
|
|
1202
|
-
};
|
|
1324
|
+
aborted = true;
|
|
1325
|
+
break;
|
|
1203
1326
|
}
|
|
1204
|
-
// Report progress before parsing (show current file being worked on)
|
|
1205
|
-
onProgress?.({
|
|
1206
|
-
phase: 'parsing',
|
|
1207
|
-
current: processed,
|
|
1208
|
-
total,
|
|
1209
|
-
currentFile: filePath,
|
|
1210
|
-
});
|
|
1211
1327
|
if (error || content === null || stats === null) {
|
|
1212
1328
|
processed++;
|
|
1213
1329
|
filesErrored++;
|
|
@@ -1217,6 +1333,7 @@ class ExtractionOrchestrator {
|
|
|
1217
1333
|
severity: 'error',
|
|
1218
1334
|
code: 'read_error',
|
|
1219
1335
|
});
|
|
1336
|
+
onProgress?.({ phase: 'parsing', current: processed, total });
|
|
1220
1337
|
continue;
|
|
1221
1338
|
}
|
|
1222
1339
|
// Honour MAX_FILE_SIZE. Without this check, vendored generated
|
|
@@ -1236,57 +1353,32 @@ class ExtractionOrchestrator {
|
|
|
1236
1353
|
onProgress?.({ phase: 'parsing', current: processed, total });
|
|
1237
1354
|
continue;
|
|
1238
1355
|
}
|
|
1239
|
-
// Parse
|
|
1240
|
-
//
|
|
1241
|
-
|
|
1242
|
-
try {
|
|
1243
|
-
result = await requestParse(filePath, content);
|
|
1244
|
-
}
|
|
1245
|
-
catch (parseErr) {
|
|
1246
|
-
processed++;
|
|
1247
|
-
filesErrored++;
|
|
1248
|
-
errors.push({
|
|
1249
|
-
message: parseErr instanceof Error ? parseErr.message : String(parseErr),
|
|
1250
|
-
filePath,
|
|
1251
|
-
severity: 'error',
|
|
1252
|
-
code: 'parse_error',
|
|
1253
|
-
});
|
|
1254
|
-
continue;
|
|
1255
|
-
}
|
|
1256
|
-
processed++;
|
|
1257
|
-
// Store in database on main thread (SQLite is not thread-safe)
|
|
1258
|
-
if (result.nodes.length > 0 || result.errors.length === 0) {
|
|
1259
|
-
const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1260
|
-
this.storeExtractionResult(filePath, content, language, stats, result);
|
|
1261
|
-
}
|
|
1262
|
-
if (result.errors.length > 0) {
|
|
1263
|
-
for (const err of result.errors) {
|
|
1264
|
-
if (!err.filePath)
|
|
1265
|
-
err.filePath = filePath;
|
|
1266
|
-
}
|
|
1267
|
-
errors.push(...result.errors);
|
|
1268
|
-
}
|
|
1269
|
-
if (result.nodes.length > 0) {
|
|
1270
|
-
filesIndexed++;
|
|
1271
|
-
totalNodes += result.nodes.length;
|
|
1272
|
-
totalEdges += result.edges.length;
|
|
1273
|
-
}
|
|
1274
|
-
else if (result.errors.some((e) => e.severity === 'error')) {
|
|
1275
|
-
filesErrored++;
|
|
1276
|
-
}
|
|
1277
|
-
else {
|
|
1278
|
-
// Files with no symbols but no errors (yaml, twig, properties) are
|
|
1279
|
-
// tracked at the file level — count them as indexed so the CLI
|
|
1280
|
-
// doesn't misleadingly report "No files found to index".
|
|
1281
|
-
const lang = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1282
|
-
if ((0, grammars_1.isFileLevelOnlyLanguage)(lang)) {
|
|
1283
|
-
filesIndexed++;
|
|
1284
|
-
}
|
|
1285
|
-
else {
|
|
1286
|
-
filesSkipped++;
|
|
1287
|
-
}
|
|
1288
|
-
}
|
|
1356
|
+
// Parse on the pool (main thread stays unblocked). Errors/timeouts are
|
|
1357
|
+
// handled inside feed() → recordParseFailure, feeding the retry pass.
|
|
1358
|
+
await feed(filePath, content, stats);
|
|
1289
1359
|
}
|
|
1360
|
+
if (aborted)
|
|
1361
|
+
break;
|
|
1362
|
+
}
|
|
1363
|
+
// Drain parses still in flight (skip on abort — we tear down below instead),
|
|
1364
|
+
// then commit any results the cursor hasn't reached yet.
|
|
1365
|
+
if (!aborted) {
|
|
1366
|
+
await Promise.all(inFlight);
|
|
1367
|
+
flushOrdered();
|
|
1368
|
+
}
|
|
1369
|
+
if (signal?.aborted || aborted) {
|
|
1370
|
+
if (pool)
|
|
1371
|
+
await pool.destroy();
|
|
1372
|
+
return {
|
|
1373
|
+
success: false,
|
|
1374
|
+
filesIndexed,
|
|
1375
|
+
filesSkipped,
|
|
1376
|
+
filesErrored,
|
|
1377
|
+
nodesCreated: totalNodes,
|
|
1378
|
+
edgesCreated: totalEdges,
|
|
1379
|
+
errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
|
|
1380
|
+
durationMs: Date.now() - startTime,
|
|
1381
|
+
};
|
|
1290
1382
|
}
|
|
1291
1383
|
// Report 100% so the progress bar doesn't hang at 99%
|
|
1292
1384
|
onProgress?.({
|
|
@@ -1303,15 +1395,17 @@ class ExtractionOrchestrator {
|
|
|
1303
1395
|
// every file gets the absolute cleanest WASM state possible.
|
|
1304
1396
|
const retryableErrors = errors.filter((e) => e.code === 'parse_error' && e.filePath &&
|
|
1305
1397
|
(e.message.includes('Worker exited') || e.message.includes('memory access out of bounds')));
|
|
1306
|
-
if (retryableErrors.length > 0 &&
|
|
1398
|
+
if (retryableErrors.length > 0 && pool) {
|
|
1307
1399
|
log(`Retrying ${retryableErrors.length} files that failed due to WASM memory errors...`);
|
|
1400
|
+
// Fresh WASM heaps for the retry phase. A retry that still crashes its
|
|
1401
|
+
// worker makes the pool respawn it, so later retries keep landing on clean
|
|
1402
|
+
// workers too.
|
|
1403
|
+
pool.recycleAll();
|
|
1308
1404
|
const stillFailing = [];
|
|
1309
1405
|
for (const errEntry of retryableErrors) {
|
|
1310
1406
|
const filePath = errEntry.filePath;
|
|
1311
1407
|
if (signal?.aborted)
|
|
1312
1408
|
break;
|
|
1313
|
-
// Fresh worker for every retry — maximum WASM headroom
|
|
1314
|
-
recycleWorker();
|
|
1315
1409
|
let content;
|
|
1316
1410
|
try {
|
|
1317
1411
|
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, filePath);
|
|
@@ -1324,7 +1418,7 @@ class ExtractionOrchestrator {
|
|
|
1324
1418
|
}
|
|
1325
1419
|
let result;
|
|
1326
1420
|
try {
|
|
1327
|
-
result = await
|
|
1421
|
+
result = await parseFile(filePath, content);
|
|
1328
1422
|
}
|
|
1329
1423
|
catch {
|
|
1330
1424
|
stillFailing.push(errEntry);
|
|
@@ -1350,11 +1444,11 @@ class ExtractionOrchestrator {
|
|
|
1350
1444
|
// code nodes but consume parser memory.
|
|
1351
1445
|
if (stillFailing.length > 0) {
|
|
1352
1446
|
log(`${stillFailing.length} files still failing — retrying with comments stripped...`);
|
|
1447
|
+
pool.recycleAll();
|
|
1353
1448
|
for (const errEntry of stillFailing) {
|
|
1354
1449
|
const filePath = errEntry.filePath;
|
|
1355
1450
|
if (signal?.aborted)
|
|
1356
1451
|
break;
|
|
1357
|
-
recycleWorker();
|
|
1358
1452
|
let fullContent;
|
|
1359
1453
|
try {
|
|
1360
1454
|
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, filePath);
|
|
@@ -1373,7 +1467,7 @@ class ExtractionOrchestrator {
|
|
|
1373
1467
|
.join('\n');
|
|
1374
1468
|
let result;
|
|
1375
1469
|
try {
|
|
1376
|
-
result = await
|
|
1470
|
+
result = await parseFile(filePath, stripped);
|
|
1377
1471
|
}
|
|
1378
1472
|
catch {
|
|
1379
1473
|
continue;
|
|
@@ -1394,11 +1488,9 @@ class ExtractionOrchestrator {
|
|
|
1394
1488
|
}
|
|
1395
1489
|
}
|
|
1396
1490
|
}
|
|
1397
|
-
// Shut down parse worker
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
parseWorker.terminate().catch(() => { });
|
|
1401
|
-
}
|
|
1491
|
+
// Shut down the parse worker pool.
|
|
1492
|
+
if (pool)
|
|
1493
|
+
await pool.destroy();
|
|
1402
1494
|
return {
|
|
1403
1495
|
success: filesIndexed > 0 || errors.filter((e) => e.severity === 'error').length === 0,
|
|
1404
1496
|
filesIndexed,
|