@colbymchenry/codegraph-darwin-x64 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/dist/bin/codegraph.js +79 -52
- package/lib/dist/bin/codegraph.js.map +1 -1
- package/lib/dist/bin/command-supervision.d.ts +12 -0
- package/lib/dist/bin/command-supervision.d.ts.map +1 -0
- package/lib/dist/bin/command-supervision.js +76 -0
- package/lib/dist/bin/command-supervision.js.map +1 -0
- package/lib/dist/db/queries.d.ts.map +1 -1
- package/lib/dist/db/queries.js +10 -2
- package/lib/dist/db/queries.js.map +1 -1
- package/lib/dist/directory.d.ts +32 -0
- package/lib/dist/directory.d.ts.map +1 -1
- package/lib/dist/directory.js +83 -0
- package/lib/dist/directory.js.map +1 -1
- package/lib/dist/extraction/index.d.ts +19 -4
- package/lib/dist/extraction/index.d.ts.map +1 -1
- package/lib/dist/extraction/index.js +287 -241
- package/lib/dist/extraction/index.js.map +1 -1
- package/lib/dist/extraction/parse-pool.d.ts +126 -0
- package/lib/dist/extraction/parse-pool.d.ts.map +1 -0
- package/lib/dist/extraction/parse-pool.js +319 -0
- package/lib/dist/extraction/parse-pool.js.map +1 -0
- package/lib/dist/extraction/tree-sitter.d.ts.map +1 -1
- package/lib/dist/extraction/tree-sitter.js +48 -19
- package/lib/dist/extraction/tree-sitter.js.map +1 -1
- package/lib/dist/mcp/daemon-paths.d.ts +30 -3
- package/lib/dist/mcp/daemon-paths.d.ts.map +1 -1
- package/lib/dist/mcp/daemon-paths.js +50 -10
- package/lib/dist/mcp/daemon-paths.js.map +1 -1
- package/lib/dist/mcp/daemon-registry.d.ts.map +1 -1
- package/lib/dist/mcp/daemon-registry.js +7 -3
- package/lib/dist/mcp/daemon-registry.js.map +1 -1
- package/lib/dist/mcp/daemon.d.ts +38 -0
- package/lib/dist/mcp/daemon.d.ts.map +1 -1
- package/lib/dist/mcp/daemon.js +168 -19
- package/lib/dist/mcp/daemon.js.map +1 -1
- package/lib/dist/mcp/engine.d.ts +17 -0
- package/lib/dist/mcp/engine.d.ts.map +1 -1
- package/lib/dist/mcp/engine.js +73 -1
- package/lib/dist/mcp/engine.js.map +1 -1
- package/lib/dist/mcp/index.d.ts.map +1 -1
- package/lib/dist/mcp/index.js +25 -43
- package/lib/dist/mcp/index.js.map +1 -1
- package/lib/dist/mcp/ppid-watchdog.d.ts +18 -0
- package/lib/dist/mcp/ppid-watchdog.d.ts.map +1 -1
- package/lib/dist/mcp/ppid-watchdog.js +37 -0
- package/lib/dist/mcp/ppid-watchdog.js.map +1 -1
- package/lib/dist/mcp/proxy.d.ts.map +1 -1
- package/lib/dist/mcp/proxy.js +14 -1
- package/lib/dist/mcp/proxy.js.map +1 -1
- package/lib/dist/mcp/query-pool.d.ts +94 -0
- package/lib/dist/mcp/query-pool.d.ts.map +1 -0
- package/lib/dist/mcp/query-pool.js +297 -0
- package/lib/dist/mcp/query-pool.js.map +1 -0
- package/lib/dist/mcp/query-worker.d.ts +24 -0
- package/lib/dist/mcp/query-worker.d.ts.map +1 -0
- package/lib/dist/mcp/query-worker.js +87 -0
- package/lib/dist/mcp/query-worker.js.map +1 -0
- package/lib/dist/mcp/tools.d.ts +57 -0
- package/lib/dist/mcp/tools.d.ts.map +1 -1
- package/lib/dist/mcp/tools.js +147 -37
- package/lib/dist/mcp/tools.js.map +1 -1
- package/lib/dist/project-config.d.ts +37 -0
- package/lib/dist/project-config.d.ts.map +1 -1
- package/lib/dist/project-config.js +127 -32
- package/lib/dist/project-config.js.map +1 -1
- package/lib/dist/resolution/c-fnptr-synthesizer.d.ts +0 -28
- package/lib/dist/resolution/c-fnptr-synthesizer.d.ts.map +1 -1
- package/lib/dist/resolution/c-fnptr-synthesizer.js +765 -79
- package/lib/dist/resolution/c-fnptr-synthesizer.js.map +1 -1
- package/lib/dist/resolution/name-matcher.d.ts.map +1 -1
- package/lib/dist/resolution/name-matcher.js +44 -0
- package/lib/dist/resolution/name-matcher.js.map +1 -1
- package/lib/node_modules/.package-lock.json +1 -1
- package/lib/package.json +1 -1
- package/package.json +1 -1
|
@@ -51,9 +51,11 @@ exports.scanDirectoryAsync = scanDirectoryAsync;
|
|
|
51
51
|
const fs = __importStar(require("fs"));
|
|
52
52
|
const fsp = __importStar(require("fs/promises"));
|
|
53
53
|
const path = __importStar(require("path"));
|
|
54
|
+
const os = __importStar(require("os"));
|
|
54
55
|
const crypto = __importStar(require("crypto"));
|
|
55
56
|
const child_process_1 = require("child_process");
|
|
56
57
|
const tree_sitter_1 = require("./tree-sitter");
|
|
58
|
+
const parse_pool_1 = require("./parse-pool");
|
|
57
59
|
const grammars_1 = require("./grammars");
|
|
58
60
|
const project_config_1 = require("../project-config");
|
|
59
61
|
const directory_1 = require("../directory");
|
|
@@ -252,6 +254,32 @@ function buildDefaultIgnore(rootDir) {
|
|
|
252
254
|
function defaultsOnlyIgnore() {
|
|
253
255
|
return (0, ignore_1.default)().add(DEFAULT_IGNORE_PATTERNS);
|
|
254
256
|
}
|
|
257
|
+
/**
|
|
258
|
+
* Matcher for the project's `codegraph.json` `includeIgnored` patterns — the
|
|
259
|
+
* explicit opt-in to index embedded git repos living inside gitignored
|
|
260
|
+
* directories (#622, #699). Returns `null` when the project opted in nothing,
|
|
261
|
+
* which is the zero-config DEFAULT: `.gitignore` is then fully respected and a
|
|
262
|
+
* gitignored directory (even one holding nested repos) is never walked or
|
|
263
|
+
* indexed (#970, #976). Built once per scan/sync/scope operation from the scan
|
|
264
|
+
* root and threaded down — never global, so multi-project daemons stay isolated.
|
|
265
|
+
*/
|
|
266
|
+
function loadIncludeIgnoredMatcher(rootDir) {
|
|
267
|
+
const patterns = (0, project_config_1.loadIncludeIgnoredPatterns)(rootDir);
|
|
268
|
+
return patterns.length > 0 ? (0, ignore_1.default)().add(patterns) : null;
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Matcher for the project's `codegraph.json` `exclude` patterns — paths to keep
|
|
272
|
+
* OUT of the index even when git-tracked, which `.gitignore` cannot do (#999).
|
|
273
|
+
* The escape hatch for a committed vendor/theme/SDK directory. Returns `null`
|
|
274
|
+
* when nothing is excluded (the zero-config default → no overhead). Matched
|
|
275
|
+
* against project-root-relative paths, so it applies uniformly across the whole
|
|
276
|
+
* workspace, including inside embedded repos (excluding `static/` means gone
|
|
277
|
+
* everywhere). Built once per scan/sync/scope operation from the scan root.
|
|
278
|
+
*/
|
|
279
|
+
function loadExcludeMatcher(rootDir) {
|
|
280
|
+
const patterns = (0, project_config_1.loadExcludePatterns)(rootDir);
|
|
281
|
+
return patterns.length > 0 ? (0, ignore_1.default)().add(patterns) : null;
|
|
282
|
+
}
|
|
255
283
|
/**
|
|
256
284
|
* `git ls-files --directory` collapses a wholly-untracked/ignored directory into
|
|
257
285
|
* one entry — and when the command's own cwd is such a directory (the indexed
|
|
@@ -391,14 +419,27 @@ function findNestedGitRepos(absDir, relPrefix) {
|
|
|
391
419
|
*/
|
|
392
420
|
class ScopeIgnore {
|
|
393
421
|
rootMatcher;
|
|
422
|
+
exclude;
|
|
394
423
|
embedded;
|
|
395
424
|
defaults = defaultsOnlyIgnore();
|
|
396
|
-
constructor(rootMatcher, embedded
|
|
425
|
+
constructor(rootMatcher, embedded,
|
|
426
|
+
/**
|
|
427
|
+
* Project `codegraph.json` `exclude` patterns (#999), matched against the
|
|
428
|
+
* full root-relative path. Wins over everything else — an explicit user
|
|
429
|
+
* exclude applies even to tracked files and even inside embedded repos.
|
|
430
|
+
*/
|
|
431
|
+
exclude = null) {
|
|
397
432
|
this.rootMatcher = rootMatcher;
|
|
433
|
+
this.exclude = exclude;
|
|
398
434
|
// Longest root first so paths in nested embedded repos hit the innermost matcher.
|
|
399
435
|
this.embedded = [...embedded].sort((a, b) => b.root.length - a.root.length);
|
|
400
436
|
}
|
|
401
437
|
ignores(rel) {
|
|
438
|
+
// User `exclude` (#999) is checked first and against the full root-relative
|
|
439
|
+
// path: it must drop git-TRACKED paths (which `.gitignore` can't) and apply
|
|
440
|
+
// everywhere, including ancestors of embedded repos.
|
|
441
|
+
if (this.exclude && this.exclude.ignores(rel))
|
|
442
|
+
return true;
|
|
402
443
|
for (const { root, matcher } of this.embedded) {
|
|
403
444
|
if (rel.startsWith(root)) {
|
|
404
445
|
const inner = rel.slice(root.length);
|
|
@@ -425,13 +466,16 @@ exports.ScopeIgnore = ScopeIgnore;
|
|
|
425
466
|
*/
|
|
426
467
|
function buildScopeIgnore(rootDir, embeddedRoots) {
|
|
427
468
|
const roots = embeddedRoots ? [...embeddedRoots] : discoverEmbeddedRepoRoots(rootDir);
|
|
428
|
-
return new ScopeIgnore(buildDefaultIgnore(rootDir), roots.map((root) => ({ root, matcher: buildDefaultIgnore(path.join(rootDir, root)) })));
|
|
469
|
+
return new ScopeIgnore(buildDefaultIgnore(rootDir), roots.map((root) => ({ root, matcher: buildDefaultIgnore(path.join(rootDir, root)) })), loadExcludeMatcher(rootDir));
|
|
429
470
|
}
|
|
430
471
|
/**
|
|
431
472
|
* Standalone discovery of every embedded repo root under `rootDir` (relative,
|
|
432
|
-
* trailing-slashed) —
|
|
433
|
-
* (#514)
|
|
434
|
-
*
|
|
473
|
+
* trailing-slashed) — the untracked kind (#193) always, and the gitignored kind
|
|
474
|
+
* (#514) only for directories the project opted in via `codegraph.json`
|
|
475
|
+
* `includeIgnored` (#622, #699); otherwise `.gitignore` is respected and they
|
|
476
|
+
* are not discovered (#970, #976). Recursive (an embedded repo can embed further
|
|
477
|
+
* repos). Returns [] for non-git roots: the filesystem walk handles nested repos
|
|
478
|
+
* there already.
|
|
435
479
|
*/
|
|
436
480
|
function discoverEmbeddedRepoRoots(rootDir) {
|
|
437
481
|
try {
|
|
@@ -442,6 +486,7 @@ function discoverEmbeddedRepoRoots(rootDir) {
|
|
|
442
486
|
}
|
|
443
487
|
const out = [];
|
|
444
488
|
const defaults = defaultsOnlyIgnore();
|
|
489
|
+
const includeIgnored = loadIncludeIgnoredMatcher(rootDir);
|
|
445
490
|
const visit = (repoAbs, prefix) => {
|
|
446
491
|
const candidates = [];
|
|
447
492
|
try {
|
|
@@ -453,7 +498,7 @@ function discoverEmbeddedRepoRoots(rootDir) {
|
|
|
453
498
|
}
|
|
454
499
|
}
|
|
455
500
|
catch { /* untracked listing failed — ignored-side discovery still runs */ }
|
|
456
|
-
candidates.push(...findIgnoredEmbeddedRepos(repoAbs));
|
|
501
|
+
candidates.push(...findIgnoredEmbeddedRepos(repoAbs, includeIgnored, prefix));
|
|
457
502
|
for (const rel of candidates) {
|
|
458
503
|
const full = (0, utils_1.normalizePath)(prefix + rel);
|
|
459
504
|
out.push(full);
|
|
@@ -464,16 +509,30 @@ function discoverEmbeddedRepoRoots(rootDir) {
|
|
|
464
509
|
return out;
|
|
465
510
|
}
|
|
466
511
|
/**
|
|
467
|
-
* Discover embedded repos hidden by `repoDir`'s OWN
|
|
468
|
-
* gitignored directory
|
|
469
|
-
*
|
|
512
|
+
* Discover embedded repos hidden by `repoDir`'s OWN gitignore rules: for each
|
|
513
|
+
* gitignored directory, search for nested `.git` roots. Returns repo paths
|
|
514
|
+
* relative to `repoDir`, trailing-slashed.
|
|
515
|
+
*
|
|
516
|
+
* OPT-IN ONLY. Walking into a gitignored directory contradicts what every other
|
|
517
|
+
* tool (and CodeGraph's own `git ls-files` foundation) does — `.gitignore`
|
|
518
|
+
* excludes. So this returns `[]` unless the project opted the directory in via
|
|
519
|
+
* `codegraph.json` `includeIgnored`; without that, a gitignored dir — including
|
|
520
|
+
* a huge reference/data dir full of nested clones — is left untouched (#970,
|
|
521
|
+
* #976). When opted in, it restores the super-repo-of-clones behavior (#622,
|
|
522
|
+
* #699). `prefix` is the scan-root-relative path of `repoDir`, so a pattern like
|
|
523
|
+
* `services/` opts that whole subtree in at any recursion depth. Built-in
|
|
524
|
+
* default excludes (`node_modules`, …) are always skipped.
|
|
470
525
|
*/
|
|
471
|
-
function findIgnoredEmbeddedRepos(repoDir) {
|
|
526
|
+
function findIgnoredEmbeddedRepos(repoDir, includeIgnored, prefix) {
|
|
527
|
+
if (!includeIgnored)
|
|
528
|
+
return [];
|
|
472
529
|
const defaults = defaultsOnlyIgnore();
|
|
473
530
|
const repos = [];
|
|
474
531
|
for (const dir of listIgnoredDirs(repoDir)) {
|
|
475
532
|
if (defaults.ignores(dir))
|
|
476
533
|
continue;
|
|
534
|
+
if (!includeIgnored.ignores((0, utils_1.normalizePath)(prefix + dir)))
|
|
535
|
+
continue;
|
|
477
536
|
repos.push(...findNestedGitRepos(path.join(repoDir, dir), dir));
|
|
478
537
|
}
|
|
479
538
|
return repos;
|
|
@@ -489,12 +548,15 @@ function findIgnoredEmbeddedRepos(repoDir) {
|
|
|
489
548
|
* skips them entirely, and untracked output reports them only as an opaque
|
|
490
549
|
* "subdir/" entry (trailing slash) rather than expanding their files. Each
|
|
491
550
|
* embedded repo is its own git boundary, so we re-run `git ls-files` inside it.
|
|
492
|
-
* (See issue #193.) GITIGNORED embedded repos are invisible even to that
|
|
493
|
-
*
|
|
494
|
-
*
|
|
495
|
-
*
|
|
551
|
+
* (See issue #193.) GITIGNORED embedded repos are invisible even to that; they
|
|
552
|
+
* are discovered separately via `findIgnoredEmbeddedRepos` (#514) but ONLY for
|
|
553
|
+
* directories the project opted in through `codegraph.json` `includeIgnored`
|
|
554
|
+
* (`includeIgnored` here, threaded from the scan root) — by default `.gitignore`
|
|
555
|
+
* is respected and they stay out (#970, #976). Every embedded repo root (however
|
|
556
|
+
* found) is recorded in `embeddedRoots` so callers can exempt its files from the
|
|
557
|
+
* parent's own gitignore rules.
|
|
496
558
|
*/
|
|
497
|
-
function collectGitFiles(repoDir, prefix, files, embeddedRoots) {
|
|
559
|
+
function collectGitFiles(repoDir, prefix, files, embeddedRoots, includeIgnored = null) {
|
|
498
560
|
const gitOpts = { cwd: repoDir, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true };
|
|
499
561
|
// Tracked files. --recurse-submodules pulls in files from active submodules,
|
|
500
562
|
// which the index would otherwise represent only as a commit pointer.
|
|
@@ -528,18 +590,20 @@ function collectGitFiles(repoDir, prefix, files, embeddedRoots) {
|
|
|
528
590
|
// it's a duplicate working view of an already-indexed repo (#848).
|
|
529
591
|
if (classifyGitDir(childDir) === 'embedded' && !defaultsOnlyIgnore().ignores(rel)) {
|
|
530
592
|
embeddedRoots?.add((0, utils_1.normalizePath)(prefix + rel));
|
|
531
|
-
collectGitFiles(childDir, prefix + rel, files, embeddedRoots);
|
|
593
|
+
collectGitFiles(childDir, prefix + rel, files, embeddedRoots, includeIgnored);
|
|
532
594
|
}
|
|
533
595
|
continue;
|
|
534
596
|
}
|
|
535
597
|
files.add((0, utils_1.normalizePath)(prefix + rel));
|
|
536
598
|
}
|
|
537
599
|
// Embedded repos hidden by THIS repo's ignore rules (`/packages/` in a
|
|
538
|
-
// super-repo .gitignore) never appear in any listing above
|
|
539
|
-
//
|
|
540
|
-
|
|
600
|
+
// super-repo .gitignore) never appear in any listing above. By default they
|
|
601
|
+
// stay hidden — `.gitignore` is respected (#970, #976). They are recursed into
|
|
602
|
+
// only when the project opted the directory in via `codegraph.json`
|
|
603
|
+
// `includeIgnored` (#622, #699), which `findIgnoredEmbeddedRepos` enforces.
|
|
604
|
+
for (const rel of findIgnoredEmbeddedRepos(repoDir, includeIgnored, prefix)) {
|
|
541
605
|
embeddedRoots?.add((0, utils_1.normalizePath)(prefix + rel));
|
|
542
|
-
collectGitFiles(path.join(repoDir, rel), prefix + rel, files, embeddedRoots);
|
|
606
|
+
collectGitFiles(path.join(repoDir, rel), prefix + rel, files, embeddedRoots, includeIgnored);
|
|
543
607
|
}
|
|
544
608
|
}
|
|
545
609
|
/**
|
|
@@ -567,7 +631,7 @@ function getGitVisibleFiles(rootDir) {
|
|
|
567
631
|
}
|
|
568
632
|
const files = new Set();
|
|
569
633
|
const embeddedRoots = new Set();
|
|
570
|
-
collectGitFiles(rootDir, '', files, embeddedRoots);
|
|
634
|
+
collectGitFiles(rootDir, '', files, embeddedRoots, loadIncludeIgnoredMatcher(rootDir));
|
|
571
635
|
// Apply built-in default ignores uniformly — to tracked files too, since
|
|
572
636
|
// committing a dependency/build dir doesn't make it project code. A
|
|
573
637
|
// `.gitignore` negation (e.g. `!vendor/`) is the explicit opt-in. (issue #407)
|
|
@@ -585,13 +649,15 @@ function getGitVisibleFiles(rootDir) {
|
|
|
585
649
|
* Use `git status` to detect changed files instead of scanning every file.
|
|
586
650
|
* Returns null on failure so callers fall back to full scan.
|
|
587
651
|
*
|
|
588
|
-
* Recurses into embedded repos —
|
|
589
|
-
*
|
|
590
|
-
* kind (#514: they never appear in the parent's status at all)
|
|
591
|
-
*
|
|
592
|
-
*
|
|
593
|
-
*
|
|
594
|
-
*
|
|
652
|
+
* Recurses into embedded repos — the untracked kind (#193: the parent's status
|
|
653
|
+
* collapses them to an opaque `?? subdir/` entry) always, and the gitignored
|
|
654
|
+
* kind (#514: they never appear in the parent's status at all) only for
|
|
655
|
+
* directories opted in via `codegraph.json` `includeIgnored` (#622, #699) —
|
|
656
|
+
* running `git status` inside each, so changes in a multi-repo workspace sync
|
|
657
|
+
* without a full rescan. By default a gitignored dir is left alone, matching the
|
|
658
|
+
* full-index scan (#970, #976). Deleting an ENTIRE embedded repo dir is the one
|
|
659
|
+
* case this cannot see (the child status that would report the deletions is gone
|
|
660
|
+
* with it); a full `codegraph index` reconciles that.
|
|
595
661
|
*/
|
|
596
662
|
function getGitChangedFiles(rootDir) {
|
|
597
663
|
try {
|
|
@@ -599,14 +665,14 @@ function getGitChangedFiles(rootDir) {
|
|
|
599
665
|
// Custom extension → language overrides from the project's codegraph.json,
|
|
600
666
|
// so change detection sees the same custom-extension files the full index does.
|
|
601
667
|
const overrides = (0, project_config_1.loadExtensionOverrides)(rootDir);
|
|
602
|
-
collectGitStatus(rootDir, '', changes, overrides);
|
|
668
|
+
collectGitStatus(rootDir, '', changes, overrides, loadIncludeIgnoredMatcher(rootDir), loadExcludeMatcher(rootDir));
|
|
603
669
|
return changes;
|
|
604
670
|
}
|
|
605
671
|
catch {
|
|
606
672
|
return null;
|
|
607
673
|
}
|
|
608
674
|
}
|
|
609
|
-
function collectGitStatus(repoDir, prefix, out, overrides) {
|
|
675
|
+
function collectGitStatus(repoDir, prefix, out, overrides, includeIgnored = null, exclude = null) {
|
|
610
676
|
const output = (0, child_process_1.execFileSync)('git', ['status', '--porcelain', '--no-renames'], { cwd: repoDir, encoding: 'utf-8', timeout: 10000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true });
|
|
611
677
|
// This repo's own ignore rules — built-in defaults (#407) plus its .gitignore.
|
|
612
678
|
// Change detection must exclude the SAME files the full index does, but git
|
|
@@ -645,6 +711,12 @@ function collectGitStatus(repoDir, prefix, out, overrides) {
|
|
|
645
711
|
// index — match against the repo-relative path, same as the full scan. (#766)
|
|
646
712
|
if (ig.ignores(rel))
|
|
647
713
|
continue;
|
|
714
|
+
// User `codegraph.json` `exclude` (#999) is project-root-relative, so it's
|
|
715
|
+
// matched against the full path — sync must not re-add a tracked file the
|
|
716
|
+
// full index now keeps out. Deletions above stay unfiltered so a file that
|
|
717
|
+
// WAS indexed before an exclude was added still cleans itself out.
|
|
718
|
+
if (exclude && exclude.ignores(filePath))
|
|
719
|
+
continue;
|
|
648
720
|
if (statusCode === '??') {
|
|
649
721
|
out.added.push(filePath);
|
|
650
722
|
}
|
|
@@ -654,14 +726,16 @@ function collectGitStatus(repoDir, prefix, out, overrides) {
|
|
|
654
726
|
}
|
|
655
727
|
}
|
|
656
728
|
// Recurse embedded repos found under untracked dirs (at the dir itself or
|
|
657
|
-
// nested deeper)
|
|
729
|
+
// nested deeper). Gitignored dirs are walked only for the directories the
|
|
730
|
+
// project opted in via `includeIgnored`; by default `.gitignore` is respected
|
|
731
|
+
// and they are left alone (#970, #976), mirroring the full-index scan.
|
|
658
732
|
for (const rel of untrackedDirs) {
|
|
659
733
|
for (const repoRel of findNestedGitRepos(path.join(repoDir, rel), rel)) {
|
|
660
|
-
collectGitStatus(path.join(repoDir, repoRel), prefix + repoRel, out, overrides);
|
|
734
|
+
collectGitStatus(path.join(repoDir, repoRel), prefix + repoRel, out, overrides, includeIgnored, exclude);
|
|
661
735
|
}
|
|
662
736
|
}
|
|
663
|
-
for (const rel of findIgnoredEmbeddedRepos(repoDir)) {
|
|
664
|
-
collectGitStatus(path.join(repoDir, rel), prefix + rel, out, overrides);
|
|
737
|
+
for (const rel of findIgnoredEmbeddedRepos(repoDir, includeIgnored, prefix)) {
|
|
738
|
+
collectGitStatus(path.join(repoDir, rel), prefix + rel, out, overrides, includeIgnored, exclude);
|
|
665
739
|
}
|
|
666
740
|
}
|
|
667
741
|
/**
|
|
@@ -820,7 +894,14 @@ function scanDirectoryWalk(rootDir, onProgress) {
|
|
|
820
894
|
}
|
|
821
895
|
// Seed a base matcher with the built-in default ignores (merged with the root
|
|
822
896
|
// .gitignore so a negation can override). Nested .gitignores still layer per-dir.
|
|
823
|
-
|
|
897
|
+
const baseMatchers = [{ dir: rootDir, ig: buildDefaultIgnore(rootDir) }];
|
|
898
|
+
// Project `codegraph.json` `exclude` patterns (#999), rooted at the project so
|
|
899
|
+
// `isIgnored` matches them against root-relative paths — same coverage the
|
|
900
|
+
// git path gets via ScopeIgnore, for non-git projects.
|
|
901
|
+
const exclude = loadExcludeMatcher(rootDir);
|
|
902
|
+
if (exclude)
|
|
903
|
+
baseMatchers.push({ dir: rootDir, ig: exclude });
|
|
904
|
+
walk(rootDir, baseMatchers);
|
|
824
905
|
return files;
|
|
825
906
|
}
|
|
826
907
|
/**
|
|
@@ -982,148 +1063,153 @@ class ExtractionOrchestrator {
|
|
|
982
1063
|
if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) {
|
|
983
1064
|
neededLanguages.push('cpp');
|
|
984
1065
|
}
|
|
985
|
-
//
|
|
986
|
-
// Falls back to in-process parsing
|
|
1066
|
+
// Parse files on a pool of worker threads (keeps the main thread free for UI
|
|
1067
|
+
// and uses every core). Falls back to in-process parsing when the compiled
|
|
1068
|
+
// worker is unavailable (e.g. running from source in tests).
|
|
987
1069
|
const parseWorkerPath = path.join(__dirname, 'parse-worker.js');
|
|
988
1070
|
const useWorker = fs.existsSync(parseWorkerPath);
|
|
989
|
-
let
|
|
1071
|
+
let pool = null;
|
|
990
1072
|
if (useWorker) {
|
|
991
|
-
|
|
992
|
-
|
|
1073
|
+
// CODEGRAPH_PARSE_WORKERS: explicit worker count; 1 = the old single-worker
|
|
1074
|
+
// behaviour (the conservative rollback). Unset → clamp(cores-1, 1, 8).
|
|
1075
|
+
const poolSize = (0, parse_pool_1.resolveParsePoolSize)(process.env.CODEGRAPH_PARSE_WORKERS, os.cpus().length);
|
|
1076
|
+
pool = new parse_pool_1.ParseWorkerPool({
|
|
1077
|
+
languages: neededLanguages,
|
|
1078
|
+
size: poolSize,
|
|
1079
|
+
workerScriptPath: parseWorkerPath,
|
|
1080
|
+
recycleInterval: WORKER_RECYCLE_INTERVAL,
|
|
1081
|
+
parseTimeoutMs: PARSE_TIMEOUT_MS,
|
|
1082
|
+
log,
|
|
1083
|
+
});
|
|
1084
|
+
log(`Parse worker pool: ${poolSize} worker(s)`);
|
|
993
1085
|
}
|
|
994
1086
|
else {
|
|
995
|
-
// In-process fallback: load grammars locally
|
|
1087
|
+
// In-process fallback: load grammars locally and parse on the main thread.
|
|
996
1088
|
await (0, grammars_1.loadGrammarsForLanguages)(neededLanguages);
|
|
997
1089
|
}
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1090
|
+
/**
|
|
1091
|
+
* Parse one file: on the pool when available (the promise REJECTS on a worker
|
|
1092
|
+
* crash/timeout — the caller records it and the retry pass re-attempts), or
|
|
1093
|
+
* in-process synchronously as the no-worker fallback. The language is resolved
|
|
1094
|
+
* here on the main thread, where the codegraph.json overrides are loaded.
|
|
1095
|
+
*/
|
|
1096
|
+
const parseFile = (filePath, content) => {
|
|
1097
|
+
const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1098
|
+
if (!pool)
|
|
1099
|
+
return Promise.resolve((0, tree_sitter_1.extractFromSource)(filePath, content, language, frameworkNames));
|
|
1100
|
+
return pool.requestParse({ filePath, content, language, frameworkNames });
|
|
1101
|
+
};
|
|
1102
|
+
// --- Bounded rolling-window dispatch, ordered commit ---
|
|
1103
|
+
// Reads stay batched/parallel; parses run concurrently across the pool; the
|
|
1104
|
+
// SQLite store stays on the main thread (it isn't thread-safe). Crucially we
|
|
1105
|
+
// COMMIT results in original file order, not parse-completion order: the
|
|
1106
|
+
// resolution phase (run after indexing) resolves an ambiguous reference to one
|
|
1107
|
+
// of several same-named candidates by the nodes' DB insertion order, so a
|
|
1108
|
+
// stable commit order keeps the resulting graph deterministic — byte-identical
|
|
1109
|
+
// to the single-worker path — instead of drifting with parse timing. The
|
|
1110
|
+
// `completed` buffer holds at most ~windowSize out-of-order results, so memory
|
|
1111
|
+
// stays bounded.
|
|
1112
|
+
const windowSize = pool ? Math.max(4, pool.size * 2) : 1;
|
|
1113
|
+
const inFlight = new Set();
|
|
1114
|
+
const completed = new Map();
|
|
1115
|
+
let nextSeq = 0; // file-order sequence assigned at dispatch
|
|
1116
|
+
let nextToStore = 0; // cursor: next sequence to commit
|
|
1117
|
+
let aborted = false;
|
|
1118
|
+
const storeResult = (filePath, content, stats, result) => {
|
|
1119
|
+
processed++;
|
|
1120
|
+
// Store in database on main thread (SQLite is not thread-safe)
|
|
1121
|
+
if (result.nodes.length > 0 || result.errors.length === 0) {
|
|
1122
|
+
const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1123
|
+
this.storeExtractionResult(filePath, content, language, stats, result);
|
|
1012
1124
|
}
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
const pending = pendingParses.get(msg.id);
|
|
1018
|
-
if (pending) {
|
|
1019
|
-
clearTimeout(pending.timer);
|
|
1020
|
-
pendingParses.delete(msg.id);
|
|
1021
|
-
pending.resolve(msg.result);
|
|
1022
|
-
}
|
|
1125
|
+
if (result.errors.length > 0) {
|
|
1126
|
+
for (const err of result.errors) {
|
|
1127
|
+
if (!err.filePath)
|
|
1128
|
+
err.filePath = filePath;
|
|
1023
1129
|
}
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1130
|
+
errors.push(...result.errors);
|
|
1131
|
+
}
|
|
1132
|
+
if (result.nodes.length > 0) {
|
|
1133
|
+
filesIndexed++;
|
|
1134
|
+
totalNodes += result.nodes.length;
|
|
1135
|
+
totalEdges += result.edges.length;
|
|
1136
|
+
}
|
|
1137
|
+
else if (result.errors.some((e) => e.severity === 'error')) {
|
|
1138
|
+
filesErrored++;
|
|
1139
|
+
}
|
|
1140
|
+
else {
|
|
1141
|
+
// Files with no symbols but no errors (yaml, twig, properties) are
|
|
1142
|
+
// tracked at the file level — count them as indexed so the CLI doesn't
|
|
1143
|
+
// misleadingly report "No files found to index".
|
|
1144
|
+
const lang = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1145
|
+
if ((0, grammars_1.isFileLevelOnlyLanguage)(lang)) {
|
|
1146
|
+
filesIndexed++;
|
|
1033
1147
|
}
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
if (parseWorker === w) {
|
|
1037
|
-
parseWorker = null;
|
|
1038
|
-
workerParseCount = 0;
|
|
1148
|
+
else {
|
|
1149
|
+
filesSkipped++;
|
|
1039
1150
|
}
|
|
1151
|
+
}
|
|
1152
|
+
onProgress?.({ phase: 'parsing', current: processed, total, currentFile: filePath });
|
|
1153
|
+
};
|
|
1154
|
+
const recordParseFailure = (filePath, err) => {
|
|
1155
|
+
processed++;
|
|
1156
|
+
filesErrored++;
|
|
1157
|
+
errors.push({
|
|
1158
|
+
message: err instanceof Error ? err.message : String(err),
|
|
1159
|
+
filePath,
|
|
1160
|
+
severity: 'error',
|
|
1161
|
+
code: 'parse_error',
|
|
1040
1162
|
});
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
await new Promise((resolve, reject) => {
|
|
1050
|
-
parseWorker.once('message', (msg) => {
|
|
1051
|
-
if (msg.type === 'grammars-loaded')
|
|
1052
|
-
resolve();
|
|
1053
|
-
else
|
|
1054
|
-
reject(new Error(`Unexpected message: ${msg.type}`));
|
|
1055
|
-
});
|
|
1056
|
-
parseWorker.postMessage({ type: 'load-grammars', languages: neededLanguages });
|
|
1057
|
-
});
|
|
1058
|
-
return parseWorker;
|
|
1059
|
-
}
|
|
1060
|
-
if (WorkerClass) {
|
|
1061
|
-
await ensureWorker();
|
|
1062
|
-
}
|
|
1063
|
-
/**
|
|
1064
|
-
* Recycle the worker thread to reclaim WASM memory.
|
|
1065
|
-
* Terminates the current worker and clears the reference so
|
|
1066
|
-
* ensureWorker() will spawn a fresh one on the next call.
|
|
1067
|
-
*/
|
|
1068
|
-
function recycleWorker() {
|
|
1069
|
-
if (!parseWorker)
|
|
1163
|
+
onProgress?.({ phase: 'parsing', current: processed, total });
|
|
1164
|
+
};
|
|
1165
|
+
// Commit buffered parses to the DB in file order, advancing the cursor over
|
|
1166
|
+
// contiguous completed results. Runs after each parse settles (and once more
|
|
1167
|
+
// after the drain). storeResult / recordParseFailure run here single-threaded,
|
|
1168
|
+
// so shared counters and SQLite writes never race despite parallel parsing.
|
|
1169
|
+
const flushOrdered = () => {
|
|
1170
|
+
if (aborted)
|
|
1070
1171
|
return;
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
// Resolve the language on the main thread (where the project's
|
|
1080
|
-
// codegraph.json overrides are loaded) and hand it to the worker, so the
|
|
1081
|
-
// worker never needs the override map itself.
|
|
1082
|
-
const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1083
|
-
if (!WorkerClass) {
|
|
1084
|
-
// In-process fallback
|
|
1085
|
-
return (0, tree_sitter_1.extractFromSource)(filePath, content, language, frameworkNames);
|
|
1172
|
+
while (completed.has(nextToStore)) {
|
|
1173
|
+
const item = completed.get(nextToStore);
|
|
1174
|
+
completed.delete(nextToStore);
|
|
1175
|
+
nextToStore++;
|
|
1176
|
+
if (item.ok)
|
|
1177
|
+
storeResult(item.filePath, item.content, item.stats, item.result);
|
|
1178
|
+
else
|
|
1179
|
+
recordParseFailure(item.filePath, item.err);
|
|
1086
1180
|
}
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1181
|
+
};
|
|
1182
|
+
// Dispatch one file's parse (parses run concurrently across the pool), tagged
|
|
1183
|
+
// with its file-order sequence so flushOrdered commits results in order. The
|
|
1184
|
+
// backpressure below bounds how far parsing runs ahead of the in-order commit.
|
|
1185
|
+
const feed = async (filePath, content, stats) => {
|
|
1186
|
+
const seq = nextSeq++;
|
|
1187
|
+
const p = (async () => {
|
|
1188
|
+
try {
|
|
1189
|
+
const result = await parseFile(filePath, content);
|
|
1190
|
+
completed.set(seq, { ok: true, filePath, content, stats, result });
|
|
1191
|
+
}
|
|
1192
|
+
catch (parseErr) {
|
|
1193
|
+
completed.set(seq, { ok: false, filePath, err: parseErr });
|
|
1194
|
+
}
|
|
1195
|
+
flushOrdered();
|
|
1196
|
+
})();
|
|
1197
|
+
const tracked = p.finally(() => { inFlight.delete(tracked); });
|
|
1198
|
+
inFlight.add(tracked);
|
|
1199
|
+
// Backpressure on the dispatched-but-not-yet-committed count (in-flight +
|
|
1200
|
+
// buffered), not just in-flight: a slow file sitting at the commit cursor
|
|
1201
|
+
// lets later parses finish and buffer, which would otherwise grow without
|
|
1202
|
+
// bound. Wait for parses to settle (each may advance the cursor) until the
|
|
1203
|
+
// window has room. `inFlight.size > 0` guards against an empty race — the
|
|
1204
|
+
// cursor file is always still in flight when the window is full.
|
|
1205
|
+
while (nextSeq - nextToStore >= windowSize && inFlight.size > 0) {
|
|
1206
|
+
await Promise.race(inFlight);
|
|
1092
1207
|
}
|
|
1093
|
-
|
|
1094
|
-
const id = nextId++;
|
|
1095
|
-
workerParseCount++;
|
|
1096
|
-
// Scale timeout for large files: base 10s + 10s per 100KB
|
|
1097
|
-
const timeoutMs = PARSE_TIMEOUT_MS + Math.floor(content.length / 100_000) * 10_000;
|
|
1098
|
-
return new Promise((resolve, reject) => {
|
|
1099
|
-
const timer = setTimeout(() => {
|
|
1100
|
-
pendingParses.delete(id);
|
|
1101
|
-
log(`TIMEOUT: ${filePath} exceeded ${timeoutMs}ms — killing worker`);
|
|
1102
|
-
// Reject FIRST — worker.terminate() can hang if WASM is stuck
|
|
1103
|
-
parseWorker = null;
|
|
1104
|
-
workerParseCount = 0;
|
|
1105
|
-
reject(new Error(`Parse timed out after ${timeoutMs}ms`));
|
|
1106
|
-
// Fire-and-forget: kill the stuck worker in the background
|
|
1107
|
-
worker.terminate().catch(() => { });
|
|
1108
|
-
}, timeoutMs);
|
|
1109
|
-
pendingParses.set(id, { resolve, reject, timer });
|
|
1110
|
-
worker.postMessage({ type: 'parse', id, filePath, content, frameworkNames, language });
|
|
1111
|
-
});
|
|
1112
|
-
}
|
|
1208
|
+
};
|
|
1113
1209
|
for (let i = 0; i < files.length; i += FILE_IO_BATCH_SIZE) {
|
|
1114
1210
|
if (signal?.aborted) {
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
return {
|
|
1118
|
-
success: false,
|
|
1119
|
-
filesIndexed,
|
|
1120
|
-
filesSkipped,
|
|
1121
|
-
filesErrored,
|
|
1122
|
-
nodesCreated: totalNodes,
|
|
1123
|
-
edgesCreated: totalEdges,
|
|
1124
|
-
errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
|
|
1125
|
-
durationMs: Date.now() - startTime,
|
|
1126
|
-
};
|
|
1211
|
+
aborted = true;
|
|
1212
|
+
break;
|
|
1127
1213
|
}
|
|
1128
1214
|
const batch = files.slice(i, i + FILE_IO_BATCH_SIZE);
|
|
1129
1215
|
// Read files in parallel (with path validation before any I/O)
|
|
@@ -1145,29 +1231,13 @@ class ExtractionOrchestrator {
|
|
|
1145
1231
|
return { filePath: fp, content: null, stats: null, error: err };
|
|
1146
1232
|
}
|
|
1147
1233
|
}));
|
|
1148
|
-
//
|
|
1234
|
+
// Dispatch each readable file into the bounded parse window; the window
|
|
1235
|
+
// stores results on the main thread as they arrive.
|
|
1149
1236
|
for (const { filePath, content, stats, error } of fileContents) {
|
|
1150
1237
|
if (signal?.aborted) {
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
return {
|
|
1154
|
-
success: false,
|
|
1155
|
-
filesIndexed,
|
|
1156
|
-
filesSkipped,
|
|
1157
|
-
filesErrored,
|
|
1158
|
-
nodesCreated: totalNodes,
|
|
1159
|
-
edgesCreated: totalEdges,
|
|
1160
|
-
errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
|
|
1161
|
-
durationMs: Date.now() - startTime,
|
|
1162
|
-
};
|
|
1238
|
+
aborted = true;
|
|
1239
|
+
break;
|
|
1163
1240
|
}
|
|
1164
|
-
// Report progress before parsing (show current file being worked on)
|
|
1165
|
-
onProgress?.({
|
|
1166
|
-
phase: 'parsing',
|
|
1167
|
-
current: processed,
|
|
1168
|
-
total,
|
|
1169
|
-
currentFile: filePath,
|
|
1170
|
-
});
|
|
1171
1241
|
if (error || content === null || stats === null) {
|
|
1172
1242
|
processed++;
|
|
1173
1243
|
filesErrored++;
|
|
@@ -1177,6 +1247,7 @@ class ExtractionOrchestrator {
|
|
|
1177
1247
|
severity: 'error',
|
|
1178
1248
|
code: 'read_error',
|
|
1179
1249
|
});
|
|
1250
|
+
onProgress?.({ phase: 'parsing', current: processed, total });
|
|
1180
1251
|
continue;
|
|
1181
1252
|
}
|
|
1182
1253
|
// Honour MAX_FILE_SIZE. Without this check, vendored generated
|
|
@@ -1196,57 +1267,32 @@ class ExtractionOrchestrator {
|
|
|
1196
1267
|
onProgress?.({ phase: 'parsing', current: processed, total });
|
|
1197
1268
|
continue;
|
|
1198
1269
|
}
|
|
1199
|
-
// Parse
|
|
1200
|
-
//
|
|
1201
|
-
|
|
1202
|
-
try {
|
|
1203
|
-
result = await requestParse(filePath, content);
|
|
1204
|
-
}
|
|
1205
|
-
catch (parseErr) {
|
|
1206
|
-
processed++;
|
|
1207
|
-
filesErrored++;
|
|
1208
|
-
errors.push({
|
|
1209
|
-
message: parseErr instanceof Error ? parseErr.message : String(parseErr),
|
|
1210
|
-
filePath,
|
|
1211
|
-
severity: 'error',
|
|
1212
|
-
code: 'parse_error',
|
|
1213
|
-
});
|
|
1214
|
-
continue;
|
|
1215
|
-
}
|
|
1216
|
-
processed++;
|
|
1217
|
-
// Store in database on main thread (SQLite is not thread-safe)
|
|
1218
|
-
if (result.nodes.length > 0 || result.errors.length === 0) {
|
|
1219
|
-
const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1220
|
-
this.storeExtractionResult(filePath, content, language, stats, result);
|
|
1221
|
-
}
|
|
1222
|
-
if (result.errors.length > 0) {
|
|
1223
|
-
for (const err of result.errors) {
|
|
1224
|
-
if (!err.filePath)
|
|
1225
|
-
err.filePath = filePath;
|
|
1226
|
-
}
|
|
1227
|
-
errors.push(...result.errors);
|
|
1228
|
-
}
|
|
1229
|
-
if (result.nodes.length > 0) {
|
|
1230
|
-
filesIndexed++;
|
|
1231
|
-
totalNodes += result.nodes.length;
|
|
1232
|
-
totalEdges += result.edges.length;
|
|
1233
|
-
}
|
|
1234
|
-
else if (result.errors.some((e) => e.severity === 'error')) {
|
|
1235
|
-
filesErrored++;
|
|
1236
|
-
}
|
|
1237
|
-
else {
|
|
1238
|
-
// Files with no symbols but no errors (yaml, twig, properties) are
|
|
1239
|
-
// tracked at the file level — count them as indexed so the CLI
|
|
1240
|
-
// doesn't misleadingly report "No files found to index".
|
|
1241
|
-
const lang = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1242
|
-
if ((0, grammars_1.isFileLevelOnlyLanguage)(lang)) {
|
|
1243
|
-
filesIndexed++;
|
|
1244
|
-
}
|
|
1245
|
-
else {
|
|
1246
|
-
filesSkipped++;
|
|
1247
|
-
}
|
|
1248
|
-
}
|
|
1270
|
+
// Parse on the pool (main thread stays unblocked). Errors/timeouts are
|
|
1271
|
+
// handled inside feed() → recordParseFailure, feeding the retry pass.
|
|
1272
|
+
await feed(filePath, content, stats);
|
|
1249
1273
|
}
|
|
1274
|
+
if (aborted)
|
|
1275
|
+
break;
|
|
1276
|
+
}
|
|
1277
|
+
// Drain parses still in flight (skip on abort — we tear down below instead),
|
|
1278
|
+
// then commit any results the cursor hasn't reached yet.
|
|
1279
|
+
if (!aborted) {
|
|
1280
|
+
await Promise.all(inFlight);
|
|
1281
|
+
flushOrdered();
|
|
1282
|
+
}
|
|
1283
|
+
if (signal?.aborted || aborted) {
|
|
1284
|
+
if (pool)
|
|
1285
|
+
await pool.destroy();
|
|
1286
|
+
return {
|
|
1287
|
+
success: false,
|
|
1288
|
+
filesIndexed,
|
|
1289
|
+
filesSkipped,
|
|
1290
|
+
filesErrored,
|
|
1291
|
+
nodesCreated: totalNodes,
|
|
1292
|
+
edgesCreated: totalEdges,
|
|
1293
|
+
errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
|
|
1294
|
+
durationMs: Date.now() - startTime,
|
|
1295
|
+
};
|
|
1250
1296
|
}
|
|
1251
1297
|
// Report 100% so the progress bar doesn't hang at 99%
|
|
1252
1298
|
onProgress?.({
|
|
@@ -1263,15 +1309,17 @@ class ExtractionOrchestrator {
|
|
|
1263
1309
|
// every file gets the absolute cleanest WASM state possible.
|
|
1264
1310
|
const retryableErrors = errors.filter((e) => e.code === 'parse_error' && e.filePath &&
|
|
1265
1311
|
(e.message.includes('Worker exited') || e.message.includes('memory access out of bounds')));
|
|
1266
|
-
if (retryableErrors.length > 0 &&
|
|
1312
|
+
if (retryableErrors.length > 0 && pool) {
|
|
1267
1313
|
log(`Retrying ${retryableErrors.length} files that failed due to WASM memory errors...`);
|
|
1314
|
+
// Fresh WASM heaps for the retry phase. A retry that still crashes its
|
|
1315
|
+
// worker makes the pool respawn it, so later retries keep landing on clean
|
|
1316
|
+
// workers too.
|
|
1317
|
+
pool.recycleAll();
|
|
1268
1318
|
const stillFailing = [];
|
|
1269
1319
|
for (const errEntry of retryableErrors) {
|
|
1270
1320
|
const filePath = errEntry.filePath;
|
|
1271
1321
|
if (signal?.aborted)
|
|
1272
1322
|
break;
|
|
1273
|
-
// Fresh worker for every retry — maximum WASM headroom
|
|
1274
|
-
recycleWorker();
|
|
1275
1323
|
let content;
|
|
1276
1324
|
try {
|
|
1277
1325
|
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, filePath);
|
|
@@ -1284,7 +1332,7 @@ class ExtractionOrchestrator {
|
|
|
1284
1332
|
}
|
|
1285
1333
|
let result;
|
|
1286
1334
|
try {
|
|
1287
|
-
result = await
|
|
1335
|
+
result = await parseFile(filePath, content);
|
|
1288
1336
|
}
|
|
1289
1337
|
catch {
|
|
1290
1338
|
stillFailing.push(errEntry);
|
|
@@ -1310,11 +1358,11 @@ class ExtractionOrchestrator {
|
|
|
1310
1358
|
// code nodes but consume parser memory.
|
|
1311
1359
|
if (stillFailing.length > 0) {
|
|
1312
1360
|
log(`${stillFailing.length} files still failing — retrying with comments stripped...`);
|
|
1361
|
+
pool.recycleAll();
|
|
1313
1362
|
for (const errEntry of stillFailing) {
|
|
1314
1363
|
const filePath = errEntry.filePath;
|
|
1315
1364
|
if (signal?.aborted)
|
|
1316
1365
|
break;
|
|
1317
|
-
recycleWorker();
|
|
1318
1366
|
let fullContent;
|
|
1319
1367
|
try {
|
|
1320
1368
|
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, filePath);
|
|
@@ -1333,7 +1381,7 @@ class ExtractionOrchestrator {
|
|
|
1333
1381
|
.join('\n');
|
|
1334
1382
|
let result;
|
|
1335
1383
|
try {
|
|
1336
|
-
result = await
|
|
1384
|
+
result = await parseFile(filePath, stripped);
|
|
1337
1385
|
}
|
|
1338
1386
|
catch {
|
|
1339
1387
|
continue;
|
|
@@ -1354,11 +1402,9 @@ class ExtractionOrchestrator {
|
|
|
1354
1402
|
}
|
|
1355
1403
|
}
|
|
1356
1404
|
}
|
|
1357
|
-
// Shut down parse worker
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
parseWorker.terminate().catch(() => { });
|
|
1361
|
-
}
|
|
1405
|
+
// Shut down the parse worker pool.
|
|
1406
|
+
if (pool)
|
|
1407
|
+
await pool.destroy();
|
|
1362
1408
|
return {
|
|
1363
1409
|
success: filesIndexed > 0 || errors.filter((e) => e.severity === 'error').length === 0,
|
|
1364
1410
|
filesIndexed,
|