@yemi33/minions 0.1.1950 → 0.1.1952
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/js/command-center.js +13 -2
- package/dashboard/js/modal-qa.js +10 -0
- package/dashboard/js/refresh.js +4 -0
- package/dashboard/js/render-dispatch.js +25 -0
- package/dashboard/js/render-other.js +109 -2
- package/dashboard/js/settings.js +1 -1
- package/dashboard/layout.html +2 -2
- package/dashboard/pages/engine.html +6 -0
- package/dashboard/slim.html +1987 -0
- package/dashboard/styles.css +8 -0
- package/dashboard.js +450 -40
- package/docs/completion-reports.md +25 -0
- package/docs/design-state-storage.md +1 -1
- package/docs/slim-ux/architecture-suggestions.md +467 -0
- package/docs/slim-ux/concepts.md +824 -0
- package/engine/ado-mcp-wrapper.js +33 -7
- package/engine/ado.js +123 -15
- package/engine/cc-worker-pool.js +41 -0
- package/engine/cleanup.js +71 -34
- package/engine/cli.js +37 -0
- package/engine/dispatch.js +32 -9
- package/engine/features.js +6 -0
- package/engine/gh-token.js +137 -0
- package/engine/github.js +166 -29
- package/engine/issues.js +29 -0
- package/engine/keep-process-sweep.js +397 -0
- package/engine/lifecycle.js +150 -33
- package/engine/playbook.js +17 -0
- package/engine/queries.js +71 -0
- package/engine/recovery.js +6 -0
- package/engine/shared.js +446 -14
- package/engine/spawn-agent.js +44 -2
- package/engine/timeout.js +34 -11
- package/engine/worktree-pool.js +410 -0
- package/engine.js +643 -119
- package/package.json +6 -3
- package/playbooks/review.md +2 -0
- package/playbooks/shared-rules.md +3 -1
- package/prompts/cc-system.md +24 -0
- package/engine/copilot-models.json +0 -5
package/engine/shared.js
CHANGED
|
@@ -632,7 +632,23 @@ function sleepMs(ms) {
|
|
|
632
632
|
}
|
|
633
633
|
}
|
|
634
634
|
|
|
635
|
-
|
|
635
|
+
// P-b7d4e8f2 — bumped from 60_000 to 300_000 once the reaper grew a PID-liveness
|
|
636
|
+
// guard (below). Holders that record their pid are protected from reap up to
|
|
637
|
+
// 5×LOCK_STALE_MS as long as `process.kill(pid, 0)` succeeds; the bump removes
|
|
638
|
+
// false-positive kills of legitimate slow operations (worktree adds, large state
|
|
639
|
+
// rewrites) while the PID guard keeps crashed-holder recovery fast.
|
|
640
|
+
const LOCK_STALE_MS = 300000; // 5 minutes — force-remove locks older than this
|
|
641
|
+
|
|
642
|
+
// Shared.js-local PID liveness check. Avoids a circular require on engine/cli.js
|
|
643
|
+
// (which has its own isPidAlive) and engine/timeout.js (which has
|
|
644
|
+
// isOsPidAliveForDispatch — but that one looks up pid from a side-channel
|
|
645
|
+
// pid-file, whereas the lock reaper already has the holder pid in-hand from the
|
|
646
|
+
// lock contents).
|
|
647
|
+
function isPidAlive(pid) {
|
|
648
|
+
if (!Number.isFinite(pid) || pid <= 0) return false;
|
|
649
|
+
try { process.kill(pid, 0); return true; }
|
|
650
|
+
catch { return false; }
|
|
651
|
+
}
|
|
636
652
|
|
|
637
653
|
function withFileLock(lockPath, fn, {
|
|
638
654
|
timeoutMs = 5000,
|
|
@@ -655,20 +671,54 @@ function withFileLock(lockPath, fn, {
|
|
|
655
671
|
const dir = path.dirname(lockPath);
|
|
656
672
|
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
657
673
|
fd = fs.openSync(lockPath, 'wx');
|
|
674
|
+
// P-b7d4e8f2 — record holder identity so the stale-lock reaper can
|
|
675
|
+
// distinguish a still-alive slow holder from a crashed one. Best-effort:
|
|
676
|
+
// the lock's existence (not its contents) provides mutual exclusion, so
|
|
677
|
+
// a write failure here must NOT abort acquisition.
|
|
678
|
+
try {
|
|
679
|
+
fs.writeSync(fd, JSON.stringify({ pid: process.pid, ts: Date.now() }));
|
|
680
|
+
} catch { /* payload is advisory; lock semantics unaffected */ }
|
|
658
681
|
break;
|
|
659
682
|
} catch (err) {
|
|
660
683
|
if (err.code !== 'EEXIST') throw err;
|
|
661
|
-
//
|
|
684
|
+
// P-b7d4e8f2 — Stale-lock check combines mtime age with PID liveness:
|
|
685
|
+
// 1. If mtime <= LOCK_STALE_MS → never reap (recently active).
|
|
686
|
+
// 2. If JSON-parsable {pid, ts}:
|
|
687
|
+
// - dead PID → reap.
|
|
688
|
+
// - alive PID, mtime <= 5× → don't reap (legitimate slow holder).
|
|
689
|
+
// - alive PID, mtime > 5× → reap (last-resort guard against
|
|
690
|
+
// stuck holders that never released).
|
|
691
|
+
// 3. Legacy / empty / non-JSON lockfile → mtime-only path (reap).
|
|
662
692
|
try {
|
|
663
693
|
const stat = fs.statSync(lockPath);
|
|
664
|
-
|
|
694
|
+
const mtimeAge = Date.now() - stat.mtimeMs;
|
|
695
|
+
if (mtimeAge > LOCK_STALE_MS) {
|
|
696
|
+
let holderPid = null;
|
|
665
697
|
try {
|
|
666
|
-
fs.
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
698
|
+
const raw = fs.readFileSync(lockPath, 'utf8');
|
|
699
|
+
const parsed = JSON.parse(raw);
|
|
700
|
+
if (parsed && Number.isFinite(parsed.pid) && parsed.pid > 0) {
|
|
701
|
+
holderPid = parsed.pid;
|
|
702
|
+
}
|
|
703
|
+
} catch { /* legacy/empty/corrupt lock → fall through to mtime-only */ }
|
|
704
|
+
|
|
705
|
+
let shouldReap;
|
|
706
|
+
if (holderPid !== null) {
|
|
707
|
+
shouldReap = !isPidAlive(holderPid) || mtimeAge > LOCK_STALE_MS * 5;
|
|
708
|
+
} else {
|
|
709
|
+
// Legacy empty/non-JSON lockfile: trust mtime alone
|
|
710
|
+
shouldReap = true;
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
if (shouldReap) {
|
|
714
|
+
try {
|
|
715
|
+
fs.unlinkSync(lockPath);
|
|
716
|
+
} catch (unlinkErr) {
|
|
717
|
+
// ENOENT: another process deleted the lock between stat and unlink — safe to retry
|
|
718
|
+
if (unlinkErr.code !== 'ENOENT') throw unlinkErr;
|
|
719
|
+
}
|
|
720
|
+
continue; // lock just removed — retry immediately
|
|
670
721
|
}
|
|
671
|
-
continue; // lock just removed — retry immediately
|
|
672
722
|
}
|
|
673
723
|
} catch (staleErr) {
|
|
674
724
|
// ENOENT from statSync: lock file disappeared between EEXIST and stat — retry will succeed
|
|
@@ -683,10 +733,24 @@ function withFileLock(lockPath, fn, {
|
|
|
683
733
|
}
|
|
684
734
|
|
|
685
735
|
try {
|
|
686
|
-
|
|
736
|
+
const result = fn();
|
|
737
|
+
// P-a3f9b2c1 — Defensive: detect a thenable return and throw synchronously.
|
|
738
|
+
// The `finally` below releases the lock immediately after `fn()` returns;
|
|
739
|
+
// an async callback would let the lock be released before its body
|
|
740
|
+
// completes, silently breaking mutual exclusion. Clean up our own fd /
|
|
741
|
+
// lock first, then throw so the caller cannot ignore the failure.
|
|
742
|
+
if (result && typeof result.then === 'function') {
|
|
743
|
+
try { fs.closeSync(fd); } catch { /* cleanup */ }
|
|
744
|
+
try { fs.unlinkSync(lockPath); } catch { /* cleanup */ }
|
|
745
|
+
fd = null; // suppress double-cleanup in `finally`
|
|
746
|
+
throw new Error('withFileLock: fn must be synchronous; got Promise. Use synchronous operations only.');
|
|
747
|
+
}
|
|
748
|
+
return result;
|
|
687
749
|
} finally {
|
|
688
|
-
|
|
689
|
-
|
|
750
|
+
if (fd !== null) {
|
|
751
|
+
try { fs.closeSync(fd); } catch { /* cleanup */ }
|
|
752
|
+
try { fs.unlinkSync(lockPath); } catch { /* cleanup */ }
|
|
753
|
+
}
|
|
690
754
|
}
|
|
691
755
|
}
|
|
692
756
|
throw lastErr;
|
|
@@ -867,7 +931,9 @@ function writeToInbox(agentId, slug, content, _inboxDir, metadata) {
|
|
|
867
931
|
// ── Process Spawning ────────────────────────────────────────────────────────
|
|
868
932
|
// All child process calls go through these to ensure windowsHide: true
|
|
869
933
|
|
|
870
|
-
const { execSync: _execSync, spawnSync: _spawnSync, spawn: _spawn, exec: _cbExec } = require('child_process');
|
|
934
|
+
const { execSync: _execSync, spawnSync: _spawnSync, spawn: _spawn, exec: _cbExec, execFile: _cbExecFile } = require('child_process');
|
|
935
|
+
const { promisify: _promisify } = require('util');
|
|
936
|
+
const _execFileAsync = _promisify(_cbExecFile);
|
|
871
937
|
|
|
872
938
|
function exec(cmd, opts = {}) {
|
|
873
939
|
return _execSync(cmd, { windowsHide: true, ...opts });
|
|
@@ -908,6 +974,149 @@ function execAsync(cmd, opts = {}) {
|
|
|
908
974
|
});
|
|
909
975
|
}
|
|
910
976
|
|
|
977
|
+
// ── Argv-form (shell:false) helpers + ref/slug validators (P-a7c4d2e8) ──────
|
|
978
|
+
// These eliminate shell-injection vectors in `gh`/`git` calls that previously
|
|
979
|
+
// interpolated untrusted PR data (slugs from PR-link regex matches, branch
|
|
980
|
+
// names from GitHub/ADO API responses, agent stdout, etc.) into a shell
|
|
981
|
+
// string. Use these instead of execAsync wherever any argument is derived
|
|
982
|
+
// from an untrusted source.
|
|
983
|
+
//
|
|
984
|
+
// const out = await shared.shellSafeGh(['api', `repos/${shared.validateGhSlug(slug)}/pulls/${prNum}`]);
|
|
985
|
+
// await shared.shellSafeGit(['fetch', 'origin', shared.validateGitRef(branch)], { cwd });
|
|
986
|
+
//
|
|
987
|
+
// Validators throw on rejection; the wrapper helpers spawn via execFile
|
|
988
|
+
// (shell:false) so shell metacharacters in argv elements are inert.
|
|
989
|
+
|
|
990
|
+
// Tightened beyond the spec baseline regex to also block argument-injection
|
|
991
|
+
// (leading dash) and unsafe ref-format quirks the shell can't help with —
|
|
992
|
+
// `..` traversal, `@{`, `*`/`?` globs, leading/trailing/double slashes, and
|
|
993
|
+
// path components ending in `.lock`. Mirrors a conservative subset of
|
|
994
|
+
// `git check-ref-format`.
|
|
995
|
+
function validateGitRef(ref) {
|
|
996
|
+
const fail = (why) => {
|
|
997
|
+
const e = new Error(`Invalid git ref (${why}): ${JSON.stringify(String(ref).slice(0, 64))}`);
|
|
998
|
+
throw e;
|
|
999
|
+
};
|
|
1000
|
+
if (typeof ref !== 'string') fail('not a string');
|
|
1001
|
+
if (ref.length === 0) fail('empty');
|
|
1002
|
+
if (ref.length > 256) fail('too long');
|
|
1003
|
+
if (!/^[A-Za-z0-9._\/-]+$/.test(ref)) fail('disallowed character');
|
|
1004
|
+
if (ref.startsWith('-')) fail('leading dash');
|
|
1005
|
+
if (ref.startsWith('/') || ref.endsWith('/')) fail('leading or trailing slash');
|
|
1006
|
+
if (ref.includes('//')) fail('double slash');
|
|
1007
|
+
if (ref.endsWith('.')) fail('trailing dot');
|
|
1008
|
+
if (ref.includes('@{')) fail('ref expression @{');
|
|
1009
|
+
// Per-component checks (split on `/`).
|
|
1010
|
+
for (const part of ref.split('/')) {
|
|
1011
|
+
if (part.length === 0) fail('empty path component');
|
|
1012
|
+
if (part === '..' || part === '.') fail('dot path component');
|
|
1013
|
+
if (part.includes('..')) fail('double-dot in component');
|
|
1014
|
+
if (part.endsWith('.lock')) fail('component ends with .lock');
|
|
1015
|
+
if (part.startsWith('.')) fail('component starts with dot');
|
|
1016
|
+
}
|
|
1017
|
+
return ref;
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
function validateGhSlug(slug) {
|
|
1021
|
+
const fail = (why) => {
|
|
1022
|
+
throw new Error(`Invalid GitHub slug (${why}): ${JSON.stringify(String(slug).slice(0, 64))}`);
|
|
1023
|
+
};
|
|
1024
|
+
if (typeof slug !== 'string') fail('not a string');
|
|
1025
|
+
if (slug.length === 0) fail('empty');
|
|
1026
|
+
if (slug.length > 256) fail('too long');
|
|
1027
|
+
if (slug !== slug.trim()) fail('surrounding whitespace');
|
|
1028
|
+
if (!/^[A-Za-z0-9._-]+\/[A-Za-z0-9._-]+$/.test(slug)) fail('disallowed character or shape');
|
|
1029
|
+
if (slug.startsWith('-') || slug.includes('/-')) fail('leading dash in component');
|
|
1030
|
+
return slug;
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
// W-mp6k7ywi000fa33c — pure helper. Returns { ok: boolean, reason?: string }.
|
|
1034
|
+
// `ok: true` when `dirPath` exists AND contains either a `.git` directory OR
|
|
1035
|
+
// a `.git` worktree pointer file (a real file whose first line starts with
|
|
1036
|
+
// `gitdir:`). Anything else — missing dir, missing `.git`, `.git` as a
|
|
1037
|
+
// random non-pointer file — returns `ok: false` with a human-readable reason.
|
|
1038
|
+
//
|
|
1039
|
+
// No shelling out (no `git rev-parse`); just `fs.existsSync`/`fs.statSync`
|
|
1040
|
+
// and a tiny content sniff for the worktree pointer case. This catches the
|
|
1041
|
+
// failure mode where an agent ran in a partial copy of a repo (selective
|
|
1042
|
+
// `cp -r`) instead of `git worktree add`, which produced a directory that
|
|
1043
|
+
// looks file-by-file like a worktree but has no git linkage. See
|
|
1044
|
+
// W-mp6ha6q9000d58a5 for the real-world incident this prevents.
|
|
1045
|
+
function isValidGitWorktree(dirPath) {
|
|
1046
|
+
if (typeof dirPath !== 'string' || dirPath.length === 0) {
|
|
1047
|
+
return { ok: false, reason: 'cwd missing or not a string' };
|
|
1048
|
+
}
|
|
1049
|
+
let dirStat;
|
|
1050
|
+
try { dirStat = fs.statSync(dirPath); }
|
|
1051
|
+
catch (_e) { return { ok: false, reason: 'directory does not exist: ' + dirPath }; }
|
|
1052
|
+
if (!dirStat.isDirectory()) {
|
|
1053
|
+
return { ok: false, reason: 'path is not a directory: ' + dirPath };
|
|
1054
|
+
}
|
|
1055
|
+
const gitPath = path.join(dirPath, '.git');
|
|
1056
|
+
let gitStat;
|
|
1057
|
+
try { gitStat = fs.statSync(gitPath); }
|
|
1058
|
+
catch (_e) { return { ok: false, reason: 'no .git directory or worktree pointer at ' + dirPath }; }
|
|
1059
|
+
if (gitStat.isDirectory()) return { ok: true };
|
|
1060
|
+
if (gitStat.isFile()) {
|
|
1061
|
+
// Worktree pointer files contain "gitdir: <abs path>" on the first line.
|
|
1062
|
+
// A `.git` file that doesn't match this shape is a normal file, not a
|
|
1063
|
+
// valid worktree linkage — reject it.
|
|
1064
|
+
let head = '';
|
|
1065
|
+
try { head = fs.readFileSync(gitPath, { encoding: 'utf8', flag: 'r' }).slice(0, 256); }
|
|
1066
|
+
catch (e) { return { ok: false, reason: '.git file unreadable: ' + e.message }; }
|
|
1067
|
+
if (/^gitdir:\s*\S/.test(head)) return { ok: true };
|
|
1068
|
+
return { ok: false, reason: '.git file present but not a worktree pointer (no "gitdir:" prefix): ' + dirPath };
|
|
1069
|
+
}
|
|
1070
|
+
return { ok: false, reason: '.git entry is neither a file nor a directory: ' + gitPath };
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1073
|
+
function shellSafeGh(args, opts = {}) {
|
|
1074
|
+
if (!Array.isArray(args)) {
|
|
1075
|
+
return Promise.reject(new TypeError('shellSafeGh: args must be an array'));
|
|
1076
|
+
}
|
|
1077
|
+
const { timeout, ...rest } = opts;
|
|
1078
|
+
return _execFileAsync('gh', args, {
|
|
1079
|
+
windowsHide: true,
|
|
1080
|
+
encoding: 'utf8',
|
|
1081
|
+
shell: false,
|
|
1082
|
+
...rest,
|
|
1083
|
+
timeout: timeout || 30000,
|
|
1084
|
+
}).then(({ stdout }) => stdout);
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
function shellSafeGit(args, opts = {}) {
|
|
1088
|
+
if (!Array.isArray(args)) {
|
|
1089
|
+
return Promise.reject(new TypeError('shellSafeGit: args must be an array'));
|
|
1090
|
+
}
|
|
1091
|
+
const { timeout, ...rest } = opts;
|
|
1092
|
+
return _execFileAsync('git', args, {
|
|
1093
|
+
windowsHide: true,
|
|
1094
|
+
encoding: 'utf8',
|
|
1095
|
+
shell: false,
|
|
1096
|
+
...rest,
|
|
1097
|
+
timeout: timeout || 30000,
|
|
1098
|
+
}).then(({ stdout }) => stdout);
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
// Sync argv-form helper for callers that aren't async (e.g. plan
|
|
1102
|
+
// materialization in materializePlansAsWorkItems). Uses execFileSync with
|
|
1103
|
+
// shell:false so argv elements are passed verbatim.
|
|
1104
|
+
function shellSafeGitSync(args, opts = {}) {
|
|
1105
|
+
if (!Array.isArray(args)) {
|
|
1106
|
+
throw new TypeError('shellSafeGitSync: args must be an array');
|
|
1107
|
+
}
|
|
1108
|
+
const { timeout, ...rest } = opts;
|
|
1109
|
+
const { execFileSync: _execFileSync } = require('child_process');
|
|
1110
|
+
return _execFileSync('git', args, {
|
|
1111
|
+
windowsHide: true,
|
|
1112
|
+
encoding: 'utf8',
|
|
1113
|
+
shell: false,
|
|
1114
|
+
stdio: 'pipe',
|
|
1115
|
+
...rest,
|
|
1116
|
+
timeout: timeout || 30000,
|
|
1117
|
+
});
|
|
1118
|
+
}
|
|
1119
|
+
|
|
911
1120
|
/**
|
|
912
1121
|
* Detect the default branch for a git repo. Tries in order:
|
|
913
1122
|
* 1. The configured mainBranch (if it exists as a local or remote ref)
|
|
@@ -1136,9 +1345,18 @@ const ENGINE_DEFAULTS = {
|
|
|
1136
1345
|
prMergeMethod: 'squash', // merge method: squash, merge, rebase
|
|
1137
1346
|
ignoredCommentAuthors: [], // comments from these authors are auto-closed and never trigger fixes
|
|
1138
1347
|
botCommentLogins: [], // P-a3f9b2c1: opt-in shared-minions GH login list — comments from these logins are suppressed ONLY when body matches positive-signal markers (Verification SUCCESS / VERDICT:APPROVE / noop:true). Narrower than ignoredCommentAuthors which suppresses all comments by login.
|
|
1348
|
+
// W-mp76pw7a001da7c1 — Per-slug GitHub PAT routing. Map of `<owner>` (or `<owner>/*`,
|
|
1349
|
+
// or `*` for fleet default) to a `gh auth` account name. `engine/gh-token.js`
|
|
1350
|
+
// resolves the right token via `gh auth token --user <account> --hostname github.com`
|
|
1351
|
+
// and threads it as `GH_TOKEN` for that one shell-out, so the engine never depends
|
|
1352
|
+
// on which gh account is globally active. Empty `{}` (default) preserves legacy
|
|
1353
|
+
// behavior — every `gh` call uses the ambient identity. Example:
|
|
1354
|
+
// { "opg-microsoft": "yemishin_microsoft", "yemi33": "yemi33", "*": "yemi33" }
|
|
1355
|
+
ghAccounts: {},
|
|
1139
1356
|
agentBusyReassignMs: 600000, // 10min — reassign work item to another agent if preferred agent is busy beyond this threshold
|
|
1140
1357
|
ccEffort: null, // effort level for CC/doc-chat (null, 'low', 'medium', 'high')
|
|
1141
1358
|
enablePreDispatchEval: true, // P-d2a9f6e5: cheap LLM gate before queueing — on by default. See engine/pre-dispatch-eval.js (Ripley §3 recommendation, 2026-05-11 architecture review). Validates from acceptance_criteria when present, falls back to description when criteria are absent but description is rich (≥80 chars). Fail-open on any validator error.
|
|
1359
|
+
completionNonceRequired: false, // P-d2a8f6c1 (agent trust boundary F8): when true, a missing `nonce` field in the completion JSON hard-fails the dispatch with failure_class:'completion-nonce-mismatch'. Default false for one release so older agents/runtime caches that haven't picked up the prompt change degrade with a warning instead of breaking. Mismatched nonces hard-fail regardless of this flag. See docs/completion-reports.md → "Trust boundary".
|
|
1142
1360
|
|
|
1143
1361
|
// ── Runtime fleet (P-3b8e5f1d) ──────────────────────────────────────────────
|
|
1144
1362
|
// Single source of truth for which CLI runtime + model every spawn uses.
|
|
@@ -1181,6 +1399,38 @@ const ENGINE_DEFAULTS = {
|
|
|
1181
1399
|
maxMeetingHumanNotesBytes: 2 * 1024, // cap human note bullet lists injected into meeting prompts
|
|
1182
1400
|
maxPipelineMeetingContextBytes: 16 * 1024, // cap aggregated meeting/dependency context for pipeline plan generation
|
|
1183
1401
|
notesArchiveMaxFiles: 2000, // keep notes/archive bounded during periodic cleanup
|
|
1402
|
+
// ── Worktree pool (W-mp73ya3e000me6c5) ─────────────────────────────────────
|
|
1403
|
+
// Per-project warm pool: completed worktrees are parked detached at
|
|
1404
|
+
// origin/<main> instead of torn down, then re-borrowed by the next dispatch
|
|
1405
|
+
// for the same project. Saves the cold install/build cost on heavy projects
|
|
1406
|
+
// (constellation: bun install + Vite warmup; minions: npm install + test cache).
|
|
1407
|
+
// Default off — opt-in fleet-wide via engine.worktreePoolSize or per-project
|
|
1408
|
+
// via projects[].worktreePoolSize. See engine/worktree-pool.js + CLAUDE.md
|
|
1409
|
+
// "Worktree pool" section for the lifecycle and edge cases.
|
|
1410
|
+
worktreePoolSize: 0, // 0 = disabled (default); per-project override beats this
|
|
1411
|
+
worktreePoolIdleTtlMs: 6 * 3600 * 1000, // 6h — idle entries past TTL are evicted by cleanup
|
|
1412
|
+
// ── keep_processes (W-mp68q6ke0010de68) ────────────────────────────────────
|
|
1413
|
+
// Opt-in per-WI (`meta.keep_processes: true`) feature that lets an agent
|
|
1414
|
+
// declare specific descendant PIDs the engine MUST NOT reap on close. The
|
|
1415
|
+
// agent writes `agents/<id>/keep-pids.json` before exiting; spawn-agent's
|
|
1416
|
+
// close handler subtracts those PIDs from the reap set and the MCP sweep
|
|
1417
|
+
// adds them as anchors so the reachability walk doesn't classify their
|
|
1418
|
+
// children as stray. Hard caps below bound abuse and audit-log churn.
|
|
1419
|
+
keepProcesses: {
|
|
1420
|
+
enabled: true, // global kill switch; default ON since the feature is opt-in per-WI
|
|
1421
|
+
maxPerAgent: 5, // max PIDs honored per keep-pids.json file
|
|
1422
|
+
maxTtlMinutes: 1440, // 24h hard cap on expires_at
|
|
1423
|
+
defaultTtlMinutes: 60, // default TTL when meta.keep_processes_ttl_minutes is unset
|
|
1424
|
+
sweepEvery: 30, // ticks between TTL/dead-PID sweeps
|
|
1425
|
+
// W-mp6k7ywi000fa33c — when true (default), validateKeepPidsRecord rejects
|
|
1426
|
+
// a keep-pids.json whose `cwd` does not look like a real git worktree
|
|
1427
|
+
// (no `.git` dir or worktree-pointer file). Catches the failure mode where
|
|
1428
|
+
// an agent runs in a partial copy of a repo (selective `cp -r`) instead
|
|
1429
|
+
// of using `git worktree add`. Per-WI override: set
|
|
1430
|
+
// `meta.keep_processes_skip_workdir_check: true` for legitimate non-git
|
|
1431
|
+
// keep_processes use cases.
|
|
1432
|
+
requireGitWorkdir: true,
|
|
1433
|
+
},
|
|
1184
1434
|
// Backward-compat: keep `engine.claude.*` field family deprecation tracker. Listed here so preflight
|
|
1185
1435
|
// knows which subkeys to flag as deprecated. Do not consume `claude.*` in new code — use the runtime
|
|
1186
1436
|
// adapter system (engine/runtimes/) and the resolveAgent*/resolveCc* helpers instead.
|
|
@@ -1586,6 +1836,27 @@ const WORK_TYPE = {
|
|
|
1586
1836
|
MEETING: 'meeting', EXPLORE: 'explore', ASK: 'ask', TEST: 'test', DOCS: 'docs',
|
|
1587
1837
|
};
|
|
1588
1838
|
|
|
1839
|
+
// Work types whose dispatch path requires a per-project git worktree. The
|
|
1840
|
+
// engine's spawnAgent uses the project's `localPath` as the worktree root —
|
|
1841
|
+
// without an owning project the rootDir falls back to MINIONS_DIR's parent,
|
|
1842
|
+
// which on Windows can collapse to a drive root and forever-fail
|
|
1843
|
+
// assertWorktreeOutsideProject. The dashboard ingress (POST /api/work-items
|
|
1844
|
+
// and /api/work-items/retry) refuses to create or re-spawn a project-less WI
|
|
1845
|
+
// of any type in this set when PROJECTS.length !== 1.
|
|
1846
|
+
//
|
|
1847
|
+
// Complement of engine.js READ_ONLY_ROOT_TASK_TYPES; `docs` is intentionally
|
|
1848
|
+
// also exempt because docs edits run at the Minions root, not in a project
|
|
1849
|
+
// worktree.
|
|
1850
|
+
const WORKTREE_REQUIRING_TYPES = new Set([
|
|
1851
|
+
WORK_TYPE.FIX,
|
|
1852
|
+
WORK_TYPE.IMPLEMENT,
|
|
1853
|
+
WORK_TYPE.IMPLEMENT_LARGE,
|
|
1854
|
+
WORK_TYPE.TEST,
|
|
1855
|
+
WORK_TYPE.VERIFY,
|
|
1856
|
+
WORK_TYPE.REVIEW,
|
|
1857
|
+
WORK_TYPE.DECOMPOSE,
|
|
1858
|
+
]);
|
|
1859
|
+
|
|
1589
1860
|
const PLAN_STATUS = {
|
|
1590
1861
|
ACTIVE: 'active', AWAITING_APPROVAL: 'awaiting-approval', APPROVED: 'approved',
|
|
1591
1862
|
PAUSED: 'paused', REJECTED: 'rejected', COMPLETED: 'completed',
|
|
@@ -1806,6 +2077,9 @@ const FAILURE_CLASS = {
|
|
|
1806
2077
|
NETWORK_ERROR: 'network-error', // API rate limit, DNS, connectivity
|
|
1807
2078
|
OUT_OF_CONTEXT: 'out-of-context', // Context window exhausted (token limit, context length)
|
|
1808
2079
|
MAX_TURNS: 'max-turns', // Claude CLI error_max_turns — work in progress, retryable
|
|
2080
|
+
COMPLETION_NONCE_MISMATCH: 'completion-nonce-mismatch', // P-d2a8f6c1: completion JSON nonce did not match the per-spawn value injected via MINIONS_COMPLETION_NONCE — treat as forged/untrusted; ignore PR/noop/status fields from the report
|
|
2081
|
+
WORKTREE_PREFLIGHT: 'worktree-preflight', // Pre-spawn worktree validation rejected (nested-in-project, drive-root collapse) — never retryable
|
|
2082
|
+
INVALID_KEEP_PROCESSES_WORKDIR: 'invalid-keep-processes-workdir', // W-mp6k7ywi000fa33c: keep-pids.json declared a cwd that is not a real git worktree (likely a selective copy of the repo) — never retryable; agent must rerun in a real worktree
|
|
1809
2083
|
UNKNOWN: 'unknown', // Unclassified failure
|
|
1810
2084
|
};
|
|
1811
2085
|
const ESCALATION_POLICY = {
|
|
@@ -1817,7 +2091,7 @@ const ESCALATION_POLICY = {
|
|
|
1817
2091
|
};
|
|
1818
2092
|
|
|
1819
2093
|
// Structured completion protocol — fields agents must produce in ```completion blocks
|
|
1820
|
-
const COMPLETION_FIELDS = ['status', 'summary', 'files_changed', 'tests', 'pr', 'not_changed', 'failure_class', 'retryable', 'needs_rerun', 'verdict', 'artifacts'];
|
|
2094
|
+
const COMPLETION_FIELDS = ['status', 'summary', 'files_changed', 'tests', 'pr', 'not_changed', 'failure_class', 'retryable', 'needs_rerun', 'verdict', 'artifacts', 'nonce'];
|
|
1821
2095
|
|
|
1822
2096
|
const DEFAULT_AGENT_METRICS = {
|
|
1823
2097
|
tasksCompleted: 0, tasksErrored: 0,
|
|
@@ -2532,6 +2806,110 @@ function assertWorktreeOutsideProject(worktreePath, projectRoot) {
|
|
|
2532
2806
|
throw err;
|
|
2533
2807
|
}
|
|
2534
2808
|
|
|
2809
|
+
/**
|
|
2810
|
+
* Resolve the project root directory used as the parent for git worktree paths
|
|
2811
|
+
* during dispatch. Centralizes the fallback that engine spawnAgent used to do
|
|
2812
|
+
* inline (`project.localPath ? path.resolve(project.localPath) : path.resolve(MINIONS_DIR, '..')`).
|
|
2813
|
+
*
|
|
2814
|
+
* Why this helper exists: when a central work item (no `project` field) is
|
|
2815
|
+
* dispatched and MINIONS_DIR sits one level below a drive/filesystem root
|
|
2816
|
+
* (e.g. `D:\squad`), `path.resolve(MINIONS_DIR, '..')` collapses to the drive
|
|
2817
|
+
* root (`D:\`). The downstream worktree path then evaluates to `D:\worktrees\…`
|
|
2818
|
+
* which IS inside `D:\`, so `assertWorktreeOutsideProject` correctly rejects it
|
|
2819
|
+
* — but the dispatch loops forever because the throw happens in spawnAgent
|
|
2820
|
+
* without surfacing as a non-retryable failure (W-mp62taw2000ubcc3).
|
|
2821
|
+
*
|
|
2822
|
+
* Detect the collapse explicitly here and throw with a clear, actionable code
|
|
2823
|
+
* (`WORKTREE_ROOTDIR_COLLAPSED_TO_DRIVE_ROOT`). Callers should map this to a
|
|
2824
|
+
* non-retryable WORKTREE_PREFLIGHT failure so the dispatch fails fast instead
|
|
2825
|
+
* of silently re-dispatching every tick.
|
|
2826
|
+
*
|
|
2827
|
+
* @param {string|null|undefined} localPath — `project.localPath`, if any
|
|
2828
|
+
* @param {string} minionsDir — the MINIONS_DIR fallback anchor
|
|
2829
|
+
* @returns {string} — absolute path to use as rootDir
|
|
2830
|
+
* @throws {Error} — code WORKTREE_ROOTDIR_COLLAPSED_TO_DRIVE_ROOT on collapse
|
|
2831
|
+
*/
|
|
2832
|
+
function resolveProjectRootDir(localPath, minionsDir) {
|
|
2833
|
+
if (localPath) return path.resolve(String(localPath));
|
|
2834
|
+
if (!minionsDir) {
|
|
2835
|
+
const err = new Error('Cannot resolve project rootDir: no project.localPath and no MINIONS_DIR provided.');
|
|
2836
|
+
err.code = 'WORKTREE_ROOTDIR_MISSING_BASE';
|
|
2837
|
+
throw err;
|
|
2838
|
+
}
|
|
2839
|
+
const fallback = path.resolve(String(minionsDir), '..');
|
|
2840
|
+
// path.parse(p).root === p means we hit the drive root (Windows `D:\`,
|
|
2841
|
+
// POSIX `/`, or UNC `\\server\share\`). A drive root is never a legitimate
|
|
2842
|
+
// project root for worktree placement — every sibling like `D:\worktrees\…`
|
|
2843
|
+
// is technically "inside" the drive root and would be rejected.
|
|
2844
|
+
if (path.parse(fallback).root === fallback) {
|
|
2845
|
+
const err = new Error(
|
|
2846
|
+
`Cannot resolve project rootDir for dispatch — MINIONS_DIR="${minionsDir}" parent collapses ` +
|
|
2847
|
+
`to filesystem/drive root "${fallback}", which cannot host worktrees. ` +
|
|
2848
|
+
`Either attach the work item to a project (POST /api/work-items with "project") or ` +
|
|
2849
|
+
`move MINIONS_DIR deeper than one directory below the drive root.`
|
|
2850
|
+
);
|
|
2851
|
+
err.code = 'WORKTREE_ROOTDIR_COLLAPSED_TO_DRIVE_ROOT';
|
|
2852
|
+
throw err;
|
|
2853
|
+
}
|
|
2854
|
+
return fallback;
|
|
2855
|
+
}
|
|
2856
|
+
|
|
2857
|
+
// ── Spawn cwd vs worktree placement (W-mp73x32w000l143d) ──────────────────────
|
|
2858
|
+
// Work types that don't need a git worktree — they read repo state but don't
|
|
2859
|
+
// produce code changes. Centralized here so engine.js spawnAgent and any
|
|
2860
|
+
// future caller (e.g. pipeline preflight) can share one definition.
|
|
2861
|
+
//
|
|
2862
|
+
// `docs` is intentionally NOT in this set: docs edits run at the Minions root
|
|
2863
|
+
// without a project worktree but ARE write-capable (they push commits to the
|
|
2864
|
+
// minions repo itself). It's the complement of WORKTREE_REQUIRING_TYPES minus
|
|
2865
|
+
// that one odd case.
|
|
2866
|
+
const READ_ONLY_ROOT_TASK_TYPES = new Set(['meeting', 'ask', 'explore', 'plan-to-prd', 'plan']);
|
|
2867
|
+
|
|
2868
|
+
/**
|
|
2869
|
+
* Resolve the agent's working directory and (when needed) the parent dir for
|
|
2870
|
+
* git worktree placement. Decouples the two concerns that spawnAgent used to
|
|
2871
|
+
* conflate (W-mp73x32w000l143d):
|
|
2872
|
+
*
|
|
2873
|
+
* 1. **cwd** — where the agent process actually runs. For read-only types
|
|
2874
|
+
* this is the project root (or MINIONS_DIR fallback for rootless WIs).
|
|
2875
|
+
* For code-mutating types this is the worktree placement parent until
|
|
2876
|
+
* `git worktree add` succeeds, after which spawnAgent reassigns it to
|
|
2877
|
+
* the worktree path.
|
|
2878
|
+
*
|
|
2879
|
+
* 2. **worktreeRootDir** — the parent directory `git worktree add` is run
|
|
2880
|
+
* from. Only meaningful when a worktree will actually be created. For
|
|
2881
|
+
* read-only types we deliberately return `null` so the caller can skip
|
|
2882
|
+
* the drive-root preflight that fires when MINIONS_DIR sits one level
|
|
2883
|
+
* below a filesystem root (resolveProjectRootDir's collapse case).
|
|
2884
|
+
*
|
|
2885
|
+
* NOTE: Pipeline branches (engine.js `isPipelineBranchName`) override this —
|
|
2886
|
+
* they always need a worktree even for read-only types because the worktree
|
|
2887
|
+
* IS the pipeline's isolated workspace. The caller must detect the pipeline
|
|
2888
|
+
* branch case and recompute worktreeRootDir via `resolveProjectRootDir`.
|
|
2889
|
+
*
|
|
2890
|
+
* @param {{ localPath?: string|null }|null|undefined} project
|
|
2891
|
+
* @param {string} type — work type (e.g. 'fix', 'explore', 'meeting')
|
|
2892
|
+
* @param {string} minionsDir — MINIONS_DIR fallback anchor
|
|
2893
|
+
* @returns {{ cwd: string|null, worktreeRootDir: string|null }}
|
|
2894
|
+
* - For read-only types: { cwd: <project dir or MINIONS_DIR>, worktreeRootDir: null }
|
|
2895
|
+
* - For code-mutating types: { cwd: null, worktreeRootDir: <project root> }
|
|
2896
|
+
* (caller defaults cwd to worktreeRootDir before worktree creation)
|
|
2897
|
+
* @throws {Error} WORKTREE_ROOTDIR_COLLAPSED_TO_DRIVE_ROOT (code-mutating only)
|
|
2898
|
+
* or WORKTREE_ROOTDIR_MISSING_BASE if neither anchor present.
|
|
2899
|
+
*/
|
|
2900
|
+
function resolveSpawnPaths(project, type, minionsDir) {
|
|
2901
|
+
const isReadOnly = READ_ONLY_ROOT_TASK_TYPES.has(type);
|
|
2902
|
+
if (isReadOnly) {
|
|
2903
|
+
if (project?.localPath) return { cwd: path.resolve(String(project.localPath)), worktreeRootDir: null };
|
|
2904
|
+
if (minionsDir) return { cwd: path.resolve(String(minionsDir)), worktreeRootDir: null };
|
|
2905
|
+
const err = new Error('Cannot resolve cwd for read-only spawn: no project.localPath and no MINIONS_DIR provided.');
|
|
2906
|
+
err.code = 'WORKTREE_ROOTDIR_MISSING_BASE';
|
|
2907
|
+
throw err;
|
|
2908
|
+
}
|
|
2909
|
+
const worktreeRootDir = resolveProjectRootDir(project?.localPath, minionsDir);
|
|
2910
|
+
return { cwd: null, worktreeRootDir };
|
|
2911
|
+
}
|
|
2912
|
+
|
|
2535
2913
|
// ── HTTP Origin Allowlist & Security Headers ─────────────────────────────────
|
|
2536
2914
|
// Pure helpers used by dashboard.js to gate mutating requests against an
|
|
2537
2915
|
// explicit allowlist of local origins and to attach uniform security response
|
|
@@ -3329,6 +3707,49 @@ function listAllProcesses() {
|
|
|
3329
3707
|
return process.platform === 'win32' ? _winListProcesses() : _unixListProcesses();
|
|
3330
3708
|
}
|
|
3331
3709
|
|
|
3710
|
+
// Cross-check a single PID's command line for a Minions agent invocation
|
|
3711
|
+
// (`claude` or `copilot`, including the `node spawn-agent.js --runtime <name>`
|
|
3712
|
+
// wrapper and `gh copilot` fallback). Used by orphan/recycled-PID safety:
|
|
3713
|
+
// - engine/cleanup.js: gate before killing a PID found in engine/tmp/pid-*.pid
|
|
3714
|
+
// - engine/timeout.js: gate before parking a dispatch as still-alive when
|
|
3715
|
+
// the OS PID is alive but may belong to an unrelated recycled-PID process
|
|
3716
|
+
//
|
|
3717
|
+
// Windows: PowerShell Get-CimInstance for the full CommandLine of one PID.
|
|
3718
|
+
// Linux: /proc/<pid>/cmdline (NUL-separated).
|
|
3719
|
+
// macOS / when /proc isn't available: fallback `ps -p <pid> -o command=`.
|
|
3720
|
+
//
|
|
3721
|
+
// Returns false when the PID is invalid, the process doesn't exist, the
|
|
3722
|
+
// command line can't be read, or the cmdline contains neither `claude` nor
|
|
3723
|
+
// `copilot`. False is the safe default for both call sites: cleanup falls
|
|
3724
|
+
// through to "skip kill" and timeout falls through to "treat PID as dead".
|
|
3725
|
+
function isProcessCommandLineMatchingAgent(pid) {
|
|
3726
|
+
const n = Number(pid);
|
|
3727
|
+
if (!Number.isInteger(n) || n <= 0) return false;
|
|
3728
|
+
let cmdline = '';
|
|
3729
|
+
try {
|
|
3730
|
+
if (process.platform === 'win32') {
|
|
3731
|
+
const out = _execSync(
|
|
3732
|
+
`powershell -NoProfile -NonInteractive -Command "(Get-CimInstance Win32_Process -Filter 'ProcessId=${n}').CommandLine"`,
|
|
3733
|
+
{ stdio: ['ignore', 'pipe', 'ignore'], timeout: 5000, windowsHide: true, encoding: 'utf8' }
|
|
3734
|
+
);
|
|
3735
|
+
cmdline = String(out || '').trim();
|
|
3736
|
+
} else {
|
|
3737
|
+
try {
|
|
3738
|
+
const buf = fs.readFileSync(`/proc/${n}/cmdline`);
|
|
3739
|
+
cmdline = buf.toString('utf8').replace(/\0/g, ' ').trim();
|
|
3740
|
+
} catch {
|
|
3741
|
+
try {
|
|
3742
|
+
cmdline = String(_execSync(`ps -p ${n} -o command=`,
|
|
3743
|
+
{ stdio: ['ignore', 'pipe', 'ignore'], timeout: 3000, encoding: 'utf8' }) || '').trim();
|
|
3744
|
+
} catch { return false; }
|
|
3745
|
+
}
|
|
3746
|
+
}
|
|
3747
|
+
} catch { return false; }
|
|
3748
|
+
if (!cmdline) return false;
|
|
3749
|
+
const lower = cmdline.toLowerCase();
|
|
3750
|
+
return lower.includes('claude') || lower.includes('copilot');
|
|
3751
|
+
}
|
|
3752
|
+
|
|
3332
3753
|
function _buildChildMap(processes) {
|
|
3333
3754
|
const childMap = new Map();
|
|
3334
3755
|
for (const p of processes) {
|
|
@@ -3755,6 +4176,12 @@ module.exports = {
|
|
|
3755
4176
|
exec,
|
|
3756
4177
|
execAsync,
|
|
3757
4178
|
execSilent,
|
|
4179
|
+
shellSafeGh,
|
|
4180
|
+
shellSafeGit,
|
|
4181
|
+
shellSafeGitSync,
|
|
4182
|
+
validateGitRef,
|
|
4183
|
+
validateGhSlug,
|
|
4184
|
+
isValidGitWorktree,
|
|
3758
4185
|
resolveMainBranch,
|
|
3759
4186
|
run,
|
|
3760
4187
|
runFile,
|
|
@@ -3770,7 +4197,7 @@ module.exports = {
|
|
|
3770
4197
|
runtimeConfigWarnings,
|
|
3771
4198
|
projectWorkSourceWarnings,
|
|
3772
4199
|
backfillProjectWorkSourceDefaults,
|
|
3773
|
-
WI_STATUS, DONE_STATUSES, PLAN_TERMINAL_STATUSES, WORK_TYPE, PLAN_STATUS, PRD_ITEM_STATUS, PRD_MATERIALIZABLE, PR_STATUS, PR_POLLABLE_STATUSES, PR_PENDING_REASON, DISPATCH_RESULT, trackReviewMetric, queuePlanToPrd, extractPlanDeclaredProject,
|
|
4200
|
+
WI_STATUS, DONE_STATUSES, PLAN_TERMINAL_STATUSES, WORK_TYPE, WORKTREE_REQUIRING_TYPES, PLAN_STATUS, PRD_ITEM_STATUS, PRD_MATERIALIZABLE, PR_STATUS, PR_POLLABLE_STATUSES, PR_PENDING_REASON, DISPATCH_RESULT, trackReviewMetric, queuePlanToPrd, extractPlanDeclaredProject,
|
|
3774
4201
|
WATCH_STATUS, WATCH_TARGET_TYPE, WATCH_CONDITION, WATCH_ABSOLUTE_CONDITIONS, WATCH_ACTION_TYPE,
|
|
3775
4202
|
WATCH_STALLED_DEFAULT_TICKS, WATCH_STUCK_STAGE_DEFAULT_TICKS,
|
|
3776
4203
|
PIPELINE_STATUS, STAGE_TYPE, MEETING_STATUS, AGENT_STATUS,
|
|
@@ -3830,6 +4257,9 @@ module.exports = {
|
|
|
3830
4257
|
isPathInsideOrEqual,
|
|
3831
4258
|
parseWorktreePorcelain,
|
|
3832
4259
|
assertWorktreeOutsideProject,
|
|
4260
|
+
resolveProjectRootDir,
|
|
4261
|
+
resolveSpawnPaths,
|
|
4262
|
+
READ_ONLY_ROOT_TASK_TYPES,
|
|
3833
4263
|
isLiveCommandCenterPath,
|
|
3834
4264
|
describeCcProtectedPaths,
|
|
3835
4265
|
renderCcSystemPrompt,
|
|
@@ -3855,6 +4285,7 @@ module.exports = {
|
|
|
3855
4285
|
killImmediate,
|
|
3856
4286
|
killByPidImmediate,
|
|
3857
4287
|
killByPidsImmediate,
|
|
4288
|
+
isProcessCommandLineMatchingAgent,
|
|
3858
4289
|
listAllProcesses,
|
|
3859
4290
|
listProcessDescendants,
|
|
3860
4291
|
listProcessReachable,
|
|
@@ -3862,6 +4293,7 @@ module.exports = {
|
|
|
3862
4293
|
_purgeReservedFiles, // exported for testing
|
|
3863
4294
|
_WIN_RESERVED_NAMES, // exported for testing
|
|
3864
4295
|
LOCK_STALE_MS,
|
|
4296
|
+
isPidAlive,
|
|
3865
4297
|
flushLogs,
|
|
3866
4298
|
redactSecrets,
|
|
3867
4299
|
slugify,
|
package/engine/spawn-agent.js
CHANGED
|
@@ -38,6 +38,7 @@ const path = require('path');
|
|
|
38
38
|
const { runFile, cleanChildEnv, killGracefully, killImmediate, killByPidsImmediate, listProcessDescendants, ts, resolveEngineCacheDir } = require('./shared');
|
|
39
39
|
const { resolveRuntime } = require('./runtimes');
|
|
40
40
|
const { acquireAdoTokenSync, isLikelyAdoToken } = require('./ado-token');
|
|
41
|
+
const keepProcessSweep = require('./keep-process-sweep');
|
|
41
42
|
|
|
42
43
|
// ─── Pure helpers (exported for tests) ──────────────────────────────────────
|
|
43
44
|
|
|
@@ -534,8 +535,49 @@ function main() {
|
|
|
534
535
|
if (trackedDescendants.size || gotFirstOutput) {
|
|
535
536
|
snapshotDescendants();
|
|
536
537
|
if (trackedDescendants.size) {
|
|
537
|
-
|
|
538
|
-
|
|
538
|
+
// W-mp68q6ke0010de68 — opt-in keep_processes flag: agents whose work
|
|
539
|
+
// item carried `meta.keep_processes: true` may have written
|
|
540
|
+
// `agents/<id>/keep-pids.json` declaring specific descendant PIDs the
|
|
541
|
+
// engine MUST NOT reap. Resolve agentId from MINIONS_LIVE_OUTPUT_PATH
|
|
542
|
+
// (engine.js:1451 sets it to agents/<agentId>/live-output.log) and
|
|
543
|
+
// subtract validated, alive PIDs from the kill set. Missing or
|
|
544
|
+
// invalid file → fall through to today's behavior (reap everything).
|
|
545
|
+
let toKillPids = [...trackedDescendants];
|
|
546
|
+
let kept = [];
|
|
547
|
+
let keepRecord = null;
|
|
548
|
+
let keepReason = null;
|
|
549
|
+
try {
|
|
550
|
+
const liveOut = process.env.MINIONS_LIVE_OUTPUT_PATH;
|
|
551
|
+
const agentId = liveOut ? path.basename(path.dirname(liveOut)) : '';
|
|
552
|
+
if (agentId) {
|
|
553
|
+
// W-mp6k7ywi000fa33c — per-WI override (set by engine when
|
|
554
|
+
// meta.keep_processes_skip_workdir_check is true) bypasses the
|
|
555
|
+
// requireGitWorkdir check so legitimate non-git keep_processes
|
|
556
|
+
// use cases (e.g., a daemon under /tmp) still anchor.
|
|
557
|
+
const reapOpts = process.env.MINIONS_KEEP_PROCESSES_SKIP_WORKDIR_CHECK === '1'
|
|
558
|
+
? { requireGitWorkdir: false }
|
|
559
|
+
: {};
|
|
560
|
+
const plan = keepProcessSweep.computeReapPlan(toKillPids, agentId, reapOpts);
|
|
561
|
+
toKillPids = plan.toKill;
|
|
562
|
+
kept = plan.kept;
|
|
563
|
+
keepRecord = plan.record;
|
|
564
|
+
keepReason = plan.reason;
|
|
565
|
+
}
|
|
566
|
+
} catch (e) {
|
|
567
|
+
try { fs.appendFileSync(debugPath, `KEEP-PIDS error: ${e.message}\n`); } catch {}
|
|
568
|
+
}
|
|
569
|
+
if (kept.length) {
|
|
570
|
+
try {
|
|
571
|
+
fs.appendFileSync(
|
|
572
|
+
debugPath,
|
|
573
|
+
`KEPT pids=[${kept.join(',')}] purpose="${(keepRecord?.purpose || '').slice(0, 200)}" wi=${keepRecord?.wi_id || ''}\n`,
|
|
574
|
+
);
|
|
575
|
+
} catch {}
|
|
576
|
+
} else if (keepReason) {
|
|
577
|
+
try { fs.appendFileSync(debugPath, `KEEP-PIDS skipped: ${keepReason}\n`); } catch {}
|
|
578
|
+
}
|
|
579
|
+
const reaped = toKillPids.length ? killByPidsImmediate(toKillPids) : 0;
|
|
580
|
+
try { fs.appendFileSync(debugPath, `DESCENDANTS reaped=${reaped}/${toKillPids.length} kept=${kept.length}\n`); } catch {}
|
|
539
581
|
}
|
|
540
582
|
}
|
|
541
583
|
// Prefer the 'exit' event's code/signal when present — see note above.
|