@yemi33/minions 0.1.1981 → 0.1.1983

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,12 @@
13
13
  // about why the agent live tab moved off SSE).
14
14
 
15
15
  let _managedProcessesEtag = null;
16
+ // Cache the last successfully-fetched items so 304 (no-op) ticks can still
17
+ // re-render the time-derived columns (uptime, TTL) against the current
18
+ // Date.now(). The server payload only changes on spawn/kill/restart, so
19
+ // without this cache the strings would freeze between events
20
+ // (W-mpbt3wfs002l3a09).
21
+ let _managedProcessesLastItems = null;
16
22
  let _managedLogES = null;
17
23
 
18
24
  function _fmtAgo(ms) {
@@ -84,23 +90,27 @@ async function renderManagedProcesses() {
84
90
  if (!root) return;
85
91
  let items;
86
92
  let fetchErr = null;
87
- let notModified = false;
88
93
  try {
89
94
  const headers = {};
90
95
  if (_managedProcessesEtag) headers['If-None-Match'] = _managedProcessesEtag;
91
96
  const res = await fetch('/api/managed-processes', { headers });
92
97
  if (res.status === 304) {
93
- notModified = true;
98
+ // Server payload unchanged — reuse cached items so uptime/TTL columns
99
+ // recompute against the current Date.now(). Defensive null check: a
100
+ // first-tick 304 shouldn't happen (no prior ETag => no If-None-Match)
101
+ // but if it does, skip this frame rather than wiping the panel.
102
+ if (!_managedProcessesLastItems) return;
103
+ items = _managedProcessesLastItems;
94
104
  } else {
95
105
  const et = res.headers.get('ETag');
96
106
  if (et) _managedProcessesEtag = et;
97
107
  const data = await res.json();
98
108
  items = (data && Array.isArray(data.items)) ? data.items : [];
109
+ _managedProcessesLastItems = items;
99
110
  }
100
111
  } catch (e) {
101
112
  fetchErr = e;
102
113
  }
103
- if (notModified) return; // nothing changed since last render
104
114
  let html;
105
115
  if (fetchErr) {
106
116
  if (countEl) countEl.textContent = '?';
@@ -35,7 +35,7 @@ The sidecar lives at `<MINIONS_DIR>/agents/<agentId>/managed-spawn.json` and is
35
35
  "name": "constellation-host", // kebab-case, ≤64 chars, unique within file
36
36
  "cmd": "bun", // must be on engine.managedSpawn.executableAllowlist
37
37
  "args": ["run", "dev"], // ≤64 entries
38
- "cwd": "D:/repos/constellation", // must be a real git worktree (requireGitWorkdir: true)
38
+ "cwd": "D:/repos/constellation", // must be inside a real git worktree (requireGitWorkdir: true) — monorepo subdirs ok, ancestor walked up to gitWorktreeMaxParentDepth parents
39
39
  "env": { "CONSTELLATION_SERVER": "http://localhost:3000" }, // ≤32 keys; POSIX-shape + denylist enforced
40
40
  "ports": [3001], // 1024-65535; ≤20 per spec; advisory only (engine doesn't bind)
41
41
  "ttl_minutes": 240, // ≤1440 (24h hard cap); defaults to 240 (4h)
@@ -217,7 +217,8 @@ All knobs live under `engine.managedSpawn` in `engine/shared.js:1500` (`ENGINE_D
217
217
  | `logRotateBytes` | `10485760` | Rotation threshold for `<name>.log`. |
218
218
  | `bootReconcileMaxMs` | `2000` | Boot-time reconcile hard timeout. |
219
219
  | `promptContextMaxBytes` | `2048` | Auto-injected `## Live managed processes` block cap. |
220
- | `requireGitWorkdir` | `true` | Reject specs whose `cwd` isn't a git worktree. |
220
+ | `requireGitWorkdir` | `true` | Reject specs whose `cwd` isn't inside a git worktree (root or any ancestor up to `gitWorktreeMaxParentDepth`). |
221
+ | `gitWorktreeMaxParentDepth` | `6` | How many parent directories `shared.isValidGitWorktree` walks when probing for `.git`. Lets monorepo specs pin a per-package `cwd` (`<root>/packages/<pkg>/...`); set to `0` to disable the walk. |
221
222
  | `executableAllowlist` | `[node, bun, npm, …]` | Single global. Applies to `spec.cmd` AND `command` healthcheck `cmd`. |
222
223
  | `envKeyDenyPatterns` | `[^AWS_, ^AZURE_, _SECRET, _TOKEN, _API_KEY, …]` | Regex source strings, matched case-insensitively. Keys matching ANY pattern are rejected unless exact-listed in `envKeyDenyOverrides`. Threat model: credential leakage, not env-key enumeration — plain project vars (`CONSTELLATION_SERVER`, `DATABASE_URL`, …) pass with no config. |
223
224
  | `envKeyDenyOverrides` | `[AWS_REGION, AWS_DEFAULT_REGION, AZURE_REGION, GCP_REGION, AWS_PROFILE]` | Exact-match exemptions for known-safe keys that would otherwise be caught by a broad prefix pattern. Case-sensitive. |
package/engine/cli.js CHANGED
@@ -818,6 +818,38 @@ const commands = {
818
818
  }
819
819
  })();
820
820
 
821
+ // W-mpbqhstz001lf518 — Boot reconcile for orphan worktrees (closes #2627).
822
+ // Walks each project's `<root>/<worktreeRoot>/` dir and removes any
823
+ // `W-*` (or posix `<proj>-<branch>-<hash>`) subdir whose dispatch id
824
+ // is NOT in dispatch.active/pending AND that is NOT a pool member.
825
+ // Such dirs are left over from a prior crash and block retries with
826
+ // `fatal: 'work/W-<id>' is already used by worktree at …`. Runs AFTER
827
+ // worktreePool.pruneStale so the pool snapshot we cross-reference is
828
+ // already settled (otherwise we could evict a dir whose pool entry was
829
+ // about to be pruned but still appeared as a pool member).
830
+ (function startupReconcileOrphanWorktrees() {
831
+ try {
832
+ const sharedMod = require('./shared');
833
+ const worktreeGc = require('./worktree-gc');
834
+ const dispatchSnap = require('./queries').getDispatch();
835
+ const cfg = require('./queries').getConfig();
836
+ const projects = sharedMod.getProjects(cfg);
837
+ if (projects.length === 0) return;
838
+ const worktreeRootRel = cfg?.engine?.worktreeRoot || sharedMod.ENGINE_DEFAULTS.worktreeRoot;
839
+ const result = worktreeGc.pruneOrphanWorktrees({
840
+ projects,
841
+ dispatchSnap,
842
+ worktreeRootRel,
843
+ log: (lvl, msg) => e.log(lvl, msg),
844
+ });
845
+ if (result.evicted > 0 || result.failed > 0) {
846
+ console.log(` Worktree boot reconcile: evicted ${result.evicted} orphan worktree(s)${result.failed ? `, ${result.failed} failed` : ''} (scanned ${result.scanned}, kept ${result.kept})`);
847
+ }
848
+ } catch (err) {
849
+ e.log('warn', `Worktree boot reconcile failed: ${err.message}`);
850
+ }
851
+ })();
852
+
821
853
  // W-mp7gox8w000n8936 — Boot reconcile for kb-sweep state: clear stale
822
854
  // `in-flight`/`starting` records left over from a crashed runner (or a
823
855
  // legacy pre-pid runner). Without this, the record sits there clogging
@@ -286,9 +286,20 @@ function _validateSpec(spec, index, limits, opts) {
286
286
  return { ok: false, reason: 'cwd-too-long' };
287
287
  }
288
288
  if (_resolveRequireGitWorkdir(opts) && typeof spec.cwd === 'string' && spec.cwd.length > 0) {
289
- const wt = shared.isValidGitWorktree(spec.cwd);
289
+ const wt = shared.isValidGitWorktree(spec.cwd, { memo: opts._gitWorktreeMemo });
290
290
  if (!wt.ok) {
291
- return { ok: false, reason: INVALID_WORKDIR_REASON_PREFIX + wt.reason };
291
+ // W-mpbpa01y000qcdc2 enrich the reject reason so the dispatcher prompt
292
+ // can distinguish a stale `cwd` (typoed / belonged to a torn-down
293
+ // worktree) from a real subdir whose `.git` lives at an ancestor. The
294
+ // legacy substring `invalid-workdir:` is preserved so the engine close-
295
+ // handler gate still matches.
296
+ const detail = [
297
+ wt.reason,
298
+ 'cwd-exists:' + (wt.exists ? 'true' : 'false'),
299
+ 'nearest-ancestor:' + (wt.nearestGitAncestor || 'null'),
300
+ 'worktree-root:' + (wt.worktreeRoot || 'null'),
301
+ ].join(' | ');
302
+ return { ok: false, reason: INVALID_WORKDIR_REASON_PREFIX + detail };
292
303
  }
293
304
  }
294
305
 
@@ -420,10 +431,14 @@ function validateManagedSpawnRecord(parsed, opts) {
420
431
  try { projects = shared.getProjects(); }
421
432
  catch (_e) { projects = []; }
422
433
  }
423
- const specOpts = Object.assign({}, opts, { projects: projects });
434
+ const specOpts = Object.assign({}, opts, { projects: projects, _gitWorktreeMemo: new Map() });
424
435
 
425
436
  const seen = new Set();
426
437
  const out = [];
438
+ // W-mpbpa01y000qcdc2 — share a memo across all specs in this file so
439
+ // adjacent `cwd`s under the same worktree root don't re-stat the same
440
+ // ancestors. Threaded through `opts._gitWorktreeMemo` into `_validateSpec`,
441
+ // then into `shared.isValidGitWorktree`.
427
442
  for (let i = 0; i < parsed.specs.length; i++) {
428
443
  const v = _validateSpec(parsed.specs[i], i, limits, specOpts);
429
444
  if (!v.ok) {
@@ -645,6 +660,22 @@ function buildManagedSpawnHint(opts) {
645
660
  '',
646
661
  'A passing smoke-test is the entry gate to writing the sidecar — not a nice-to-have. If you skip it, you are betting the WI completion against a command you never ran.',
647
662
  '',
663
+ '### Monorepo `cwd` tip (W-mpbpa01y000qcdc2)',
664
+ '',
665
+ '**Only worktree ROOTS have a `.git` entry.** In a multi-package repo (yarn workspaces, pnpm, bun workspaces, lerna, lage, turborepo), `packages/<pkg>/` does NOT contain its own `.git` — the engine\'s workdir validator walks up to ' + (limits.gitWorktreeMaxParentDepth || 6) + ' parent directories looking for one, so a per-package `cwd` IS accepted, but the **canonical recipe** is to keep `cwd` at the worktree root and target the package via the runtime\'s workspace flag:',
666
+ '',
667
+ '```jsonc',
668
+ '{',
669
+ ' "name": "constellation-host",',
670
+ ' "cmd": "bun",',
671
+ ' "args": ["-F", "@scope/server", "run", "dev"], // bun workspace filter',
672
+ ' "cwd": "<worktree-root>", // NOT <worktree-root>/packages/server',
673
+ ' "healthcheck": { "type": "http", "url": "http://localhost:3001/health", "expect_status": 200, "interval_s": 1, "timeout_s": 60 }',
674
+ '}',
675
+ '```',
676
+ '',
677
+ 'Equivalent flags for other runtimes: `pnpm --filter <pkg>`, `yarn workspace <pkg>`, `npm run -w <pkg>`, `lage run <task> --to <pkg>`. Keeping `cwd` at the root makes the spec portable across machines (no hard-coded package path) and avoids per-package `node_modules` resolution surprises.',
678
+ '',
648
679
  '### Verify before exit',
649
680
  '',
650
681
  'After you write the file, query the engine to confirm acceptance:',
package/engine/shared.js CHANGED
@@ -1089,11 +1089,20 @@ function validateGhSlug(slug) {
1089
1089
  return slug;
1090
1090
  }
1091
1091
 
1092
- // W-mp6k7ywi000fa33c — pure helper. Returns { ok: boolean, reason?: string }.
1093
- // `ok: true` when `dirPath` exists AND contains either a `.git` directory OR
1094
- // a `.git` worktree pointer file (a real file whose first line starts with
1095
- // `gitdir:`). Anything else missing dir, missing `.git`, `.git` as a
1096
- // random non-pointer file returns `ok: false` with a human-readable reason.
1092
+ // W-mp6k7ywi000fa33c — pure helper. Returns:
1093
+ // { ok: boolean,
1094
+ // reason?: string,
1095
+ // exists: boolean, // whether dirPath itself exists on disk
1096
+ // nearestGitAncestor: string|null, // path to nearest ancestor .git found
1097
+ // worktreeRoot: string|null, // dir that owns the nearest .git
1098
+ // depth: number // 0 = dirPath itself; >0 = parents walked; -1 = not found
1099
+ // }
1100
+ // `ok: true` when `dirPath` (or one of its ancestors up to a configurable
1101
+ // depth) contains either a `.git` directory OR a `.git` worktree pointer
1102
+ // file (a real file whose first line starts with `gitdir:`). The ancestor
1103
+ // walk lets monorepo agents pin a per-package `cwd` (e.g.
1104
+ // `<worktree-root>/packages/server`) — only the worktree root carries `.git`
1105
+ // but the package subdir is still inside a real worktree (W-mpbpa01y000qcdc2).
1097
1106
  //
1098
1107
  // No shelling out (no `git rev-parse`); just `fs.existsSync`/`fs.statSync`
1099
1108
  // and a tiny content sniff for the worktree pointer case. This catches the
@@ -1101,32 +1110,108 @@ function validateGhSlug(slug) {
1101
1110
  // `cp -r`) instead of `git worktree add`, which produced a directory that
1102
1111
  // looks file-by-file like a worktree but has no git linkage. See
1103
1112
  // W-mp6ha6q9000d58a5 for the real-world incident this prevents.
1104
- function isValidGitWorktree(dirPath) {
1105
- if (typeof dirPath !== 'string' || dirPath.length === 0) {
1106
- return { ok: false, reason: 'cwd missing or not a string' };
1107
- }
1113
+ //
1114
+ // Options:
1115
+ // maxParentDepth number of parent directories to walk after the direct
1116
+ // probe; defaults to ENGINE_DEFAULTS.managedSpawn.gitWorktreeMaxParentDepth
1117
+ // (6). Pass 0 to disable the walk and restore the legacy behavior.
1118
+ // memo — optional Map shared across multiple calls (e.g. validating N
1119
+ // specs in one sidecar file) so adjacent paths don't re-stat the same
1120
+ // ancestors. Keys are absolute path strings; values are result objects.
1121
+ function _probeGitAtDir(dirPath) {
1108
1122
  let dirStat;
1109
1123
  try { dirStat = fs.statSync(dirPath); }
1110
- catch (_e) { return { ok: false, reason: 'directory does not exist: ' + dirPath }; }
1124
+ catch (_e) { return { ok: false, exists: false, reason: 'directory does not exist: ' + dirPath }; }
1111
1125
  if (!dirStat.isDirectory()) {
1112
- return { ok: false, reason: 'path is not a directory: ' + dirPath };
1126
+ return { ok: false, exists: true, reason: 'path is not a directory: ' + dirPath };
1113
1127
  }
1114
1128
  const gitPath = path.join(dirPath, '.git');
1115
1129
  let gitStat;
1116
1130
  try { gitStat = fs.statSync(gitPath); }
1117
- catch (_e) { return { ok: false, reason: 'no .git directory or worktree pointer at ' + dirPath }; }
1118
- if (gitStat.isDirectory()) return { ok: true };
1131
+ catch (_e) { return { ok: false, exists: true, reason: 'no .git directory or worktree pointer at ' + dirPath }; }
1132
+ if (gitStat.isDirectory()) return { ok: true, exists: true, gitPath: gitPath };
1119
1133
  if (gitStat.isFile()) {
1120
1134
  // Worktree pointer files contain "gitdir: <abs path>" on the first line.
1121
1135
  // A `.git` file that doesn't match this shape is a normal file, not a
1122
1136
  // valid worktree linkage — reject it.
1123
1137
  let head = '';
1124
1138
  try { head = fs.readFileSync(gitPath, { encoding: 'utf8', flag: 'r' }).slice(0, 256); }
1125
- catch (e) { return { ok: false, reason: '.git file unreadable: ' + e.message }; }
1126
- if (/^gitdir:\s*\S/.test(head)) return { ok: true };
1127
- return { ok: false, reason: '.git file present but not a worktree pointer (no "gitdir:" prefix): ' + dirPath };
1139
+ catch (e) { return { ok: false, exists: true, reason: '.git file unreadable: ' + e.message }; }
1140
+ if (/^gitdir:\s*\S/.test(head)) return { ok: true, exists: true, gitPath: gitPath };
1141
+ return { ok: false, exists: true, reason: '.git file present but not a worktree pointer (no "gitdir:" prefix): ' + dirPath };
1142
+ }
1143
+ return { ok: false, exists: true, reason: '.git entry is neither a file nor a directory: ' + gitPath };
1144
+ }
1145
+
1146
+ function isValidGitWorktree(dirPath, opts) {
1147
+ if (typeof dirPath !== 'string' || dirPath.length === 0) {
1148
+ return { ok: false, exists: false, nearestGitAncestor: null, worktreeRoot: null, depth: -1, reason: 'cwd missing or not a string' };
1149
+ }
1150
+ opts = opts || {};
1151
+ const limits = (ENGINE_DEFAULTS && ENGINE_DEFAULTS.managedSpawn) || {};
1152
+ const defaultDepth = Number.isFinite(limits.gitWorktreeMaxParentDepth) ? limits.gitWorktreeMaxParentDepth : 6;
1153
+ const maxDepth = Math.max(0, Number.isFinite(opts.maxParentDepth) ? opts.maxParentDepth : defaultDepth);
1154
+ const memo = opts.memo instanceof Map ? opts.memo : null;
1155
+
1156
+ if (memo && memo.has(dirPath)) return memo.get(dirPath);
1157
+
1158
+ const direct = _probeGitAtDir(dirPath);
1159
+ if (direct.ok) {
1160
+ const out = { ok: true, exists: true, nearestGitAncestor: direct.gitPath, worktreeRoot: dirPath, depth: 0 };
1161
+ if (memo) memo.set(dirPath, out);
1162
+ return out;
1128
1163
  }
1129
- return { ok: false, reason: '.git entry is neither a file nor a directory: ' + gitPath };
1164
+
1165
+ // Don't walk ancestors when the path itself doesn't exist — that's a stale
1166
+ // / typoed cwd, not a monorepo subdir, and walking would mask the real
1167
+ // problem. The legacy "directory does not exist" reason is preserved.
1168
+ if (!direct.exists) {
1169
+ const out = { ok: false, exists: false, nearestGitAncestor: null, worktreeRoot: null, depth: -1, reason: direct.reason };
1170
+ if (memo) memo.set(dirPath, out);
1171
+ return out;
1172
+ }
1173
+
1174
+ // Walk parents up to maxDepth, looking for any ancestor whose direct probe
1175
+ // succeeds. Reuse the memo for previously-seen ancestors (matters when
1176
+ // validating several specs that share a common worktree-root prefix).
1177
+ let cur = dirPath;
1178
+ let walked = 0;
1179
+ for (let i = 1; i <= maxDepth; i++) {
1180
+ const parent = path.dirname(cur);
1181
+ if (parent === cur) break; // hit the filesystem root
1182
+ walked = i;
1183
+ if (memo && memo.has(parent)) {
1184
+ const cached = memo.get(parent);
1185
+ if (cached.ok) {
1186
+ const out = { ok: true, exists: true, nearestGitAncestor: cached.nearestGitAncestor, worktreeRoot: cached.worktreeRoot, depth: i };
1187
+ memo.set(dirPath, out);
1188
+ return out;
1189
+ }
1190
+ cur = parent;
1191
+ continue;
1192
+ }
1193
+ const probe = _probeGitAtDir(parent);
1194
+ if (probe.ok) {
1195
+ const ancestorResult = { ok: true, exists: true, nearestGitAncestor: probe.gitPath, worktreeRoot: parent, depth: 0 };
1196
+ if (memo) memo.set(parent, ancestorResult);
1197
+ const out = { ok: true, exists: true, nearestGitAncestor: probe.gitPath, worktreeRoot: parent, depth: i };
1198
+ if (memo) memo.set(dirPath, out);
1199
+ return out;
1200
+ }
1201
+ cur = parent;
1202
+ }
1203
+
1204
+ // No ancestor satisfied the .git probe. Synthesize a reason that keeps the
1205
+ // direct-probe detail (so legacy substring assertions like "no .git" and
1206
+ // "not a worktree pointer" still match) AND adds the ancestor-walk context
1207
+ // the dispatcher prompt needs to distinguish "stale path" from
1208
+ // "real subdir of an unrelated dir".
1209
+ const reason = direct.reason
1210
+ + ' (and no .git directory or worktree pointer in any of '
1211
+ + walked + ' parent directories up to depth ' + maxDepth + ')';
1212
+ const out = { ok: false, exists: true, nearestGitAncestor: null, worktreeRoot: null, depth: -1, reason: reason };
1213
+ if (memo) memo.set(dirPath, out);
1214
+ return out;
1130
1215
  }
1131
1216
 
1132
1217
  function shellSafeGh(args, opts = {}) {
@@ -1513,6 +1598,12 @@ const ENGINE_DEFAULTS = {
1513
1598
  bootReconcileMaxMs: 2000, // boot-time reconcile timeout (don't block engine boot)
1514
1599
  promptContextMaxBytes: 2048, // cap on auto-injected `## Live managed processes` block
1515
1600
  requireGitWorkdir: true, // reject specs whose `cwd` isn't a real git worktree
1601
+ // W-mpbpa01y000qcdc2 — how many parent directories `isValidGitWorktree`
1602
+ // walks before giving up. In monorepos only the worktree ROOT has `.git`;
1603
+ // a per-package `cwd` like `<root>/packages/server` needs the walk to
1604
+ // succeed. 6 covers `packages/<pkg>/src/<sub>/<sub>/<sub>` without
1605
+ // walking out of any realistic project; set to 0 to disable.
1606
+ gitWorktreeMaxParentDepth: 6,
1516
1607
  // Single global executable allowlist. Applies to both `spec.cmd` and any
1517
1608
  // `command` healthcheck's `cmd`. Keep narrow — adding a binary here lets
1518
1609
  // any agent's sidecar invoke it under engine ownership.
@@ -0,0 +1,360 @@
1
+ // engine/worktree-gc.js — orphan-worktree garbage collection
2
+ //
3
+ // W-mpbqhstz001lf518 (closes yemi33/minions#2627). Two callers, one shared
4
+ // decision surface:
5
+ //
6
+ // 1. On dispatch end (engine.js onAgentClose): remove the dispatch's
7
+ // worktree once the agent process has truly exited and all sidecar
8
+ // reads (keep_processes, managed_spawn) are done. Skip when the
9
+ // worktree was borrowed from the pool, returned to the pool, or is
10
+ // anchored by live keep_processes PIDs / managed_spawn cwds.
11
+ //
12
+ // 2. On engine boot (engine/cli.js): scan each project's worktree root
13
+ // for orphan W-* dirs whose dispatch is not in dispatch.json (active
14
+ // or pending) AND that are not pool entries. Such dirs are leftovers
15
+ // from a prior crash that block retries with
16
+ // `fatal: 'work/W-…' is already used by worktree at …`.
17
+ //
18
+ // All git ops route through `shared.removeWorktree` (which already runs
19
+ // `git worktree remove --force` + `git worktree prune`). All git ops run
20
+ // OUTSIDE `mutateJsonFileLocked` callbacks — per the lock-callback
21
+ // contract in CLAUDE.md ("do not run network calls, git commands, process
22
+ // kills, or `await` while holding a file lock").
23
+
24
+ const fs = require('fs');
25
+ const path = require('path');
26
+
27
+ const shared = require('./shared');
28
+ const worktreePool = require('./worktree-pool');
29
+
30
+ let keepProcessSweep = null;
31
+ function _keepProcessSweep() {
32
+ if (!keepProcessSweep) keepProcessSweep = require('./keep-process-sweep');
33
+ return keepProcessSweep;
34
+ }
35
+
36
+ let managedSpawn = null;
37
+ function _managedSpawn() {
38
+ if (!managedSpawn) managedSpawn = require('./managed-spawn');
39
+ return managedSpawn;
40
+ }
41
+
42
+ const _noopLog = () => {};
43
+
44
+ /**
45
+ * Decide whether a dispatch-end worktree should be GC'd.
46
+ *
47
+ * Inputs are explicit so callers can supply mocks. Returns
48
+ * { gc: boolean, reason: string }
49
+ * where `reason` is a short tag suitable for logging.
50
+ *
51
+ * Skip rules (any one wins, in priority order):
52
+ * - no-worktree-path — worktreePath empty / null
53
+ * - missing-on-disk — worktreePath does not exist (already gone)
54
+ * - pool-member — worktree-pool currently owns this path
55
+ * (covers borrowed + idle + returned states;
56
+ * authoritative over the legacy in-memory
57
+ * borrowedFromPool/returnedToPool flags
58
+ * because the pool can evict between borrow
59
+ * and dispatch-end — see PR #2627 review)
60
+ * - keep-processes-anchored — agent declared keep_processes PIDs that are live
61
+ * - managed-spawn-anchored — agent placed managed_spawn services with cwd
62
+ * inside (or equal to) the worktree (checked
63
+ * BOTH against in-memory `managedSpawnSpawnedCount`
64
+ * for services this dispatch just spawned AND
65
+ * against the global state file for legacy
66
+ * services that recordManagedBatch persisted)
67
+ *
68
+ * Otherwise: `{ gc: true, reason: 'orphan' }`.
69
+ */
70
+ function shouldGcDispatchWorktree(opts) {
71
+ const {
72
+ worktreePath,
73
+ agentId = '',
74
+ // Belt+suspenders: defensive override when caller wants to forcibly
75
+ // protect this worktree regardless of pool/anchor state.
76
+ forceSkip = false,
77
+ // In-memory hint from the dispatch-end caller: when > 0, services were
78
+ // spawned by THIS dispatch but recordManagedBatch may have failed to
79
+ // persist them; skip GC unconditionally to prevent yanking their cwd.
80
+ managedSpawnSpawnedCount = 0,
81
+ // Test injection points — production callers leave undefined.
82
+ isPoolMember = null,
83
+ listManagedSpecs = null,
84
+ getActiveAnchorPidsForAgent = null,
85
+ fileExists = null,
86
+ } = opts || {};
87
+
88
+ if (!worktreePath) return { gc: false, reason: 'no-worktree-path' };
89
+ const exists = typeof fileExists === 'function'
90
+ ? !!fileExists(worktreePath)
91
+ : fs.existsSync(worktreePath);
92
+ if (!exists) return { gc: false, reason: 'missing-on-disk' };
93
+ if (forceSkip) return { gc: false, reason: 'force-skip' };
94
+
95
+ // Pool ownership is authoritative: the pool's on-disk state file tells us
96
+ // whether ANY pool entry (idle, borrowed, just-returned) currently claims
97
+ // this path. Subsumes the legacy in-memory borrowedFromPool/returnedToPool
98
+ // flags and covers the pool-return-throw eviction case correctly.
99
+ try {
100
+ const fn = typeof isPoolMember === 'function'
101
+ ? isPoolMember
102
+ : (wt) => worktreePool.isPoolMember(wt);
103
+ if (fn(worktreePath)) return { gc: false, reason: 'pool-member' };
104
+ } catch (_e) { /* pool readable optional */ }
105
+
106
+ // In-memory managed-spawn signal: this dispatch's own spawn batch.
107
+ // `managedSpawnSpawnedCount > 0` means recordManagedBatch was called (or
108
+ // attempted) — the cwd of those services lives inside this worktree, so
109
+ // even if the state-file persistence failed we must protect them.
110
+ if (Number(managedSpawnSpawnedCount) > 0) {
111
+ return { gc: false, reason: 'managed-spawn-anchored' };
112
+ }
113
+
114
+ // keep_processes anchor check: any live PID for this agent → skip.
115
+ if (agentId) {
116
+ try {
117
+ const fn = typeof getActiveAnchorPidsForAgent === 'function'
118
+ ? getActiveAnchorPidsForAgent
119
+ : _keepProcessSweep().getActiveAnchorPidsForAgent;
120
+ const res = fn(agentId);
121
+ if (res && res.pids && res.pids.size > 0) {
122
+ return { gc: false, reason: 'keep-processes-anchored' };
123
+ }
124
+ } catch (_e) { /* sidecar reader optional */ }
125
+ }
126
+
127
+ // managed_spawn anchor check: any LIVE spec (from any agent) whose cwd
128
+ // is == or under the worktree → skip. Covers the "completed dispatch
129
+ // with long-running managed-spawn services survives boot" case.
130
+ try {
131
+ const fn = typeof listManagedSpecs === 'function'
132
+ ? listManagedSpecs
133
+ : _managedSpawn().listManagedSpecs;
134
+ const specs = fn() || [];
135
+ const wtAbs = path.resolve(worktreePath);
136
+ const wtPrefix = wtAbs + path.sep;
137
+ for (const rec of specs) {
138
+ if (!rec || typeof rec.cwd !== 'string' || rec.cwd.length === 0) continue;
139
+ let cwdAbs;
140
+ try { cwdAbs = path.resolve(rec.cwd); } catch { continue; }
141
+ if (cwdAbs === wtAbs || cwdAbs.startsWith(wtPrefix)) {
142
+ return { gc: false, reason: 'managed-spawn-anchored' };
143
+ }
144
+ }
145
+ } catch (_e) { /* sidecar reader optional */ }
146
+
147
+ return { gc: true, reason: 'orphan' };
148
+ }
149
+
150
+ /**
151
+ * Run shouldGcDispatchWorktree + (if eligible) `shared.removeWorktree`.
152
+ *
153
+ * Returns `{ outcome, reason, removed }`:
154
+ * - outcome: 'gc'|'skip'|'gc-failed'
155
+ * - reason: short tag from shouldGcDispatchWorktree, or 'remove-failed'
156
+ * - removed: boolean — true when removeWorktree returned truthy
157
+ */
158
+ function gcDispatchWorktreeIfOrphan(opts) {
159
+ const {
160
+ worktreePath,
161
+ gitRoot,
162
+ worktreeRoot,
163
+ log = _noopLog,
164
+ removeWorktree = null,
165
+ } = opts || {};
166
+ const decision = shouldGcDispatchWorktree(opts);
167
+ if (!decision.gc) {
168
+ return { outcome: 'skip', reason: decision.reason, removed: false };
169
+ }
170
+ if (!gitRoot || !worktreeRoot) {
171
+ return { outcome: 'skip', reason: 'no-git-root', removed: false };
172
+ }
173
+ const _removeFn = typeof removeWorktree === 'function' ? removeWorktree : shared.removeWorktree;
174
+ try {
175
+ const removed = _removeFn(worktreePath, gitRoot, worktreeRoot);
176
+ if (removed) {
177
+ log('info', `worktree-gc: dispatch-end removed ${path.basename(worktreePath)}`);
178
+ return { outcome: 'gc', reason: decision.reason, removed: true };
179
+ }
180
+ log('warn', `worktree-gc: dispatch-end remove returned false for ${worktreePath}`);
181
+ return { outcome: 'gc-failed', reason: 'remove-failed', removed: false };
182
+ } catch (gcErr) {
183
+ log('warn', `worktree-gc: dispatch-end remove threw for ${worktreePath}: ${gcErr.message}`);
184
+ return { outcome: 'gc-failed', reason: 'remove-threw', removed: false };
185
+ }
186
+ }
187
+
188
+ /**
189
+ * Walk each project's worktree root and evict orphan W-* dirs.
190
+ *
191
+ * - `projects` — array of `{ name, localPath }`
192
+ * - `dispatchSnap` — `{ active: [...], pending: [...] }` (work-item dispatch.json)
193
+ * - `worktreeRootRel` — relative dir under each project's localPath (default '../worktrees')
194
+ * - `log` — log(level, msg) function (optional)
195
+ * - `fs`/`removeWorktree`/`buildWorktreeDirName`/`listManagedSpecs` — test injection points
196
+ *
197
+ * Protection rules (any hit → keep):
198
+ * 1. Dir name matches the wtDirName of any active/pending dispatch in
199
+ * `dispatchSnap`, computed using THAT DISPATCH'S OWN project name.
200
+ * We iterate every (dispatch, project) combo so two projects sharing
201
+ * a parent `worktreeRoot` (monorepo-style) don't cross-evict each
202
+ * other's dispatches. (PR #2627 review — Issue 2)
203
+ * 2. Normalized abs path is currently in any `worktreePool` entry (idle
204
+ * or borrowed) — see `extraPoolPaths` for the test override.
205
+ * 3. Worktree contains (or equals) the cwd of any live managed_spawn
206
+ * spec — matches `engine/cleanup.js`'s tick-time protection so a
207
+ * completed dispatch with long-running services survives engine
208
+ * restart. (PR #2627 review — Issue 1)
209
+ *
210
+ * Returns `{ scanned, kept, evicted, failed, perProject }`.
211
+ */
212
+ function pruneOrphanWorktrees(opts) {
213
+ opts = opts || {};
214
+ const projects = Array.isArray(opts.projects) ? opts.projects : [];
215
+ const dispatchSnap = opts.dispatchSnap || { active: [], pending: [] };
216
+ const worktreeRootRel = typeof opts.worktreeRootRel === 'string' && opts.worktreeRootRel.length > 0
217
+ ? opts.worktreeRootRel
218
+ : (shared.ENGINE_DEFAULTS && shared.ENGINE_DEFAULTS.worktreeRoot) || '../worktrees';
219
+ const log = typeof opts.log === 'function' ? opts.log : _noopLog;
220
+ const _fs = opts.fs || fs;
221
+ const _removeWorktree = typeof opts.removeWorktree === 'function'
222
+ ? opts.removeWorktree
223
+ : shared.removeWorktree;
224
+ const _buildWorktreeDirName = typeof opts.buildWorktreeDirName === 'function'
225
+ ? opts.buildWorktreeDirName
226
+ : shared.buildWorktreeDirName;
227
+ const _listManagedSpecs = typeof opts.listManagedSpecs === 'function'
228
+ ? opts.listManagedSpecs
229
+ : (() => {
230
+ try { return _managedSpawn().listManagedSpecs(); }
231
+ catch (_e) { return []; }
232
+ });
233
+
234
+ // Pool-known paths (idle + borrowed + any stale entry still on disk). The
235
+ // worktree-pool's own pruneStale() runs FIRST in cli boot so this snapshot
236
+ // is post-prune.
237
+ const poolPaths = new Set();
238
+ try {
239
+ const entries = (worktreePool.readPool() || { entries: [] }).entries || [];
240
+ for (const e of entries) {
241
+ if (e && e.path) poolPaths.add(worktreePool._normalizePath(e.path));
242
+ }
243
+ } catch (_e) { /* pool readable optional */ }
244
+ if (Array.isArray(opts.extraPoolPaths)) {
245
+ for (const p of opts.extraPoolPaths) poolPaths.add(worktreePool._normalizePath(p));
246
+ }
247
+
248
+ // Cross-project live dir names: compute the expected wtDirName for every
249
+ // active/pending dispatch using THAT DISPATCH'S OWN project name. Each
250
+ // dir name's hash depends on (id, projectName, branchName), so a dir
251
+ // belonging to project B will not appear when we compute names for A.
252
+ // This prevents the cross-project sweep from yanking another project's
253
+ // active worktree when they share a parent `worktreeRoot`. (PR #2627
254
+ // review — Issue 2)
255
+ const globalLiveDirNames = new Set();
256
+ for (const d of [...(dispatchSnap.active || []), ...(dispatchSnap.pending || [])]) {
257
+ if (!d || !d.id || !d.meta || !d.meta.branch) continue;
258
+ const dispatchProject = (d.meta.project && typeof d.meta.project === 'string')
259
+ ? d.meta.project
260
+ : 'default';
261
+ try {
262
+ globalLiveDirNames.add(_buildWorktreeDirName({
263
+ dispatchId: d.id,
264
+ projectName: dispatchProject,
265
+ branchName: d.meta.branch,
266
+ }));
267
+ } catch (_e) { /* defensive */ }
268
+ }
269
+
270
+ // Resolved managed-spawn cwds (per-cwd → resolved abs path). Used to
271
+ // reject GC of any dir that contains (or equals) a live service's cwd.
272
+ // (PR #2627 review — Issue 1)
273
+ const managedSpawnCwds = [];
274
+ try {
275
+ for (const rec of (_listManagedSpecs() || [])) {
276
+ if (!rec || typeof rec.cwd !== 'string' || rec.cwd.length === 0) continue;
277
+ try { managedSpawnCwds.push(path.resolve(rec.cwd)); }
278
+ catch (_e) { /* malformed cwd — skip */ }
279
+ }
280
+ } catch (_e) { /* optional */ }
281
+
282
+ const result = { scanned: 0, kept: 0, evicted: 0, failed: 0, perProject: {} };
283
+
284
+ for (const project of projects) {
285
+ if (!project || !project.localPath) continue;
286
+ let rootDir;
287
+ try { rootDir = path.resolve(String(project.localPath)); } catch { continue; }
288
+ let rootExists = false;
289
+ try { rootExists = _fs.existsSync(rootDir); } catch { rootExists = false; }
290
+ if (!rootExists) continue;
291
+ const wtParent = path.resolve(rootDir, worktreeRootRel);
292
+ let parentExists = false;
293
+ try { parentExists = _fs.existsSync(wtParent); } catch { parentExists = false; }
294
+ if (!parentExists) continue;
295
+
296
+ let entries;
297
+ try { entries = _fs.readdirSync(wtParent, { withFileTypes: true }); }
298
+ catch (readErr) {
299
+ log('warn', `worktree-gc: readdir failed for ${wtParent}: ${readErr.message}`);
300
+ continue;
301
+ }
302
+
303
+ const projStats = { scanned: 0, kept: 0, evicted: 0, failed: 0 };
304
+ for (const ent of entries) {
305
+ if (!ent || (typeof ent.isDirectory === 'function' && !ent.isDirectory())) continue;
306
+ const name = ent.name || ent;
307
+ if (typeof name !== 'string' || name.length === 0) continue;
308
+ projStats.scanned++;
309
+ result.scanned++;
310
+ const wtPath = path.join(wtParent, name);
311
+
312
+ // 1. Live-dispatch protection (cross-project safe).
313
+ if (globalLiveDirNames.has(name)) {
314
+ projStats.kept++; result.kept++;
315
+ continue;
316
+ }
317
+ // 2. Pool membership protection.
318
+ const normPath = worktreePool._normalizePath(wtPath);
319
+ if (poolPaths.has(normPath)) {
320
+ projStats.kept++; result.kept++;
321
+ continue;
322
+ }
323
+ // 3. managed_spawn cwd anchor protection.
324
+ if (managedSpawnCwds.length > 0) {
325
+ const wtPathNorm = path.resolve(wtPath);
326
+ const wtPrefix = wtPathNorm + path.sep;
327
+ let anchored = false;
328
+ for (const cwd of managedSpawnCwds) {
329
+ if (cwd === wtPathNorm || cwd.startsWith(wtPrefix)) { anchored = true; break; }
330
+ }
331
+ if (anchored) {
332
+ projStats.kept++; result.kept++;
333
+ continue;
334
+ }
335
+ }
336
+
337
+ try {
338
+ const removed = _removeWorktree(wtPath, rootDir, wtParent);
339
+ if (removed) {
340
+ projStats.evicted++; result.evicted++;
341
+ log('info', `worktree-gc: boot-evicted orphan ${name} for project ${project.name || 'default'}`);
342
+ } else {
343
+ projStats.failed++; result.failed++;
344
+ log('warn', `worktree-gc: boot-evict returned false for ${wtPath}`);
345
+ }
346
+ } catch (rmErr) {
347
+ projStats.failed++; result.failed++;
348
+ log('warn', `worktree-gc: boot-evict threw for ${wtPath}: ${rmErr.message}`);
349
+ }
350
+ }
351
+ result.perProject[project.name || rootDir] = projStats;
352
+ }
353
+ return result;
354
+ }
355
+
356
+ module.exports = {
357
+ shouldGcDispatchWorktree,
358
+ gcDispatchWorktreeIfOrphan,
359
+ pruneOrphanWorktrees,
360
+ };
package/engine.js CHANGED
@@ -2600,9 +2600,15 @@ async function spawnAgent(dispatchItem, config) {
2600
2600
  // Capacity rejected — drop any stale entry so cleanup can reap normally.
2601
2601
  worktreePool.evictEntry(worktreePath, 'capacity-rejected');
2602
2602
  }
2603
+ // W-mpbqhstz001lf518: 'inserted'/'flipped' both leave the pool
2604
+ // owning the worktree on disk; the dispatch-end GC below queries
2605
+ // `worktreePool.isPoolMember(worktreePath)` to honor that.
2603
2606
  } catch (returnErr) {
2604
2607
  log('warn', `worktree-pool: return failed for ${worktreePath}: ${returnErr.message} — evicting from pool`);
2605
2608
  worktreePool.evictEntry(worktreePath, 'return-git-failed');
2609
+ // After eviction, the pool no longer owns this worktree — the
2610
+ // dispatch-end GC below will pick it up via the isPoolMember
2611
+ // check (which now correctly returns false).
2606
2612
  }
2607
2613
  } else if (_keepPidsAlive || _managedSpawnAlive) {
2608
2614
  // Skip the pool — the worktree is in use by left-running processes
@@ -2613,6 +2619,42 @@ async function spawnAgent(dispatchItem, config) {
2613
2619
  }
2614
2620
  }
2615
2621
 
2622
+ // W-mpbqhstz001lf518 — dispatch-end orphan worktree GC (closes #2627).
2623
+ // Runs AFTER all sidecar reads (keep_processes / managed_spawn
2624
+ // acceptance + healthcheck) and AFTER the pool-return block has settled
2625
+ // its decision, BEFORE completeDispatch finalizes the dispatch. The GC
2626
+ // decision queries `worktreePool.isPoolMember(worktreePath)` directly
2627
+ // — that on-disk state is authoritative over the in-memory
2628
+ // borrowedFromPool/returnedToPool flags because the pool can evict the
2629
+ // entry mid-dispatch (e.g. when returnToPool throws). We also pass
2630
+ // `managedSpawnSpawnedCount` so a spawn batch where recordManagedBatch
2631
+ // silently failed still protects the cwd from GC. Without this GC, a
2632
+ // crashed dispatch leaks `work/W-<id>` worktree entries that block
2633
+ // every retry with `fatal: 'work/W-<id>' is already used by worktree
2634
+ // at …` until the 2-hour age sweep in cleanup.js eventually catches up.
2635
+ if (worktreePath && rootDir && branchName) {
2636
+ try {
2637
+ const _wgc = require('./engine/worktree-gc');
2638
+ const _wtRoot = path.resolve(rootDir, engineConfig.worktreeRoot || ENGINE_DEFAULTS.worktreeRoot);
2639
+ const _gcResult = _wgc.gcDispatchWorktreeIfOrphan({
2640
+ worktreePath,
2641
+ gitRoot: rootDir,
2642
+ worktreeRoot: _wtRoot,
2643
+ agentId,
2644
+ managedSpawnSpawnedCount: Array.isArray(managedSpawnSpawned) ? managedSpawnSpawned.length : 0,
2645
+ log,
2646
+ });
2647
+ if (_gcResult.outcome === 'gc') {
2648
+ log('info', `worktree-gc: dispatch-end GC of ${path.basename(worktreePath)} (id=${id}, agent=${agentId})`);
2649
+ } else if (_gcResult.outcome === 'gc-failed') {
2650
+ log('warn', `worktree-gc: dispatch-end GC failed for ${worktreePath} (id=${id}): ${_gcResult.reason}`);
2651
+ }
2652
+ // 'skip' is silent — every skip reason is expected behavior.
2653
+ } catch (gcErr) {
2654
+ log('warn', `worktree-gc: dispatch-end check threw for ${worktreePath} (id=${id}): ${gcErr.message}`);
2655
+ }
2656
+ }
2657
+
2616
2658
  completeDispatch(id, effectiveResult, errorReason, resultSummary, completeOpts);
2617
2659
 
2618
2660
  // W-mpbpexrg00110661 — surface managed-spawn partial-healthcheck failures
@@ -4462,11 +4504,21 @@ function discoverFromWorkItems(config, project) {
4462
4504
  if (item.depends_on && item.depends_on.length > 0) {
4463
4505
  const depStatus = areDependenciesMet(item, config);
4464
4506
  if (depStatus === 'failed' && !isItemCompleted(item)) {
4465
- item.status = WI_STATUS.FAILED;
4466
- item.failReason = 'Dependency failed cannot proceed';
4467
- delete item._pendingReason;
4468
- log('warn', `Marking ${item.id} as failed: dependency failed (plan: ${item.sourcePlan})`);
4469
- needsWrite = true;
4507
+ // W-mpbqhstz001lf518 (closes #2627) — do NOT cascade-fail the
4508
+ // dependent. Previously we set status=FAILED here, which burned the
4509
+ // dependent's _retryCount every time the dep was auto-retried and
4510
+ // failed again, and left the dependent stuck at `failed` requiring
4511
+ // manual intervention even after the dep eventually succeeded.
4512
+ //
4513
+ // New behavior: surface `_pendingReason: 'dependency_failed'` and
4514
+ // skip dispatch this tick. The dependent stays PENDING so when the
4515
+ // dep flips to done (e.g. via the stall-recovery sweep auto-retry
4516
+ // at engine.js:5577), the dependent picks up naturally on the next
4517
+ // dispatch tick. No _retryCount increment, no FAILED mark.
4518
+ if (item._pendingReason !== 'dependency_failed') {
4519
+ item._pendingReason = 'dependency_failed';
4520
+ needsWrite = true;
4521
+ }
4470
4522
  continue;
4471
4523
  }
4472
4524
  if (!depStatus) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yemi33/minions",
3
- "version": "0.1.1981",
3
+ "version": "0.1.1983",
4
4
  "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
5
5
  "bin": {
6
6
  "minions": "bin/minions.js"