@yemi33/minions 0.1.1982 → 0.1.1983
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/js/render-managed.js +13 -3
- package/engine/cli.js +32 -0
- package/engine/worktree-gc.js +360 -0
- package/engine.js +57 -5
- package/package.json +1 -1
|
@@ -13,6 +13,12 @@
|
|
|
13
13
|
// about why the agent live tab moved off SSE).
|
|
14
14
|
|
|
15
15
|
let _managedProcessesEtag = null;
|
|
16
|
+
// Cache the last successfully-fetched items so 304 (no-op) ticks can still
|
|
17
|
+
// re-render the time-derived columns (uptime, TTL) against the current
|
|
18
|
+
// Date.now(). The server payload only changes on spawn/kill/restart, so
|
|
19
|
+
// without this cache the strings would freeze between events
|
|
20
|
+
// (W-mpbt3wfs002l3a09).
|
|
21
|
+
let _managedProcessesLastItems = null;
|
|
16
22
|
let _managedLogES = null;
|
|
17
23
|
|
|
18
24
|
function _fmtAgo(ms) {
|
|
@@ -84,23 +90,27 @@ async function renderManagedProcesses() {
|
|
|
84
90
|
if (!root) return;
|
|
85
91
|
let items;
|
|
86
92
|
let fetchErr = null;
|
|
87
|
-
let notModified = false;
|
|
88
93
|
try {
|
|
89
94
|
const headers = {};
|
|
90
95
|
if (_managedProcessesEtag) headers['If-None-Match'] = _managedProcessesEtag;
|
|
91
96
|
const res = await fetch('/api/managed-processes', { headers });
|
|
92
97
|
if (res.status === 304) {
|
|
93
|
-
|
|
98
|
+
// Server payload unchanged — reuse cached items so uptime/TTL columns
|
|
99
|
+
// recompute against the current Date.now(). Defensive null check: a
|
|
100
|
+
// first-tick 304 shouldn't happen (no prior ETag => no If-None-Match)
|
|
101
|
+
// but if it does, skip this frame rather than wiping the panel.
|
|
102
|
+
if (!_managedProcessesLastItems) return;
|
|
103
|
+
items = _managedProcessesLastItems;
|
|
94
104
|
} else {
|
|
95
105
|
const et = res.headers.get('ETag');
|
|
96
106
|
if (et) _managedProcessesEtag = et;
|
|
97
107
|
const data = await res.json();
|
|
98
108
|
items = (data && Array.isArray(data.items)) ? data.items : [];
|
|
109
|
+
_managedProcessesLastItems = items;
|
|
99
110
|
}
|
|
100
111
|
} catch (e) {
|
|
101
112
|
fetchErr = e;
|
|
102
113
|
}
|
|
103
|
-
if (notModified) return; // nothing changed since last render
|
|
104
114
|
let html;
|
|
105
115
|
if (fetchErr) {
|
|
106
116
|
if (countEl) countEl.textContent = '?';
|
package/engine/cli.js
CHANGED
|
@@ -818,6 +818,38 @@ const commands = {
|
|
|
818
818
|
}
|
|
819
819
|
})();
|
|
820
820
|
|
|
821
|
+
// W-mpbqhstz001lf518 — Boot reconcile for orphan worktrees (closes #2627).
|
|
822
|
+
// Walks each project's `<root>/<worktreeRoot>/` dir and removes any
|
|
823
|
+
// `W-*` (or posix `<proj>-<branch>-<hash>`) subdir whose dispatch id
|
|
824
|
+
// is NOT in dispatch.active/pending AND that is NOT a pool member.
|
|
825
|
+
// Such dirs are left over from a prior crash and block retries with
|
|
826
|
+
// `fatal: 'work/W-<id>' is already used by worktree at …`. Runs AFTER
|
|
827
|
+
// worktreePool.pruneStale so the pool snapshot we cross-reference is
|
|
828
|
+
// already settled (otherwise we could evict a dir whose pool entry was
|
|
829
|
+
// about to be pruned but still appeared as a pool member).
|
|
830
|
+
(function startupReconcileOrphanWorktrees() {
|
|
831
|
+
try {
|
|
832
|
+
const sharedMod = require('./shared');
|
|
833
|
+
const worktreeGc = require('./worktree-gc');
|
|
834
|
+
const dispatchSnap = require('./queries').getDispatch();
|
|
835
|
+
const cfg = require('./queries').getConfig();
|
|
836
|
+
const projects = sharedMod.getProjects(cfg);
|
|
837
|
+
if (projects.length === 0) return;
|
|
838
|
+
const worktreeRootRel = cfg?.engine?.worktreeRoot || sharedMod.ENGINE_DEFAULTS.worktreeRoot;
|
|
839
|
+
const result = worktreeGc.pruneOrphanWorktrees({
|
|
840
|
+
projects,
|
|
841
|
+
dispatchSnap,
|
|
842
|
+
worktreeRootRel,
|
|
843
|
+
log: (lvl, msg) => e.log(lvl, msg),
|
|
844
|
+
});
|
|
845
|
+
if (result.evicted > 0 || result.failed > 0) {
|
|
846
|
+
console.log(` Worktree boot reconcile: evicted ${result.evicted} orphan worktree(s)${result.failed ? `, ${result.failed} failed` : ''} (scanned ${result.scanned}, kept ${result.kept})`);
|
|
847
|
+
}
|
|
848
|
+
} catch (err) {
|
|
849
|
+
e.log('warn', `Worktree boot reconcile failed: ${err.message}`);
|
|
850
|
+
}
|
|
851
|
+
})();
|
|
852
|
+
|
|
821
853
|
// W-mp7gox8w000n8936 — Boot reconcile for kb-sweep state: clear stale
|
|
822
854
|
// `in-flight`/`starting` records left over from a crashed runner (or a
|
|
823
855
|
// legacy pre-pid runner). Without this, the record sits there clogging
|
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
// engine/worktree-gc.js — orphan-worktree garbage collection
|
|
2
|
+
//
|
|
3
|
+
// W-mpbqhstz001lf518 (closes yemi33/minions#2627). Two callers, one shared
|
|
4
|
+
// decision surface:
|
|
5
|
+
//
|
|
6
|
+
// 1. On dispatch end (engine.js onAgentClose): remove the dispatch's
|
|
7
|
+
// worktree once the agent process has truly exited and all sidecar
|
|
8
|
+
// reads (keep_processes, managed_spawn) are done. Skip when the
|
|
9
|
+
// worktree was borrowed from the pool, returned to the pool, or is
|
|
10
|
+
// anchored by live keep_processes PIDs / managed_spawn cwds.
|
|
11
|
+
//
|
|
12
|
+
// 2. On engine boot (engine/cli.js): scan each project's worktree root
|
|
13
|
+
// for orphan W-* dirs whose dispatch is not in dispatch.json (active
|
|
14
|
+
// or pending) AND that are not pool entries. Such dirs are leftovers
|
|
15
|
+
// from a prior crash that block retries with
|
|
16
|
+
// `fatal: 'work/W-…' is already used by worktree at …`.
|
|
17
|
+
//
|
|
18
|
+
// All git ops route through `shared.removeWorktree` (which already runs
|
|
19
|
+
// `git worktree remove --force` + `git worktree prune`). All git ops run
|
|
20
|
+
// OUTSIDE `mutateJsonFileLocked` callbacks — per the lock-callback
|
|
21
|
+
// contract in CLAUDE.md ("do not run network calls, git commands, process
|
|
22
|
+
// kills, or `await` while holding a file lock").
|
|
23
|
+
|
|
24
|
+
const fs = require('fs');
|
|
25
|
+
const path = require('path');
|
|
26
|
+
|
|
27
|
+
const shared = require('./shared');
|
|
28
|
+
const worktreePool = require('./worktree-pool');
|
|
29
|
+
|
|
30
|
+
let keepProcessSweep = null;
|
|
31
|
+
function _keepProcessSweep() {
|
|
32
|
+
if (!keepProcessSweep) keepProcessSweep = require('./keep-process-sweep');
|
|
33
|
+
return keepProcessSweep;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
let managedSpawn = null;
|
|
37
|
+
function _managedSpawn() {
|
|
38
|
+
if (!managedSpawn) managedSpawn = require('./managed-spawn');
|
|
39
|
+
return managedSpawn;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const _noopLog = () => {};
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Decide whether a dispatch-end worktree should be GC'd.
|
|
46
|
+
*
|
|
47
|
+
* Inputs are explicit so callers can supply mocks. Returns
|
|
48
|
+
* { gc: boolean, reason: string }
|
|
49
|
+
* where `reason` is a short tag suitable for logging.
|
|
50
|
+
*
|
|
51
|
+
* Skip rules (any one wins, in priority order):
|
|
52
|
+
* - no-worktree-path — worktreePath empty / null
|
|
53
|
+
* - missing-on-disk — worktreePath does not exist (already gone)
|
|
54
|
+
* - pool-member — worktree-pool currently owns this path
|
|
55
|
+
* (covers borrowed + idle + returned states;
|
|
56
|
+
* authoritative over the legacy in-memory
|
|
57
|
+
* borrowedFromPool/returnedToPool flags
|
|
58
|
+
* because the pool can evict between borrow
|
|
59
|
+
* and dispatch-end — see PR #2627 review)
|
|
60
|
+
* - keep-processes-anchored — agent declared keep_processes PIDs that are live
|
|
61
|
+
* - managed-spawn-anchored — agent placed managed_spawn services with cwd
|
|
62
|
+
* inside (or equal to) the worktree (checked
|
|
63
|
+
* BOTH against in-memory `managedSpawnSpawnedCount`
|
|
64
|
+
* for services this dispatch just spawned AND
|
|
65
|
+
* against the global state file for legacy
|
|
66
|
+
* services that recordManagedBatch persisted)
|
|
67
|
+
*
|
|
68
|
+
* Otherwise: `{ gc: true, reason: 'orphan' }`.
|
|
69
|
+
*/
|
|
70
|
+
function shouldGcDispatchWorktree(opts) {
|
|
71
|
+
const {
|
|
72
|
+
worktreePath,
|
|
73
|
+
agentId = '',
|
|
74
|
+
// Belt+suspenders: defensive override when caller wants to forcibly
|
|
75
|
+
// protect this worktree regardless of pool/anchor state.
|
|
76
|
+
forceSkip = false,
|
|
77
|
+
// In-memory hint from the dispatch-end caller: when > 0, services were
|
|
78
|
+
// spawned by THIS dispatch but recordManagedBatch may have failed to
|
|
79
|
+
// persist them; skip GC unconditionally to prevent yanking their cwd.
|
|
80
|
+
managedSpawnSpawnedCount = 0,
|
|
81
|
+
// Test injection points — production callers leave undefined.
|
|
82
|
+
isPoolMember = null,
|
|
83
|
+
listManagedSpecs = null,
|
|
84
|
+
getActiveAnchorPidsForAgent = null,
|
|
85
|
+
fileExists = null,
|
|
86
|
+
} = opts || {};
|
|
87
|
+
|
|
88
|
+
if (!worktreePath) return { gc: false, reason: 'no-worktree-path' };
|
|
89
|
+
const exists = typeof fileExists === 'function'
|
|
90
|
+
? !!fileExists(worktreePath)
|
|
91
|
+
: fs.existsSync(worktreePath);
|
|
92
|
+
if (!exists) return { gc: false, reason: 'missing-on-disk' };
|
|
93
|
+
if (forceSkip) return { gc: false, reason: 'force-skip' };
|
|
94
|
+
|
|
95
|
+
// Pool ownership is authoritative: the pool's on-disk state file tells us
|
|
96
|
+
// whether ANY pool entry (idle, borrowed, just-returned) currently claims
|
|
97
|
+
// this path. Subsumes the legacy in-memory borrowedFromPool/returnedToPool
|
|
98
|
+
// flags and covers the pool-return-throw eviction case correctly.
|
|
99
|
+
try {
|
|
100
|
+
const fn = typeof isPoolMember === 'function'
|
|
101
|
+
? isPoolMember
|
|
102
|
+
: (wt) => worktreePool.isPoolMember(wt);
|
|
103
|
+
if (fn(worktreePath)) return { gc: false, reason: 'pool-member' };
|
|
104
|
+
} catch (_e) { /* pool readable optional */ }
|
|
105
|
+
|
|
106
|
+
// In-memory managed-spawn signal: this dispatch's own spawn batch.
|
|
107
|
+
// `managedSpawnSpawnedCount > 0` means recordManagedBatch was called (or
|
|
108
|
+
// attempted) — the cwd of those services lives inside this worktree, so
|
|
109
|
+
// even if the state-file persistence failed we must protect them.
|
|
110
|
+
if (Number(managedSpawnSpawnedCount) > 0) {
|
|
111
|
+
return { gc: false, reason: 'managed-spawn-anchored' };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// keep_processes anchor check: any live PID for this agent → skip.
|
|
115
|
+
if (agentId) {
|
|
116
|
+
try {
|
|
117
|
+
const fn = typeof getActiveAnchorPidsForAgent === 'function'
|
|
118
|
+
? getActiveAnchorPidsForAgent
|
|
119
|
+
: _keepProcessSweep().getActiveAnchorPidsForAgent;
|
|
120
|
+
const res = fn(agentId);
|
|
121
|
+
if (res && res.pids && res.pids.size > 0) {
|
|
122
|
+
return { gc: false, reason: 'keep-processes-anchored' };
|
|
123
|
+
}
|
|
124
|
+
} catch (_e) { /* sidecar reader optional */ }
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// managed_spawn anchor check: any LIVE spec (from any agent) whose cwd
|
|
128
|
+
// is == or under the worktree → skip. Covers the "completed dispatch
|
|
129
|
+
// with long-running managed-spawn services survives boot" case.
|
|
130
|
+
try {
|
|
131
|
+
const fn = typeof listManagedSpecs === 'function'
|
|
132
|
+
? listManagedSpecs
|
|
133
|
+
: _managedSpawn().listManagedSpecs;
|
|
134
|
+
const specs = fn() || [];
|
|
135
|
+
const wtAbs = path.resolve(worktreePath);
|
|
136
|
+
const wtPrefix = wtAbs + path.sep;
|
|
137
|
+
for (const rec of specs) {
|
|
138
|
+
if (!rec || typeof rec.cwd !== 'string' || rec.cwd.length === 0) continue;
|
|
139
|
+
let cwdAbs;
|
|
140
|
+
try { cwdAbs = path.resolve(rec.cwd); } catch { continue; }
|
|
141
|
+
if (cwdAbs === wtAbs || cwdAbs.startsWith(wtPrefix)) {
|
|
142
|
+
return { gc: false, reason: 'managed-spawn-anchored' };
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
} catch (_e) { /* sidecar reader optional */ }
|
|
146
|
+
|
|
147
|
+
return { gc: true, reason: 'orphan' };
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Run shouldGcDispatchWorktree + (if eligible) `shared.removeWorktree`.
|
|
152
|
+
*
|
|
153
|
+
* Returns `{ outcome, reason, removed }`:
|
|
154
|
+
* - outcome: 'gc'|'skip'|'gc-failed'
|
|
155
|
+
* - reason: short tag from shouldGcDispatchWorktree, or 'remove-failed'
|
|
156
|
+
* - removed: boolean — true when removeWorktree returned truthy
|
|
157
|
+
*/
|
|
158
|
+
function gcDispatchWorktreeIfOrphan(opts) {
|
|
159
|
+
const {
|
|
160
|
+
worktreePath,
|
|
161
|
+
gitRoot,
|
|
162
|
+
worktreeRoot,
|
|
163
|
+
log = _noopLog,
|
|
164
|
+
removeWorktree = null,
|
|
165
|
+
} = opts || {};
|
|
166
|
+
const decision = shouldGcDispatchWorktree(opts);
|
|
167
|
+
if (!decision.gc) {
|
|
168
|
+
return { outcome: 'skip', reason: decision.reason, removed: false };
|
|
169
|
+
}
|
|
170
|
+
if (!gitRoot || !worktreeRoot) {
|
|
171
|
+
return { outcome: 'skip', reason: 'no-git-root', removed: false };
|
|
172
|
+
}
|
|
173
|
+
const _removeFn = typeof removeWorktree === 'function' ? removeWorktree : shared.removeWorktree;
|
|
174
|
+
try {
|
|
175
|
+
const removed = _removeFn(worktreePath, gitRoot, worktreeRoot);
|
|
176
|
+
if (removed) {
|
|
177
|
+
log('info', `worktree-gc: dispatch-end removed ${path.basename(worktreePath)}`);
|
|
178
|
+
return { outcome: 'gc', reason: decision.reason, removed: true };
|
|
179
|
+
}
|
|
180
|
+
log('warn', `worktree-gc: dispatch-end remove returned false for ${worktreePath}`);
|
|
181
|
+
return { outcome: 'gc-failed', reason: 'remove-failed', removed: false };
|
|
182
|
+
} catch (gcErr) {
|
|
183
|
+
log('warn', `worktree-gc: dispatch-end remove threw for ${worktreePath}: ${gcErr.message}`);
|
|
184
|
+
return { outcome: 'gc-failed', reason: 'remove-threw', removed: false };
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Walk each project's worktree root and evict orphan W-* dirs.
|
|
190
|
+
*
|
|
191
|
+
* - `projects` — array of `{ name, localPath }`
|
|
192
|
+
* - `dispatchSnap` — `{ active: [...], pending: [...] }` (work-item dispatch.json)
|
|
193
|
+
* - `worktreeRootRel` — relative dir under each project's localPath (default '../worktrees')
|
|
194
|
+
* - `log` — log(level, msg) function (optional)
|
|
195
|
+
* - `fs`/`removeWorktree`/`buildWorktreeDirName`/`listManagedSpecs` — test injection points
|
|
196
|
+
*
|
|
197
|
+
* Protection rules (any hit → keep):
|
|
198
|
+
* 1. Dir name matches the wtDirName of any active/pending dispatch in
|
|
199
|
+
* `dispatchSnap`, computed using THAT DISPATCH'S OWN project name.
|
|
200
|
+
* We iterate every (dispatch, project) combo so two projects sharing
|
|
201
|
+
* a parent `worktreeRoot` (monorepo-style) don't cross-evict each
|
|
202
|
+
* other's dispatches. (PR #2627 review — Issue 2)
|
|
203
|
+
* 2. Normalized abs path is currently in any `worktreePool` entry (idle
|
|
204
|
+
* or borrowed) — see `extraPoolPaths` for the test override.
|
|
205
|
+
* 3. Worktree contains (or equals) the cwd of any live managed_spawn
|
|
206
|
+
* spec — matches `engine/cleanup.js`'s tick-time protection so a
|
|
207
|
+
* completed dispatch with long-running services survives engine
|
|
208
|
+
* restart. (PR #2627 review — Issue 1)
|
|
209
|
+
*
|
|
210
|
+
* Returns `{ scanned, kept, evicted, failed, perProject }`.
|
|
211
|
+
*/
|
|
212
|
+
function pruneOrphanWorktrees(opts) {
|
|
213
|
+
opts = opts || {};
|
|
214
|
+
const projects = Array.isArray(opts.projects) ? opts.projects : [];
|
|
215
|
+
const dispatchSnap = opts.dispatchSnap || { active: [], pending: [] };
|
|
216
|
+
const worktreeRootRel = typeof opts.worktreeRootRel === 'string' && opts.worktreeRootRel.length > 0
|
|
217
|
+
? opts.worktreeRootRel
|
|
218
|
+
: (shared.ENGINE_DEFAULTS && shared.ENGINE_DEFAULTS.worktreeRoot) || '../worktrees';
|
|
219
|
+
const log = typeof opts.log === 'function' ? opts.log : _noopLog;
|
|
220
|
+
const _fs = opts.fs || fs;
|
|
221
|
+
const _removeWorktree = typeof opts.removeWorktree === 'function'
|
|
222
|
+
? opts.removeWorktree
|
|
223
|
+
: shared.removeWorktree;
|
|
224
|
+
const _buildWorktreeDirName = typeof opts.buildWorktreeDirName === 'function'
|
|
225
|
+
? opts.buildWorktreeDirName
|
|
226
|
+
: shared.buildWorktreeDirName;
|
|
227
|
+
const _listManagedSpecs = typeof opts.listManagedSpecs === 'function'
|
|
228
|
+
? opts.listManagedSpecs
|
|
229
|
+
: (() => {
|
|
230
|
+
try { return _managedSpawn().listManagedSpecs(); }
|
|
231
|
+
catch (_e) { return []; }
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
// Pool-known paths (idle + borrowed + any stale entry still on disk). The
|
|
235
|
+
// worktree-pool's own pruneStale() runs FIRST in cli boot so this snapshot
|
|
236
|
+
// is post-prune.
|
|
237
|
+
const poolPaths = new Set();
|
|
238
|
+
try {
|
|
239
|
+
const entries = (worktreePool.readPool() || { entries: [] }).entries || [];
|
|
240
|
+
for (const e of entries) {
|
|
241
|
+
if (e && e.path) poolPaths.add(worktreePool._normalizePath(e.path));
|
|
242
|
+
}
|
|
243
|
+
} catch (_e) { /* pool readable optional */ }
|
|
244
|
+
if (Array.isArray(opts.extraPoolPaths)) {
|
|
245
|
+
for (const p of opts.extraPoolPaths) poolPaths.add(worktreePool._normalizePath(p));
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Cross-project live dir names: compute the expected wtDirName for every
|
|
249
|
+
// active/pending dispatch using THAT DISPATCH'S OWN project name. Each
|
|
250
|
+
// dir name's hash depends on (id, projectName, branchName), so a dir
|
|
251
|
+
// belonging to project B will not appear when we compute names for A.
|
|
252
|
+
// This prevents the cross-project sweep from yanking another project's
|
|
253
|
+
// active worktree when they share a parent `worktreeRoot`. (PR #2627
|
|
254
|
+
// review — Issue 2)
|
|
255
|
+
const globalLiveDirNames = new Set();
|
|
256
|
+
for (const d of [...(dispatchSnap.active || []), ...(dispatchSnap.pending || [])]) {
|
|
257
|
+
if (!d || !d.id || !d.meta || !d.meta.branch) continue;
|
|
258
|
+
const dispatchProject = (d.meta.project && typeof d.meta.project === 'string')
|
|
259
|
+
? d.meta.project
|
|
260
|
+
: 'default';
|
|
261
|
+
try {
|
|
262
|
+
globalLiveDirNames.add(_buildWorktreeDirName({
|
|
263
|
+
dispatchId: d.id,
|
|
264
|
+
projectName: dispatchProject,
|
|
265
|
+
branchName: d.meta.branch,
|
|
266
|
+
}));
|
|
267
|
+
} catch (_e) { /* defensive */ }
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// Resolved managed-spawn cwds (per-cwd → resolved abs path). Used to
|
|
271
|
+
// reject GC of any dir that contains (or equals) a live service's cwd.
|
|
272
|
+
// (PR #2627 review — Issue 1)
|
|
273
|
+
const managedSpawnCwds = [];
|
|
274
|
+
try {
|
|
275
|
+
for (const rec of (_listManagedSpecs() || [])) {
|
|
276
|
+
if (!rec || typeof rec.cwd !== 'string' || rec.cwd.length === 0) continue;
|
|
277
|
+
try { managedSpawnCwds.push(path.resolve(rec.cwd)); }
|
|
278
|
+
catch (_e) { /* malformed cwd — skip */ }
|
|
279
|
+
}
|
|
280
|
+
} catch (_e) { /* optional */ }
|
|
281
|
+
|
|
282
|
+
const result = { scanned: 0, kept: 0, evicted: 0, failed: 0, perProject: {} };
|
|
283
|
+
|
|
284
|
+
for (const project of projects) {
|
|
285
|
+
if (!project || !project.localPath) continue;
|
|
286
|
+
let rootDir;
|
|
287
|
+
try { rootDir = path.resolve(String(project.localPath)); } catch { continue; }
|
|
288
|
+
let rootExists = false;
|
|
289
|
+
try { rootExists = _fs.existsSync(rootDir); } catch { rootExists = false; }
|
|
290
|
+
if (!rootExists) continue;
|
|
291
|
+
const wtParent = path.resolve(rootDir, worktreeRootRel);
|
|
292
|
+
let parentExists = false;
|
|
293
|
+
try { parentExists = _fs.existsSync(wtParent); } catch { parentExists = false; }
|
|
294
|
+
if (!parentExists) continue;
|
|
295
|
+
|
|
296
|
+
let entries;
|
|
297
|
+
try { entries = _fs.readdirSync(wtParent, { withFileTypes: true }); }
|
|
298
|
+
catch (readErr) {
|
|
299
|
+
log('warn', `worktree-gc: readdir failed for ${wtParent}: ${readErr.message}`);
|
|
300
|
+
continue;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
const projStats = { scanned: 0, kept: 0, evicted: 0, failed: 0 };
|
|
304
|
+
for (const ent of entries) {
|
|
305
|
+
if (!ent || (typeof ent.isDirectory === 'function' && !ent.isDirectory())) continue;
|
|
306
|
+
const name = ent.name || ent;
|
|
307
|
+
if (typeof name !== 'string' || name.length === 0) continue;
|
|
308
|
+
projStats.scanned++;
|
|
309
|
+
result.scanned++;
|
|
310
|
+
const wtPath = path.join(wtParent, name);
|
|
311
|
+
|
|
312
|
+
// 1. Live-dispatch protection (cross-project safe).
|
|
313
|
+
if (globalLiveDirNames.has(name)) {
|
|
314
|
+
projStats.kept++; result.kept++;
|
|
315
|
+
continue;
|
|
316
|
+
}
|
|
317
|
+
// 2. Pool membership protection.
|
|
318
|
+
const normPath = worktreePool._normalizePath(wtPath);
|
|
319
|
+
if (poolPaths.has(normPath)) {
|
|
320
|
+
projStats.kept++; result.kept++;
|
|
321
|
+
continue;
|
|
322
|
+
}
|
|
323
|
+
// 3. managed_spawn cwd anchor protection.
|
|
324
|
+
if (managedSpawnCwds.length > 0) {
|
|
325
|
+
const wtPathNorm = path.resolve(wtPath);
|
|
326
|
+
const wtPrefix = wtPathNorm + path.sep;
|
|
327
|
+
let anchored = false;
|
|
328
|
+
for (const cwd of managedSpawnCwds) {
|
|
329
|
+
if (cwd === wtPathNorm || cwd.startsWith(wtPrefix)) { anchored = true; break; }
|
|
330
|
+
}
|
|
331
|
+
if (anchored) {
|
|
332
|
+
projStats.kept++; result.kept++;
|
|
333
|
+
continue;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
try {
|
|
338
|
+
const removed = _removeWorktree(wtPath, rootDir, wtParent);
|
|
339
|
+
if (removed) {
|
|
340
|
+
projStats.evicted++; result.evicted++;
|
|
341
|
+
log('info', `worktree-gc: boot-evicted orphan ${name} for project ${project.name || 'default'}`);
|
|
342
|
+
} else {
|
|
343
|
+
projStats.failed++; result.failed++;
|
|
344
|
+
log('warn', `worktree-gc: boot-evict returned false for ${wtPath}`);
|
|
345
|
+
}
|
|
346
|
+
} catch (rmErr) {
|
|
347
|
+
projStats.failed++; result.failed++;
|
|
348
|
+
log('warn', `worktree-gc: boot-evict threw for ${wtPath}: ${rmErr.message}`);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
result.perProject[project.name || rootDir] = projStats;
|
|
352
|
+
}
|
|
353
|
+
return result;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
module.exports = {
|
|
357
|
+
shouldGcDispatchWorktree,
|
|
358
|
+
gcDispatchWorktreeIfOrphan,
|
|
359
|
+
pruneOrphanWorktrees,
|
|
360
|
+
};
|
package/engine.js
CHANGED
|
@@ -2600,9 +2600,15 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
2600
2600
|
// Capacity rejected — drop any stale entry so cleanup can reap normally.
|
|
2601
2601
|
worktreePool.evictEntry(worktreePath, 'capacity-rejected');
|
|
2602
2602
|
}
|
|
2603
|
+
// W-mpbqhstz001lf518: 'inserted'/'flipped' both leave the pool
|
|
2604
|
+
// owning the worktree on disk; the dispatch-end GC below queries
|
|
2605
|
+
// `worktreePool.isPoolMember(worktreePath)` to honor that.
|
|
2603
2606
|
} catch (returnErr) {
|
|
2604
2607
|
log('warn', `worktree-pool: return failed for ${worktreePath}: ${returnErr.message} — evicting from pool`);
|
|
2605
2608
|
worktreePool.evictEntry(worktreePath, 'return-git-failed');
|
|
2609
|
+
// After eviction, the pool no longer owns this worktree — the
|
|
2610
|
+
// dispatch-end GC below will pick it up via the isPoolMember
|
|
2611
|
+
// check (which now correctly returns false).
|
|
2606
2612
|
}
|
|
2607
2613
|
} else if (_keepPidsAlive || _managedSpawnAlive) {
|
|
2608
2614
|
// Skip the pool — the worktree is in use by left-running processes
|
|
@@ -2613,6 +2619,42 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
2613
2619
|
}
|
|
2614
2620
|
}
|
|
2615
2621
|
|
|
2622
|
+
// W-mpbqhstz001lf518 — dispatch-end orphan worktree GC (closes #2627).
|
|
2623
|
+
// Runs AFTER all sidecar reads (keep_processes / managed_spawn
|
|
2624
|
+
// acceptance + healthcheck) and AFTER the pool-return block has settled
|
|
2625
|
+
// its decision, BEFORE completeDispatch finalizes the dispatch. The GC
|
|
2626
|
+
// decision queries `worktreePool.isPoolMember(worktreePath)` directly
|
|
2627
|
+
// — that on-disk state is authoritative over the in-memory
|
|
2628
|
+
// borrowedFromPool/returnedToPool flags because the pool can evict the
|
|
2629
|
+
// entry mid-dispatch (e.g. when returnToPool throws). We also pass
|
|
2630
|
+
// `managedSpawnSpawnedCount` so a spawn batch where recordManagedBatch
|
|
2631
|
+
// silently failed still protects the cwd from GC. Without this GC, a
|
|
2632
|
+
// crashed dispatch leaks `work/W-<id>` worktree entries that block
|
|
2633
|
+
// every retry with `fatal: 'work/W-<id>' is already used by worktree
|
|
2634
|
+
// at …` until the 2-hour age sweep in cleanup.js eventually catches up.
|
|
2635
|
+
if (worktreePath && rootDir && branchName) {
|
|
2636
|
+
try {
|
|
2637
|
+
const _wgc = require('./engine/worktree-gc');
|
|
2638
|
+
const _wtRoot = path.resolve(rootDir, engineConfig.worktreeRoot || ENGINE_DEFAULTS.worktreeRoot);
|
|
2639
|
+
const _gcResult = _wgc.gcDispatchWorktreeIfOrphan({
|
|
2640
|
+
worktreePath,
|
|
2641
|
+
gitRoot: rootDir,
|
|
2642
|
+
worktreeRoot: _wtRoot,
|
|
2643
|
+
agentId,
|
|
2644
|
+
managedSpawnSpawnedCount: Array.isArray(managedSpawnSpawned) ? managedSpawnSpawned.length : 0,
|
|
2645
|
+
log,
|
|
2646
|
+
});
|
|
2647
|
+
if (_gcResult.outcome === 'gc') {
|
|
2648
|
+
log('info', `worktree-gc: dispatch-end GC of ${path.basename(worktreePath)} (id=${id}, agent=${agentId})`);
|
|
2649
|
+
} else if (_gcResult.outcome === 'gc-failed') {
|
|
2650
|
+
log('warn', `worktree-gc: dispatch-end GC failed for ${worktreePath} (id=${id}): ${_gcResult.reason}`);
|
|
2651
|
+
}
|
|
2652
|
+
// 'skip' is silent — every skip reason is expected behavior.
|
|
2653
|
+
} catch (gcErr) {
|
|
2654
|
+
log('warn', `worktree-gc: dispatch-end check threw for ${worktreePath} (id=${id}): ${gcErr.message}`);
|
|
2655
|
+
}
|
|
2656
|
+
}
|
|
2657
|
+
|
|
2616
2658
|
completeDispatch(id, effectiveResult, errorReason, resultSummary, completeOpts);
|
|
2617
2659
|
|
|
2618
2660
|
// W-mpbpexrg00110661 — surface managed-spawn partial-healthcheck failures
|
|
@@ -4462,11 +4504,21 @@ function discoverFromWorkItems(config, project) {
|
|
|
4462
4504
|
if (item.depends_on && item.depends_on.length > 0) {
|
|
4463
4505
|
const depStatus = areDependenciesMet(item, config);
|
|
4464
4506
|
if (depStatus === 'failed' && !isItemCompleted(item)) {
|
|
4465
|
-
|
|
4466
|
-
|
|
4467
|
-
|
|
4468
|
-
|
|
4469
|
-
|
|
4507
|
+
// W-mpbqhstz001lf518 (closes #2627) — do NOT cascade-fail the
|
|
4508
|
+
// dependent. Previously we set status=FAILED here, which burned the
|
|
4509
|
+
// dependent's _retryCount every time the dep was auto-retried and
|
|
4510
|
+
// failed again, and left the dependent stuck at `failed` requiring
|
|
4511
|
+
// manual intervention even after the dep eventually succeeded.
|
|
4512
|
+
//
|
|
4513
|
+
// New behavior: surface `_pendingReason: 'dependency_failed'` and
|
|
4514
|
+
// skip dispatch this tick. The dependent stays PENDING so when the
|
|
4515
|
+
// dep flips to done (e.g. via the stall-recovery sweep auto-retry
|
|
4516
|
+
// at engine.js:5577), the dependent picks up naturally on the next
|
|
4517
|
+
// dispatch tick. No _retryCount increment, no FAILED mark.
|
|
4518
|
+
if (item._pendingReason !== 'dependency_failed') {
|
|
4519
|
+
item._pendingReason = 'dependency_failed';
|
|
4520
|
+
needsWrite = true;
|
|
4521
|
+
}
|
|
4470
4522
|
continue;
|
|
4471
4523
|
}
|
|
4472
4524
|
if (!depStatus) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yemi33/minions",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1983",
|
|
4
4
|
"description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
|
|
5
5
|
"bin": {
|
|
6
6
|
"minions": "bin/minions.js"
|