@yemi33/minions 0.1.1872 → 0.1.1874
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -3
- package/dashboard.js +1 -1
- package/engine/cleanup.js +41 -1
- package/engine/lifecycle.js +254 -47
- package/engine/shared.js +1 -0
- package/engine/timeout.js +7 -1
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,10 +1,18 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
-
## 0.1.
|
|
3
|
+
## 0.1.1874 (2026-05-11)
|
|
4
|
+
|
|
5
|
+
### Fixes
|
|
6
|
+
- add horizontal margin to inline page toast
|
|
7
|
+
|
|
8
|
+
## 0.1.1873 (2026-05-11)
|
|
9
|
+
|
|
10
|
+
### Other
|
|
11
|
+
- Implement: Phantom completion work preservation (P-e0b4f7a5) (#2356)
|
|
12
|
+
|
|
13
|
+
## 0.1.1871 (2026-05-11)
|
|
4
14
|
|
|
5
15
|
### Features
|
|
6
|
-
- Stale-HEAD guard on fix-task pushes (P-c8f2d5e3) (#2360)
|
|
7
|
-
- Cached buildStatus invalidation on no-op completion (#2355)
|
|
8
16
|
- per-agent memory file architecture (P-f1c5a8b6) (#2354)
|
|
9
17
|
- Implement pre-dispatch acceptance criteria validation gate (P-a2d6b9c7) (#2352)
|
|
10
18
|
|
package/dashboard.js
CHANGED
|
@@ -837,7 +837,7 @@ function buildDashboardHtml() {
|
|
|
837
837
|
// the top — showToast('cmd-toast', …) auto-routes here when a page is active,
|
|
838
838
|
// so feedback lands near the action instead of the floating top-right toast.
|
|
839
839
|
const pages = ['home', 'work', 'prs', 'plans', 'inbox', 'tools', 'schedule', 'watches', 'pipelines', 'meetings', 'engine'];
|
|
840
|
-
const pageToast = ' <div class="cmd-toast cmd-toast-inline page-toast" style="margin:6px
|
|
840
|
+
const pageToast = ' <div class="cmd-toast cmd-toast-inline page-toast" style="margin:6px 16px"></div>\n';
|
|
841
841
|
let pageHtml = '';
|
|
842
842
|
for (const p of pages) {
|
|
843
843
|
const content = safeRead(path.join(dashDir, 'pages', p + '.html'));
|
package/engine/cleanup.js
CHANGED
|
@@ -79,6 +79,26 @@ function localBranchWorktreeInUse(root, branch) {
|
|
|
79
79
|
}
|
|
80
80
|
}
|
|
81
81
|
|
|
82
|
+
// P-e0b4f7a5 — collect branches of work items currently in the
|
|
83
|
+
// phantom-completion retry state for a given project. Returns a Set of
|
|
84
|
+
// branch strings. Used by the worktree cleanup loop to protect worktrees
|
|
85
|
+
// belonging to in-flight phantom retries from the 2-hour age sweep —
|
|
86
|
+
// without this protection the agent's pushed branch reference could be
|
|
87
|
+
// destroyed alongside the worktree before the retry runs.
|
|
88
|
+
function collectPhantomBranchesForProject(project) {
|
|
89
|
+
const branches = new Set();
|
|
90
|
+
try {
|
|
91
|
+
const items = safeJson(projectWorkItemsPath(project)) || [];
|
|
92
|
+
if (!Array.isArray(items)) return branches;
|
|
93
|
+
for (const w of items) {
|
|
94
|
+
if (w && w._phantomCompletion === true && w._phantomBranch) {
|
|
95
|
+
branches.add(String(w._phantomBranch));
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
} catch { /* best-effort — never let cleanup crash on a missing/corrupt WI file */ }
|
|
99
|
+
return branches;
|
|
100
|
+
}
|
|
101
|
+
|
|
82
102
|
function cleanupMergedPrLocalBranch(root, project, pr) {
|
|
83
103
|
const branch = normalizeLocalBranchName(pr?.branch);
|
|
84
104
|
const result = { deleted: false, forced: false, skipped: null };
|
|
@@ -451,6 +471,11 @@ async function runCleanup(config, verbose = false) {
|
|
|
451
471
|
const wtEntries = []; // { dir, wtPath, mtime, shouldClean, isProtected }
|
|
452
472
|
const dispatch = getDispatch();
|
|
453
473
|
const activeDispatchIds = new Set((dispatch.active || []).map(d => d.id));
|
|
474
|
+
// P-e0b4f7a5 — branches whose work item is mid-phantom-retry. Their
|
|
475
|
+
// worktrees must survive the age/cap sweep until the retry completes
|
|
476
|
+
// (or exhausts its budget) so the agent's already-pushed branch ref
|
|
477
|
+
// isn't destroyed alongside the worktree.
|
|
478
|
+
const phantomBranches = collectPhantomBranchesForProject(project);
|
|
454
479
|
|
|
455
480
|
// Probe `git branch --show-current` for every worktree in chunks of 5.
|
|
456
481
|
// Sequential probing was the dominant cost in the cleanup phase
|
|
@@ -492,6 +517,20 @@ async function runCleanup(config, verbose = false) {
|
|
|
492
517
|
});
|
|
493
518
|
if (isReferenced) isProtected = true;
|
|
494
519
|
|
|
520
|
+
// P-e0b4f7a5 — protect worktrees whose branch matches a work item in
|
|
521
|
+
// the phantom-completion retry state. The dispatch may have already
|
|
522
|
+
// moved to dispatch.completed (so isReferenced is false) but the
|
|
523
|
+
// retry will re-dispatch on the same branch shortly.
|
|
524
|
+
if (!isProtected && phantomBranches.size > 0) {
|
|
525
|
+
for (const branch of phantomBranches) {
|
|
526
|
+
if (worktreeMatchesBranch(dirLower, branch, actualBranch)) {
|
|
527
|
+
isProtected = true;
|
|
528
|
+
if (verbose) console.log(` Skipping worktree ${dir}: phantom-completion retry pending`);
|
|
529
|
+
break;
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
|
|
495
534
|
// Also clean worktrees older than 2 hours with no active dispatch referencing them
|
|
496
535
|
let mtime = Date.now();
|
|
497
536
|
if (!shouldClean) {
|
|
@@ -499,7 +538,7 @@ async function runCleanup(config, verbose = false) {
|
|
|
499
538
|
const stat = fs.statSync(wtPath);
|
|
500
539
|
mtime = stat.mtimeMs;
|
|
501
540
|
const ageMs = Date.now() - mtime;
|
|
502
|
-
if (ageMs > 7200000 && !isReferenced) { // 2 hours
|
|
541
|
+
if (ageMs > 7200000 && !isReferenced && !isProtected) { // 2 hours — P-e0b4f7a5: phantom-protected worktrees survive the age sweep too
|
|
503
542
|
shouldClean = true;
|
|
504
543
|
}
|
|
505
544
|
} catch { /* optional */ }
|
|
@@ -1080,4 +1119,5 @@ module.exports = {
|
|
|
1080
1119
|
worktreeMatchesBranch, // exported for testing
|
|
1081
1120
|
getWorktreeBranch, // exported for lifecycle cleanup
|
|
1082
1121
|
cleanupMergedPrLocalBranch, // exported for lifecycle cleanup and testing
|
|
1122
|
+
collectPhantomBranchesForProject, // P-e0b4f7a5 — exported for testing
|
|
1083
1123
|
};
|
package/engine/lifecycle.js
CHANGED
|
@@ -543,6 +543,11 @@ function updateWorkItemStatus(meta, status, reason) {
|
|
|
543
543
|
delete target.failReason;
|
|
544
544
|
delete target.failedAt;
|
|
545
545
|
delete target._retryCount;
|
|
546
|
+
// P-e0b4f7a5 — successful completion (including a phantom-retry
|
|
547
|
+
// succeeding) clears the phantom markers so cleanup can reap the
|
|
548
|
+
// worktree on the next sweep.
|
|
549
|
+
delete target._phantomCompletion;
|
|
550
|
+
delete target._phantomBranch;
|
|
546
551
|
target.completedAt = ts();
|
|
547
552
|
// Restore agent info from dispatch metadata (cleared on retry reset)
|
|
548
553
|
if (meta._agentId && !target.dispatched_to) target.dispatched_to = meta._agentId;
|
|
@@ -1087,6 +1092,90 @@ async function findOpenPrForBranch(meta, config) {
|
|
|
1087
1092
|
return null;
|
|
1088
1093
|
}
|
|
1089
1094
|
|
|
1095
|
+
// P-e0b4f7a5 — quick "did the agent push the branch before the runtime
|
|
1096
|
+
// crashed?" probe. `git ls-remote origin <branch>` returns a non-empty
|
|
1097
|
+
// "<sha>\trefs/heads/<branch>" line when the branch exists on the remote and
|
|
1098
|
+
// nothing when it doesn't. Used by enforcePrAttachmentContract to gate the
|
|
1099
|
+
// phantom-recovery PR auto-link: if the branch isn't there, no PR can exist
|
|
1100
|
+
// either and there's no point burning another `gh pr list` round-trip.
|
|
1101
|
+
async function _phantomBranchExistsOnRemote(meta, config) {
|
|
1102
|
+
if (!meta?.branch) return false;
|
|
1103
|
+
const projectObj = resolvePrFallbackProject(meta, config);
|
|
1104
|
+
// Fall back to the branch lookup from any cwd if no project root is known —
|
|
1105
|
+
// git will use the ambient remote configuration. We prefer the project root
|
|
1106
|
+
// because dispatch worktrees may not have origin wired yet.
|
|
1107
|
+
const cwd = projectObj?.localPath || meta?.cwd || process.cwd();
|
|
1108
|
+
try {
|
|
1109
|
+
const out = await runFileCapture('git', ['ls-remote', '--heads', 'origin', String(meta.branch)], { cwd, timeout: 15000 });
|
|
1110
|
+
// Any non-empty stdout line that ends in refs/heads/<branch> = branch exists.
|
|
1111
|
+
return /\trefs\/heads\//.test(String(out || ''));
|
|
1112
|
+
} catch (err) {
|
|
1113
|
+
log('debug', `Phantom ls-remote probe failed for ${meta.branch}: ${err.message}`);
|
|
1114
|
+
return false;
|
|
1115
|
+
}
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
// P-e0b4f7a5 — extracted from enforcePrAttachmentContract so the phantom
|
|
1119
|
+
// recovery path can reuse the same canonical-attach upsert without
|
|
1120
|
+
// duplicating the entry construction. Returns null if the link succeeded,
|
|
1121
|
+
// or a contract-failure object if the verification step couldn't read the
|
|
1122
|
+
// PR tracking state (state-error path mirrors the original inline behavior).
|
|
1123
|
+
function _attachFoundPrToWi(found, meta, agentId, resultSummary, config) {
|
|
1124
|
+
const entry = {
|
|
1125
|
+
id: shared.getCanonicalPrId(found.project, found.prNumber, found.url),
|
|
1126
|
+
prNumber: found.prNumber,
|
|
1127
|
+
title: meta.item?.title || `PR #${found.prNumber}`,
|
|
1128
|
+
agent: agentId,
|
|
1129
|
+
branch: meta.branch || '',
|
|
1130
|
+
reviewStatus: 'pending',
|
|
1131
|
+
status: PR_STATUS.ACTIVE,
|
|
1132
|
+
created: ts(),
|
|
1133
|
+
url: found.url,
|
|
1134
|
+
prdItems: [meta.item.id],
|
|
1135
|
+
sourcePlan: meta.item?.sourcePlan || '',
|
|
1136
|
+
itemType: meta.item?.itemType || '',
|
|
1137
|
+
};
|
|
1138
|
+
shared.upsertPullRequestRecord(shared.projectPrPath(found.project), entry, {
|
|
1139
|
+
project: found.project,
|
|
1140
|
+
itemId: meta.item.id,
|
|
1141
|
+
});
|
|
1142
|
+
try {
|
|
1143
|
+
if (hasCanonicalPrAttachment(meta.item.id, config)) return null;
|
|
1144
|
+
} catch (err) {
|
|
1145
|
+
const reason = `${meta.item.id} auto-linked a PR but PR attachment verification could not read PR tracking state: ${err.message}`;
|
|
1146
|
+
markPrAttachmentVerificationError(meta, agentId, reason, resultSummary);
|
|
1147
|
+
log('warn', reason);
|
|
1148
|
+
return { reason, itemId: meta.item.id, severity: 'hard', stateError: true };
|
|
1149
|
+
}
|
|
1150
|
+
return null;
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1153
|
+
// P-e0b4f7a5 — phantom-completion recovery: when the runtime crashes before
|
|
1154
|
+
// emitting its terminating result event, the agent may still have pushed
|
|
1155
|
+
// the branch (and possibly opened the PR) seconds beforehand. Verify with
|
|
1156
|
+
// `git ls-remote origin <branch>` and, if the branch landed on the remote,
|
|
1157
|
+
// attempt one final canonical PR attachment via the existing
|
|
1158
|
+
// findOpenPrForBranch helper. Returns true if a PR was found and linked
|
|
1159
|
+
// (work is recoverable — caller should treat as success), false otherwise.
|
|
1160
|
+
async function _attemptPhantomPrRecovery(meta, agentId, resultSummary, config) {
|
|
1161
|
+
if (!meta?.branch || !meta?.item?.id) return false;
|
|
1162
|
+
const branchOnRemote = await _phantomBranchExistsOnRemote(meta, config);
|
|
1163
|
+
if (!branchOnRemote) return false;
|
|
1164
|
+
const recovered = await findOpenPrForBranch(meta, config);
|
|
1165
|
+
if (!recovered) {
|
|
1166
|
+
log('info', `Phantom-completion: branch ${meta.branch} exists on remote for ${meta.item.id} but no open PR found — routing through phantom retry budget`);
|
|
1167
|
+
return false;
|
|
1168
|
+
}
|
|
1169
|
+
const attachResult = _attachFoundPrToWi(recovered, meta, agentId, resultSummary, config);
|
|
1170
|
+
log('info', `Phantom-completion recovery: auto-linked existing PR ${shared.getCanonicalPrId(recovered.project, recovered.prNumber, recovered.url)} on branch ${meta.branch} for ${meta.item.id} (runtime crashed but agent had pushed the PR)`);
|
|
1171
|
+
// attachResult === null = link verified; non-null = canonical-attach
|
|
1172
|
+
// verification failed (state error). Treat state error as "not recovered"
|
|
1173
|
+
// so the caller falls through to the normal failure path with that error
|
|
1174
|
+
// surfaced via markPrAttachmentVerificationError already called inside
|
|
1175
|
+
// _attachFoundPrToWi.
|
|
1176
|
+
return attachResult === null;
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1090
1179
|
// Lightweight probe for "did the agent's output contain ANY PR URL?". Used by
|
|
1091
1180
|
// the PR-attachment contract to distinguish silent-failure (no URL anywhere)
|
|
1092
1181
|
// from auto-link-miss (URL present but engine couldn't canonically attach it).
|
|
@@ -1113,10 +1202,79 @@ function _outputHasRuntimeResultEvent(output) {
|
|
|
1113
1202
|
return /"type":\s*"result"/.test(output);
|
|
1114
1203
|
}
|
|
1115
1204
|
|
|
1116
|
-
function markMissingPrAttachment(meta, agentId, reason, resultSummary, severity) {
|
|
1205
|
+
function markMissingPrAttachment(meta, agentId, reason, resultSummary, severity, opts) {
|
|
1117
1206
|
const noPrWiPath = resolveWorkItemPath(meta);
|
|
1118
1207
|
const isHard = severity !== 'soft';
|
|
1208
|
+
const isPhantom = !!(opts && opts.phantom);
|
|
1119
1209
|
let syncFailedToPrd = false;
|
|
1210
|
+
// Phantom branch: a runtime crash that hard-fails for "no PR attached" should
|
|
1211
|
+
// not bypass the retry budget — the agent never got a chance to do the work.
|
|
1212
|
+
// Track these separately on `_phantomRetryCount` so they don't pollute the
|
|
1213
|
+
// PR-attachment retry counter (`_retryCount`). Cap at maxPhantomRetries; only
|
|
1214
|
+
// hard-fail once the phantom budget is exhausted.
|
|
1215
|
+
let phantomRetryDeferred = false;
|
|
1216
|
+
let phantomRetryExhausted = false;
|
|
1217
|
+
let phantomRetryCount = 0;
|
|
1218
|
+
if (isHard && isPhantom && noPrWiPath) {
|
|
1219
|
+
mutateJsonFileLocked(noPrWiPath, data => {
|
|
1220
|
+
if (!Array.isArray(data)) return data;
|
|
1221
|
+
const w = data.find(i => i.id === meta.item.id);
|
|
1222
|
+
if (!w) return data;
|
|
1223
|
+
const phantomRetries = w._phantomRetryCount || 0;
|
|
1224
|
+
if (phantomRetries < ENGINE_DEFAULTS.maxPhantomRetries) {
|
|
1225
|
+
w.status = WI_STATUS.PENDING;
|
|
1226
|
+
w._phantomRetryCount = phantomRetries + 1;
|
|
1227
|
+
w._lastRetryAt = ts();
|
|
1228
|
+
w._lastRetryReason = reason;
|
|
1229
|
+
w._pendingReason = 'phantom_completion';
|
|
1230
|
+
// P-e0b4f7a5 — _phantomCompletion + _phantomBranch let cleanup.js
|
|
1231
|
+
// protect the worktree of an in-flight phantom retry. Without these
|
|
1232
|
+
// markers the 2-hour age sweep can wipe the worktree (and the agent's
|
|
1233
|
+
// already-pushed branch reference) between phantom detection and
|
|
1234
|
+
// re-dispatch.
|
|
1235
|
+
w._phantomCompletion = true;
|
|
1236
|
+
if (meta.branch) w._phantomBranch = meta.branch;
|
|
1237
|
+
delete w.completedAt;
|
|
1238
|
+
delete w.dispatched_at;
|
|
1239
|
+
delete w.dispatched_to;
|
|
1240
|
+
delete w.failReason;
|
|
1241
|
+
delete w.failedAt;
|
|
1242
|
+
delete w._missingPrAttachment;
|
|
1243
|
+
phantomRetryDeferred = true;
|
|
1244
|
+
phantomRetryCount = phantomRetries + 1;
|
|
1245
|
+
log('warn', `Work item ${meta.item.id} hit phantom-completion path — retry ${phantomRetryCount}/${ENGINE_DEFAULTS.maxPhantomRetries} (runtime likely crashed before emitting result event)`);
|
|
1246
|
+
} else {
|
|
1247
|
+
phantomRetryExhausted = true;
|
|
1248
|
+
phantomRetryCount = phantomRetries;
|
|
1249
|
+
}
|
|
1250
|
+
return data;
|
|
1251
|
+
}, { skipWriteIfUnchanged: true });
|
|
1252
|
+
if (phantomRetryDeferred) {
|
|
1253
|
+
// Soft inbox note: the runtime crashed but we're retrying; surface the
|
|
1254
|
+
// event without flagging the WI as silent failure.
|
|
1255
|
+
shared.writeToInbox('engine', `phantom-completion-retry-${meta.item.id}`,
|
|
1256
|
+
`# Phantom completion retry for ${meta.item.id}\n\n` +
|
|
1257
|
+
`**Agent:** ${agentId}\n` +
|
|
1258
|
+
`**Work item:** \`${meta.item.id}\` — ${meta.item.title || ''}\n` +
|
|
1259
|
+
`**Type:** ${meta.item.type || 'unknown'}\n` +
|
|
1260
|
+
`**Branch:** ${meta.branch || '(none)'}\n` +
|
|
1261
|
+
`**Phantom retry:** ${phantomRetryCount}/${ENGINE_DEFAULTS.maxPhantomRetries}\n\n` +
|
|
1262
|
+
`${reason}\n` +
|
|
1263
|
+
(resultSummary ? `\n## Agent summary\n${resultSummary}\n` : ''),
|
|
1264
|
+
null,
|
|
1265
|
+
{ sourceItem: meta.item.id, reason: 'phantom-completion-retry' });
|
|
1266
|
+
// Sync PRD back to pending so dependent flow doesn't see it as failed.
|
|
1267
|
+
if (meta.item?.sourcePlan) {
|
|
1268
|
+
try { syncPrdItemStatus(meta.item.id, WI_STATUS.PENDING, meta.item.sourcePlan); } catch (e) { log('warn', 'phantom retry PRD sync: ' + e.message); }
|
|
1269
|
+
}
|
|
1270
|
+
return;
|
|
1271
|
+
}
|
|
1272
|
+
if (phantomRetryExhausted) {
|
|
1273
|
+
// Fall through to the regular hard-fail path with augmented reason so
|
|
1274
|
+
// operators see "phantom retries exhausted" instead of the generic msg.
|
|
1275
|
+
reason = `${reason} — phantom retries exhausted (${phantomRetryCount}/${ENGINE_DEFAULTS.maxPhantomRetries})`;
|
|
1276
|
+
}
|
|
1277
|
+
}
|
|
1120
1278
|
if (noPrWiPath) {
|
|
1121
1279
|
mutateJsonFileLocked(noPrWiPath, data => {
|
|
1122
1280
|
if (!Array.isArray(data)) return data;
|
|
@@ -1132,6 +1290,11 @@ function markMissingPrAttachment(meta, agentId, reason, resultSummary, severity)
|
|
|
1132
1290
|
delete w.completedAt;
|
|
1133
1291
|
delete w._noPr;
|
|
1134
1292
|
delete w._noPrReason;
|
|
1293
|
+
// P-e0b4f7a5 — terminal hard-fail (genuine missing PR or phantom
|
|
1294
|
+
// retries exhausted) clears the in-flight phantom markers so cleanup
|
|
1295
|
+
// can finally reap the worktree.
|
|
1296
|
+
delete w._phantomCompletion;
|
|
1297
|
+
delete w._phantomBranch;
|
|
1135
1298
|
} else {
|
|
1136
1299
|
// Soft: don't change status or failReason — the agent did the work,
|
|
1137
1300
|
// we just couldn't auto-attach the PR. Surface a flag for the dashboard
|
|
@@ -1208,7 +1371,8 @@ function markPrAttachmentVerificationError(meta, agentId, reason, resultSummary)
|
|
|
1208
1371
|
{ sourceItem: meta.item.id, reason: 'pr-attachment-state-error' });
|
|
1209
1372
|
}
|
|
1210
1373
|
|
|
1211
|
-
async function enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, output) {
|
|
1374
|
+
async function enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, output, opts) {
|
|
1375
|
+
const detectPhantom = !!(opts && opts.detectPhantom);
|
|
1212
1376
|
if (!isPrAttachmentRequired(type, meta?.item, meta)) return null;
|
|
1213
1377
|
try {
|
|
1214
1378
|
if (hasCanonicalPrAttachment(meta.item.id, config)) return null;
|
|
@@ -1221,39 +1385,35 @@ async function enforcePrAttachmentContract(type, meta, agentId, config, resultSu
|
|
|
1221
1385
|
|
|
1222
1386
|
const found = await findOpenPrForBranch(meta, config);
|
|
1223
1387
|
if (found) {
|
|
1224
|
-
const
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
agent: agentId,
|
|
1229
|
-
branch: meta.branch || '',
|
|
1230
|
-
reviewStatus: 'pending',
|
|
1231
|
-
status: PR_STATUS.ACTIVE,
|
|
1232
|
-
created: ts(),
|
|
1233
|
-
url: found.url,
|
|
1234
|
-
prdItems: [meta.item.id],
|
|
1235
|
-
sourcePlan: meta.item?.sourcePlan || '',
|
|
1236
|
-
itemType: meta.item?.itemType || '',
|
|
1237
|
-
};
|
|
1238
|
-
shared.upsertPullRequestRecord(shared.projectPrPath(found.project), entry, {
|
|
1239
|
-
project: found.project,
|
|
1240
|
-
itemId: meta.item.id,
|
|
1241
|
-
});
|
|
1242
|
-
log('info', `Auto-linked existing PR ${entry.id} on branch ${meta.branch} for ${meta.item.id}`);
|
|
1243
|
-
try {
|
|
1244
|
-
if (hasCanonicalPrAttachment(meta.item.id, config)) return null;
|
|
1245
|
-
} catch (err) {
|
|
1246
|
-
const reason = `${meta.item.id} auto-linked a PR but PR attachment verification could not read PR tracking state: ${err.message}`;
|
|
1247
|
-
markPrAttachmentVerificationError(meta, agentId, reason, resultSummary);
|
|
1248
|
-
log('warn', reason);
|
|
1249
|
-
return { reason, itemId: meta.item.id, severity: 'hard', stateError: true };
|
|
1250
|
-
}
|
|
1388
|
+
const attachResult = _attachFoundPrToWi(found, meta, agentId, resultSummary, config);
|
|
1389
|
+
log('info', `Auto-linked existing PR ${shared.getCanonicalPrId(found.project, found.prNumber, found.url)} on branch ${meta.branch} for ${meta.item.id}`);
|
|
1390
|
+
if (attachResult === null) return null;
|
|
1391
|
+
return attachResult;
|
|
1251
1392
|
}
|
|
1252
1393
|
|
|
1253
1394
|
// Distinguish "agent never claimed a PR" (hard — silent failure the contract
|
|
1254
1395
|
// was designed to catch) from "agent claimed a PR but engine couldn't attach
|
|
1255
1396
|
// it canonically" (soft — verification gap, not a failure).
|
|
1256
1397
|
const severity = _outputContainsPrUrl(output) ? 'soft' : 'hard';
|
|
1398
|
+
// Phantom completion = hard severity + opt-in detectPhantom + no terminating
|
|
1399
|
+
// result event in stream. The runtime CLI crashed mid-conversation; the
|
|
1400
|
+
// agent never got a chance to open a PR. Hard-failing here would bypass the
|
|
1401
|
+
// retry budget for a runtime bug. Surface phantom: true to
|
|
1402
|
+
// markMissingPrAttachment so it routes through the _phantomRetryCount path.
|
|
1403
|
+
const isPhantom = severity === 'hard' && detectPhantom && !_outputHasRuntimeResultEvent(output);
|
|
1404
|
+
|
|
1405
|
+
// P-e0b4f7a5 — phantom-completion recovery: an agent may have pushed its
|
|
1406
|
+
// branch (and even opened the PR) seconds before the runtime crashed.
|
|
1407
|
+
// Verify with `git ls-remote origin <branch>` and, if the branch landed,
|
|
1408
|
+
// make one final canonical-attach attempt before burning a phantom retry.
|
|
1409
|
+
// This recovers work that would otherwise be lost — both the worktree
|
|
1410
|
+
// (cleanup would reap it) and the orphan PR link (no WI ever points at it).
|
|
1411
|
+
if (isPhantom) {
|
|
1412
|
+
if (await _attemptPhantomPrRecovery(meta, agentId, resultSummary, config)) {
|
|
1413
|
+
return null;
|
|
1414
|
+
}
|
|
1415
|
+
}
|
|
1416
|
+
|
|
1257
1417
|
// Hard-fail messaging: if the runtime never emitted its terminating result
|
|
1258
1418
|
// event, the failure is a phantom completion (runtime CLI crashed), not the
|
|
1259
1419
|
// agent silently skipping work. Surface that truthfully so operators don't
|
|
@@ -1268,9 +1428,9 @@ async function enforcePrAttachmentContract(type, meta, agentId, config, resultSu
|
|
|
1268
1428
|
} else {
|
|
1269
1429
|
reason = `${meta.item.id} completed and a PR URL was found in the agent's output, but it couldn't be canonically attached. The work likely succeeded — verify by checking the PR list. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`;
|
|
1270
1430
|
}
|
|
1271
|
-
markMissingPrAttachment(meta, agentId, reason, resultSummary, severity);
|
|
1431
|
+
markMissingPrAttachment(meta, agentId, reason, resultSummary, severity, { phantom: isPhantom });
|
|
1272
1432
|
log(severity === 'hard' ? 'warn' : 'info', reason);
|
|
1273
|
-
return { reason, itemId: meta.item.id, severity };
|
|
1433
|
+
return { reason, itemId: meta.item.id, severity, phantom: isPhantom };
|
|
1274
1434
|
}
|
|
1275
1435
|
|
|
1276
1436
|
// ─── Post-Completion Hooks ──────────────────────────────────────────────────
|
|
@@ -2564,6 +2724,20 @@ function detectNonTerminalResultSummary(_resultSummary, structuredCompletion, co
|
|
|
2564
2724
|
}
|
|
2565
2725
|
|
|
2566
2726
|
function deferNonTerminalCompletion(meta, detection) {
|
|
2727
|
+
return _deferRetryWithCounter(meta, detection, '_retryCount', ENGINE_DEFAULTS.maxRetries, 'nonterminal_completion');
|
|
2728
|
+
}
|
|
2729
|
+
|
|
2730
|
+
// Phantom-completion variant — uses _phantomRetryCount + maxPhantomRetries so
|
|
2731
|
+
// runtime-crash retries don't share a budget with the PR-attachment contract's
|
|
2732
|
+
// retries. Cap is independent (ENGINE_DEFAULTS.maxPhantomRetries) so the two
|
|
2733
|
+
// failure modes can be tuned separately. Failure mode triggered when the
|
|
2734
|
+
// runtime exits cleanly but emits no result event, no structured completion,
|
|
2735
|
+
// and no completion report — see detectNonTerminalResultSummary.
|
|
2736
|
+
function deferPhantomCompletion(meta, detection) {
|
|
2737
|
+
return _deferRetryWithCounter(meta, detection, '_phantomRetryCount', ENGINE_DEFAULTS.maxPhantomRetries, 'phantom_completion');
|
|
2738
|
+
}
|
|
2739
|
+
|
|
2740
|
+
function _deferRetryWithCounter(meta, detection, counterField, maxCount, pendingReason) {
|
|
2567
2741
|
const itemId = meta?.item?.id;
|
|
2568
2742
|
const reason = detection?.reason || 'Nonterminal completion summary';
|
|
2569
2743
|
if (!itemId) return reason;
|
|
@@ -2576,35 +2750,49 @@ function deferNonTerminalCompletion(meta, detection) {
|
|
|
2576
2750
|
if (!Array.isArray(data)) return data;
|
|
2577
2751
|
const w = data.find(i => i.id === itemId);
|
|
2578
2752
|
if (!w) return data;
|
|
2579
|
-
const retries = w
|
|
2580
|
-
if (retries <
|
|
2753
|
+
const retries = w[counterField] || 0;
|
|
2754
|
+
if (retries < maxCount) {
|
|
2581
2755
|
w.status = WI_STATUS.PENDING;
|
|
2582
|
-
w
|
|
2756
|
+
w[counterField] = retries + 1;
|
|
2583
2757
|
w._lastRetryAt = ts();
|
|
2584
2758
|
w._lastRetryReason = reason;
|
|
2585
|
-
w._pendingReason =
|
|
2759
|
+
w._pendingReason = pendingReason;
|
|
2760
|
+
// P-e0b4f7a5 — phantom-retry path stamps _phantomCompletion +
|
|
2761
|
+
// _phantomBranch so cleanup.js can preserve the worktree across the
|
|
2762
|
+
// re-dispatch window. Only set for the phantom counter; nonterminal
|
|
2763
|
+
// retries don't share this protection.
|
|
2764
|
+
if (counterField === '_phantomRetryCount') {
|
|
2765
|
+
w._phantomCompletion = true;
|
|
2766
|
+
if (meta?.branch) w._phantomBranch = meta.branch;
|
|
2767
|
+
}
|
|
2586
2768
|
delete w.completedAt;
|
|
2587
2769
|
delete w.dispatched_at;
|
|
2588
2770
|
delete w.dispatched_to;
|
|
2589
2771
|
delete w.failedAt;
|
|
2590
2772
|
finalStatus = WI_STATUS.PENDING;
|
|
2591
|
-
log('warn', `Work item ${itemId} reported
|
|
2773
|
+
log('warn', `Work item ${itemId} reported ${pendingReason} — retry ${retries + 1}/${maxCount} (${counterField}): ${reason}`);
|
|
2592
2774
|
} else {
|
|
2593
2775
|
w.status = WI_STATUS.FAILED;
|
|
2594
|
-
w.failReason = `${reason} after ${
|
|
2776
|
+
w.failReason = `${reason} after ${maxCount} attempts`;
|
|
2595
2777
|
w.failedAt = ts();
|
|
2596
2778
|
delete w.completedAt;
|
|
2597
2779
|
delete w.dispatched_at;
|
|
2598
2780
|
delete w.dispatched_to;
|
|
2599
2781
|
delete w._pendingReason;
|
|
2782
|
+
// Exhausted phantom retries: clear the in-flight markers so cleanup
|
|
2783
|
+
// can reap the worktree on the next sweep.
|
|
2784
|
+
if (counterField === '_phantomRetryCount') {
|
|
2785
|
+
delete w._phantomCompletion;
|
|
2786
|
+
delete w._phantomBranch;
|
|
2787
|
+
}
|
|
2600
2788
|
finalStatus = WI_STATUS.FAILED;
|
|
2601
|
-
log('warn', `Work item ${itemId} failed — repeated
|
|
2789
|
+
log('warn', `Work item ${itemId} failed — repeated ${pendingReason} after ${maxCount} attempts`);
|
|
2602
2790
|
}
|
|
2603
2791
|
return data;
|
|
2604
2792
|
}, { defaultValue: [], skipWriteIfUnchanged: true });
|
|
2605
2793
|
syncPrdItemStatus(itemId, finalStatus, meta.item?.sourcePlan);
|
|
2606
2794
|
} catch (err) {
|
|
2607
|
-
log('warn',
|
|
2795
|
+
log('warn', `${pendingReason} gate: ${err.message}`);
|
|
2608
2796
|
}
|
|
2609
2797
|
return reason;
|
|
2610
2798
|
}
|
|
@@ -2814,8 +3002,9 @@ function handleDecompositionResult(stdout, meta, config, runtimeName) {
|
|
|
2814
3002
|
return 0;
|
|
2815
3003
|
}
|
|
2816
3004
|
|
|
2817
|
-
async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, config) {
|
|
3005
|
+
async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, config, opts) {
|
|
2818
3006
|
|
|
3007
|
+
const detectPhantom = !!(opts && opts.detectPhantom);
|
|
2819
3008
|
const type = dispatchItem.type;
|
|
2820
3009
|
const meta = dispatchItem.meta;
|
|
2821
3010
|
const isSuccess = code === 0;
|
|
@@ -3055,13 +3244,27 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
|
|
|
3055
3244
|
|
|
3056
3245
|
let completionContractFailure = null;
|
|
3057
3246
|
if (effectiveSuccess && meta?.item?.id && !skipDoneStatus) {
|
|
3058
|
-
const nonTerminalCompletion = detectNonTerminalResultSummary(completionGateSummary, structuredCompletion, reportCompletion);
|
|
3247
|
+
const nonTerminalCompletion = detectNonTerminalResultSummary(completionGateSummary, structuredCompletion, reportCompletion, { detectPhantom });
|
|
3059
3248
|
if (nonTerminalCompletion) {
|
|
3060
|
-
|
|
3061
|
-
|
|
3062
|
-
|
|
3063
|
-
|
|
3064
|
-
|
|
3249
|
+
const isPhantomDetection = nonTerminalCompletion.phrase === 'phantom-completion';
|
|
3250
|
+
// P-e0b4f7a5 — before deferring a phantom retry, attempt to recover
|
|
3251
|
+
// the agent's work via the ls-remote + canonical-attach probe. If the
|
|
3252
|
+
// agent had pushed its branch (and possibly opened the PR) seconds
|
|
3253
|
+
// before the runtime crashed, link the PR and treat the WI as a
|
|
3254
|
+
// normal successful completion. This preserves work that would
|
|
3255
|
+
// otherwise be lost and avoids burning a phantom retry on something
|
|
3256
|
+
// that already shipped.
|
|
3257
|
+
if (isPhantomDetection && await _attemptPhantomPrRecovery(meta, agentId, resultSummary, config)) {
|
|
3258
|
+
log('info', `Phantom-completion recovered for ${meta.item.id} via ls-remote + PR auto-link — no retry needed`);
|
|
3259
|
+
} else {
|
|
3260
|
+
skipDoneStatus = true;
|
|
3261
|
+
const reason = isPhantomDetection
|
|
3262
|
+
? deferPhantomCompletion(meta, nonTerminalCompletion)
|
|
3263
|
+
: deferNonTerminalCompletion(meta, nonTerminalCompletion);
|
|
3264
|
+
completionContractFailure = { reason, itemId: meta.item.id, nonTerminal: true, processWorkItemFailure: false, phantom: isPhantomDetection };
|
|
3265
|
+
if (!nonCleanReportWritten) {
|
|
3266
|
+
writeNonCleanAgentReport(dispatchItem, agentId, 'partial', structuredCompletion, completionGateSummary, code);
|
|
3267
|
+
}
|
|
3065
3268
|
}
|
|
3066
3269
|
}
|
|
3067
3270
|
}
|
|
@@ -3077,7 +3280,7 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
|
|
|
3077
3280
|
}
|
|
3078
3281
|
|
|
3079
3282
|
if (effectiveSuccess && meta?.item?.id && !skipDoneStatus && !noopRationale) {
|
|
3080
|
-
completionContractFailure = await enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, stdout);
|
|
3283
|
+
completionContractFailure = await enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, stdout, { detectPhantom });
|
|
3081
3284
|
if (completionContractFailure?.severity === 'hard' || completionContractFailure?.nonTerminal) {
|
|
3082
3285
|
skipDoneStatus = true;
|
|
3083
3286
|
}
|
|
@@ -3460,6 +3663,10 @@ module.exports = {
|
|
|
3460
3663
|
parseCompletionFieldSummary,
|
|
3461
3664
|
parseCompletionNoop,
|
|
3462
3665
|
detectNonTerminalResultSummary,
|
|
3666
|
+
deferNonTerminalCompletion,
|
|
3667
|
+
deferPhantomCompletion,
|
|
3668
|
+
enforcePrAttachmentContract,
|
|
3669
|
+
markMissingPrAttachment,
|
|
3463
3670
|
parseCompletionReportFile,
|
|
3464
3671
|
persistCompletionReport,
|
|
3465
3672
|
runPostCompletionHooks,
|
package/engine/shared.js
CHANGED
|
@@ -1078,6 +1078,7 @@ const ENGINE_DEFAULTS = {
|
|
|
1078
1078
|
evalMaxIterations: 3, // legacy UI/config field; engine discovery no longer enforces review→fix cycle caps
|
|
1079
1079
|
evalMaxCost: null, // USD ceiling per work item across all eval iterations; null = no limit (gather baseline data first)
|
|
1080
1080
|
maxRetries: 3, // max dispatch retries before marking work item as failed
|
|
1081
|
+
maxPhantomRetries: 3, // max retries for "phantom completion" (runtime crashed before emitting type:"result"); tracked separately from _retryCount so phantom retries don't pollute the normal PR-attachment retry budget. See engine/lifecycle.markMissingPrAttachment + detectNonTerminalResultSummary.
|
|
1081
1082
|
minRetryGapMs: 120000, // 2min — minimum gap between retry dispatches for the same work item; prevents tight retry loops when an idempotent agent (e.g. review bailing out on a duplicate) cannot produce the expected output (#1770)
|
|
1082
1083
|
pipelineApiRetries: 2, // max attempts for pipeline API calls
|
|
1083
1084
|
pipelineApiRetryDelay: 2000, // ms delay between pipeline API retries
|
package/engine/timeout.js
CHANGED
|
@@ -318,7 +318,13 @@ function checkTimeouts(config) {
|
|
|
318
318
|
|
|
319
319
|
// Run post-completion hooks via shared helper (async — fire and forget in timeout context).
|
|
320
320
|
// Pass the actual exit code so autoRecovery (PR-created-but-failed) still works correctly.
|
|
321
|
-
|
|
321
|
+
// detectPhantom: true mirrors the line 310 detectNonTerminalResultSummary call —
|
|
322
|
+
// when the timeout path completes a dispatch via the [process-exit] sentinel,
|
|
323
|
+
// we have no guarantee the runtime emitted a result event. Propagating
|
|
324
|
+
// detectPhantom downstream lets enforcePrAttachmentContract route phantom
|
|
325
|
+
// hard-fails through the _phantomRetryCount budget instead of bypassing
|
|
326
|
+
// the retry counter entirely (P-d9a3e6f4).
|
|
327
|
+
runPostCompletionHooks(item, item.agent, processExitCode, fullLogForHooks, config, { detectPhantom: true }).catch(e => log('warn', 'post-completion hooks: ' + e.message));
|
|
322
328
|
|
|
323
329
|
if (hasProcess) {
|
|
324
330
|
shared.killImmediate(activeProcesses.get(item.id)?.proc);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yemi33/minions",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1874",
|
|
4
4
|
"description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
|
|
5
5
|
"bin": {
|
|
6
6
|
"minions": "bin/minions.js"
|