@yemi33/minions 0.1.1871 → 0.1.1873
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/engine/cleanup.js +41 -1
- package/engine/lifecycle.js +280 -50
- package/engine/shared.js +1 -0
- package/engine/spawn-agent.js +76 -1
- package/engine/timeout.js +7 -1
- package/engine.js +50 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
package/engine/cleanup.js
CHANGED
|
@@ -79,6 +79,26 @@ function localBranchWorktreeInUse(root, branch) {
|
|
|
79
79
|
}
|
|
80
80
|
}
|
|
81
81
|
|
|
82
|
+
// P-e0b4f7a5 — collect branches of work items currently in the
|
|
83
|
+
// phantom-completion retry state for a given project. Returns a Set of
|
|
84
|
+
// branch strings. Used by the worktree cleanup loop to protect worktrees
|
|
85
|
+
// belonging to in-flight phantom retries from the 2-hour age sweep —
|
|
86
|
+
// without this protection the agent's pushed branch reference could be
|
|
87
|
+
// destroyed alongside the worktree before the retry runs.
|
|
88
|
+
function collectPhantomBranchesForProject(project) {
|
|
89
|
+
const branches = new Set();
|
|
90
|
+
try {
|
|
91
|
+
const items = safeJson(projectWorkItemsPath(project)) || [];
|
|
92
|
+
if (!Array.isArray(items)) return branches;
|
|
93
|
+
for (const w of items) {
|
|
94
|
+
if (w && w._phantomCompletion === true && w._phantomBranch) {
|
|
95
|
+
branches.add(String(w._phantomBranch));
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
} catch { /* best-effort — never let cleanup crash on a missing/corrupt WI file */ }
|
|
99
|
+
return branches;
|
|
100
|
+
}
|
|
101
|
+
|
|
82
102
|
function cleanupMergedPrLocalBranch(root, project, pr) {
|
|
83
103
|
const branch = normalizeLocalBranchName(pr?.branch);
|
|
84
104
|
const result = { deleted: false, forced: false, skipped: null };
|
|
@@ -451,6 +471,11 @@ async function runCleanup(config, verbose = false) {
|
|
|
451
471
|
const wtEntries = []; // { dir, wtPath, mtime, shouldClean, isProtected }
|
|
452
472
|
const dispatch = getDispatch();
|
|
453
473
|
const activeDispatchIds = new Set((dispatch.active || []).map(d => d.id));
|
|
474
|
+
// P-e0b4f7a5 — branches whose work item is mid-phantom-retry. Their
|
|
475
|
+
// worktrees must survive the age/cap sweep until the retry completes
|
|
476
|
+
// (or exhausts its budget) so the agent's already-pushed branch ref
|
|
477
|
+
// isn't destroyed alongside the worktree.
|
|
478
|
+
const phantomBranches = collectPhantomBranchesForProject(project);
|
|
454
479
|
|
|
455
480
|
// Probe `git branch --show-current` for every worktree in chunks of 5.
|
|
456
481
|
// Sequential probing was the dominant cost in the cleanup phase
|
|
@@ -492,6 +517,20 @@ async function runCleanup(config, verbose = false) {
|
|
|
492
517
|
});
|
|
493
518
|
if (isReferenced) isProtected = true;
|
|
494
519
|
|
|
520
|
+
// P-e0b4f7a5 — protect worktrees whose branch matches a work item in
|
|
521
|
+
// the phantom-completion retry state. The dispatch may have already
|
|
522
|
+
// moved to dispatch.completed (so isReferenced is false) but the
|
|
523
|
+
// retry will re-dispatch on the same branch shortly.
|
|
524
|
+
if (!isProtected && phantomBranches.size > 0) {
|
|
525
|
+
for (const branch of phantomBranches) {
|
|
526
|
+
if (worktreeMatchesBranch(dirLower, branch, actualBranch)) {
|
|
527
|
+
isProtected = true;
|
|
528
|
+
if (verbose) console.log(` Skipping worktree ${dir}: phantom-completion retry pending`);
|
|
529
|
+
break;
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
|
|
495
534
|
// Also clean worktrees older than 2 hours with no active dispatch referencing them
|
|
496
535
|
let mtime = Date.now();
|
|
497
536
|
if (!shouldClean) {
|
|
@@ -499,7 +538,7 @@ async function runCleanup(config, verbose = false) {
|
|
|
499
538
|
const stat = fs.statSync(wtPath);
|
|
500
539
|
mtime = stat.mtimeMs;
|
|
501
540
|
const ageMs = Date.now() - mtime;
|
|
502
|
-
if (ageMs > 7200000 && !isReferenced) { // 2 hours
|
|
541
|
+
if (ageMs > 7200000 && !isReferenced && !isProtected) { // 2 hours — P-e0b4f7a5: phantom-protected worktrees survive the age sweep too
|
|
503
542
|
shouldClean = true;
|
|
504
543
|
}
|
|
505
544
|
} catch { /* optional */ }
|
|
@@ -1080,4 +1119,5 @@ module.exports = {
|
|
|
1080
1119
|
worktreeMatchesBranch, // exported for testing
|
|
1081
1120
|
getWorktreeBranch, // exported for lifecycle cleanup
|
|
1082
1121
|
cleanupMergedPrLocalBranch, // exported for lifecycle cleanup and testing
|
|
1122
|
+
collectPhantomBranchesForProject, // P-e0b4f7a5 — exported for testing
|
|
1083
1123
|
};
|
package/engine/lifecycle.js
CHANGED
|
@@ -543,6 +543,11 @@ function updateWorkItemStatus(meta, status, reason) {
|
|
|
543
543
|
delete target.failReason;
|
|
544
544
|
delete target.failedAt;
|
|
545
545
|
delete target._retryCount;
|
|
546
|
+
// P-e0b4f7a5 — successful completion (including a phantom-retry
|
|
547
|
+
// succeeding) clears the phantom markers so cleanup can reap the
|
|
548
|
+
// worktree on the next sweep.
|
|
549
|
+
delete target._phantomCompletion;
|
|
550
|
+
delete target._phantomBranch;
|
|
546
551
|
target.completedAt = ts();
|
|
547
552
|
// Restore agent info from dispatch metadata (cleared on retry reset)
|
|
548
553
|
if (meta._agentId && !target.dispatched_to) target.dispatched_to = meta._agentId;
|
|
@@ -1087,6 +1092,90 @@ async function findOpenPrForBranch(meta, config) {
|
|
|
1087
1092
|
return null;
|
|
1088
1093
|
}
|
|
1089
1094
|
|
|
1095
|
+
// P-e0b4f7a5 — quick "did the agent push the branch before the runtime
|
|
1096
|
+
// crashed?" probe. `git ls-remote origin <branch>` returns a non-empty
|
|
1097
|
+
// "<sha>\trefs/heads/<branch>" line when the branch exists on the remote and
|
|
1098
|
+
// nothing when it doesn't. Used by enforcePrAttachmentContract to gate the
|
|
1099
|
+
// phantom-recovery PR auto-link: if the branch isn't there, no PR can exist
|
|
1100
|
+
// either and there's no point burning another `gh pr list` round-trip.
|
|
1101
|
+
async function _phantomBranchExistsOnRemote(meta, config) {
|
|
1102
|
+
if (!meta?.branch) return false;
|
|
1103
|
+
const projectObj = resolvePrFallbackProject(meta, config);
|
|
1104
|
+
// Fall back to the branch lookup from any cwd if no project root is known —
|
|
1105
|
+
// git will use the ambient remote configuration. We prefer the project root
|
|
1106
|
+
// because dispatch worktrees may not have origin wired yet.
|
|
1107
|
+
const cwd = projectObj?.localPath || meta?.cwd || process.cwd();
|
|
1108
|
+
try {
|
|
1109
|
+
const out = await runFileCapture('git', ['ls-remote', '--heads', 'origin', String(meta.branch)], { cwd, timeout: 15000 });
|
|
1110
|
+
// Any non-empty stdout line that ends in refs/heads/<branch> = branch exists.
|
|
1111
|
+
return /\trefs\/heads\//.test(String(out || ''));
|
|
1112
|
+
} catch (err) {
|
|
1113
|
+
log('debug', `Phantom ls-remote probe failed for ${meta.branch}: ${err.message}`);
|
|
1114
|
+
return false;
|
|
1115
|
+
}
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
// P-e0b4f7a5 — extracted from enforcePrAttachmentContract so the phantom
|
|
1119
|
+
// recovery path can reuse the same canonical-attach upsert without
|
|
1120
|
+
// duplicating the entry construction. Returns null if the link succeeded,
|
|
1121
|
+
// or a contract-failure object if the verification step couldn't read the
|
|
1122
|
+
// PR tracking state (state-error path mirrors the original inline behavior).
|
|
1123
|
+
function _attachFoundPrToWi(found, meta, agentId, resultSummary, config) {
|
|
1124
|
+
const entry = {
|
|
1125
|
+
id: shared.getCanonicalPrId(found.project, found.prNumber, found.url),
|
|
1126
|
+
prNumber: found.prNumber,
|
|
1127
|
+
title: meta.item?.title || `PR #${found.prNumber}`,
|
|
1128
|
+
agent: agentId,
|
|
1129
|
+
branch: meta.branch || '',
|
|
1130
|
+
reviewStatus: 'pending',
|
|
1131
|
+
status: PR_STATUS.ACTIVE,
|
|
1132
|
+
created: ts(),
|
|
1133
|
+
url: found.url,
|
|
1134
|
+
prdItems: [meta.item.id],
|
|
1135
|
+
sourcePlan: meta.item?.sourcePlan || '',
|
|
1136
|
+
itemType: meta.item?.itemType || '',
|
|
1137
|
+
};
|
|
1138
|
+
shared.upsertPullRequestRecord(shared.projectPrPath(found.project), entry, {
|
|
1139
|
+
project: found.project,
|
|
1140
|
+
itemId: meta.item.id,
|
|
1141
|
+
});
|
|
1142
|
+
try {
|
|
1143
|
+
if (hasCanonicalPrAttachment(meta.item.id, config)) return null;
|
|
1144
|
+
} catch (err) {
|
|
1145
|
+
const reason = `${meta.item.id} auto-linked a PR but PR attachment verification could not read PR tracking state: ${err.message}`;
|
|
1146
|
+
markPrAttachmentVerificationError(meta, agentId, reason, resultSummary);
|
|
1147
|
+
log('warn', reason);
|
|
1148
|
+
return { reason, itemId: meta.item.id, severity: 'hard', stateError: true };
|
|
1149
|
+
}
|
|
1150
|
+
return null;
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1153
|
+
// P-e0b4f7a5 — phantom-completion recovery: when the runtime crashes before
|
|
1154
|
+
// emitting its terminating result event, the agent may still have pushed
|
|
1155
|
+
// the branch (and possibly opened the PR) seconds beforehand. Verify with
|
|
1156
|
+
// `git ls-remote origin <branch>` and, if the branch landed on the remote,
|
|
1157
|
+
// attempt one final canonical PR attachment via the existing
|
|
1158
|
+
// findOpenPrForBranch helper. Returns true if a PR was found and linked
|
|
1159
|
+
// (work is recoverable — caller should treat as success), false otherwise.
|
|
1160
|
+
async function _attemptPhantomPrRecovery(meta, agentId, resultSummary, config) {
|
|
1161
|
+
if (!meta?.branch || !meta?.item?.id) return false;
|
|
1162
|
+
const branchOnRemote = await _phantomBranchExistsOnRemote(meta, config);
|
|
1163
|
+
if (!branchOnRemote) return false;
|
|
1164
|
+
const recovered = await findOpenPrForBranch(meta, config);
|
|
1165
|
+
if (!recovered) {
|
|
1166
|
+
log('info', `Phantom-completion: branch ${meta.branch} exists on remote for ${meta.item.id} but no open PR found — routing through phantom retry budget`);
|
|
1167
|
+
return false;
|
|
1168
|
+
}
|
|
1169
|
+
const attachResult = _attachFoundPrToWi(recovered, meta, agentId, resultSummary, config);
|
|
1170
|
+
log('info', `Phantom-completion recovery: auto-linked existing PR ${shared.getCanonicalPrId(recovered.project, recovered.prNumber, recovered.url)} on branch ${meta.branch} for ${meta.item.id} (runtime crashed but agent had pushed the PR)`);
|
|
1171
|
+
// attachResult === null = link verified; non-null = canonical-attach
|
|
1172
|
+
// verification failed (state error). Treat state error as "not recovered"
|
|
1173
|
+
// so the caller falls through to the normal failure path with that error
|
|
1174
|
+
// surfaced via markPrAttachmentVerificationError already called inside
|
|
1175
|
+
// _attachFoundPrToWi.
|
|
1176
|
+
return attachResult === null;
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1090
1179
|
// Lightweight probe for "did the agent's output contain ANY PR URL?". Used by
|
|
1091
1180
|
// the PR-attachment contract to distinguish silent-failure (no URL anywhere)
|
|
1092
1181
|
// from auto-link-miss (URL present but engine couldn't canonically attach it).
|
|
@@ -1113,10 +1202,79 @@ function _outputHasRuntimeResultEvent(output) {
|
|
|
1113
1202
|
return /"type":\s*"result"/.test(output);
|
|
1114
1203
|
}
|
|
1115
1204
|
|
|
1116
|
-
function markMissingPrAttachment(meta, agentId, reason, resultSummary, severity) {
|
|
1205
|
+
function markMissingPrAttachment(meta, agentId, reason, resultSummary, severity, opts) {
|
|
1117
1206
|
const noPrWiPath = resolveWorkItemPath(meta);
|
|
1118
1207
|
const isHard = severity !== 'soft';
|
|
1208
|
+
const isPhantom = !!(opts && opts.phantom);
|
|
1119
1209
|
let syncFailedToPrd = false;
|
|
1210
|
+
// Phantom branch: a runtime crash that hard-fails for "no PR attached" should
|
|
1211
|
+
// not bypass the retry budget — the agent never got a chance to do the work.
|
|
1212
|
+
// Track these separately on `_phantomRetryCount` so they don't pollute the
|
|
1213
|
+
// PR-attachment retry counter (`_retryCount`). Cap at maxPhantomRetries; only
|
|
1214
|
+
// hard-fail once the phantom budget is exhausted.
|
|
1215
|
+
let phantomRetryDeferred = false;
|
|
1216
|
+
let phantomRetryExhausted = false;
|
|
1217
|
+
let phantomRetryCount = 0;
|
|
1218
|
+
if (isHard && isPhantom && noPrWiPath) {
|
|
1219
|
+
mutateJsonFileLocked(noPrWiPath, data => {
|
|
1220
|
+
if (!Array.isArray(data)) return data;
|
|
1221
|
+
const w = data.find(i => i.id === meta.item.id);
|
|
1222
|
+
if (!w) return data;
|
|
1223
|
+
const phantomRetries = w._phantomRetryCount || 0;
|
|
1224
|
+
if (phantomRetries < ENGINE_DEFAULTS.maxPhantomRetries) {
|
|
1225
|
+
w.status = WI_STATUS.PENDING;
|
|
1226
|
+
w._phantomRetryCount = phantomRetries + 1;
|
|
1227
|
+
w._lastRetryAt = ts();
|
|
1228
|
+
w._lastRetryReason = reason;
|
|
1229
|
+
w._pendingReason = 'phantom_completion';
|
|
1230
|
+
// P-e0b4f7a5 — _phantomCompletion + _phantomBranch let cleanup.js
|
|
1231
|
+
// protect the worktree of an in-flight phantom retry. Without these
|
|
1232
|
+
// markers the 2-hour age sweep can wipe the worktree (and the agent's
|
|
1233
|
+
// already-pushed branch reference) between phantom detection and
|
|
1234
|
+
// re-dispatch.
|
|
1235
|
+
w._phantomCompletion = true;
|
|
1236
|
+
if (meta.branch) w._phantomBranch = meta.branch;
|
|
1237
|
+
delete w.completedAt;
|
|
1238
|
+
delete w.dispatched_at;
|
|
1239
|
+
delete w.dispatched_to;
|
|
1240
|
+
delete w.failReason;
|
|
1241
|
+
delete w.failedAt;
|
|
1242
|
+
delete w._missingPrAttachment;
|
|
1243
|
+
phantomRetryDeferred = true;
|
|
1244
|
+
phantomRetryCount = phantomRetries + 1;
|
|
1245
|
+
log('warn', `Work item ${meta.item.id} hit phantom-completion path — retry ${phantomRetryCount}/${ENGINE_DEFAULTS.maxPhantomRetries} (runtime likely crashed before emitting result event)`);
|
|
1246
|
+
} else {
|
|
1247
|
+
phantomRetryExhausted = true;
|
|
1248
|
+
phantomRetryCount = phantomRetries;
|
|
1249
|
+
}
|
|
1250
|
+
return data;
|
|
1251
|
+
}, { skipWriteIfUnchanged: true });
|
|
1252
|
+
if (phantomRetryDeferred) {
|
|
1253
|
+
// Soft inbox note: the runtime crashed but we're retrying; surface the
|
|
1254
|
+
// event without flagging the WI as silent failure.
|
|
1255
|
+
shared.writeToInbox('engine', `phantom-completion-retry-${meta.item.id}`,
|
|
1256
|
+
`# Phantom completion retry for ${meta.item.id}\n\n` +
|
|
1257
|
+
`**Agent:** ${agentId}\n` +
|
|
1258
|
+
`**Work item:** \`${meta.item.id}\` — ${meta.item.title || ''}\n` +
|
|
1259
|
+
`**Type:** ${meta.item.type || 'unknown'}\n` +
|
|
1260
|
+
`**Branch:** ${meta.branch || '(none)'}\n` +
|
|
1261
|
+
`**Phantom retry:** ${phantomRetryCount}/${ENGINE_DEFAULTS.maxPhantomRetries}\n\n` +
|
|
1262
|
+
`${reason}\n` +
|
|
1263
|
+
(resultSummary ? `\n## Agent summary\n${resultSummary}\n` : ''),
|
|
1264
|
+
null,
|
|
1265
|
+
{ sourceItem: meta.item.id, reason: 'phantom-completion-retry' });
|
|
1266
|
+
// Sync PRD back to pending so dependent flow doesn't see it as failed.
|
|
1267
|
+
if (meta.item?.sourcePlan) {
|
|
1268
|
+
try { syncPrdItemStatus(meta.item.id, WI_STATUS.PENDING, meta.item.sourcePlan); } catch (e) { log('warn', 'phantom retry PRD sync: ' + e.message); }
|
|
1269
|
+
}
|
|
1270
|
+
return;
|
|
1271
|
+
}
|
|
1272
|
+
if (phantomRetryExhausted) {
|
|
1273
|
+
// Fall through to the regular hard-fail path with augmented reason so
|
|
1274
|
+
// operators see "phantom retries exhausted" instead of the generic msg.
|
|
1275
|
+
reason = `${reason} — phantom retries exhausted (${phantomRetryCount}/${ENGINE_DEFAULTS.maxPhantomRetries})`;
|
|
1276
|
+
}
|
|
1277
|
+
}
|
|
1120
1278
|
if (noPrWiPath) {
|
|
1121
1279
|
mutateJsonFileLocked(noPrWiPath, data => {
|
|
1122
1280
|
if (!Array.isArray(data)) return data;
|
|
@@ -1132,6 +1290,11 @@ function markMissingPrAttachment(meta, agentId, reason, resultSummary, severity)
|
|
|
1132
1290
|
delete w.completedAt;
|
|
1133
1291
|
delete w._noPr;
|
|
1134
1292
|
delete w._noPrReason;
|
|
1293
|
+
// P-e0b4f7a5 — terminal hard-fail (genuine missing PR or phantom
|
|
1294
|
+
// retries exhausted) clears the in-flight phantom markers so cleanup
|
|
1295
|
+
// can finally reap the worktree.
|
|
1296
|
+
delete w._phantomCompletion;
|
|
1297
|
+
delete w._phantomBranch;
|
|
1135
1298
|
} else {
|
|
1136
1299
|
// Soft: don't change status or failReason — the agent did the work,
|
|
1137
1300
|
// we just couldn't auto-attach the PR. Surface a flag for the dashboard
|
|
@@ -1208,7 +1371,8 @@ function markPrAttachmentVerificationError(meta, agentId, reason, resultSummary)
|
|
|
1208
1371
|
{ sourceItem: meta.item.id, reason: 'pr-attachment-state-error' });
|
|
1209
1372
|
}
|
|
1210
1373
|
|
|
1211
|
-
async function enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, output) {
|
|
1374
|
+
async function enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, output, opts) {
|
|
1375
|
+
const detectPhantom = !!(opts && opts.detectPhantom);
|
|
1212
1376
|
if (!isPrAttachmentRequired(type, meta?.item, meta)) return null;
|
|
1213
1377
|
try {
|
|
1214
1378
|
if (hasCanonicalPrAttachment(meta.item.id, config)) return null;
|
|
@@ -1221,39 +1385,35 @@ async function enforcePrAttachmentContract(type, meta, agentId, config, resultSu
|
|
|
1221
1385
|
|
|
1222
1386
|
const found = await findOpenPrForBranch(meta, config);
|
|
1223
1387
|
if (found) {
|
|
1224
|
-
const
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
agent: agentId,
|
|
1229
|
-
branch: meta.branch || '',
|
|
1230
|
-
reviewStatus: 'pending',
|
|
1231
|
-
status: PR_STATUS.ACTIVE,
|
|
1232
|
-
created: ts(),
|
|
1233
|
-
url: found.url,
|
|
1234
|
-
prdItems: [meta.item.id],
|
|
1235
|
-
sourcePlan: meta.item?.sourcePlan || '',
|
|
1236
|
-
itemType: meta.item?.itemType || '',
|
|
1237
|
-
};
|
|
1238
|
-
shared.upsertPullRequestRecord(shared.projectPrPath(found.project), entry, {
|
|
1239
|
-
project: found.project,
|
|
1240
|
-
itemId: meta.item.id,
|
|
1241
|
-
});
|
|
1242
|
-
log('info', `Auto-linked existing PR ${entry.id} on branch ${meta.branch} for ${meta.item.id}`);
|
|
1243
|
-
try {
|
|
1244
|
-
if (hasCanonicalPrAttachment(meta.item.id, config)) return null;
|
|
1245
|
-
} catch (err) {
|
|
1246
|
-
const reason = `${meta.item.id} auto-linked a PR but PR attachment verification could not read PR tracking state: ${err.message}`;
|
|
1247
|
-
markPrAttachmentVerificationError(meta, agentId, reason, resultSummary);
|
|
1248
|
-
log('warn', reason);
|
|
1249
|
-
return { reason, itemId: meta.item.id, severity: 'hard', stateError: true };
|
|
1250
|
-
}
|
|
1388
|
+
const attachResult = _attachFoundPrToWi(found, meta, agentId, resultSummary, config);
|
|
1389
|
+
log('info', `Auto-linked existing PR ${shared.getCanonicalPrId(found.project, found.prNumber, found.url)} on branch ${meta.branch} for ${meta.item.id}`);
|
|
1390
|
+
if (attachResult === null) return null;
|
|
1391
|
+
return attachResult;
|
|
1251
1392
|
}
|
|
1252
1393
|
|
|
1253
1394
|
// Distinguish "agent never claimed a PR" (hard — silent failure the contract
|
|
1254
1395
|
// was designed to catch) from "agent claimed a PR but engine couldn't attach
|
|
1255
1396
|
// it canonically" (soft — verification gap, not a failure).
|
|
1256
1397
|
const severity = _outputContainsPrUrl(output) ? 'soft' : 'hard';
|
|
1398
|
+
// Phantom completion = hard severity + opt-in detectPhantom + no terminating
|
|
1399
|
+
// result event in stream. The runtime CLI crashed mid-conversation; the
|
|
1400
|
+
// agent never got a chance to open a PR. Hard-failing here would bypass the
|
|
1401
|
+
// retry budget for a runtime bug. Surface phantom: true to
|
|
1402
|
+
// markMissingPrAttachment so it routes through the _phantomRetryCount path.
|
|
1403
|
+
const isPhantom = severity === 'hard' && detectPhantom && !_outputHasRuntimeResultEvent(output);
|
|
1404
|
+
|
|
1405
|
+
// P-e0b4f7a5 — phantom-completion recovery: an agent may have pushed its
|
|
1406
|
+
// branch (and even opened the PR) seconds before the runtime crashed.
|
|
1407
|
+
// Verify with `git ls-remote origin <branch>` and, if the branch landed,
|
|
1408
|
+
// make one final canonical-attach attempt before burning a phantom retry.
|
|
1409
|
+
// This recovers work that would otherwise be lost — both the worktree
|
|
1410
|
+
// (cleanup would reap it) and the orphan PR link (no WI ever points at it).
|
|
1411
|
+
if (isPhantom) {
|
|
1412
|
+
if (await _attemptPhantomPrRecovery(meta, agentId, resultSummary, config)) {
|
|
1413
|
+
return null;
|
|
1414
|
+
}
|
|
1415
|
+
}
|
|
1416
|
+
|
|
1257
1417
|
// Hard-fail messaging: if the runtime never emitted its terminating result
|
|
1258
1418
|
// event, the failure is a phantom completion (runtime CLI crashed), not the
|
|
1259
1419
|
// agent silently skipping work. Surface that truthfully so operators don't
|
|
@@ -1268,9 +1428,9 @@ async function enforcePrAttachmentContract(type, meta, agentId, config, resultSu
|
|
|
1268
1428
|
} else {
|
|
1269
1429
|
reason = `${meta.item.id} completed and a PR URL was found in the agent's output, but it couldn't be canonically attached. The work likely succeeded — verify by checking the PR list. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`;
|
|
1270
1430
|
}
|
|
1271
|
-
markMissingPrAttachment(meta, agentId, reason, resultSummary, severity);
|
|
1431
|
+
markMissingPrAttachment(meta, agentId, reason, resultSummary, severity, { phantom: isPhantom });
|
|
1272
1432
|
log(severity === 'hard' ? 'warn' : 'info', reason);
|
|
1273
|
-
return { reason, itemId: meta.item.id, severity };
|
|
1433
|
+
return { reason, itemId: meta.item.id, severity, phantom: isPhantom };
|
|
1274
1434
|
}
|
|
1275
1435
|
|
|
1276
1436
|
// ─── Post-Completion Hooks ──────────────────────────────────────────────────
|
|
@@ -1592,7 +1752,7 @@ async function detectPrFixBranchChange(meta, config) {
|
|
|
1592
1752
|
return { changed: null, beforeHead, afterHead: remoteHead || '', reason: 'unable to prove branch head after fix' };
|
|
1593
1753
|
}
|
|
1594
1754
|
|
|
1595
|
-
function recordPrNoOpFixAttempt(target, cause, source, dispatchItem, branchChange, config) {
|
|
1755
|
+
function recordPrNoOpFixAttempt(target, cause, source, dispatchItem, branchChange, config, noopReason) {
|
|
1596
1756
|
const evidenceFingerprint = shared.prFixEvidenceFingerprint(target, cause);
|
|
1597
1757
|
const prior = shared.getPrNoOpFixRecord(target, cause);
|
|
1598
1758
|
const sameEvidence = prior?.evidenceFingerprint === evidenceFingerprint;
|
|
@@ -1623,6 +1783,20 @@ function recordPrNoOpFixAttempt(target, cause, source, dispatchItem, branchChang
|
|
|
1623
1783
|
afterHead: branchChange?.afterHead || '',
|
|
1624
1784
|
};
|
|
1625
1785
|
|
|
1786
|
+
// Record a same-SHA dispatch outcome on the PR record so the eligibility
|
|
1787
|
+
// filter can short-circuit duplicate build-fix dispatches against an
|
|
1788
|
+
// unchanged commit. Reset happens implicitly when headSha advances and the
|
|
1789
|
+
// discovery filter compares lastDispatchHeadSha to the current head.
|
|
1790
|
+
const headSha = getPrFixBaselineHead(target);
|
|
1791
|
+
target.lastDispatchedAt = now;
|
|
1792
|
+
target.lastDispatchOutcome = 'noop';
|
|
1793
|
+
target.lastDispatchHeadSha = headSha;
|
|
1794
|
+
target.lastDispatchReason = String(
|
|
1795
|
+
noopReason
|
|
1796
|
+
|| branchChange?.reason
|
|
1797
|
+
|| 'fix completed without changing the PR branch'
|
|
1798
|
+
).slice(0, 500);
|
|
1799
|
+
|
|
1626
1800
|
if (cause === shared.PR_FIX_CAUSE.HUMAN_FEEDBACK && target.humanFeedback) {
|
|
1627
1801
|
target.humanFeedback.pendingFix = !paused;
|
|
1628
1802
|
if (paused) target.humanFeedback.noOpPaused = true;
|
|
@@ -1639,6 +1813,14 @@ function clearPrNoOpFixAttempt(target, cause) {
|
|
|
1639
1813
|
if (Object.keys(target._noOpFixes).length === 0) delete target._noOpFixes;
|
|
1640
1814
|
if (target._lastNoOpFix?.cause === cause) delete target._lastNoOpFix;
|
|
1641
1815
|
if (target.humanFeedback) delete target.humanFeedback.noOpPaused;
|
|
1816
|
+
// The lastDispatch* trackers exist to prevent duplicate noop dispatches at
|
|
1817
|
+
// the same head; once the agent actually pushed a fix we no longer want them
|
|
1818
|
+
// to suppress a fresh dispatch (the SHA may have moved or the next failure
|
|
1819
|
+
// is genuinely new).
|
|
1820
|
+
delete target.lastDispatchedAt;
|
|
1821
|
+
delete target.lastDispatchOutcome;
|
|
1822
|
+
delete target.lastDispatchHeadSha;
|
|
1823
|
+
delete target.lastDispatchReason;
|
|
1642
1824
|
}
|
|
1643
1825
|
|
|
1644
1826
|
function updatePrAfterFix(pr, project, source, options = {}, legacyDispatchId = '') {
|
|
@@ -1666,7 +1848,7 @@ function updatePrAfterFix(pr, project, source, options = {}, legacyDispatchId =
|
|
|
1666
1848
|
target.minionsReview = next;
|
|
1667
1849
|
};
|
|
1668
1850
|
if (explicitlyChangedBranch && options.branchChange?.changed === false) {
|
|
1669
|
-
const record = recordPrNoOpFixAttempt(target, cause, source, options.dispatchItem, options.branchChange, options.config);
|
|
1851
|
+
const record = recordPrNoOpFixAttempt(target, cause, source, options.dispatchItem, options.branchChange, options.config, options.noopReason);
|
|
1670
1852
|
result = { noOp: true, cause, paused: !!record.paused, count: record.count };
|
|
1671
1853
|
log('warn', `Updated ${pr.id} → recorded no-op ${cause} fix attempt ${record.count}${record.paused ? ' (paused)' : ''}; PR branch was unchanged`);
|
|
1672
1854
|
return prs;
|
|
@@ -1678,7 +1860,7 @@ function updatePrAfterFix(pr, project, source, options = {}, legacyDispatchId =
|
|
|
1678
1860
|
// automation cause handled — a future tick with working detection must
|
|
1679
1861
|
// be free to re-dispatch.
|
|
1680
1862
|
if (explicitlyChangedBranch && options.branchChange?.changed === null) {
|
|
1681
|
-
const record = recordPrNoOpFixAttempt(target, cause, source, options.dispatchItem, options.branchChange, options.config);
|
|
1863
|
+
const record = recordPrNoOpFixAttempt(target, cause, source, options.dispatchItem, options.branchChange, options.config, options.noopReason);
|
|
1682
1864
|
result = { noOp: true, cause, paused: !!record.paused, count: record.count, indeterminate: true };
|
|
1683
1865
|
log('warn', `Updated ${pr.id} → recorded indeterminate ${cause} fix attempt ${record.count}${record.paused ? ' (paused)' : ''}; PR branch advance could not be verified${options.branchChange?.reason ? ` (${options.branchChange.reason})` : ''}`);
|
|
1684
1866
|
return prs;
|
|
@@ -2542,6 +2724,20 @@ function detectNonTerminalResultSummary(_resultSummary, structuredCompletion, co
|
|
|
2542
2724
|
}
|
|
2543
2725
|
|
|
2544
2726
|
function deferNonTerminalCompletion(meta, detection) {
|
|
2727
|
+
return _deferRetryWithCounter(meta, detection, '_retryCount', ENGINE_DEFAULTS.maxRetries, 'nonterminal_completion');
|
|
2728
|
+
}
|
|
2729
|
+
|
|
2730
|
+
// Phantom-completion variant — uses _phantomRetryCount + maxPhantomRetries so
|
|
2731
|
+
// runtime-crash retries don't share a budget with the PR-attachment contract's
|
|
2732
|
+
// retries. Cap is independent (ENGINE_DEFAULTS.maxPhantomRetries) so the two
|
|
2733
|
+
// failure modes can be tuned separately. Failure mode triggered when the
|
|
2734
|
+
// runtime exits cleanly but emits no result event, no structured completion,
|
|
2735
|
+
// and no completion report — see detectNonTerminalResultSummary.
|
|
2736
|
+
function deferPhantomCompletion(meta, detection) {
|
|
2737
|
+
return _deferRetryWithCounter(meta, detection, '_phantomRetryCount', ENGINE_DEFAULTS.maxPhantomRetries, 'phantom_completion');
|
|
2738
|
+
}
|
|
2739
|
+
|
|
2740
|
+
function _deferRetryWithCounter(meta, detection, counterField, maxCount, pendingReason) {
|
|
2545
2741
|
const itemId = meta?.item?.id;
|
|
2546
2742
|
const reason = detection?.reason || 'Nonterminal completion summary';
|
|
2547
2743
|
if (!itemId) return reason;
|
|
@@ -2554,35 +2750,49 @@ function deferNonTerminalCompletion(meta, detection) {
|
|
|
2554
2750
|
if (!Array.isArray(data)) return data;
|
|
2555
2751
|
const w = data.find(i => i.id === itemId);
|
|
2556
2752
|
if (!w) return data;
|
|
2557
|
-
const retries = w
|
|
2558
|
-
if (retries <
|
|
2753
|
+
const retries = w[counterField] || 0;
|
|
2754
|
+
if (retries < maxCount) {
|
|
2559
2755
|
w.status = WI_STATUS.PENDING;
|
|
2560
|
-
w
|
|
2756
|
+
w[counterField] = retries + 1;
|
|
2561
2757
|
w._lastRetryAt = ts();
|
|
2562
2758
|
w._lastRetryReason = reason;
|
|
2563
|
-
w._pendingReason =
|
|
2759
|
+
w._pendingReason = pendingReason;
|
|
2760
|
+
// P-e0b4f7a5 — phantom-retry path stamps _phantomCompletion +
|
|
2761
|
+
// _phantomBranch so cleanup.js can preserve the worktree across the
|
|
2762
|
+
// re-dispatch window. Only set for the phantom counter; nonterminal
|
|
2763
|
+
// retries don't share this protection.
|
|
2764
|
+
if (counterField === '_phantomRetryCount') {
|
|
2765
|
+
w._phantomCompletion = true;
|
|
2766
|
+
if (meta?.branch) w._phantomBranch = meta.branch;
|
|
2767
|
+
}
|
|
2564
2768
|
delete w.completedAt;
|
|
2565
2769
|
delete w.dispatched_at;
|
|
2566
2770
|
delete w.dispatched_to;
|
|
2567
2771
|
delete w.failedAt;
|
|
2568
2772
|
finalStatus = WI_STATUS.PENDING;
|
|
2569
|
-
log('warn', `Work item ${itemId} reported
|
|
2773
|
+
log('warn', `Work item ${itemId} reported ${pendingReason} — retry ${retries + 1}/${maxCount} (${counterField}): ${reason}`);
|
|
2570
2774
|
} else {
|
|
2571
2775
|
w.status = WI_STATUS.FAILED;
|
|
2572
|
-
w.failReason = `${reason} after ${
|
|
2776
|
+
w.failReason = `${reason} after ${maxCount} attempts`;
|
|
2573
2777
|
w.failedAt = ts();
|
|
2574
2778
|
delete w.completedAt;
|
|
2575
2779
|
delete w.dispatched_at;
|
|
2576
2780
|
delete w.dispatched_to;
|
|
2577
2781
|
delete w._pendingReason;
|
|
2782
|
+
// Exhausted phantom retries: clear the in-flight markers so cleanup
|
|
2783
|
+
// can reap the worktree on the next sweep.
|
|
2784
|
+
if (counterField === '_phantomRetryCount') {
|
|
2785
|
+
delete w._phantomCompletion;
|
|
2786
|
+
delete w._phantomBranch;
|
|
2787
|
+
}
|
|
2578
2788
|
finalStatus = WI_STATUS.FAILED;
|
|
2579
|
-
log('warn', `Work item ${itemId} failed — repeated
|
|
2789
|
+
log('warn', `Work item ${itemId} failed — repeated ${pendingReason} after ${maxCount} attempts`);
|
|
2580
2790
|
}
|
|
2581
2791
|
return data;
|
|
2582
2792
|
}, { defaultValue: [], skipWriteIfUnchanged: true });
|
|
2583
2793
|
syncPrdItemStatus(itemId, finalStatus, meta.item?.sourcePlan);
|
|
2584
2794
|
} catch (err) {
|
|
2585
|
-
log('warn',
|
|
2795
|
+
log('warn', `${pendingReason} gate: ${err.message}`);
|
|
2586
2796
|
}
|
|
2587
2797
|
return reason;
|
|
2588
2798
|
}
|
|
@@ -2792,8 +3002,9 @@ function handleDecompositionResult(stdout, meta, config, runtimeName) {
|
|
|
2792
3002
|
return 0;
|
|
2793
3003
|
}
|
|
2794
3004
|
|
|
2795
|
-
async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, config) {
|
|
3005
|
+
async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, config, opts) {
|
|
2796
3006
|
|
|
3007
|
+
const detectPhantom = !!(opts && opts.detectPhantom);
|
|
2797
3008
|
const type = dispatchItem.type;
|
|
2798
3009
|
const meta = dispatchItem.meta;
|
|
2799
3010
|
const isSuccess = code === 0;
|
|
@@ -3033,13 +3244,27 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
|
|
|
3033
3244
|
|
|
3034
3245
|
let completionContractFailure = null;
|
|
3035
3246
|
if (effectiveSuccess && meta?.item?.id && !skipDoneStatus) {
|
|
3036
|
-
const nonTerminalCompletion = detectNonTerminalResultSummary(completionGateSummary, structuredCompletion, reportCompletion);
|
|
3247
|
+
const nonTerminalCompletion = detectNonTerminalResultSummary(completionGateSummary, structuredCompletion, reportCompletion, { detectPhantom });
|
|
3037
3248
|
if (nonTerminalCompletion) {
|
|
3038
|
-
|
|
3039
|
-
|
|
3040
|
-
|
|
3041
|
-
|
|
3042
|
-
|
|
3249
|
+
const isPhantomDetection = nonTerminalCompletion.phrase === 'phantom-completion';
|
|
3250
|
+
// P-e0b4f7a5 — before deferring a phantom retry, attempt to recover
|
|
3251
|
+
// the agent's work via the ls-remote + canonical-attach probe. If the
|
|
3252
|
+
// agent had pushed its branch (and possibly opened the PR) seconds
|
|
3253
|
+
// before the runtime crashed, link the PR and treat the WI as a
|
|
3254
|
+
// normal successful completion. This preserves work that would
|
|
3255
|
+
// otherwise be lost and avoids burning a phantom retry on something
|
|
3256
|
+
// that already shipped.
|
|
3257
|
+
if (isPhantomDetection && await _attemptPhantomPrRecovery(meta, agentId, resultSummary, config)) {
|
|
3258
|
+
log('info', `Phantom-completion recovered for ${meta.item.id} via ls-remote + PR auto-link — no retry needed`);
|
|
3259
|
+
} else {
|
|
3260
|
+
skipDoneStatus = true;
|
|
3261
|
+
const reason = isPhantomDetection
|
|
3262
|
+
? deferPhantomCompletion(meta, nonTerminalCompletion)
|
|
3263
|
+
: deferNonTerminalCompletion(meta, nonTerminalCompletion);
|
|
3264
|
+
completionContractFailure = { reason, itemId: meta.item.id, nonTerminal: true, processWorkItemFailure: false, phantom: isPhantomDetection };
|
|
3265
|
+
if (!nonCleanReportWritten) {
|
|
3266
|
+
writeNonCleanAgentReport(dispatchItem, agentId, 'partial', structuredCompletion, completionGateSummary, code);
|
|
3267
|
+
}
|
|
3043
3268
|
}
|
|
3044
3269
|
}
|
|
3045
3270
|
}
|
|
@@ -3055,7 +3280,7 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
|
|
|
3055
3280
|
}
|
|
3056
3281
|
|
|
3057
3282
|
if (effectiveSuccess && meta?.item?.id && !skipDoneStatus && !noopRationale) {
|
|
3058
|
-
completionContractFailure = await enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, stdout);
|
|
3283
|
+
completionContractFailure = await enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, stdout, { detectPhantom });
|
|
3059
3284
|
if (completionContractFailure?.severity === 'hard' || completionContractFailure?.nonTerminal) {
|
|
3060
3285
|
skipDoneStatus = true;
|
|
3061
3286
|
}
|
|
@@ -3208,6 +3433,7 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
|
|
|
3208
3433
|
dispatchItem,
|
|
3209
3434
|
branchChange: prFixBranchChange,
|
|
3210
3435
|
config,
|
|
3436
|
+
noopReason: noopRationale || meta?._noopReason || '',
|
|
3211
3437
|
});
|
|
3212
3438
|
// (#984) Sync PRD status for PR-linked features: fix work items have a different ID
|
|
3213
3439
|
// than the original PRD feature, so syncPrdItemStatus(fixWiId, ...) finds nothing.
|
|
@@ -3437,6 +3663,10 @@ module.exports = {
|
|
|
3437
3663
|
parseCompletionFieldSummary,
|
|
3438
3664
|
parseCompletionNoop,
|
|
3439
3665
|
detectNonTerminalResultSummary,
|
|
3666
|
+
deferNonTerminalCompletion,
|
|
3667
|
+
deferPhantomCompletion,
|
|
3668
|
+
enforcePrAttachmentContract,
|
|
3669
|
+
markMissingPrAttachment,
|
|
3440
3670
|
parseCompletionReportFile,
|
|
3441
3671
|
persistCompletionReport,
|
|
3442
3672
|
runPostCompletionHooks,
|
package/engine/shared.js
CHANGED
|
@@ -1078,6 +1078,7 @@ const ENGINE_DEFAULTS = {
|
|
|
1078
1078
|
evalMaxIterations: 3, // legacy UI/config field; engine discovery no longer enforces review→fix cycle caps
|
|
1079
1079
|
evalMaxCost: null, // USD ceiling per work item across all eval iterations; null = no limit (gather baseline data first)
|
|
1080
1080
|
maxRetries: 3, // max dispatch retries before marking work item as failed
|
|
1081
|
+
maxPhantomRetries: 3, // max retries for "phantom completion" (runtime crashed before emitting type:"result"); tracked separately from _retryCount so phantom retries don't pollute the normal PR-attachment retry budget. See engine/lifecycle.markMissingPrAttachment + detectNonTerminalResultSummary.
|
|
1081
1082
|
minRetryGapMs: 120000, // 2min — minimum gap between retry dispatches for the same work item; prevents tight retry loops when an idempotent agent (e.g. review bailing out on a duplicate) cannot produce the expected output (#1770)
|
|
1082
1083
|
pipelineApiRetries: 2, // max attempts for pipeline API calls
|
|
1083
1084
|
pipelineApiRetryDelay: 2000, // ms delay between pipeline API retries
|
package/engine/spawn-agent.js
CHANGED
|
@@ -162,6 +162,81 @@ function formatProcessExitSentinel(exitCode, signal) {
|
|
|
162
162
|
return `\n[process-exit] code=${exitCode}${signal ? ` signal=${signal}` : ''}\n`;
|
|
163
163
|
}
|
|
164
164
|
|
|
165
|
+
/**
|
|
166
|
+
* Pre-push stale-HEAD guard for fix-task dispatches (P-c8f2d5e3).
|
|
167
|
+
*
|
|
168
|
+
* When the engine reuses an existing worktree on a PR branch that was rebased
|
|
169
|
+
* upstream (force-push), the local HEAD can sit behind origin/<branch>. The
|
|
170
|
+
* first push from that worktree silently overwrites the rebased history — a
|
|
171
|
+
* confirmed silent-overwrite footgun captured in team memory.
|
|
172
|
+
*
|
|
173
|
+
* This helper runs:
|
|
174
|
+
* git fetch origin <branch>
|
|
175
|
+
* git rev-list --count HEAD..origin/<branch>
|
|
176
|
+
* inside the worktree. When the count is > 0 it throws a clear, actionable
|
|
177
|
+
* error so engine.spawnAgent can abort the dispatch before invoking the
|
|
178
|
+
* runtime CLI — i.e. before the agent has a chance to push.
|
|
179
|
+
*
|
|
180
|
+
* The fetch is best-effort: if origin doesn't have the ref yet (first push on
|
|
181
|
+
* a fresh branch, common for shared-branch plan items), the helper returns
|
|
182
|
+
* `{ ok: true, skipped: 'no-upstream' }` instead of failing — there's no
|
|
183
|
+
* rebased tip to overwrite. Any other fetch failure is also treated as a
|
|
184
|
+
* skip with `skipped: 'fetch-failed'` so transient network issues don't
|
|
185
|
+
* brick an otherwise-healthy dispatch.
|
|
186
|
+
*
|
|
187
|
+
* @param {object} args
|
|
188
|
+
* @param {string} args.branch - PR branch name (already sanitized)
|
|
189
|
+
* @param {string} args.cwd - Worktree path
|
|
190
|
+
* @param {function} [args.exec] - Async exec(cmd, opts) — injectable for tests
|
|
191
|
+
* @param {object} [args.gitOpts] - Options passed through to exec
|
|
192
|
+
* @returns {Promise<{ok: true, behindCount: number, skipped?: string}>}
|
|
193
|
+
* @throws {Error & {code: 'STALE_HEAD'}} when local HEAD is behind origin
|
|
194
|
+
*/
|
|
195
|
+
async function assertStaleHeadOk({ branch, cwd, exec, gitOpts } = {}) {
|
|
196
|
+
if (!branch) throw new Error('assertStaleHeadOk: branch is required');
|
|
197
|
+
if (!cwd) throw new Error('assertStaleHeadOk: cwd is required');
|
|
198
|
+
const execFn = typeof exec === 'function'
|
|
199
|
+
? exec
|
|
200
|
+
: require('./shared').execAsync;
|
|
201
|
+
const opts = { ...(gitOpts || {}), cwd };
|
|
202
|
+
|
|
203
|
+
// Best-effort fetch. Branch-missing-on-origin is a legitimate state (first
|
|
204
|
+
// push on a freshly-cut feature branch) and must NOT block dispatch.
|
|
205
|
+
try {
|
|
206
|
+
await execFn(`git fetch origin "${branch}"`, opts);
|
|
207
|
+
} catch (err) {
|
|
208
|
+
const msg = (err && (err.stderr?.toString?.() || err.message || '')) + '';
|
|
209
|
+
if (/couldn'?t find remote ref|not found in upstream|unknown revision/i.test(msg)) {
|
|
210
|
+
return { ok: true, behindCount: 0, skipped: 'no-upstream' };
|
|
211
|
+
}
|
|
212
|
+
// Other failures (network/auth/timeout) — skip rather than block.
|
|
213
|
+
return { ok: true, behindCount: 0, skipped: 'fetch-failed' };
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
let countOut;
|
|
217
|
+
try {
|
|
218
|
+
countOut = await execFn(`git rev-list --count HEAD..origin/${branch}`, opts);
|
|
219
|
+
} catch (err) {
|
|
220
|
+
// origin/<branch> resolution failed AFTER fetch — treat as no-upstream.
|
|
221
|
+
return { ok: true, behindCount: 0, skipped: 'rev-list-failed' };
|
|
222
|
+
}
|
|
223
|
+
const raw = typeof countOut === 'string'
|
|
224
|
+
? countOut
|
|
225
|
+
: (countOut?.stdout?.toString?.() ?? String(countOut ?? ''));
|
|
226
|
+
const behindCount = parseInt(String(raw).trim(), 10);
|
|
227
|
+
if (!Number.isFinite(behindCount) || behindCount <= 0) {
|
|
228
|
+
return { ok: true, behindCount: Number.isFinite(behindCount) ? behindCount : 0 };
|
|
229
|
+
}
|
|
230
|
+
const err = new Error(
|
|
231
|
+
`PR branch was rebased; local HEAD is stale (${behindCount} commits behind origin). ` +
|
|
232
|
+
`Run \`git pull --rebase origin ${branch}\` first.`
|
|
233
|
+
);
|
|
234
|
+
err.code = 'STALE_HEAD';
|
|
235
|
+
err.behindCount = behindCount;
|
|
236
|
+
err.branch = branch;
|
|
237
|
+
throw err;
|
|
238
|
+
}
|
|
239
|
+
|
|
165
240
|
// The orphan reaper recovers an agent's exit code by scanning live-output.log for
|
|
166
241
|
// `[process-exit] code=N`. The previous design wrote the sentinel to stdout, hoping
|
|
167
242
|
// the engine's stdout consumer (engine.js) would copy it into the file — but when
|
|
@@ -456,6 +531,6 @@ function main() {
|
|
|
456
531
|
});
|
|
457
532
|
}
|
|
458
533
|
|
|
459
|
-
module.exports = { parseSpawnArgs, buildSpawnInvocation, normalizeRuntimeExit, shouldInjectAdoTokenEnv, injectAdoTokenEnv, injectAdoTokenEnvForRepoHost, writeProcessExitSentinel, computeAddDirs, createParentPipeForwarder };
|
|
534
|
+
module.exports = { parseSpawnArgs, buildSpawnInvocation, normalizeRuntimeExit, shouldInjectAdoTokenEnv, injectAdoTokenEnv, injectAdoTokenEnvForRepoHost, writeProcessExitSentinel, computeAddDirs, createParentPipeForwarder, assertStaleHeadOk };
|
|
460
535
|
|
|
461
536
|
if (require.main === module) main();
|
package/engine/timeout.js
CHANGED
|
@@ -318,7 +318,13 @@ function checkTimeouts(config) {
|
|
|
318
318
|
|
|
319
319
|
// Run post-completion hooks via shared helper (async — fire and forget in timeout context).
|
|
320
320
|
// Pass the actual exit code so autoRecovery (PR-created-but-failed) still works correctly.
|
|
321
|
-
|
|
321
|
+
// detectPhantom: true mirrors the line 310 detectNonTerminalResultSummary call —
|
|
322
|
+
// when the timeout path completes a dispatch via the [process-exit] sentinel,
|
|
323
|
+
// we have no guarantee the runtime emitted a result event. Propagating
|
|
324
|
+
// detectPhantom downstream lets enforcePrAttachmentContract route phantom
|
|
325
|
+
// hard-fails through the _phantomRetryCount budget instead of bypassing
|
|
326
|
+
// the retry counter entirely (P-d9a3e6f4).
|
|
327
|
+
runPostCompletionHooks(item, item.agent, processExitCode, fullLogForHooks, config, { detectPhantom: true }).catch(e => log('warn', 'post-completion hooks: ' + e.message));
|
|
322
328
|
|
|
323
329
|
if (hasProcess) {
|
|
324
330
|
shared.killImmediate(activeProcesses.get(item.id)?.proc);
|
package/engine.js
CHANGED
|
@@ -28,6 +28,7 @@ const { exec, execAsync, execSilent, runFile, ts, ENGINE_DEFAULTS,
|
|
|
28
28
|
WI_STATUS, DONE_STATUSES, WORK_TYPE, PLAN_STATUS, PRD_ITEM_STATUS, PRD_MATERIALIZABLE, PR_STATUS, DISPATCH_RESULT, AGENT_STATUS,
|
|
29
29
|
FAILURE_CLASS } = shared;
|
|
30
30
|
const { resolveRuntime } = require('./engine/runtimes');
|
|
31
|
+
const { assertStaleHeadOk } = require('./engine/spawn-agent');
|
|
31
32
|
const queries = require('./engine/queries');
|
|
32
33
|
|
|
33
34
|
// ─── Paths ──────────────────────────────────────────────────────────────────
|
|
@@ -1114,6 +1115,41 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1114
1115
|
log('warn', `Agent ${agentId} running ${type} task in main repo (no worktree) for ${id} — changes may land on master directly`);
|
|
1115
1116
|
}
|
|
1116
1117
|
|
|
1118
|
+
// ── Stale-HEAD guard for fix-task pushes (P-c8f2d5e3) ────────────────────
|
|
1119
|
+
// When a PR branch is rebased upstream (force-push), a reused worktree can
|
|
1120
|
+
// sit on local HEAD that's behind origin/<branch>. The first push from that
|
|
1121
|
+
// worktree silently overwrites the rebased history. Fix-task dispatches are
|
|
1122
|
+
// the canonical case: they always target an existing PR branch the engine
|
|
1123
|
+
// already polled. Abort dispatch BEFORE invoking the runtime CLI so the
|
|
1124
|
+
// agent never gets a chance to push over the rebased tip.
|
|
1125
|
+
// Read-only and non-fix dispatches are out of scope — implement tasks cut
|
|
1126
|
+
// their own branch from main, and review/verify don't push.
|
|
1127
|
+
if (type === WORK_TYPE.FIX && branchName && worktreePath && cwd === worktreePath) {
|
|
1128
|
+
try {
|
|
1129
|
+
const guard = await assertStaleHeadOk({
|
|
1130
|
+
branch: branchName,
|
|
1131
|
+
cwd: worktreePath,
|
|
1132
|
+
exec: execAsync,
|
|
1133
|
+
gitOpts: { ..._gitOpts, timeout: 15000 },
|
|
1134
|
+
});
|
|
1135
|
+
if (guard.skipped) {
|
|
1136
|
+
log('info', `Stale-HEAD guard skipped for ${id} (${branchName}): ${guard.skipped}`);
|
|
1137
|
+
}
|
|
1138
|
+
} catch (err) {
|
|
1139
|
+
if (err && err.code === 'STALE_HEAD') {
|
|
1140
|
+
log('error', `Stale-HEAD guard rejected fix dispatch ${id} on ${branchName}: ${err.message}`);
|
|
1141
|
+
_cleanupPromptFiles();
|
|
1142
|
+
completeDispatch(id, DISPATCH_RESULT.ERROR, err.message.slice(0, 300));
|
|
1143
|
+
cleanupTempAgent(agentId);
|
|
1144
|
+
return null;
|
|
1145
|
+
}
|
|
1146
|
+
// Non-STALE_HEAD failures from the guard itself shouldn't block dispatch
|
|
1147
|
+
// (the guard is conservative by design — fetch/network issues fall through
|
|
1148
|
+
// to skipped:'fetch-failed'). Log and continue.
|
|
1149
|
+
log('warn', `Stale-HEAD guard error for ${id} (${branchName}): ${err.message}`);
|
|
1150
|
+
}
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1117
1153
|
// ── Runtime + opts resolution (P-2a6d9c4f) ────────────────────────────────
|
|
1118
1154
|
// Every CLI-specific knob flows through the runtime adapter resolved from
|
|
1119
1155
|
// resolveAgentCli(agent, engine). Engine code MUST NOT branch on
|
|
@@ -2936,6 +2972,20 @@ async function discoverFromPrs(config, project) {
|
|
|
2936
2972
|
const autoFixBuilds = config.engine?.autoFixBuilds ?? ENGINE_DEFAULTS.autoFixBuilds;
|
|
2937
2973
|
if (pollEnabled && autoFixBuilds && pr.status === PR_STATUS.ACTIVE && pr.buildStatus === 'failing'
|
|
2938
2974
|
&& !isPrNoOpFixCauseSuppressed(pr, shared.PR_FIX_CAUSE.BUILD_FAILURE)) {
|
|
2975
|
+
// P-b7e1c4d2: skip when the most recent dispatch already noop'd against
|
|
2976
|
+
// the same head SHA — chronic across PRs #2315–#2323 where every fix
|
|
2977
|
+
// agent rebutted "this is a pre-existing master baseline" but the
|
|
2978
|
+
// cached buildStatus:failing kept re-triggering the loop. The check
|
|
2979
|
+
// clears automatically once a new commit lands (lastDispatchHeadSha
|
|
2980
|
+
// stops matching the current head).
|
|
2981
|
+
const currentHeadSha = String(pr.headSha || pr._adoSourceCommit || pr._adoHeadCommit || '').trim();
|
|
2982
|
+
if (pr.lastDispatchOutcome === 'noop'
|
|
2983
|
+
&& pr.lastDispatchHeadSha
|
|
2984
|
+
&& currentHeadSha
|
|
2985
|
+
&& pr.lastDispatchHeadSha === currentHeadSha) {
|
|
2986
|
+
log('info', `Skipping build-fix for ${pr.id}: last dispatch was noop on the same head ${currentHeadSha.slice(0, 8)} (${(pr.lastDispatchReason || '').slice(0, 120)})`);
|
|
2987
|
+
continue;
|
|
2988
|
+
}
|
|
2939
2989
|
const buildCauseKey = getPrAutomationCauseKey('build', pr);
|
|
2940
2990
|
const key = getPrAutomationDispatchKey(`build-fix-${project?.name || 'default'}-${prDisplayId}`, buildCauseKey);
|
|
2941
2991
|
if (isPrAutomationCauseHandledOrPending(project, pr, buildCauseKey)) continue;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yemi33/minions",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1873",
|
|
4
4
|
"description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
|
|
5
5
|
"bin": {
|
|
6
6
|
"minions": "bin/minions.js"
|