npm - @yemi33/minions - Versions diffs - 0.1.1872 → 0.1.1873 - Mend

@yemi33/minions 0.1.1872 → 0.1.1873

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -1,10 +1,13 @@
 # Changelog
-## 0.1.1872 (2026-05-11)
+## 0.1.1873 (2026-05-11)
+### Other
+- Implement: Phantom completion work preservation (P-e0b4f7a5) (#2356)
+## 0.1.1871 (2026-05-11)
 ### Features
--  Stale-HEAD guard on fix-task pushes (P-c8f2d5e3) (#2360)
-- Cached buildStatus invalidation on no-op completion (#2355)
 -  per-agent memory file architecture (P-f1c5a8b6) (#2354)
 -  Implement pre-dispatch acceptance criteria validation gate (P-a2d6b9c7) (#2352)

package/engine/cleanup.js CHANGED Viewed

@@ -79,6 +79,26 @@ function localBranchWorktreeInUse(root, branch) {
   }
 }
+// P-e0b4f7a5 — collect branches of work items currently in the
+// phantom-completion retry state for a given project. Returns a Set of
+// branch strings. Used by the worktree cleanup loop to protect worktrees
+// belonging to in-flight phantom retries from the 2-hour age sweep —
+// without this protection the agent's pushed branch reference could be
+// destroyed alongside the worktree before the retry runs.
+function collectPhantomBranchesForProject(project) {
+  const branches = new Set();
+  try {
+    const items = safeJson(projectWorkItemsPath(project)) || [];
+    if (!Array.isArray(items)) return branches;
+    for (const w of items) {
+      if (w && w._phantomCompletion === true && w._phantomBranch) {
+        branches.add(String(w._phantomBranch));
+      }
+    }
+  } catch { /* best-effort — never let cleanup crash on a missing/corrupt WI file */ }
+  return branches;
+}
 function cleanupMergedPrLocalBranch(root, project, pr) {
   const branch = normalizeLocalBranchName(pr?.branch);
   const result = { deleted: false, forced: false, skipped: null };
@@ -451,6 +471,11 @@ async function runCleanup(config, verbose = false) {
       const wtEntries = []; // { dir, wtPath, mtime, shouldClean, isProtected }
       const dispatch = getDispatch();
       const activeDispatchIds = new Set((dispatch.active || []).map(d => d.id));
+      // P-e0b4f7a5 — branches whose work item is mid-phantom-retry. Their
+      // worktrees must survive the age/cap sweep until the retry completes
+      // (or exhausts its budget) so the agent's already-pushed branch ref
+      // isn't destroyed alongside the worktree.
+      const phantomBranches = collectPhantomBranchesForProject(project);
       // Probe `git branch --show-current` for every worktree in chunks of 5.
       // Sequential probing was the dominant cost in the cleanup phase
@@ -492,6 +517,20 @@ async function runCleanup(config, verbose = false) {
         });
         if (isReferenced) isProtected = true;
+        // P-e0b4f7a5 — protect worktrees whose branch matches a work item in
+        // the phantom-completion retry state. The dispatch may have already
+        // moved to dispatch.completed (so isReferenced is false) but the
+        // retry will re-dispatch on the same branch shortly.
+        if (!isProtected && phantomBranches.size > 0) {
+          for (const branch of phantomBranches) {
+            if (worktreeMatchesBranch(dirLower, branch, actualBranch)) {
+              isProtected = true;
+              if (verbose) console.log(`  Skipping worktree ${dir}: phantom-completion retry pending`);
+              break;
+            }
+          }
+        }
         // Also clean worktrees older than 2 hours with no active dispatch referencing them
         let mtime = Date.now();
         if (!shouldClean) {
@@ -499,7 +538,7 @@ async function runCleanup(config, verbose = false) {
             const stat = fs.statSync(wtPath);
             mtime = stat.mtimeMs;
             const ageMs = Date.now() - mtime;
-            if (ageMs > 7200000 && !isReferenced) { // 2 hours
+            if (ageMs > 7200000 && !isReferenced && !isProtected) { // 2 hours — P-e0b4f7a5: phantom-protected worktrees survive the age sweep too
               shouldClean = true;
             }
           } catch { /* optional */ }
@@ -1080,4 +1119,5 @@ module.exports = {
   worktreeMatchesBranch,     // exported for testing
   getWorktreeBranch,         // exported for lifecycle cleanup
   cleanupMergedPrLocalBranch, // exported for lifecycle cleanup and testing
+  collectPhantomBranchesForProject, // P-e0b4f7a5 — exported for testing
 };

package/engine/lifecycle.js CHANGED Viewed

@@ -543,6 +543,11 @@ function updateWorkItemStatus(meta, status, reason) {
         delete target.failReason;
         delete target.failedAt;
         delete target._retryCount;
+        // P-e0b4f7a5 — successful completion (including a phantom-retry
+        // succeeding) clears the phantom markers so cleanup can reap the
+        // worktree on the next sweep.
+        delete target._phantomCompletion;
+        delete target._phantomBranch;
         target.completedAt = ts();
         // Restore agent info from dispatch metadata (cleared on retry reset)
         if (meta._agentId && !target.dispatched_to) target.dispatched_to = meta._agentId;
@@ -1087,6 +1092,90 @@ async function findOpenPrForBranch(meta, config) {
   return null;
 }
+// P-e0b4f7a5 — quick "did the agent push the branch before the runtime
+// crashed?" probe. `git ls-remote origin <branch>` returns a non-empty
+// "<sha>\trefs/heads/<branch>" line when the branch exists on the remote and
+// nothing when it doesn't. Used by enforcePrAttachmentContract to gate the
+// phantom-recovery PR auto-link: if the branch isn't there, no PR can exist
+// either and there's no point burning another `gh pr list` round-trip.
+async function _phantomBranchExistsOnRemote(meta, config) {
+  if (!meta?.branch) return false;
+  const projectObj = resolvePrFallbackProject(meta, config);
+  // Fall back to the branch lookup from any cwd if no project root is known —
+  // git will use the ambient remote configuration. We prefer the project root
+  // because dispatch worktrees may not have origin wired yet.
+  const cwd = projectObj?.localPath || meta?.cwd || process.cwd();
+  try {
+    const out = await runFileCapture('git', ['ls-remote', '--heads', 'origin', String(meta.branch)], { cwd, timeout: 15000 });
+    // Any non-empty stdout line that ends in refs/heads/<branch> = branch exists.
+    return /\trefs\/heads\//.test(String(out || ''));
+  } catch (err) {
+    log('debug', `Phantom ls-remote probe failed for ${meta.branch}: ${err.message}`);
+    return false;
+  }
+}
+// P-e0b4f7a5 — extracted from enforcePrAttachmentContract so the phantom
+// recovery path can reuse the same canonical-attach upsert without
+// duplicating the entry construction. Returns null if the link succeeded,
+// or a contract-failure object if the verification step couldn't read the
+// PR tracking state (state-error path mirrors the original inline behavior).
+function _attachFoundPrToWi(found, meta, agentId, resultSummary, config) {
+  const entry = {
+    id: shared.getCanonicalPrId(found.project, found.prNumber, found.url),
+    prNumber: found.prNumber,
+    title: meta.item?.title || `PR #${found.prNumber}`,
+    agent: agentId,
+    branch: meta.branch || '',
+    reviewStatus: 'pending',
+    status: PR_STATUS.ACTIVE,
+    created: ts(),
+    url: found.url,
+    prdItems: [meta.item.id],
+    sourcePlan: meta.item?.sourcePlan || '',
+    itemType: meta.item?.itemType || '',
+  };
+  shared.upsertPullRequestRecord(shared.projectPrPath(found.project), entry, {
+    project: found.project,
+    itemId: meta.item.id,
+  });
+  try {
+    if (hasCanonicalPrAttachment(meta.item.id, config)) return null;
+  } catch (err) {
+    const reason = `${meta.item.id} auto-linked a PR but PR attachment verification could not read PR tracking state: ${err.message}`;
+    markPrAttachmentVerificationError(meta, agentId, reason, resultSummary);
+    log('warn', reason);
+    return { reason, itemId: meta.item.id, severity: 'hard', stateError: true };
+  }
+  return null;
+}
+// P-e0b4f7a5 — phantom-completion recovery: when the runtime crashes before
+// emitting its terminating result event, the agent may still have pushed
+// the branch (and possibly opened the PR) seconds beforehand. Verify with
+// `git ls-remote origin <branch>` and, if the branch landed on the remote,
+// attempt one final canonical PR attachment via the existing
+// findOpenPrForBranch helper. Returns true if a PR was found and linked
+// (work is recoverable — caller should treat as success), false otherwise.
+async function _attemptPhantomPrRecovery(meta, agentId, resultSummary, config) {
+  if (!meta?.branch || !meta?.item?.id) return false;
+  const branchOnRemote = await _phantomBranchExistsOnRemote(meta, config);
+  if (!branchOnRemote) return false;
+  const recovered = await findOpenPrForBranch(meta, config);
+  if (!recovered) {
+    log('info', `Phantom-completion: branch ${meta.branch} exists on remote for ${meta.item.id} but no open PR found — routing through phantom retry budget`);
+    return false;
+  }
+  const attachResult = _attachFoundPrToWi(recovered, meta, agentId, resultSummary, config);
+  log('info', `Phantom-completion recovery: auto-linked existing PR ${shared.getCanonicalPrId(recovered.project, recovered.prNumber, recovered.url)} on branch ${meta.branch} for ${meta.item.id} (runtime crashed but agent had pushed the PR)`);
+  // attachResult === null = link verified; non-null = canonical-attach
+  // verification failed (state error). Treat state error as "not recovered"
+  // so the caller falls through to the normal failure path with that error
+  // surfaced via markPrAttachmentVerificationError already called inside
+  // _attachFoundPrToWi.
+  return attachResult === null;
+}
 // Lightweight probe for "did the agent's output contain ANY PR URL?". Used by
 // the PR-attachment contract to distinguish silent-failure (no URL anywhere)
 // from auto-link-miss (URL present but engine couldn't canonically attach it).
@@ -1113,10 +1202,79 @@ function _outputHasRuntimeResultEvent(output) {
   return /"type":\s*"result"/.test(output);
 }
-function markMissingPrAttachment(meta, agentId, reason, resultSummary, severity) {
+function markMissingPrAttachment(meta, agentId, reason, resultSummary, severity, opts) {
   const noPrWiPath = resolveWorkItemPath(meta);
   const isHard = severity !== 'soft';
+  const isPhantom = !!(opts && opts.phantom);
   let syncFailedToPrd = false;
+  // Phantom branch: a runtime crash that hard-fails for "no PR attached" should
+  // not bypass the retry budget — the agent never got a chance to do the work.
+  // Track these separately on `_phantomRetryCount` so they don't pollute the
+  // PR-attachment retry counter (`_retryCount`). Cap at maxPhantomRetries; only
+  // hard-fail once the phantom budget is exhausted.
+  let phantomRetryDeferred = false;
+  let phantomRetryExhausted = false;
+  let phantomRetryCount = 0;
+  if (isHard && isPhantom && noPrWiPath) {
+    mutateJsonFileLocked(noPrWiPath, data => {
+      if (!Array.isArray(data)) return data;
+      const w = data.find(i => i.id === meta.item.id);
+      if (!w) return data;
+      const phantomRetries = w._phantomRetryCount || 0;
+      if (phantomRetries < ENGINE_DEFAULTS.maxPhantomRetries) {
+        w.status = WI_STATUS.PENDING;
+        w._phantomRetryCount = phantomRetries + 1;
+        w._lastRetryAt = ts();
+        w._lastRetryReason = reason;
+        w._pendingReason = 'phantom_completion';
+        // P-e0b4f7a5 — _phantomCompletion + _phantomBranch let cleanup.js
+        // protect the worktree of an in-flight phantom retry. Without these
+        // markers the 2-hour age sweep can wipe the worktree (and the agent's
+        // already-pushed branch reference) between phantom detection and
+        // re-dispatch.
+        w._phantomCompletion = true;
+        if (meta.branch) w._phantomBranch = meta.branch;
+        delete w.completedAt;
+        delete w.dispatched_at;
+        delete w.dispatched_to;
+        delete w.failReason;
+        delete w.failedAt;
+        delete w._missingPrAttachment;
+        phantomRetryDeferred = true;
+        phantomRetryCount = phantomRetries + 1;
+        log('warn', `Work item ${meta.item.id} hit phantom-completion path — retry ${phantomRetryCount}/${ENGINE_DEFAULTS.maxPhantomRetries} (runtime likely crashed before emitting result event)`);
+      } else {
+        phantomRetryExhausted = true;
+        phantomRetryCount = phantomRetries;
+      }
+      return data;
+    }, { skipWriteIfUnchanged: true });
+    if (phantomRetryDeferred) {
+      // Soft inbox note: the runtime crashed but we're retrying; surface the
+      // event without flagging the WI as silent failure.
+      shared.writeToInbox('engine', `phantom-completion-retry-${meta.item.id}`,
+        `# Phantom completion retry for ${meta.item.id}\n\n` +
+        `**Agent:** ${agentId}\n` +
+        `**Work item:** \`${meta.item.id}\` — ${meta.item.title || ''}\n` +
+        `**Type:** ${meta.item.type || 'unknown'}\n` +
+        `**Branch:** ${meta.branch || '(none)'}\n` +
+        `**Phantom retry:** ${phantomRetryCount}/${ENGINE_DEFAULTS.maxPhantomRetries}\n\n` +
+        `${reason}\n` +
+        (resultSummary ? `\n## Agent summary\n${resultSummary}\n` : ''),
+        null,
+        { sourceItem: meta.item.id, reason: 'phantom-completion-retry' });
+      // Sync PRD back to pending so dependent flow doesn't see it as failed.
+      if (meta.item?.sourcePlan) {
+        try { syncPrdItemStatus(meta.item.id, WI_STATUS.PENDING, meta.item.sourcePlan); } catch (e) { log('warn', 'phantom retry PRD sync: ' + e.message); }
+      }
+      return;
+    }
+    if (phantomRetryExhausted) {
+      // Fall through to the regular hard-fail path with augmented reason so
+      // operators see "phantom retries exhausted" instead of the generic msg.
+      reason = `${reason} — phantom retries exhausted (${phantomRetryCount}/${ENGINE_DEFAULTS.maxPhantomRetries})`;
+    }
+  }
   if (noPrWiPath) {
     mutateJsonFileLocked(noPrWiPath, data => {
       if (!Array.isArray(data)) return data;
@@ -1132,6 +1290,11 @@ function markMissingPrAttachment(meta, agentId, reason, resultSummary, severity)
         delete w.completedAt;
         delete w._noPr;
         delete w._noPrReason;
+        // P-e0b4f7a5 — terminal hard-fail (genuine missing PR or phantom
+        // retries exhausted) clears the in-flight phantom markers so cleanup
+        // can finally reap the worktree.
+        delete w._phantomCompletion;
+        delete w._phantomBranch;
       } else {
         // Soft: don't change status or failReason — the agent did the work,
         // we just couldn't auto-attach the PR. Surface a flag for the dashboard
@@ -1208,7 +1371,8 @@ function markPrAttachmentVerificationError(meta, agentId, reason, resultSummary)
     { sourceItem: meta.item.id, reason: 'pr-attachment-state-error' });
 }
-async function enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, output) {
+async function enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, output, opts) {
+  const detectPhantom = !!(opts && opts.detectPhantom);
   if (!isPrAttachmentRequired(type, meta?.item, meta)) return null;
   try {
     if (hasCanonicalPrAttachment(meta.item.id, config)) return null;
@@ -1221,39 +1385,35 @@ async function enforcePrAttachmentContract(type, meta, agentId, config, resultSu
   const found = await findOpenPrForBranch(meta, config);
   if (found) {
-    const entry = {
-      id: shared.getCanonicalPrId(found.project, found.prNumber, found.url),
-      prNumber: found.prNumber,
-      title: meta.item?.title || `PR #${found.prNumber}`,
-      agent: agentId,
-      branch: meta.branch || '',
-      reviewStatus: 'pending',
-      status: PR_STATUS.ACTIVE,
-      created: ts(),
-      url: found.url,
-      prdItems: [meta.item.id],
-      sourcePlan: meta.item?.sourcePlan || '',
-      itemType: meta.item?.itemType || '',
-    };
-    shared.upsertPullRequestRecord(shared.projectPrPath(found.project), entry, {
-      project: found.project,
-      itemId: meta.item.id,
-    });
-    log('info', `Auto-linked existing PR ${entry.id} on branch ${meta.branch} for ${meta.item.id}`);
-    try {
-      if (hasCanonicalPrAttachment(meta.item.id, config)) return null;
-    } catch (err) {
-      const reason = `${meta.item.id} auto-linked a PR but PR attachment verification could not read PR tracking state: ${err.message}`;
-      markPrAttachmentVerificationError(meta, agentId, reason, resultSummary);
-      log('warn', reason);
-      return { reason, itemId: meta.item.id, severity: 'hard', stateError: true };
-    }
+    const attachResult = _attachFoundPrToWi(found, meta, agentId, resultSummary, config);
+    log('info', `Auto-linked existing PR ${shared.getCanonicalPrId(found.project, found.prNumber, found.url)} on branch ${meta.branch} for ${meta.item.id}`);
+    if (attachResult === null) return null;
+    return attachResult;
   }
   // Distinguish "agent never claimed a PR" (hard — silent failure the contract
   // was designed to catch) from "agent claimed a PR but engine couldn't attach
   // it canonically" (soft — verification gap, not a failure).
   const severity = _outputContainsPrUrl(output) ? 'soft' : 'hard';
+  // Phantom completion = hard severity + opt-in detectPhantom + no terminating
+  // result event in stream. The runtime CLI crashed mid-conversation; the
+  // agent never got a chance to open a PR. Hard-failing here would bypass the
+  // retry budget for a runtime bug. Surface phantom: true to
+  // markMissingPrAttachment so it routes through the _phantomRetryCount path.
+  const isPhantom = severity === 'hard' && detectPhantom && !_outputHasRuntimeResultEvent(output);
+  // P-e0b4f7a5 — phantom-completion recovery: an agent may have pushed its
+  // branch (and even opened the PR) seconds before the runtime crashed.
+  // Verify with `git ls-remote origin <branch>` and, if the branch landed,
+  // make one final canonical-attach attempt before burning a phantom retry.
+  // This recovers work that would otherwise be lost — both the worktree
+  // (cleanup would reap it) and the orphan PR link (no WI ever points at it).
+  if (isPhantom) {
+    if (await _attemptPhantomPrRecovery(meta, agentId, resultSummary, config)) {
+      return null;
+    }
+  }
   // Hard-fail messaging: if the runtime never emitted its terminating result
   // event, the failure is a phantom completion (runtime CLI crashed), not the
   // agent silently skipping work. Surface that truthfully so operators don't
@@ -1268,9 +1428,9 @@ async function enforcePrAttachmentContract(type, meta, agentId, config, resultSu
   } else {
     reason = `${meta.item.id} completed and a PR URL was found in the agent's output, but it couldn't be canonically attached. The work likely succeeded — verify by checking the PR list. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`;
   }
-  markMissingPrAttachment(meta, agentId, reason, resultSummary, severity);
+  markMissingPrAttachment(meta, agentId, reason, resultSummary, severity, { phantom: isPhantom });
   log(severity === 'hard' ? 'warn' : 'info', reason);
-  return { reason, itemId: meta.item.id, severity };
+  return { reason, itemId: meta.item.id, severity, phantom: isPhantom };
 }
 // ─── Post-Completion Hooks ──────────────────────────────────────────────────
@@ -2564,6 +2724,20 @@ function detectNonTerminalResultSummary(_resultSummary, structuredCompletion, co
 }
 function deferNonTerminalCompletion(meta, detection) {
+  return _deferRetryWithCounter(meta, detection, '_retryCount', ENGINE_DEFAULTS.maxRetries, 'nonterminal_completion');
+}
+// Phantom-completion variant — uses _phantomRetryCount + maxPhantomRetries so
+// runtime-crash retries don't share a budget with the PR-attachment contract's
+// retries. Cap is independent (ENGINE_DEFAULTS.maxPhantomRetries) so the two
+// failure modes can be tuned separately. Failure mode triggered when the
+// runtime exits cleanly but emits no result event, no structured completion,
+// and no completion report — see detectNonTerminalResultSummary.
+function deferPhantomCompletion(meta, detection) {
+  return _deferRetryWithCounter(meta, detection, '_phantomRetryCount', ENGINE_DEFAULTS.maxPhantomRetries, 'phantom_completion');
+}
+function _deferRetryWithCounter(meta, detection, counterField, maxCount, pendingReason) {
   const itemId = meta?.item?.id;
   const reason = detection?.reason || 'Nonterminal completion summary';
   if (!itemId) return reason;
@@ -2576,35 +2750,49 @@ function deferNonTerminalCompletion(meta, detection) {
       if (!Array.isArray(data)) return data;
       const w = data.find(i => i.id === itemId);
       if (!w) return data;
-      const retries = w._retryCount || 0;
-      if (retries < ENGINE_DEFAULTS.maxRetries) {
+      const retries = w[counterField] || 0;
+      if (retries < maxCount) {
         w.status = WI_STATUS.PENDING;
-        w._retryCount = retries + 1;
+        w[counterField] = retries + 1;
         w._lastRetryAt = ts();
         w._lastRetryReason = reason;
-        w._pendingReason = 'nonterminal_completion';
+        w._pendingReason = pendingReason;
+        // P-e0b4f7a5 — phantom-retry path stamps _phantomCompletion +
+        // _phantomBranch so cleanup.js can preserve the worktree across the
+        // re-dispatch window. Only set for the phantom counter; nonterminal
+        // retries don't share this protection.
+        if (counterField === '_phantomRetryCount') {
+          w._phantomCompletion = true;
+          if (meta?.branch) w._phantomBranch = meta.branch;
+        }
         delete w.completedAt;
         delete w.dispatched_at;
         delete w.dispatched_to;
         delete w.failedAt;
         finalStatus = WI_STATUS.PENDING;
-        log('warn', `Work item ${itemId} reported nonterminal success — retry ${retries + 1}/${ENGINE_DEFAULTS.maxRetries}: ${reason}`);
+        log('warn', `Work item ${itemId} reported ${pendingReason} — retry ${retries + 1}/${maxCount} (${counterField}): ${reason}`);
       } else {
         w.status = WI_STATUS.FAILED;
-        w.failReason = `${reason} after ${ENGINE_DEFAULTS.maxRetries} attempts`;
+        w.failReason = `${reason} after ${maxCount} attempts`;
         w.failedAt = ts();
         delete w.completedAt;
         delete w.dispatched_at;
         delete w.dispatched_to;
         delete w._pendingReason;
+        // Exhausted phantom retries: clear the in-flight markers so cleanup
+        // can reap the worktree on the next sweep.
+        if (counterField === '_phantomRetryCount') {
+          delete w._phantomCompletion;
+          delete w._phantomBranch;
+        }
         finalStatus = WI_STATUS.FAILED;
-        log('warn', `Work item ${itemId} failed — repeated nonterminal completion summaries after ${ENGINE_DEFAULTS.maxRetries} attempts`);
+        log('warn', `Work item ${itemId} failed — repeated ${pendingReason} after ${maxCount} attempts`);
       }
       return data;
     }, { defaultValue: [], skipWriteIfUnchanged: true });
     syncPrdItemStatus(itemId, finalStatus, meta.item?.sourcePlan);
   } catch (err) {
-    log('warn', `nonterminal completion gate: ${err.message}`);
+    log('warn', `${pendingReason} gate: ${err.message}`);
   }
   return reason;
 }
@@ -2814,8 +3002,9 @@ function handleDecompositionResult(stdout, meta, config, runtimeName) {
   return 0;
 }
-async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, config) {
+async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, config, opts) {
+  const detectPhantom = !!(opts && opts.detectPhantom);
   const type = dispatchItem.type;
   const meta = dispatchItem.meta;
   const isSuccess = code === 0;
@@ -3055,13 +3244,27 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
   let completionContractFailure = null;
   if (effectiveSuccess && meta?.item?.id && !skipDoneStatus) {
-    const nonTerminalCompletion = detectNonTerminalResultSummary(completionGateSummary, structuredCompletion, reportCompletion);
+    const nonTerminalCompletion = detectNonTerminalResultSummary(completionGateSummary, structuredCompletion, reportCompletion, { detectPhantom });
     if (nonTerminalCompletion) {
-      skipDoneStatus = true;
-      const reason = deferNonTerminalCompletion(meta, nonTerminalCompletion);
-      completionContractFailure = { reason, itemId: meta.item.id, nonTerminal: true, processWorkItemFailure: false };
-      if (!nonCleanReportWritten) {
-        writeNonCleanAgentReport(dispatchItem, agentId, 'partial', structuredCompletion, completionGateSummary, code);
+      const isPhantomDetection = nonTerminalCompletion.phrase === 'phantom-completion';
+      // P-e0b4f7a5 — before deferring a phantom retry, attempt to recover
+      // the agent's work via the ls-remote + canonical-attach probe. If the
+      // agent had pushed its branch (and possibly opened the PR) seconds
+      // before the runtime crashed, link the PR and treat the WI as a
+      // normal successful completion. This preserves work that would
+      // otherwise be lost and avoids burning a phantom retry on something
+      // that already shipped.
+      if (isPhantomDetection && await _attemptPhantomPrRecovery(meta, agentId, resultSummary, config)) {
+        log('info', `Phantom-completion recovered for ${meta.item.id} via ls-remote + PR auto-link — no retry needed`);
+      } else {
+        skipDoneStatus = true;
+        const reason = isPhantomDetection
+          ? deferPhantomCompletion(meta, nonTerminalCompletion)
+          : deferNonTerminalCompletion(meta, nonTerminalCompletion);
+        completionContractFailure = { reason, itemId: meta.item.id, nonTerminal: true, processWorkItemFailure: false, phantom: isPhantomDetection };
+        if (!nonCleanReportWritten) {
+          writeNonCleanAgentReport(dispatchItem, agentId, 'partial', structuredCompletion, completionGateSummary, code);
+        }
       }
     }
   }
@@ -3077,7 +3280,7 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
   }
   if (effectiveSuccess && meta?.item?.id && !skipDoneStatus && !noopRationale) {
-    completionContractFailure = await enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, stdout);
+    completionContractFailure = await enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, stdout, { detectPhantom });
     if (completionContractFailure?.severity === 'hard' || completionContractFailure?.nonTerminal) {
       skipDoneStatus = true;
     }
@@ -3460,6 +3663,10 @@ module.exports = {
   parseCompletionFieldSummary,
   parseCompletionNoop,
   detectNonTerminalResultSummary,
+  deferNonTerminalCompletion,
+  deferPhantomCompletion,
+  enforcePrAttachmentContract,
+  markMissingPrAttachment,
   parseCompletionReportFile,
   persistCompletionReport,
   runPostCompletionHooks,

package/engine/shared.js CHANGED Viewed

@@ -1078,6 +1078,7 @@ const ENGINE_DEFAULTS = {
   evalMaxIterations: 3, // legacy UI/config field; engine discovery no longer enforces review→fix cycle caps
   evalMaxCost: null, // USD ceiling per work item across all eval iterations; null = no limit (gather baseline data first)
   maxRetries: 3, // max dispatch retries before marking work item as failed
+  maxPhantomRetries: 3, // max retries for "phantom completion" (runtime crashed before emitting type:"result"); tracked separately from _retryCount so phantom retries don't pollute the normal PR-attachment retry budget. See engine/lifecycle.markMissingPrAttachment + detectNonTerminalResultSummary.
   minRetryGapMs: 120000, // 2min — minimum gap between retry dispatches for the same work item; prevents tight retry loops when an idempotent agent (e.g. review bailing out on a duplicate) cannot produce the expected output (#1770)
   pipelineApiRetries: 2, // max attempts for pipeline API calls
   pipelineApiRetryDelay: 2000, // ms delay between pipeline API retries

package/engine/timeout.js CHANGED Viewed

@@ -318,7 +318,13 @@ function checkTimeouts(config) {
     // Run post-completion hooks via shared helper (async — fire and forget in timeout context).
     // Pass the actual exit code so autoRecovery (PR-created-but-failed) still works correctly.
-    runPostCompletionHooks(item, item.agent, processExitCode, fullLogForHooks, config).catch(e => log('warn', 'post-completion hooks: ' + e.message));
+    // detectPhantom: true mirrors the line 310 detectNonTerminalResultSummary call —
+    // when the timeout path completes a dispatch via the [process-exit] sentinel,
+    // we have no guarantee the runtime emitted a result event. Propagating
+    // detectPhantom downstream lets enforcePrAttachmentContract route phantom
+    // hard-fails through the _phantomRetryCount budget instead of bypassing
+    // the retry counter entirely (P-d9a3e6f4).
+    runPostCompletionHooks(item, item.agent, processExitCode, fullLogForHooks, config, { detectPhantom: true }).catch(e => log('warn', 'post-completion hooks: ' + e.message));
     if (hasProcess) {
       shared.killImmediate(activeProcesses.get(item.id)?.proc);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@yemi33/minions",
-  "version": "0.1.1872",
+  "version": "0.1.1873",
   "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
   "bin": {
     "minions": "bin/minions.js"