@yemi33/minions 0.1.1649 → 0.1.1651

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@ const { safeRead, safeJson, safeWrite, mutateJsonFileLocked, mutateWorkItems, ex
11
11
  log, ts, dateStamp, WI_STATUS, DONE_STATUSES, PLAN_TERMINAL_STATUSES, WORK_TYPE, PLAN_STATUS, PRD_ITEM_STATUS, PR_STATUS, DISPATCH_RESULT,
12
12
  ENGINE_DEFAULTS, DEFAULT_AGENT_METRICS, FAILURE_CLASS } = shared;
13
13
  const { trackEngineUsage } = require('./llm');
14
+ const { resolveRuntime } = require('./runtimes');
14
15
  const queries = require('./queries');
15
16
  const { isBranchActive } = require('./cooldown');
16
17
  const { worktreeDirMatchesBranch } = require('./cleanup');
@@ -980,36 +981,72 @@ async function findOpenPrForBranch(meta, config) {
980
981
  return null;
981
982
  }
982
983
 
983
- function markMissingPrAttachment(meta, agentId, reason, resultSummary) {
984
+ // Lightweight probe for "did the agent's output contain ANY PR URL?". Used by
985
+ // the PR-attachment contract to distinguish silent-failure (no URL anywhere)
986
+ // from auto-link-miss (URL present but engine couldn't canonically attach it).
987
+ // Keep this regex roughly in sync with the gated detection in syncPrsFromOutput
988
+ // — this is yes/no only; no capture groups required.
989
+ function _outputContainsPrUrl(output) {
990
+ if (!output || typeof output !== 'string') return false;
991
+ const prUrlPattern = /https?:\/\/(?:github\.com\/[^\s"'\\)\]]+\/[^\s"'\\)\]]+\/pull\/\d+|(?:dev\.azure\.com|[^/\s"'\\)\]]+\.visualstudio\.com)[^\s"'\\)\]]*?pullrequest\/\d+)/i;
992
+ return prUrlPattern.test(output);
993
+ }
994
+
995
+ function markMissingPrAttachment(meta, agentId, reason, resultSummary, severity) {
984
996
  const noPrWiPath = resolveWorkItemPath(meta);
997
+ const isHard = severity !== 'soft';
985
998
  if (noPrWiPath) {
986
999
  mutateJsonFileLocked(noPrWiPath, data => {
987
1000
  if (!Array.isArray(data)) return data;
988
1001
  const w = data.find(i => i.id === meta.item.id);
989
1002
  if (!w) return data;
990
- w.status = WI_STATUS.NEEDS_REVIEW;
991
- w._missingPrAttachment = true;
992
- w.failReason = reason;
993
- w._lastReviewReason = reason;
994
- delete w.completedAt;
995
- delete w._noPr;
996
- delete w._noPrReason;
1003
+ if (isHard) {
1004
+ w.status = WI_STATUS.NEEDS_REVIEW;
1005
+ w._missingPrAttachment = true;
1006
+ w.failReason = reason;
1007
+ w._lastReviewReason = reason;
1008
+ delete w.completedAt;
1009
+ delete w._noPr;
1010
+ delete w._noPrReason;
1011
+ } else {
1012
+ // Soft: don't change status or failReason — the agent did the work,
1013
+ // we just couldn't auto-attach the PR. Surface a flag for the dashboard
1014
+ // so the dispatch row can render a yellow "verify" badge.
1015
+ w._unverifiedPrAttachment = true;
1016
+ w._lastReviewReason = reason;
1017
+ }
997
1018
  return data;
998
1019
  }, { skipWriteIfUnchanged: true });
999
1020
  }
1000
- shared.writeToInbox('engine', `missing-pr-attachment-${meta.item.id}`,
1001
- `# PR attachment missing for ${meta.item.id}\n\n` +
1002
- `**Agent:** ${agentId}\n` +
1003
- `**Work item:** \`${meta.item.id}\` — ${meta.item.title || ''}\n` +
1004
- `**Type:** ${meta.item.type || 'unknown'}\n` +
1005
- `**Branch:** ${meta.branch || '(none)'}\n\n` +
1006
- `${reason}\n` +
1007
- (resultSummary ? `\n## Agent summary\n${resultSummary}\n` : ''),
1008
- null,
1009
- { sourceItem: meta.item.id, reason: 'missing-pr-attachment' });
1021
+ if (isHard) {
1022
+ shared.writeToInbox('engine', `missing-pr-attachment-${meta.item.id}`,
1023
+ `# PR attachment missing for ${meta.item.id}\n\n` +
1024
+ `**Agent:** ${agentId}\n` +
1025
+ `**Work item:** \`${meta.item.id}\` — ${meta.item.title || ''}\n` +
1026
+ `**Type:** ${meta.item.type || 'unknown'}\n` +
1027
+ `**Branch:** ${meta.branch || '(none)'}\n\n` +
1028
+ `${reason}\n` +
1029
+ (resultSummary ? `\n## Agent summary\n${resultSummary}\n` : ''),
1030
+ null,
1031
+ { sourceItem: meta.item.id, reason: 'missing-pr-attachment' });
1032
+ } else {
1033
+ shared.writeToInbox('engine', `pr-auto-link-unverified-${meta.item.id}`,
1034
+ `# PR auto-link unverified for ${meta.item.id}\n\n` +
1035
+ `**Agent:** ${agentId}\n` +
1036
+ `**Work item:** \`${meta.item.id}\` — ${meta.item.title || ''}\n` +
1037
+ `**Type:** ${meta.item.type || 'unknown'}\n` +
1038
+ `**Branch:** ${meta.branch || '(none)'}\n\n` +
1039
+ `${reason}\n\n` +
1040
+ `The agent's output mentioned a PR URL but the engine couldn't canonically attach it ` +
1041
+ `(URL detection regex miss, branch lookup race, untrusted tool_use signature, etc.). ` +
1042
+ `The work likely succeeded — verify against the project's PR list.\n` +
1043
+ (resultSummary ? `\n## Agent summary\n${resultSummary}\n` : ''),
1044
+ null,
1045
+ { sourceItem: meta.item.id, reason: 'pr-auto-link-unverified' });
1046
+ }
1010
1047
  }
1011
1048
 
1012
- async function enforcePrAttachmentContract(type, meta, agentId, config, resultSummary) {
1049
+ async function enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, output) {
1013
1050
  if (!isPrAttachmentRequired(type, meta?.item, meta)) return null;
1014
1051
  if (hasCanonicalPrAttachment(meta.item.id, config)) return null;
1015
1052
 
@@ -1037,10 +1074,16 @@ async function enforcePrAttachmentContract(type, meta, agentId, config, resultSu
1037
1074
  if (hasCanonicalPrAttachment(meta.item.id, config)) return null;
1038
1075
  }
1039
1076
 
1040
- const reason = `PR-producing work item ${meta.item.id} completed without a canonically attached PR record. Successful completion requires PR.prdItems/pr-links.json to include the work item; branch names, note URLs, and _context.workItemId metadata are not sufficient.`;
1041
- markMissingPrAttachment(meta, agentId, reason, resultSummary);
1042
- log('warn', reason);
1043
- return { reason, itemId: meta.item.id };
1077
+ // Distinguish "agent never claimed a PR" (hard silent failure the contract
1078
+ // was designed to catch) from "agent claimed a PR but engine couldn't attach
1079
+ // it canonically" (soft — verification gap, not a failure).
1080
+ const severity = _outputContainsPrUrl(output) ? 'soft' : 'hard';
1081
+ const reason = severity === 'hard'
1082
+ ? `${meta.item.id} completed but no PR URL was detected in the agent's output. Expected a PR — verify the agent didn't fail silently. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`
1083
+ : `${meta.item.id} completed and a PR URL was found in the agent's output, but it couldn't be canonically attached. The work likely succeeded — verify by checking the PR list. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`;
1084
+ markMissingPrAttachment(meta, agentId, reason, resultSummary, severity);
1085
+ log(severity === 'hard' ? 'warn' : 'info', reason);
1086
+ return { reason, itemId: meta.item.id, severity };
1044
1087
  }
1045
1088
 
1046
1089
  // ─── Post-Completion Hooks ──────────────────────────────────────────────────
@@ -1059,9 +1102,7 @@ function parseReviewVerdict(text) {
1059
1102
  // Match "VERDICT: APPROVE" or "VERDICT: REQUEST_CHANGES" (case-insensitive, optional markdown bold)
1060
1103
  const verdictMatch = text.match(/VERDICT[:\s]+\*{0,2}(APPROVE|REQUEST[_\s-]?CHANGES)\*{0,2}/i);
1061
1104
  if (verdictMatch) {
1062
- const v = verdictMatch[1].toUpperCase().replace(/[\s-]/g, '_');
1063
- if (v === 'APPROVE') return 'approved';
1064
- if (v.includes('CHANGES')) return 'changes-requested';
1105
+ return normalizeReviewVerdict(verdictMatch[1]);
1065
1106
  }
1066
1107
  return null;
1067
1108
  }
@@ -1083,7 +1124,7 @@ function isReviewBailout(text) {
1083
1124
  return /bail(ing)?\s+out/i.test(text) || /already\s+posted/i.test(text);
1084
1125
  }
1085
1126
 
1086
- async function updatePrAfterReview(agentId, pr, project, config, resultSummary) {
1127
+ async function updatePrAfterReview(agentId, pr, project, config, resultSummary, structuredCompletion = null) {
1087
1128
 
1088
1129
  if (!pr?.id) return;
1089
1130
 
@@ -1108,12 +1149,12 @@ async function updatePrAfterReview(agentId, pr, project, config, resultSummary)
1108
1149
  }
1109
1150
  } catch (e) { log('warn', `Post-review status check for ${pr.id}: ${e.message}`); }
1110
1151
 
1111
- // Fallback: if live check returned pending (e.g., GitHub self-approval blocked), parse verdict from agent output
1152
+ // Fallback: if live check returned pending (e.g., GitHub self-approval blocked), use the agent's completion report.
1112
1153
  if (!postReviewStatus) {
1113
- const verdict = parseReviewVerdict(resultSummary);
1154
+ const verdict = reviewVerdictFromCompletion(structuredCompletion) || parseReviewVerdict(resultSummary);
1114
1155
  if (verdict) {
1115
1156
  postReviewStatus = verdict;
1116
- log('info', `Parsed review verdict from agent output for ${pr.id}: ${verdict}`);
1157
+ log('info', `Read review verdict from agent completion for ${pr.id}: ${verdict}`);
1117
1158
  }
1118
1159
  }
1119
1160
 
@@ -1700,10 +1741,163 @@ function parseStructuredCompletion(stdout, runtimeName) {
1700
1741
  return result;
1701
1742
  }
1702
1743
 
1744
+ function parseCompletionReportFile(dispatchItem) {
1745
+ const reportPath = dispatchItem?.meta?.completionReportPath || shared.dispatchCompletionReportPath(dispatchItem?.id);
1746
+ if (!reportPath || !fs.existsSync(reportPath)) return null;
1747
+ const report = safeJson(reportPath);
1748
+ if (!report || typeof report !== 'object' || Array.isArray(report)) {
1749
+ log('warn', `Ignoring malformed completion report for ${dispatchItem?.id || 'unknown'}: ${reportPath}`);
1750
+ return null;
1751
+ }
1752
+ if (!report.status && report.outcome) report.status = report.outcome;
1753
+ if (!report.status) {
1754
+ log('warn', `Ignoring completion report without status for ${dispatchItem?.id || 'unknown'}: ${reportPath}`);
1755
+ return null;
1756
+ }
1757
+ report._source = 'report-file';
1758
+ report._path = reportPath;
1759
+ return report;
1760
+ }
1761
+
1703
1762
  function normalizeCompletionStatus(status) {
1704
1763
  return String(status || '').trim().toLowerCase().replace(/[\s_]+/g, '-');
1705
1764
  }
1706
1765
 
1766
+ function isTerminalPendingValue(value) {
1767
+ const text = String(value || '').trim().toLowerCase();
1768
+ if (!text) return true;
1769
+ return /^(?:none|n\/a|na|no|nothing|not-applicable|not applicable|-)$/.test(text)
1770
+ || /^no\s+(?:pending|remaining|outstanding)\b/.test(text)
1771
+ || /^(?:all\s+)?(?:pending|remaining|outstanding)\s+(?:work|items?|tasks?)?\s*(?:resolved|complete|completed|done|closed)$/.test(text);
1772
+ }
1773
+
1774
+ function isTerminalPendingLine(line) {
1775
+ const text = String(line || '').trim().toLowerCase();
1776
+ return /\bno\s+pending\b/.test(text)
1777
+ || /\bpending\s*[:=-]\s*(?:none|n\/a|na|no|nothing|not applicable|-)\b/.test(text)
1778
+ || /\bpending\s+(?:work|items?|tasks?)?\s*(?:resolved|complete|completed|done|closed)\b/.test(text);
1779
+ }
1780
+
1781
+ function detectNonTerminalResultSummary(resultSummary, structuredCompletion) {
1782
+ const completionStatus = normalizeCompletionStatus(structuredCompletion?.status);
1783
+ if (completionStatus) {
1784
+ if (/^(?:partial|partially-complete|in-progress|pending|deferred|blocked|incomplete|to-be-continued)/.test(completionStatus)) {
1785
+ return {
1786
+ phrase: `status:${structuredCompletion.status}`,
1787
+ reason: `Nonterminal completion summary: structured status is '${structuredCompletion.status}'`,
1788
+ };
1789
+ }
1790
+ if (/^(?:fail|failed|failure|error)/.test(completionStatus)) {
1791
+ return {
1792
+ phrase: `status:${structuredCompletion.status}`,
1793
+ reason: `Nonterminal completion summary: structured status is '${structuredCompletion.status}', not a successful terminal state`,
1794
+ };
1795
+ }
1796
+ }
1797
+
1798
+ if (structuredCompletion?.pending && !isTerminalPendingValue(structuredCompletion.pending)) {
1799
+ return {
1800
+ phrase: 'pending',
1801
+ reason: `Nonterminal completion summary: pending work remains (${String(structuredCompletion.pending).slice(0, 160)})`,
1802
+ };
1803
+ }
1804
+
1805
+ const text = String(resultSummary || '').replace(/\r/g, '').trim();
1806
+ if (!text) return null;
1807
+
1808
+ const patterns = [
1809
+ { phrase: 'still running', re: /\b(?:still|currently|continues?\s+to\s+be)\s+(?:running|ongoing|in\s+progress)\b/i },
1810
+ { phrase: 'will check later', re: /\b(?:i(?:'|’)ll|i\s+will|we(?:'|’)ll|we\s+will|will)\s+(?:check|verify|review|follow\s+up|revisit)\s+(?:again\s+)?(?:later|soon|in\b|after\b|when\b)/i },
1811
+ { phrase: 'wake up', re: /\bwake(?:\s|-)?up\b|\bwake\b.*\b(?:check|verify|review)\b/i },
1812
+ { phrase: 'not yet complete', re: /\b(?:not\s+yet|isn(?:'|’)t|not|incomplete|not\s+fully|not\s+completely)\s+(?:complete|completed|done|finished|validated|verified)\b/i },
1813
+ { phrase: 'partial', re: /\bpartial(?:ly)?\b/i },
1814
+ { phrase: 'to be continued', re: /\bto\s+be\s+continued\b|\btbc\b/i },
1815
+ { phrase: 'in progress', re: /\bin\s+progress\b|\bongoing\b|\bincomplete\b/i },
1816
+ ];
1817
+ for (const { phrase, re } of patterns) {
1818
+ if (re.test(text)) {
1819
+ return { phrase, reason: `Nonterminal completion summary: matched '${phrase}'` };
1820
+ }
1821
+ }
1822
+
1823
+ const pendingLines = text.split('\n').filter(line => /\bpending\b/i.test(line));
1824
+ for (const line of pendingLines) {
1825
+ if (!isTerminalPendingLine(line)) {
1826
+ return { phrase: 'pending', reason: `Nonterminal completion summary: matched 'pending'` };
1827
+ }
1828
+ }
1829
+
1830
+ return null;
1831
+ }
1832
+
1833
+ function deferNonTerminalCompletion(meta, detection) {
1834
+ const itemId = meta?.item?.id;
1835
+ const reason = detection?.reason || 'Nonterminal completion summary';
1836
+ if (!itemId) return reason;
1837
+ const wiPath = resolveWorkItemPath(meta);
1838
+ if (!wiPath) return reason;
1839
+
1840
+ let finalStatus = WI_STATUS.PENDING;
1841
+ try {
1842
+ mutateJsonFileLocked(wiPath, data => {
1843
+ if (!Array.isArray(data)) return data;
1844
+ const w = data.find(i => i.id === itemId);
1845
+ if (!w) return data;
1846
+ const retries = w._retryCount || 0;
1847
+ if (retries < ENGINE_DEFAULTS.maxRetries) {
1848
+ w.status = WI_STATUS.PENDING;
1849
+ w._retryCount = retries + 1;
1850
+ w._lastRetryAt = ts();
1851
+ w._lastRetryReason = reason;
1852
+ w._pendingReason = 'nonterminal_completion';
1853
+ delete w.completedAt;
1854
+ delete w.dispatched_at;
1855
+ delete w.dispatched_to;
1856
+ delete w.failedAt;
1857
+ finalStatus = WI_STATUS.PENDING;
1858
+ log('warn', `Work item ${itemId} reported nonterminal success — retry ${retries + 1}/${ENGINE_DEFAULTS.maxRetries}: ${reason}`);
1859
+ } else {
1860
+ w.status = WI_STATUS.FAILED;
1861
+ w.failReason = `${reason} after ${ENGINE_DEFAULTS.maxRetries} attempts`;
1862
+ w.failedAt = ts();
1863
+ delete w.completedAt;
1864
+ delete w.dispatched_at;
1865
+ delete w.dispatched_to;
1866
+ delete w._pendingReason;
1867
+ finalStatus = WI_STATUS.FAILED;
1868
+ log('warn', `Work item ${itemId} failed — repeated nonterminal completion summaries after ${ENGINE_DEFAULTS.maxRetries} attempts`);
1869
+ }
1870
+ return data;
1871
+ }, { defaultValue: [], skipWriteIfUnchanged: true });
1872
+ syncPrdItemStatus(itemId, finalStatus, meta.item?.sourcePlan);
1873
+ } catch (err) {
1874
+ log('warn', `nonterminal completion gate: ${err.message}`);
1875
+ }
1876
+ return reason;
1877
+ }
1878
+
1879
+ function parseCompletionBoolean(value) {
1880
+ if (typeof value === 'boolean') return value;
1881
+ if (typeof value === 'string') {
1882
+ const normalized = value.trim().toLowerCase();
1883
+ if (['true', 'yes', '1'].includes(normalized)) return true;
1884
+ if (['false', 'no', '0'].includes(normalized)) return false;
1885
+ }
1886
+ return undefined;
1887
+ }
1888
+
1889
+ function normalizeReviewVerdict(verdict) {
1890
+ const value = String(verdict || '').trim().toLowerCase().replace(/[\s-]+/g, '_');
1891
+ if (value === 'approve' || value === 'approved') return 'approved';
1892
+ if (value === 'request_changes' || value === 'changes_requested' || value === 'changes-requested') return 'changes-requested';
1893
+ return null;
1894
+ }
1895
+
1896
+ function reviewVerdictFromCompletion(completion) {
1897
+ if (!completion || typeof completion !== 'object') return null;
1898
+ return normalizeReviewVerdict(completion.verdict || completion.review_verdict || completion.reviewVerdict);
1899
+ }
1900
+
1707
1901
  function writeNonCleanAgentReport(dispatchItem, agentId, outcome, structuredCompletion, resultSummary, exitCode) {
1708
1902
  if (!dispatchItem?.id || !outcome) {
1709
1903
  log('warn', 'Cannot write non-clean agent report without dispatch id and outcome');
@@ -1839,21 +2033,31 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
1839
2033
  // and for the foundation-only state of this plan item; downstream items
1840
2034
  // (P-2a6d9c4f, P-9c4f2d6a) populate dispatchItem.meta.runtimeName at spawn time.
1841
2035
  const runtimeName = dispatchItem.meta?.runtimeName || dispatchItem.runtimeName || 'claude';
1842
- const { resultSummary, taskUsage, sessionId, model } = parseAgentOutput(stdout, runtimeName);
2036
+ let { resultSummary, taskUsage, sessionId, model } = parseAgentOutput(stdout, runtimeName);
1843
2037
 
1844
- // Try structured completion protocol first (```completion block from agent output)
1845
- const structuredCompletion = parseStructuredCompletion(stdout, runtimeName);
2038
+ // Prefer the sidecar completion report; keep fenced output as a compatibility fallback.
2039
+ const reportCompletion = parseCompletionReportFile(dispatchItem);
2040
+ const structuredCompletion = reportCompletion || parseStructuredCompletion(stdout, runtimeName);
1846
2041
  if (structuredCompletion) {
1847
- log('info', `Structured completion from ${agentId}: status=${structuredCompletion.status}, pr=${structuredCompletion.pr || 'N/A'}`);
2042
+ if (structuredCompletion.summary) resultSummary = String(structuredCompletion.summary);
2043
+ log('info', `Structured completion from ${agentId}: status=${structuredCompletion.status}, pr=${structuredCompletion.pr || 'N/A'}${structuredCompletion._source ? ` (${structuredCompletion._source})` : ''}`);
1848
2044
  }
2045
+ const completionGateSummary = resultSummary || (typeof stdout === 'string' && !stdout.includes('"type":') ? stdout : '');
1849
2046
 
1850
2047
  // Save session for potential resume on next dispatch
1851
2048
  if (isSuccess && sessionId && agentId && !agentId.startsWith('temp-')) {
1852
2049
  try {
1853
- shared.safeWrite(path.join(AGENTS_DIR, agentId, 'session.json'), {
1854
- sessionId, dispatchId: dispatchItem.id, savedAt: ts(),
1855
- branch: dispatchItem.meta?.branch || null,
1856
- });
2050
+ const runtime = resolveRuntime(runtimeName);
2051
+ if (runtime && typeof runtime.saveSession === 'function') {
2052
+ runtime.saveSession({
2053
+ agentId,
2054
+ dispatchId: dispatchItem.id,
2055
+ branch: dispatchItem.meta?.branch || null,
2056
+ sessionId,
2057
+ agentsDir: AGENTS_DIR,
2058
+ logger: { warn: (msg) => log('warn', msg) },
2059
+ });
2060
+ }
1857
2061
  } catch (err) { log('warn', `Session save: ${err.message}`); }
1858
2062
  }
1859
2063
 
@@ -1869,18 +2073,24 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
1869
2073
  log('info', `Structured completion reports PR (${structuredCompletion.pr}) but regex sync found none — PR may already be tracked`);
1870
2074
  }
1871
2075
 
2076
+ const completionStatus = normalizeCompletionStatus(structuredCompletion?.status);
2077
+ const agentNeedsRerun = parseCompletionBoolean(structuredCompletion?.needs_rerun ?? structuredCompletion?.needsRerun) === true;
2078
+ const agentReportedFailure = completionStatus.startsWith('fail') || agentNeedsRerun;
2079
+ const agentRetryable = parseCompletionBoolean(structuredCompletion?.retryable);
2080
+
1872
2081
  // Auto-recover: if a failed implement/fix/test agent created PRs, it likely succeeded before the failure surfaced.
1873
2082
  const prCreatingType = type === WORK_TYPE.IMPLEMENT || type === WORK_TYPE.IMPLEMENT_LARGE || type === WORK_TYPE.FIX || type === WORK_TYPE.TEST;
1874
- const autoRecovered = !isSuccess && prsCreatedCount > 0 && prCreatingType && !!meta?.item?.id;
2083
+ const autoRecovered = !agentReportedFailure && !isSuccess && prsCreatedCount > 0 && prCreatingType && !!meta?.item?.id;
1875
2084
  if (autoRecovered) {
1876
2085
  log('info', `Auto-recovery: agent failed but created ${prsCreatedCount} PR(s) — upgrading ${meta.item.id} to done`);
1877
2086
  }
1878
- const effectiveSuccess = isSuccess || autoRecovered;
2087
+ const effectiveSuccess = (isSuccess && !agentReportedFailure) || autoRecovered;
1879
2088
 
1880
- const completionStatus = normalizeCompletionStatus(structuredCompletion?.status);
2089
+ let nonCleanReportWritten = false;
1881
2090
  if (completionStatus.startsWith('partial') || autoRecovered || (completionStatus.startsWith('fail') && isSuccess)) {
1882
2091
  const outcome = completionStatus.startsWith('fail') ? 'failure' : 'partial';
1883
- writeNonCleanAgentReport(dispatchItem, agentId, outcome, structuredCompletion, resultSummary, code);
2092
+ writeNonCleanAgentReport(dispatchItem, agentId, outcome, structuredCompletion, completionGateSummary, code);
2093
+ nonCleanReportWritten = true;
1884
2094
  }
1885
2095
 
1886
2096
  // Handle decomposition results — create sub-items from decompose agent output
@@ -1903,7 +2113,7 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
1903
2113
  // and after 3 such bailouts the WI flips to status=failed even though the
1904
2114
  // original review was posted on the first run.
1905
2115
  if (effectiveSuccess && type === WORK_TYPE.REVIEW && meta?.item?.id) {
1906
- const verdict = parseReviewVerdict(resultSummary);
2116
+ const verdict = reviewVerdictFromCompletion(structuredCompletion) || parseReviewVerdict(resultSummary);
1907
2117
  if (!verdict && isReviewBailout(resultSummary)) {
1908
2118
  log('info', `Review ${meta.item.id} bailed out (review already posted) — treating as DONE without retry`);
1909
2119
  } else if (!verdict) {
@@ -1988,8 +2198,22 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
1988
2198
 
1989
2199
  let completionContractFailure = null;
1990
2200
  if (effectiveSuccess && meta?.item?.id && !skipDoneStatus) {
1991
- completionContractFailure = await enforcePrAttachmentContract(type, meta, agentId, config, resultSummary);
1992
- if (completionContractFailure) skipDoneStatus = true;
2201
+ const nonTerminalCompletion = detectNonTerminalResultSummary(completionGateSummary, structuredCompletion);
2202
+ if (nonTerminalCompletion) {
2203
+ skipDoneStatus = true;
2204
+ const reason = deferNonTerminalCompletion(meta, nonTerminalCompletion);
2205
+ completionContractFailure = { reason, itemId: meta.item.id, nonTerminal: true };
2206
+ if (!nonCleanReportWritten) {
2207
+ writeNonCleanAgentReport(dispatchItem, agentId, 'partial', structuredCompletion, completionGateSummary, code);
2208
+ }
2209
+ }
2210
+ }
2211
+
2212
+ if (effectiveSuccess && meta?.item?.id && !skipDoneStatus) {
2213
+ completionContractFailure = await enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, stdout);
2214
+ if (completionContractFailure?.severity === 'hard' || completionContractFailure?.nonTerminal) {
2215
+ skipDoneStatus = true;
2216
+ }
1993
2217
  }
1994
2218
 
1995
2219
  if (effectiveSuccess && meta?.item?.id && !skipDoneStatus) {
@@ -2095,7 +2319,7 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
2095
2319
  // (retryCount was being deleted by done-marking before the check could read it)
2096
2320
  // Review verdict check similarly moved before updateWorkItemStatus(DONE) — same root cause.
2097
2321
 
2098
- if (type === WORK_TYPE.REVIEW) await updatePrAfterReview(agentId, meta?.pr, meta?.project, config, resultSummary);
2322
+ if (type === WORK_TYPE.REVIEW) await updatePrAfterReview(agentId, meta?.pr, meta?.project, config, resultSummary, structuredCompletion);
2099
2323
  if (type === WORK_TYPE.FIX) {
2100
2324
  updatePrAfterFix(meta?.pr, meta?.project, meta?.source);
2101
2325
  // (#984) Sync PRD status for PR-linked features: fix work items have a different ID
@@ -2114,7 +2338,9 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
2114
2338
  }
2115
2339
  }
2116
2340
  checkForLearnings(agentId, config.agents[agentId], dispatchItem.task);
2117
- const finalResult = completionContractFailure ? DISPATCH_RESULT.ERROR : (effectiveSuccess ? DISPATCH_RESULT.SUCCESS : DISPATCH_RESULT.ERROR);
2341
+ const hardContractFail = completionContractFailure?.severity === 'hard'
2342
+ || completionContractFailure?.nonTerminal === true;
2343
+ const finalResult = hardContractFail ? DISPATCH_RESULT.ERROR : (effectiveSuccess ? DISPATCH_RESULT.SUCCESS : DISPATCH_RESULT.ERROR);
2118
2344
  if (finalResult === DISPATCH_RESULT.SUCCESS) {
2119
2345
  extractSkillsFromOutput(stdout, agentId, dispatchItem, config);
2120
2346
  // Also scan inbox notes for skill blocks — agents often write skills to inbox, not stdout
@@ -2142,7 +2368,7 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
2142
2368
  teams.teamsNotifyCompletion(dispatchItem, finalResult, agentId).catch(() => {});
2143
2369
  } catch {}
2144
2370
 
2145
- return { resultSummary, taskUsage, autoRecovered, structuredCompletion, completionContractFailure };
2371
+ return { resultSummary, taskUsage, autoRecovered, structuredCompletion, completionContractFailure, agentReportedFailure, agentRetryable };
2146
2372
  }
2147
2373
 
2148
2374
  // ─── PR → PRD Status Sync ─────────────────────────────────────────────────────
@@ -2322,6 +2548,8 @@ module.exports = {
2322
2548
  parseReviewVerdict,
2323
2549
  isReviewBailout,
2324
2550
  parseStructuredCompletion,
2551
+ detectNonTerminalResultSummary,
2552
+ parseCompletionReportFile,
2325
2553
  runPostCompletionHooks,
2326
2554
  syncPrdFromPrs,
2327
2555
  resolveWorkItemPath,
@@ -25,6 +25,7 @@
25
25
  const fs = require('fs');
26
26
  const os = require('os');
27
27
  const path = require('path');
28
+ const { FAILURE_CLASS, safeWrite, ts } = require('../shared');
28
29
 
29
30
  const ENGINE_DIR = __dirname.replace(/[\\/]runtimes$/, '');
30
31
  const MINIONS_DIR = path.resolve(ENGINE_DIR, '..');
@@ -229,6 +230,88 @@ function buildArgs(opts = {}) {
229
230
  return args;
230
231
  }
231
232
 
233
+ function buildSpawnFlags(opts = {}) {
234
+ const flags = ['--runtime', 'claude'];
235
+ if (opts.maxTurns != null) flags.push('--max-turns', String(opts.maxTurns));
236
+ if (opts.model) flags.push('--model', String(opts.model));
237
+ if (opts.allowedTools) flags.push('--allowedTools', String(opts.allowedTools));
238
+ if (opts.effort) flags.push('--effort', String(opts.effort));
239
+ if (opts.sessionId) flags.push('--resume', String(opts.sessionId));
240
+ if (opts.maxBudget != null) flags.push('--max-budget-usd', String(opts.maxBudget));
241
+ if (opts.bare === true) flags.push('--bare');
242
+ if (opts.fallbackModel) flags.push('--fallback-model', String(opts.fallbackModel));
243
+ if (opts.stream != null && opts.stream !== '') flags.push('--stream', String(opts.stream));
244
+ if (opts.disableBuiltinMcps === true) flags.push('--disable-builtin-mcps');
245
+ if (opts.suppressAgentsMd === true) flags.push('--no-custom-instructions');
246
+ if (opts.reasoningSummaries === true) flags.push('--enable-reasoning-summaries');
247
+ return flags;
248
+ }
249
+
250
+ function getResumeSessionId({ agentId, branchName, agentsDir, maxAgeMs = 2 * 60 * 60 * 1000, logger = console } = {}) {
251
+ if (!agentId || agentId.startsWith('temp-') || !agentsDir) return null;
252
+ try {
253
+ const sessionPath = path.join(agentsDir, agentId, 'session.json');
254
+ const sessionFile = _safeJson(sessionPath);
255
+ if (!sessionFile?.sessionId || !sessionFile.savedAt) return null;
256
+ const sessionAge = Date.now() - new Date(sessionFile.savedAt).getTime();
257
+ const sameBranch = branchName && sessionFile.branch && sessionFile.branch === branchName;
258
+ if (sessionAge < maxAgeMs && sameBranch) {
259
+ if (logger && typeof logger.info === 'function') {
260
+ logger.info(`Resuming session ${sessionFile.sessionId} for ${agentId} on branch ${branchName} (age: ${Math.round(sessionAge / 60000)}min)`);
261
+ }
262
+ return sessionFile.sessionId;
263
+ }
264
+ } catch (e) {
265
+ if (logger && typeof logger.warn === 'function') logger.warn('session resume lookup: ' + e.message);
266
+ }
267
+ return null;
268
+ }
269
+
270
+ function saveSession({ agentId, dispatchId, branch, sessionId, agentsDir, now = ts, writeJson = safeWrite, logger = console } = {}) {
271
+ if (!sessionId || !agentId || agentId.startsWith('temp-') || !agentsDir) return false;
272
+ try {
273
+ writeJson(path.join(agentsDir, agentId, 'session.json'), {
274
+ sessionId,
275
+ dispatchId,
276
+ savedAt: typeof now === 'function' ? now() : new Date().toISOString(),
277
+ branch: branch || null,
278
+ });
279
+ return true;
280
+ } catch (err) {
281
+ if (logger && typeof logger.warn === 'function') logger.warn(`Session save: ${err.message}`);
282
+ return false;
283
+ }
284
+ }
285
+
286
+ function detectPermissionGate(outputChunk) {
287
+ const lower = String(outputChunk || '').toLowerCase();
288
+ return /\b(trust this|do you trust|allow access|grant permission|approve tools?|permission prompt)\b/.test(lower);
289
+ }
290
+
291
+ function getPromptDeliveryMode() {
292
+ return 'stdin';
293
+ }
294
+
295
+ function usesSystemPromptFile({ isResume } = {}) {
296
+ return !isResume;
297
+ }
298
+
299
+ function _runtimeFailureClass(code) {
300
+ if (code === 'auth-failure' || code === 'budget-exceeded') return FAILURE_CLASS.PERMISSION_BLOCKED;
301
+ if (code === 'context-limit') return FAILURE_CLASS.OUT_OF_CONTEXT;
302
+ if (code === 'crash') return FAILURE_CLASS.SPAWN_ERROR;
303
+ return null;
304
+ }
305
+
306
+ function classifyFailure({ code, stdout = '', stderr = '', fallback } = {}) {
307
+ if (code === 78) return { failureClass: FAILURE_CLASS.CONFIG_ERROR, retryable: false, message: 'Claude configuration error' };
308
+ const parsed = parseError(`${stdout || ''}\n${stderr || ''}`);
309
+ const runtimeClass = parsed.code ? _runtimeFailureClass(parsed.code) : null;
310
+ if (runtimeClass) return { failureClass: runtimeClass, retryable: parsed.retriable !== false, message: parsed.message || '' };
311
+ const fallbackClass = typeof fallback === 'function' ? fallback(code, stdout, stderr) : FAILURE_CLASS.UNKNOWN;
312
+ return { failureClass: fallbackClass, retryable: parsed.retriable !== false, message: parsed.message || '' };
313
+ }
314
+
232
315
  /**
233
316
  * Build the final prompt text delivered to the Claude CLI. Claude takes the
234
317
  * system prompt via `--system-prompt-file` and the user prompt via stdin, so
@@ -536,8 +619,15 @@ module.exports = {
536
619
  modelsCache: MODELS_CACHE,
537
620
  spawnScript: path.join(ENGINE_DIR, 'spawn-agent.js'),
538
621
  installHint: INSTALL_HINT,
622
+ buildSpawnFlags,
539
623
  buildArgs,
540
624
  buildPrompt,
625
+ getResumeSessionId,
626
+ saveSession,
627
+ detectPermissionGate,
628
+ getPromptDeliveryMode,
629
+ usesSystemPromptFile,
630
+ classifyFailure,
541
631
  resolveModel,
542
632
  parseOutput,
543
633
  parseStreamChunk,