@yemi33/minions 0.1.1650 → 0.1.1652

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@ const { safeRead, safeJson, safeWrite, mutateJsonFileLocked, mutateWorkItems, ex
11
11
  log, ts, dateStamp, WI_STATUS, DONE_STATUSES, PLAN_TERMINAL_STATUSES, WORK_TYPE, PLAN_STATUS, PRD_ITEM_STATUS, PR_STATUS, DISPATCH_RESULT,
12
12
  ENGINE_DEFAULTS, DEFAULT_AGENT_METRICS, FAILURE_CLASS } = shared;
13
13
  const { trackEngineUsage } = require('./llm');
14
+ const { resolveRuntime } = require('./runtimes');
14
15
  const queries = require('./queries');
15
16
  const { isBranchActive } = require('./cooldown');
16
17
  const { worktreeDirMatchesBranch } = require('./cleanup');
@@ -980,36 +981,72 @@ async function findOpenPrForBranch(meta, config) {
980
981
  return null;
981
982
  }
982
983
 
983
- function markMissingPrAttachment(meta, agentId, reason, resultSummary) {
984
+ // Lightweight probe for "did the agent's output contain ANY PR URL?". Used by
985
+ // the PR-attachment contract to distinguish silent-failure (no URL anywhere)
986
+ // from auto-link-miss (URL present but engine couldn't canonically attach it).
987
+ // Keep this regex roughly in sync with the gated detection in syncPrsFromOutput
988
+ // — this is yes/no only; no capture groups required.
989
+ function _outputContainsPrUrl(output) {
990
+ if (!output || typeof output !== 'string') return false;
991
+ const prUrlPattern = /https?:\/\/(?:github\.com\/[^\s"'\\)\]]+\/[^\s"'\\)\]]+\/pull\/\d+|(?:dev\.azure\.com|[^/\s"'\\)\]]+\.visualstudio\.com)[^\s"'\\)\]]*?pullrequest\/\d+)/i;
992
+ return prUrlPattern.test(output);
993
+ }
994
+
995
+ function markMissingPrAttachment(meta, agentId, reason, resultSummary, severity) {
984
996
  const noPrWiPath = resolveWorkItemPath(meta);
997
+ const isHard = severity !== 'soft';
985
998
  if (noPrWiPath) {
986
999
  mutateJsonFileLocked(noPrWiPath, data => {
987
1000
  if (!Array.isArray(data)) return data;
988
1001
  const w = data.find(i => i.id === meta.item.id);
989
1002
  if (!w) return data;
990
- w.status = WI_STATUS.NEEDS_REVIEW;
991
- w._missingPrAttachment = true;
992
- w.failReason = reason;
993
- w._lastReviewReason = reason;
994
- delete w.completedAt;
995
- delete w._noPr;
996
- delete w._noPrReason;
1003
+ if (isHard) {
1004
+ w.status = WI_STATUS.NEEDS_REVIEW;
1005
+ w._missingPrAttachment = true;
1006
+ w.failReason = reason;
1007
+ w._lastReviewReason = reason;
1008
+ delete w.completedAt;
1009
+ delete w._noPr;
1010
+ delete w._noPrReason;
1011
+ } else {
1012
+ // Soft: don't change status or failReason — the agent did the work,
1013
+ // we just couldn't auto-attach the PR. Surface a flag for the dashboard
1014
+ // so the dispatch row can render a yellow "verify" badge.
1015
+ w._unverifiedPrAttachment = true;
1016
+ w._lastReviewReason = reason;
1017
+ }
997
1018
  return data;
998
1019
  }, { skipWriteIfUnchanged: true });
999
1020
  }
1000
- shared.writeToInbox('engine', `missing-pr-attachment-${meta.item.id}`,
1001
- `# PR attachment missing for ${meta.item.id}\n\n` +
1002
- `**Agent:** ${agentId}\n` +
1003
- `**Work item:** \`${meta.item.id}\` — ${meta.item.title || ''}\n` +
1004
- `**Type:** ${meta.item.type || 'unknown'}\n` +
1005
- `**Branch:** ${meta.branch || '(none)'}\n\n` +
1006
- `${reason}\n` +
1007
- (resultSummary ? `\n## Agent summary\n${resultSummary}\n` : ''),
1008
- null,
1009
- { sourceItem: meta.item.id, reason: 'missing-pr-attachment' });
1021
+ if (isHard) {
1022
+ shared.writeToInbox('engine', `missing-pr-attachment-${meta.item.id}`,
1023
+ `# PR attachment missing for ${meta.item.id}\n\n` +
1024
+ `**Agent:** ${agentId}\n` +
1025
+ `**Work item:** \`${meta.item.id}\` — ${meta.item.title || ''}\n` +
1026
+ `**Type:** ${meta.item.type || 'unknown'}\n` +
1027
+ `**Branch:** ${meta.branch || '(none)'}\n\n` +
1028
+ `${reason}\n` +
1029
+ (resultSummary ? `\n## Agent summary\n${resultSummary}\n` : ''),
1030
+ null,
1031
+ { sourceItem: meta.item.id, reason: 'missing-pr-attachment' });
1032
+ } else {
1033
+ shared.writeToInbox('engine', `pr-auto-link-unverified-${meta.item.id}`,
1034
+ `# PR auto-link unverified for ${meta.item.id}\n\n` +
1035
+ `**Agent:** ${agentId}\n` +
1036
+ `**Work item:** \`${meta.item.id}\` — ${meta.item.title || ''}\n` +
1037
+ `**Type:** ${meta.item.type || 'unknown'}\n` +
1038
+ `**Branch:** ${meta.branch || '(none)'}\n\n` +
1039
+ `${reason}\n\n` +
1040
+ `The agent's output mentioned a PR URL but the engine couldn't canonically attach it ` +
1041
+ `(URL detection regex miss, branch lookup race, untrusted tool_use signature, etc.). ` +
1042
+ `The work likely succeeded — verify against the project's PR list.\n` +
1043
+ (resultSummary ? `\n## Agent summary\n${resultSummary}\n` : ''),
1044
+ null,
1045
+ { sourceItem: meta.item.id, reason: 'pr-auto-link-unverified' });
1046
+ }
1010
1047
  }
1011
1048
 
1012
- async function enforcePrAttachmentContract(type, meta, agentId, config, resultSummary) {
1049
+ async function enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, output) {
1013
1050
  if (!isPrAttachmentRequired(type, meta?.item, meta)) return null;
1014
1051
  if (hasCanonicalPrAttachment(meta.item.id, config)) return null;
1015
1052
 
@@ -1037,10 +1074,16 @@ async function enforcePrAttachmentContract(type, meta, agentId, config, resultSu
1037
1074
  if (hasCanonicalPrAttachment(meta.item.id, config)) return null;
1038
1075
  }
1039
1076
 
1040
- const reason = `PR-producing work item ${meta.item.id} completed without a canonically attached PR record. Successful completion requires PR.prdItems/pr-links.json to include the work item; branch names, note URLs, and _context.workItemId metadata are not sufficient.`;
1041
- markMissingPrAttachment(meta, agentId, reason, resultSummary);
1042
- log('warn', reason);
1043
- return { reason, itemId: meta.item.id };
1077
+ // Distinguish "agent never claimed a PR" (hard silent failure the contract
1078
+ // was designed to catch) from "agent claimed a PR but engine couldn't attach
1079
+ // it canonically" (soft — verification gap, not a failure).
1080
+ const severity = _outputContainsPrUrl(output) ? 'soft' : 'hard';
1081
+ const reason = severity === 'hard'
1082
+ ? `${meta.item.id} completed but no PR URL was detected in the agent's output. Expected a PR — verify the agent didn't fail silently. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`
1083
+ : `${meta.item.id} completed and a PR URL was found in the agent's output, but it couldn't be canonically attached. The work likely succeeded — verify by checking the PR list. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`;
1084
+ markMissingPrAttachment(meta, agentId, reason, resultSummary, severity);
1085
+ log(severity === 'hard' ? 'warn' : 'info', reason);
1086
+ return { reason, itemId: meta.item.id, severity };
1044
1087
  }
1045
1088
 
1046
1089
  // ─── Post-Completion Hooks ──────────────────────────────────────────────────
@@ -1059,9 +1102,7 @@ function parseReviewVerdict(text) {
1059
1102
  // Match "VERDICT: APPROVE" or "VERDICT: REQUEST_CHANGES" (case-insensitive, optional markdown bold)
1060
1103
  const verdictMatch = text.match(/VERDICT[:\s]+\*{0,2}(APPROVE|REQUEST[_\s-]?CHANGES)\*{0,2}/i);
1061
1104
  if (verdictMatch) {
1062
- const v = verdictMatch[1].toUpperCase().replace(/[\s-]/g, '_');
1063
- if (v === 'APPROVE') return 'approved';
1064
- if (v.includes('CHANGES')) return 'changes-requested';
1105
+ return normalizeReviewVerdict(verdictMatch[1]);
1065
1106
  }
1066
1107
  return null;
1067
1108
  }
@@ -1083,7 +1124,7 @@ function isReviewBailout(text) {
1083
1124
  return /bail(ing)?\s+out/i.test(text) || /already\s+posted/i.test(text);
1084
1125
  }
1085
1126
 
1086
- async function updatePrAfterReview(agentId, pr, project, config, resultSummary) {
1127
+ async function updatePrAfterReview(agentId, pr, project, config, resultSummary, structuredCompletion = null) {
1087
1128
 
1088
1129
  if (!pr?.id) return;
1089
1130
 
@@ -1108,12 +1149,12 @@ async function updatePrAfterReview(agentId, pr, project, config, resultSummary)
1108
1149
  }
1109
1150
  } catch (e) { log('warn', `Post-review status check for ${pr.id}: ${e.message}`); }
1110
1151
 
1111
- // Fallback: if live check returned pending (e.g., GitHub self-approval blocked), parse verdict from agent output
1152
+ // Fallback: if live check returned pending (e.g., GitHub self-approval blocked), use the agent's completion report.
1112
1153
  if (!postReviewStatus) {
1113
- const verdict = parseReviewVerdict(resultSummary);
1154
+ const verdict = reviewVerdictFromCompletion(structuredCompletion) || parseReviewVerdict(resultSummary);
1114
1155
  if (verdict) {
1115
1156
  postReviewStatus = verdict;
1116
- log('info', `Parsed review verdict from agent output for ${pr.id}: ${verdict}`);
1157
+ log('info', `Read review verdict from agent completion for ${pr.id}: ${verdict}`);
1117
1158
  }
1118
1159
  }
1119
1160
 
@@ -1700,6 +1741,24 @@ function parseStructuredCompletion(stdout, runtimeName) {
1700
1741
  return result;
1701
1742
  }
1702
1743
 
1744
+ function parseCompletionReportFile(dispatchItem) {
1745
+ const reportPath = dispatchItem?.meta?.completionReportPath || shared.dispatchCompletionReportPath(dispatchItem?.id);
1746
+ if (!reportPath || !fs.existsSync(reportPath)) return null;
1747
+ const report = safeJson(reportPath);
1748
+ if (!report || typeof report !== 'object' || Array.isArray(report)) {
1749
+ log('warn', `Ignoring malformed completion report for ${dispatchItem?.id || 'unknown'}: ${reportPath}`);
1750
+ return null;
1751
+ }
1752
+ if (!report.status && report.outcome) report.status = report.outcome;
1753
+ if (!report.status) {
1754
+ log('warn', `Ignoring completion report without status for ${dispatchItem?.id || 'unknown'}: ${reportPath}`);
1755
+ return null;
1756
+ }
1757
+ report._source = 'report-file';
1758
+ report._path = reportPath;
1759
+ return report;
1760
+ }
1761
+
1703
1762
  function normalizeCompletionStatus(status) {
1704
1763
  return String(status || '').trim().toLowerCase().replace(/[\s_]+/g, '-');
1705
1764
  }
@@ -1817,6 +1876,28 @@ function deferNonTerminalCompletion(meta, detection) {
1817
1876
  return reason;
1818
1877
  }
1819
1878
 
1879
+ function parseCompletionBoolean(value) {
1880
+ if (typeof value === 'boolean') return value;
1881
+ if (typeof value === 'string') {
1882
+ const normalized = value.trim().toLowerCase();
1883
+ if (['true', 'yes', '1'].includes(normalized)) return true;
1884
+ if (['false', 'no', '0'].includes(normalized)) return false;
1885
+ }
1886
+ return undefined;
1887
+ }
1888
+
1889
+ function normalizeReviewVerdict(verdict) {
1890
+ const value = String(verdict || '').trim().toLowerCase().replace(/[\s-]+/g, '_');
1891
+ if (value === 'approve' || value === 'approved') return 'approved';
1892
+ if (value === 'request_changes' || value === 'changes_requested' || value === 'changes-requested') return 'changes-requested';
1893
+ return null;
1894
+ }
1895
+
1896
+ function reviewVerdictFromCompletion(completion) {
1897
+ if (!completion || typeof completion !== 'object') return null;
1898
+ return normalizeReviewVerdict(completion.verdict || completion.review_verdict || completion.reviewVerdict);
1899
+ }
1900
+
1820
1901
  function writeNonCleanAgentReport(dispatchItem, agentId, outcome, structuredCompletion, resultSummary, exitCode) {
1821
1902
  if (!dispatchItem?.id || !outcome) {
1822
1903
  log('warn', 'Cannot write non-clean agent report without dispatch id and outcome');
@@ -1952,22 +2033,31 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
1952
2033
  // and for the foundation-only state of this plan item; downstream items
1953
2034
  // (P-2a6d9c4f, P-9c4f2d6a) populate dispatchItem.meta.runtimeName at spawn time.
1954
2035
  const runtimeName = dispatchItem.meta?.runtimeName || dispatchItem.runtimeName || 'claude';
1955
- const { resultSummary, taskUsage, sessionId, model } = parseAgentOutput(stdout, runtimeName);
1956
- const completionGateSummary = resultSummary || (typeof stdout === 'string' && !stdout.includes('"type":') ? stdout : '');
2036
+ let { resultSummary, taskUsage, sessionId, model } = parseAgentOutput(stdout, runtimeName);
1957
2037
 
1958
- // Try structured completion protocol first (```completion block from agent output)
1959
- const structuredCompletion = parseStructuredCompletion(stdout, runtimeName);
2038
+ // Prefer the sidecar completion report; keep fenced output as a compatibility fallback.
2039
+ const reportCompletion = parseCompletionReportFile(dispatchItem);
2040
+ const structuredCompletion = reportCompletion || parseStructuredCompletion(stdout, runtimeName);
1960
2041
  if (structuredCompletion) {
1961
- log('info', `Structured completion from ${agentId}: status=${structuredCompletion.status}, pr=${structuredCompletion.pr || 'N/A'}`);
2042
+ if (structuredCompletion.summary) resultSummary = String(structuredCompletion.summary);
2043
+ log('info', `Structured completion from ${agentId}: status=${structuredCompletion.status}, pr=${structuredCompletion.pr || 'N/A'}${structuredCompletion._source ? ` (${structuredCompletion._source})` : ''}`);
1962
2044
  }
2045
+ const completionGateSummary = resultSummary || (typeof stdout === 'string' && !stdout.includes('"type":') ? stdout : '');
1963
2046
 
1964
2047
  // Save session for potential resume on next dispatch
1965
2048
  if (isSuccess && sessionId && agentId && !agentId.startsWith('temp-')) {
1966
2049
  try {
1967
- shared.safeWrite(path.join(AGENTS_DIR, agentId, 'session.json'), {
1968
- sessionId, dispatchId: dispatchItem.id, savedAt: ts(),
1969
- branch: dispatchItem.meta?.branch || null,
1970
- });
2050
+ const runtime = resolveRuntime(runtimeName);
2051
+ if (runtime && typeof runtime.saveSession === 'function') {
2052
+ runtime.saveSession({
2053
+ agentId,
2054
+ dispatchId: dispatchItem.id,
2055
+ branch: dispatchItem.meta?.branch || null,
2056
+ sessionId,
2057
+ agentsDir: AGENTS_DIR,
2058
+ logger: { warn: (msg) => log('warn', msg) },
2059
+ });
2060
+ }
1971
2061
  } catch (err) { log('warn', `Session save: ${err.message}`); }
1972
2062
  }
1973
2063
 
@@ -1983,15 +2073,19 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
1983
2073
  log('info', `Structured completion reports PR (${structuredCompletion.pr}) but regex sync found none — PR may already be tracked`);
1984
2074
  }
1985
2075
 
2076
+ const completionStatus = normalizeCompletionStatus(structuredCompletion?.status);
2077
+ const agentNeedsRerun = parseCompletionBoolean(structuredCompletion?.needs_rerun ?? structuredCompletion?.needsRerun) === true;
2078
+ const agentReportedFailure = completionStatus.startsWith('fail') || agentNeedsRerun;
2079
+ const agentRetryable = parseCompletionBoolean(structuredCompletion?.retryable);
2080
+
1986
2081
  // Auto-recover: if a failed implement/fix/test agent created PRs, it likely succeeded before the failure surfaced.
1987
2082
  const prCreatingType = type === WORK_TYPE.IMPLEMENT || type === WORK_TYPE.IMPLEMENT_LARGE || type === WORK_TYPE.FIX || type === WORK_TYPE.TEST;
1988
- const autoRecovered = !isSuccess && prsCreatedCount > 0 && prCreatingType && !!meta?.item?.id;
2083
+ const autoRecovered = !agentReportedFailure && !isSuccess && prsCreatedCount > 0 && prCreatingType && !!meta?.item?.id;
1989
2084
  if (autoRecovered) {
1990
2085
  log('info', `Auto-recovery: agent failed but created ${prsCreatedCount} PR(s) — upgrading ${meta.item.id} to done`);
1991
2086
  }
1992
- const effectiveSuccess = isSuccess || autoRecovered;
2087
+ const effectiveSuccess = (isSuccess && !agentReportedFailure) || autoRecovered;
1993
2088
 
1994
- const completionStatus = normalizeCompletionStatus(structuredCompletion?.status);
1995
2089
  let nonCleanReportWritten = false;
1996
2090
  if (completionStatus.startsWith('partial') || autoRecovered || (completionStatus.startsWith('fail') && isSuccess)) {
1997
2091
  const outcome = completionStatus.startsWith('fail') ? 'failure' : 'partial';
@@ -2019,7 +2113,7 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
2019
2113
  // and after 3 such bailouts the WI flips to status=failed even though the
2020
2114
  // original review was posted on the first run.
2021
2115
  if (effectiveSuccess && type === WORK_TYPE.REVIEW && meta?.item?.id) {
2022
- const verdict = parseReviewVerdict(resultSummary);
2116
+ const verdict = reviewVerdictFromCompletion(structuredCompletion) || parseReviewVerdict(resultSummary);
2023
2117
  if (!verdict && isReviewBailout(resultSummary)) {
2024
2118
  log('info', `Review ${meta.item.id} bailed out (review already posted) — treating as DONE without retry`);
2025
2119
  } else if (!verdict) {
@@ -2116,8 +2210,10 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
2116
2210
  }
2117
2211
 
2118
2212
  if (effectiveSuccess && meta?.item?.id && !skipDoneStatus) {
2119
- completionContractFailure = await enforcePrAttachmentContract(type, meta, agentId, config, resultSummary);
2120
- if (completionContractFailure) skipDoneStatus = true;
2213
+ completionContractFailure = await enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, stdout);
2214
+ if (completionContractFailure?.severity === 'hard' || completionContractFailure?.nonTerminal) {
2215
+ skipDoneStatus = true;
2216
+ }
2121
2217
  }
2122
2218
 
2123
2219
  if (effectiveSuccess && meta?.item?.id && !skipDoneStatus) {
@@ -2223,7 +2319,7 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
2223
2319
  // (retryCount was being deleted by done-marking before the check could read it)
2224
2320
  // Review verdict check similarly moved before updateWorkItemStatus(DONE) — same root cause.
2225
2321
 
2226
- if (type === WORK_TYPE.REVIEW) await updatePrAfterReview(agentId, meta?.pr, meta?.project, config, resultSummary);
2322
+ if (type === WORK_TYPE.REVIEW) await updatePrAfterReview(agentId, meta?.pr, meta?.project, config, resultSummary, structuredCompletion);
2227
2323
  if (type === WORK_TYPE.FIX) {
2228
2324
  updatePrAfterFix(meta?.pr, meta?.project, meta?.source);
2229
2325
  // (#984) Sync PRD status for PR-linked features: fix work items have a different ID
@@ -2242,7 +2338,9 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
2242
2338
  }
2243
2339
  }
2244
2340
  checkForLearnings(agentId, config.agents[agentId], dispatchItem.task);
2245
- const finalResult = completionContractFailure ? DISPATCH_RESULT.ERROR : (effectiveSuccess ? DISPATCH_RESULT.SUCCESS : DISPATCH_RESULT.ERROR);
2341
+ const hardContractFail = completionContractFailure?.severity === 'hard'
2342
+ || completionContractFailure?.nonTerminal === true;
2343
+ const finalResult = hardContractFail ? DISPATCH_RESULT.ERROR : (effectiveSuccess ? DISPATCH_RESULT.SUCCESS : DISPATCH_RESULT.ERROR);
2246
2344
  if (finalResult === DISPATCH_RESULT.SUCCESS) {
2247
2345
  extractSkillsFromOutput(stdout, agentId, dispatchItem, config);
2248
2346
  // Also scan inbox notes for skill blocks — agents often write skills to inbox, not stdout
@@ -2270,7 +2368,7 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
2270
2368
  teams.teamsNotifyCompletion(dispatchItem, finalResult, agentId).catch(() => {});
2271
2369
  } catch {}
2272
2370
 
2273
- return { resultSummary, taskUsage, autoRecovered, structuredCompletion, completionContractFailure };
2371
+ return { resultSummary, taskUsage, autoRecovered, structuredCompletion, completionContractFailure, agentReportedFailure, agentRetryable };
2274
2372
  }
2275
2373
 
2276
2374
  // ─── PR → PRD Status Sync ─────────────────────────────────────────────────────
@@ -2451,6 +2549,7 @@ module.exports = {
2451
2549
  isReviewBailout,
2452
2550
  parseStructuredCompletion,
2453
2551
  detectNonTerminalResultSummary,
2552
+ parseCompletionReportFile,
2454
2553
  runPostCompletionHooks,
2455
2554
  syncPrdFromPrs,
2456
2555
  resolveWorkItemPath,
@@ -25,6 +25,7 @@
25
25
  const fs = require('fs');
26
26
  const os = require('os');
27
27
  const path = require('path');
28
+ const { FAILURE_CLASS, safeWrite, ts } = require('../shared');
28
29
 
29
30
  const ENGINE_DIR = __dirname.replace(/[\\/]runtimes$/, '');
30
31
  const MINIONS_DIR = path.resolve(ENGINE_DIR, '..');
@@ -229,6 +230,88 @@ function buildArgs(opts = {}) {
229
230
  return args;
230
231
  }
231
232
 
233
+ function buildSpawnFlags(opts = {}) {
234
+ const flags = ['--runtime', 'claude'];
235
+ if (opts.maxTurns != null) flags.push('--max-turns', String(opts.maxTurns));
236
+ if (opts.model) flags.push('--model', String(opts.model));
237
+ if (opts.allowedTools) flags.push('--allowedTools', String(opts.allowedTools));
238
+ if (opts.effort) flags.push('--effort', String(opts.effort));
239
+ if (opts.sessionId) flags.push('--resume', String(opts.sessionId));
240
+ if (opts.maxBudget != null) flags.push('--max-budget-usd', String(opts.maxBudget));
241
+ if (opts.bare === true) flags.push('--bare');
242
+ if (opts.fallbackModel) flags.push('--fallback-model', String(opts.fallbackModel));
243
+ if (opts.stream != null && opts.stream !== '') flags.push('--stream', String(opts.stream));
244
+ if (opts.disableBuiltinMcps === true) flags.push('--disable-builtin-mcps');
245
+ if (opts.suppressAgentsMd === true) flags.push('--no-custom-instructions');
246
+ if (opts.reasoningSummaries === true) flags.push('--enable-reasoning-summaries');
247
+ return flags;
248
+ }
249
+
250
+ function getResumeSessionId({ agentId, branchName, agentsDir, maxAgeMs = 2 * 60 * 60 * 1000, logger = console } = {}) {
251
+ if (!agentId || agentId.startsWith('temp-') || !agentsDir) return null;
252
+ try {
253
+ const sessionPath = path.join(agentsDir, agentId, 'session.json');
254
+ const sessionFile = _safeJson(sessionPath);
255
+ if (!sessionFile?.sessionId || !sessionFile.savedAt) return null;
256
+ const sessionAge = Date.now() - new Date(sessionFile.savedAt).getTime();
257
+ const sameBranch = branchName && sessionFile.branch && sessionFile.branch === branchName;
258
+ if (sessionAge < maxAgeMs && sameBranch) {
259
+ if (logger && typeof logger.info === 'function') {
260
+ logger.info(`Resuming session ${sessionFile.sessionId} for ${agentId} on branch ${branchName} (age: ${Math.round(sessionAge / 60000)}min)`);
261
+ }
262
+ return sessionFile.sessionId;
263
+ }
264
+ } catch (e) {
265
+ if (logger && typeof logger.warn === 'function') logger.warn('session resume lookup: ' + e.message);
266
+ }
267
+ return null;
268
+ }
269
+
270
+ function saveSession({ agentId, dispatchId, branch, sessionId, agentsDir, now = ts, writeJson = safeWrite, logger = console } = {}) {
271
+ if (!sessionId || !agentId || agentId.startsWith('temp-') || !agentsDir) return false;
272
+ try {
273
+ writeJson(path.join(agentsDir, agentId, 'session.json'), {
274
+ sessionId,
275
+ dispatchId,
276
+ savedAt: typeof now === 'function' ? now() : new Date().toISOString(),
277
+ branch: branch || null,
278
+ });
279
+ return true;
280
+ } catch (err) {
281
+ if (logger && typeof logger.warn === 'function') logger.warn(`Session save: ${err.message}`);
282
+ return false;
283
+ }
284
+ }
285
+
286
+ function detectPermissionGate(outputChunk) {
287
+ const lower = String(outputChunk || '').toLowerCase();
288
+ return /\b(trust this|do you trust|allow access|grant permission|approve tools?|permission prompt)\b/.test(lower);
289
+ }
290
+
291
+ function getPromptDeliveryMode() {
292
+ return 'stdin';
293
+ }
294
+
295
+ function usesSystemPromptFile({ isResume } = {}) {
296
+ return !isResume;
297
+ }
298
+
299
+ function _runtimeFailureClass(code) {
300
+ if (code === 'auth-failure' || code === 'budget-exceeded') return FAILURE_CLASS.PERMISSION_BLOCKED;
301
+ if (code === 'context-limit') return FAILURE_CLASS.OUT_OF_CONTEXT;
302
+ if (code === 'crash') return FAILURE_CLASS.SPAWN_ERROR;
303
+ return null;
304
+ }
305
+
306
+ function classifyFailure({ code, stdout = '', stderr = '', fallback } = {}) {
307
+ if (code === 78) return { failureClass: FAILURE_CLASS.CONFIG_ERROR, retryable: false, message: 'Claude configuration error' };
308
+ const parsed = parseError(`${stdout || ''}\n${stderr || ''}`);
309
+ const runtimeClass = parsed.code ? _runtimeFailureClass(parsed.code) : null;
310
+ if (runtimeClass) return { failureClass: runtimeClass, retryable: parsed.retriable !== false, message: parsed.message || '' };
311
+ const fallbackClass = typeof fallback === 'function' ? fallback(code, stdout, stderr) : FAILURE_CLASS.UNKNOWN;
312
+ return { failureClass: fallbackClass, retryable: parsed.retriable !== false, message: parsed.message || '' };
313
+ }
314
+
232
315
  /**
233
316
  * Build the final prompt text delivered to the Claude CLI. Claude takes the
234
317
  * system prompt via `--system-prompt-file` and the user prompt via stdin, so
@@ -536,8 +619,15 @@ module.exports = {
536
619
  modelsCache: MODELS_CACHE,
537
620
  spawnScript: path.join(ENGINE_DIR, 'spawn-agent.js'),
538
621
  installHint: INSTALL_HINT,
622
+ buildSpawnFlags,
539
623
  buildArgs,
540
624
  buildPrompt,
625
+ getResumeSessionId,
626
+ saveSession,
627
+ detectPermissionGate,
628
+ getPromptDeliveryMode,
629
+ usesSystemPromptFile,
630
+ classifyFailure,
541
631
  resolveModel,
542
632
  parseOutput,
543
633
  parseStreamChunk,
@@ -31,6 +31,7 @@ const fs = require('fs');
31
31
  const https = require('https');
32
32
  const path = require('path');
33
33
  const { execSync } = require('child_process');
34
+ const { FAILURE_CLASS, safeWrite, ts } = require('../shared');
34
35
 
35
36
  const ENGINE_DIR = __dirname.replace(/[\\/]runtimes$/, '');
36
37
  const isWin = process.platform === 'win32';
@@ -254,6 +255,88 @@ function buildArgs(opts = {}) {
254
255
  return args;
255
256
  }
256
257
 
258
+ function buildSpawnFlags(opts = {}) {
259
+ const flags = ['--runtime', 'copilot'];
260
+ if (opts.maxTurns != null) flags.push('--max-turns', String(opts.maxTurns));
261
+ if (opts.model) flags.push('--model', String(opts.model));
262
+ if (opts.allowedTools) flags.push('--allowedTools', String(opts.allowedTools));
263
+ if (module.exports.capabilities.effortLevels && opts.effort) flags.push('--effort', String(opts.effort));
264
+ if (module.exports.capabilities.sessionResume && opts.sessionId) flags.push('--resume', String(opts.sessionId));
265
+ if (module.exports.capabilities.budgetCap && opts.maxBudget != null) flags.push('--max-budget-usd', String(opts.maxBudget));
266
+ if (module.exports.capabilities.bareMode && opts.bare === true) flags.push('--bare');
267
+ if (module.exports.capabilities.fallbackModel && opts.fallbackModel) flags.push('--fallback-model', String(opts.fallbackModel));
268
+ if (opts.stream != null && opts.stream !== '') flags.push('--stream', String(opts.stream));
269
+ if (opts.disableBuiltinMcps === true) flags.push('--disable-builtin-mcps');
270
+ if (opts.suppressAgentsMd === true) flags.push('--no-custom-instructions');
271
+ if (opts.reasoningSummaries === true) flags.push('--enable-reasoning-summaries');
272
+ return flags;
273
+ }
274
+
275
+ function getResumeSessionId({ agentId, branchName, agentsDir, maxAgeMs = 2 * 60 * 60 * 1000, logger = console } = {}) {
276
+ if (!agentId || agentId.startsWith('temp-') || !agentsDir) return null;
277
+ try {
278
+ const sessionPath = path.join(agentsDir, agentId, 'session.json');
279
+ const sessionFile = _safeJson(sessionPath);
280
+ if (!sessionFile?.sessionId || !sessionFile.savedAt) return null;
281
+ const sessionAge = Date.now() - new Date(sessionFile.savedAt).getTime();
282
+ const sameBranch = branchName && sessionFile.branch && sessionFile.branch === branchName;
283
+ if (sessionAge < maxAgeMs && sameBranch) {
284
+ if (logger && typeof logger.info === 'function') {
285
+ logger.info(`Resuming session ${sessionFile.sessionId} for ${agentId} on branch ${branchName} (age: ${Math.round(sessionAge / 60000)}min)`);
286
+ }
287
+ return sessionFile.sessionId;
288
+ }
289
+ } catch (e) {
290
+ if (logger && typeof logger.warn === 'function') logger.warn('session resume lookup: ' + e.message);
291
+ }
292
+ return null;
293
+ }
294
+
295
+ function saveSession({ agentId, dispatchId, branch, sessionId, agentsDir, now = ts, writeJson = safeWrite, logger = console } = {}) {
296
+ if (!sessionId || !agentId || agentId.startsWith('temp-') || !agentsDir) return false;
297
+ try {
298
+ writeJson(path.join(agentsDir, agentId, 'session.json'), {
299
+ sessionId,
300
+ dispatchId,
301
+ savedAt: typeof now === 'function' ? now() : new Date().toISOString(),
302
+ branch: branch || null,
303
+ });
304
+ return true;
305
+ } catch (err) {
306
+ if (logger && typeof logger.warn === 'function') logger.warn(`Session save: ${err.message}`);
307
+ return false;
308
+ }
309
+ }
310
+
311
+ function detectPermissionGate() {
312
+ return false;
313
+ }
314
+
315
+ function getPromptDeliveryMode() {
316
+ return 'stdin';
317
+ }
318
+
319
+ function usesSystemPromptFile() {
320
+ return false;
321
+ }
322
+
323
+ function _runtimeFailureClass(code) {
324
+ if (code === 'auth-failure' || code === 'budget-exceeded') return FAILURE_CLASS.PERMISSION_BLOCKED;
325
+ if (code === 'unknown-model') return FAILURE_CLASS.CONFIG_ERROR;
326
+ if (code === 'rate-limit') return FAILURE_CLASS.NETWORK_ERROR;
327
+ if (code === 'crash') return FAILURE_CLASS.SPAWN_ERROR;
328
+ return null;
329
+ }
330
+
331
+ function classifyFailure({ code, stdout = '', stderr = '', fallback } = {}) {
332
+ if (code === 78) return { failureClass: FAILURE_CLASS.CONFIG_ERROR, retryable: false, message: 'Copilot configuration error' };
333
+ const parsed = parseError(`${stdout || ''}\n${stderr || ''}`);
334
+ const runtimeClass = parsed.code ? _runtimeFailureClass(parsed.code) : null;
335
+ if (runtimeClass) return { failureClass: runtimeClass, retryable: parsed.retriable !== false, message: parsed.message || '' };
336
+ const fallbackClass = typeof fallback === 'function' ? fallback(code, stdout, stderr) : FAILURE_CLASS.UNKNOWN;
337
+ return { failureClass: fallbackClass, retryable: parsed.retriable !== false, message: parsed.message || '' };
338
+ }
339
+
257
340
  // ── Prompt Construction ─────────────────────────────────────────────────────
258
341
  //
259
342
  // Copilot has no --system-prompt-file flag, so we deliver the system prompt
@@ -681,8 +764,15 @@ module.exports = {
681
764
  // Use the same wrapper as Claude — spawn-agent.js is runtime-agnostic per P-9c4f2d6a
682
765
  spawnScript: path.join(ENGINE_DIR, 'spawn-agent.js'),
683
766
  installHint: INSTALL_HINT,
767
+ buildSpawnFlags,
684
768
  buildArgs,
685
769
  buildPrompt,
770
+ getResumeSessionId,
771
+ saveSession,
772
+ detectPermissionGate,
773
+ getPromptDeliveryMode,
774
+ usesSystemPromptFile,
775
+ classifyFailure,
686
776
  resolveModel,
687
777
  parseOutput,
688
778
  parseStreamChunk,
package/engine/shared.js CHANGED
@@ -230,6 +230,12 @@ function dispatchPromptSidecarPath(dispatchId) {
230
230
  return path.join(_promptContextsDir(), `${safeId}.md`);
231
231
  }
232
232
 
233
+ function dispatchCompletionReportPath(dispatchId) {
234
+ if (!dispatchId) return null;
235
+ const safeId = String(dispatchId).replace(/[^a-zA-Z0-9._-]/g, '-');
236
+ return path.join(MINIONS_DIR, 'engine', 'completions', `${safeId}.json`);
237
+ }
238
+
233
239
  /**
234
240
  * If the dispatch item's prompt exceeds thresholdBytes, write the full prompt
235
241
  * to engine/contexts/<id>.md and replace `item.prompt` with a short stub
@@ -716,7 +722,7 @@ const ENGINE_DEFAULTS = {
716
722
  autoFixBuilds: true, // auto-dispatch fix agents when a PR build fails
717
723
  meetingRoundTimeout: 900000, // 15min per meeting round before auto-advance
718
724
  evalLoop: true, // enable review→fix loop after implementation completes
719
- evalMaxIterations: 3, // max review→fix cycles before escalating to human
725
+ evalMaxIterations: 3, // legacy UI/config field; engine discovery no longer enforces review→fix cycle caps
720
726
  evalMaxCost: null, // USD ceiling per work item across all eval iterations; null = no limit (gather baseline data first)
721
727
  maxRetries: 3, // max dispatch retries before marking work item as failed
722
728
  minRetryGapMs: 120000, // 2min — minimum gap between retry dispatches for the same work item; prevents tight retry loops when an idempotent agent (e.g. review bailing out on a duplicate) cannot produce the expected output (#1770)
@@ -727,7 +733,7 @@ const ENGINE_DEFAULTS = {
727
733
  logBufferSize: 50, // flush immediately when buffer exceeds this many entries
728
734
  lockRetries: 0, // no retries — single 5s timeout window with 25ms polling (200 attempts) is sufficient; stale lock recovery at 60s handles crashes
729
735
  lockRetryBackoffMs: 500, // base backoff between lock retries (doubles each attempt: 500ms, 1s, 2s, ...)
730
- maxBuildFixAttempts: 3, // max consecutive auto-fix dispatch cycles per PR before escalation to human
736
+ maxBuildFixAttempts: 3, // legacy UI/config field; engine discovery no longer enforces build-fix attempt caps
731
737
  buildFixGracePeriod: 600000, // 10min — wait for CI to run after build fix before re-dispatching
732
738
  adoPollEnabled: true, // poll ADO PR status, comments, and reconciliation on each tick cycle
733
739
  ghPollEnabled: true, // poll GitHub PR status, comments, and reconciliation on each tick cycle
@@ -1171,7 +1177,7 @@ const ESCALATION_POLICY = {
1171
1177
  };
1172
1178
 
1173
1179
  // Structured completion protocol — fields agents must produce in ```completion blocks
1174
- const COMPLETION_FIELDS = ['status', 'files_changed', 'tests', 'pr', 'pending', 'failure_class'];
1180
+ const COMPLETION_FIELDS = ['status', 'summary', 'files_changed', 'tests', 'pr', 'pending', 'failure_class', 'retryable', 'needs_rerun', 'verdict'];
1175
1181
 
1176
1182
  const DEFAULT_AGENT_METRICS = {
1177
1183
  tasksCompleted: 0, tasksErrored: 0,
@@ -1778,6 +1784,39 @@ function findPrRecord(prs, prRef, project = null) {
1778
1784
  return numberMatches.length === 1 ? numberMatches[0] : null;
1779
1785
  }
1780
1786
 
1787
+ function snapshotPrRecord(pr) {
1788
+ if (pr === undefined) return undefined;
1789
+ return JSON.parse(JSON.stringify(pr));
1790
+ }
1791
+
1792
+ function _jsonEqual(a, b) {
1793
+ return JSON.stringify(a) === JSON.stringify(b);
1794
+ }
1795
+
1796
+ function _isPlainObject(value) {
1797
+ return !!value && typeof value === 'object' && !Array.isArray(value);
1798
+ }
1799
+
1800
+ function applyPrFieldDelta(target, before, after) {
1801
+ if (!target || typeof target !== 'object' || !after || typeof after !== 'object') return target;
1802
+ before = before && typeof before === 'object' ? before : {};
1803
+ const keys = new Set([...Object.keys(before), ...Object.keys(after)]);
1804
+ for (const key of keys) {
1805
+ const beforeValue = before[key];
1806
+ const afterHas = Object.prototype.hasOwnProperty.call(after, key);
1807
+ const afterValue = after[key];
1808
+ if (_jsonEqual(beforeValue, afterValue)) continue;
1809
+ if (!afterHas) {
1810
+ delete target[key];
1811
+ } else if (_isPlainObject(beforeValue) && _isPlainObject(afterValue) && _isPlainObject(target[key])) {
1812
+ applyPrFieldDelta(target[key], beforeValue, afterValue);
1813
+ } else {
1814
+ target[key] = snapshotPrRecord(afterValue);
1815
+ }
1816
+ }
1817
+ return target;
1818
+ }
1819
+
1781
1820
  function normalizePrRecord(pr, project = null) {
1782
1821
  if (!pr || typeof pr !== 'object') return false;
1783
1822
  let changed = false;
@@ -2271,6 +2310,7 @@ module.exports = {
2271
2310
  safeUnlink,
2272
2311
  PROMPT_CONTEXTS_DIR,
2273
2312
  dispatchPromptSidecarPath,
2313
+ dispatchCompletionReportPath,
2274
2314
  sidecarDispatchPrompt,
2275
2315
  resolveDispatchPrompt,
2276
2316
  deleteDispatchPromptSidecar,
@@ -2325,6 +2365,8 @@ module.exports = {
2325
2365
  isPrCompatibleWithProject,
2326
2366
  getCanonicalPrId,
2327
2367
  findPrRecord,
2368
+ snapshotPrRecord,
2369
+ applyPrFieldDelta,
2328
2370
  normalizePrRecord,
2329
2371
  normalizePrRecords,
2330
2372
  upsertPullRequestRecord,