@yemi33/minions 0.1.1649 → 0.1.1651
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/engine/ado.js +17 -30
- package/engine/copilot-models.json +1 -1
- package/engine/dispatch.js +14 -6
- package/engine/github.js +24 -22
- package/engine/lifecycle.js +276 -48
- package/engine/runtimes/claude.js +90 -0
- package/engine/runtimes/copilot.js +90 -0
- package/engine/shared.js +45 -3
- package/engine/spawn-agent.js +9 -6
- package/engine/timeout.js +17 -4
- package/engine.js +108 -139
- package/package.json +1 -1
- package/playbooks/fix.md +2 -2
- package/playbooks/implement-shared.md +2 -2
- package/playbooks/review.md +2 -3
- package/playbooks/shared-rules.md +12 -2
package/engine/lifecycle.js
CHANGED
|
@@ -11,6 +11,7 @@ const { safeRead, safeJson, safeWrite, mutateJsonFileLocked, mutateWorkItems, ex
|
|
|
11
11
|
log, ts, dateStamp, WI_STATUS, DONE_STATUSES, PLAN_TERMINAL_STATUSES, WORK_TYPE, PLAN_STATUS, PRD_ITEM_STATUS, PR_STATUS, DISPATCH_RESULT,
|
|
12
12
|
ENGINE_DEFAULTS, DEFAULT_AGENT_METRICS, FAILURE_CLASS } = shared;
|
|
13
13
|
const { trackEngineUsage } = require('./llm');
|
|
14
|
+
const { resolveRuntime } = require('./runtimes');
|
|
14
15
|
const queries = require('./queries');
|
|
15
16
|
const { isBranchActive } = require('./cooldown');
|
|
16
17
|
const { worktreeDirMatchesBranch } = require('./cleanup');
|
|
@@ -980,36 +981,72 @@ async function findOpenPrForBranch(meta, config) {
|
|
|
980
981
|
return null;
|
|
981
982
|
}
|
|
982
983
|
|
|
983
|
-
|
|
984
|
+
// Lightweight probe for "did the agent's output contain ANY PR URL?". Used by
|
|
985
|
+
// the PR-attachment contract to distinguish silent-failure (no URL anywhere)
|
|
986
|
+
// from auto-link-miss (URL present but engine couldn't canonically attach it).
|
|
987
|
+
// Keep this regex roughly in sync with the gated detection in syncPrsFromOutput
|
|
988
|
+
// — this is yes/no only; no capture groups required.
|
|
989
|
+
function _outputContainsPrUrl(output) {
|
|
990
|
+
if (!output || typeof output !== 'string') return false;
|
|
991
|
+
const prUrlPattern = /https?:\/\/(?:github\.com\/[^\s"'\\)\]]+\/[^\s"'\\)\]]+\/pull\/\d+|(?:dev\.azure\.com|[^/\s"'\\)\]]+\.visualstudio\.com)[^\s"'\\)\]]*?pullrequest\/\d+)/i;
|
|
992
|
+
return prUrlPattern.test(output);
|
|
993
|
+
}
|
|
994
|
+
|
|
995
|
+
function markMissingPrAttachment(meta, agentId, reason, resultSummary, severity) {
|
|
984
996
|
const noPrWiPath = resolveWorkItemPath(meta);
|
|
997
|
+
const isHard = severity !== 'soft';
|
|
985
998
|
if (noPrWiPath) {
|
|
986
999
|
mutateJsonFileLocked(noPrWiPath, data => {
|
|
987
1000
|
if (!Array.isArray(data)) return data;
|
|
988
1001
|
const w = data.find(i => i.id === meta.item.id);
|
|
989
1002
|
if (!w) return data;
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
1003
|
+
if (isHard) {
|
|
1004
|
+
w.status = WI_STATUS.NEEDS_REVIEW;
|
|
1005
|
+
w._missingPrAttachment = true;
|
|
1006
|
+
w.failReason = reason;
|
|
1007
|
+
w._lastReviewReason = reason;
|
|
1008
|
+
delete w.completedAt;
|
|
1009
|
+
delete w._noPr;
|
|
1010
|
+
delete w._noPrReason;
|
|
1011
|
+
} else {
|
|
1012
|
+
// Soft: don't change status or failReason — the agent did the work,
|
|
1013
|
+
// we just couldn't auto-attach the PR. Surface a flag for the dashboard
|
|
1014
|
+
// so the dispatch row can render a yellow "verify" badge.
|
|
1015
|
+
w._unverifiedPrAttachment = true;
|
|
1016
|
+
w._lastReviewReason = reason;
|
|
1017
|
+
}
|
|
997
1018
|
return data;
|
|
998
1019
|
}, { skipWriteIfUnchanged: true });
|
|
999
1020
|
}
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1021
|
+
if (isHard) {
|
|
1022
|
+
shared.writeToInbox('engine', `missing-pr-attachment-${meta.item.id}`,
|
|
1023
|
+
`# PR attachment missing for ${meta.item.id}\n\n` +
|
|
1024
|
+
`**Agent:** ${agentId}\n` +
|
|
1025
|
+
`**Work item:** \`${meta.item.id}\` — ${meta.item.title || ''}\n` +
|
|
1026
|
+
`**Type:** ${meta.item.type || 'unknown'}\n` +
|
|
1027
|
+
`**Branch:** ${meta.branch || '(none)'}\n\n` +
|
|
1028
|
+
`${reason}\n` +
|
|
1029
|
+
(resultSummary ? `\n## Agent summary\n${resultSummary}\n` : ''),
|
|
1030
|
+
null,
|
|
1031
|
+
{ sourceItem: meta.item.id, reason: 'missing-pr-attachment' });
|
|
1032
|
+
} else {
|
|
1033
|
+
shared.writeToInbox('engine', `pr-auto-link-unverified-${meta.item.id}`,
|
|
1034
|
+
`# PR auto-link unverified for ${meta.item.id}\n\n` +
|
|
1035
|
+
`**Agent:** ${agentId}\n` +
|
|
1036
|
+
`**Work item:** \`${meta.item.id}\` — ${meta.item.title || ''}\n` +
|
|
1037
|
+
`**Type:** ${meta.item.type || 'unknown'}\n` +
|
|
1038
|
+
`**Branch:** ${meta.branch || '(none)'}\n\n` +
|
|
1039
|
+
`${reason}\n\n` +
|
|
1040
|
+
`The agent's output mentioned a PR URL but the engine couldn't canonically attach it ` +
|
|
1041
|
+
`(URL detection regex miss, branch lookup race, untrusted tool_use signature, etc.). ` +
|
|
1042
|
+
`The work likely succeeded — verify against the project's PR list.\n` +
|
|
1043
|
+
(resultSummary ? `\n## Agent summary\n${resultSummary}\n` : ''),
|
|
1044
|
+
null,
|
|
1045
|
+
{ sourceItem: meta.item.id, reason: 'pr-auto-link-unverified' });
|
|
1046
|
+
}
|
|
1010
1047
|
}
|
|
1011
1048
|
|
|
1012
|
-
async function enforcePrAttachmentContract(type, meta, agentId, config, resultSummary) {
|
|
1049
|
+
async function enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, output) {
|
|
1013
1050
|
if (!isPrAttachmentRequired(type, meta?.item, meta)) return null;
|
|
1014
1051
|
if (hasCanonicalPrAttachment(meta.item.id, config)) return null;
|
|
1015
1052
|
|
|
@@ -1037,10 +1074,16 @@ async function enforcePrAttachmentContract(type, meta, agentId, config, resultSu
|
|
|
1037
1074
|
if (hasCanonicalPrAttachment(meta.item.id, config)) return null;
|
|
1038
1075
|
}
|
|
1039
1076
|
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1077
|
+
// Distinguish "agent never claimed a PR" (hard — silent failure the contract
|
|
1078
|
+
// was designed to catch) from "agent claimed a PR but engine couldn't attach
|
|
1079
|
+
// it canonically" (soft — verification gap, not a failure).
|
|
1080
|
+
const severity = _outputContainsPrUrl(output) ? 'soft' : 'hard';
|
|
1081
|
+
const reason = severity === 'hard'
|
|
1082
|
+
? `${meta.item.id} completed but no PR URL was detected in the agent's output. Expected a PR — verify the agent didn't fail silently. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`
|
|
1083
|
+
: `${meta.item.id} completed and a PR URL was found in the agent's output, but it couldn't be canonically attached. The work likely succeeded — verify by checking the PR list. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`;
|
|
1084
|
+
markMissingPrAttachment(meta, agentId, reason, resultSummary, severity);
|
|
1085
|
+
log(severity === 'hard' ? 'warn' : 'info', reason);
|
|
1086
|
+
return { reason, itemId: meta.item.id, severity };
|
|
1044
1087
|
}
|
|
1045
1088
|
|
|
1046
1089
|
// ─── Post-Completion Hooks ──────────────────────────────────────────────────
|
|
@@ -1059,9 +1102,7 @@ function parseReviewVerdict(text) {
|
|
|
1059
1102
|
// Match "VERDICT: APPROVE" or "VERDICT: REQUEST_CHANGES" (case-insensitive, optional markdown bold)
|
|
1060
1103
|
const verdictMatch = text.match(/VERDICT[:\s]+\*{0,2}(APPROVE|REQUEST[_\s-]?CHANGES)\*{0,2}/i);
|
|
1061
1104
|
if (verdictMatch) {
|
|
1062
|
-
|
|
1063
|
-
if (v === 'APPROVE') return 'approved';
|
|
1064
|
-
if (v.includes('CHANGES')) return 'changes-requested';
|
|
1105
|
+
return normalizeReviewVerdict(verdictMatch[1]);
|
|
1065
1106
|
}
|
|
1066
1107
|
return null;
|
|
1067
1108
|
}
|
|
@@ -1083,7 +1124,7 @@ function isReviewBailout(text) {
|
|
|
1083
1124
|
return /bail(ing)?\s+out/i.test(text) || /already\s+posted/i.test(text);
|
|
1084
1125
|
}
|
|
1085
1126
|
|
|
1086
|
-
async function updatePrAfterReview(agentId, pr, project, config, resultSummary) {
|
|
1127
|
+
async function updatePrAfterReview(agentId, pr, project, config, resultSummary, structuredCompletion = null) {
|
|
1087
1128
|
|
|
1088
1129
|
if (!pr?.id) return;
|
|
1089
1130
|
|
|
@@ -1108,12 +1149,12 @@ async function updatePrAfterReview(agentId, pr, project, config, resultSummary)
|
|
|
1108
1149
|
}
|
|
1109
1150
|
} catch (e) { log('warn', `Post-review status check for ${pr.id}: ${e.message}`); }
|
|
1110
1151
|
|
|
1111
|
-
// Fallback: if live check returned pending (e.g., GitHub self-approval blocked),
|
|
1152
|
+
// Fallback: if live check returned pending (e.g., GitHub self-approval blocked), use the agent's completion report.
|
|
1112
1153
|
if (!postReviewStatus) {
|
|
1113
|
-
const verdict = parseReviewVerdict(resultSummary);
|
|
1154
|
+
const verdict = reviewVerdictFromCompletion(structuredCompletion) || parseReviewVerdict(resultSummary);
|
|
1114
1155
|
if (verdict) {
|
|
1115
1156
|
postReviewStatus = verdict;
|
|
1116
|
-
log('info', `
|
|
1157
|
+
log('info', `Read review verdict from agent completion for ${pr.id}: ${verdict}`);
|
|
1117
1158
|
}
|
|
1118
1159
|
}
|
|
1119
1160
|
|
|
@@ -1700,10 +1741,163 @@ function parseStructuredCompletion(stdout, runtimeName) {
|
|
|
1700
1741
|
return result;
|
|
1701
1742
|
}
|
|
1702
1743
|
|
|
1744
|
+
function parseCompletionReportFile(dispatchItem) {
|
|
1745
|
+
const reportPath = dispatchItem?.meta?.completionReportPath || shared.dispatchCompletionReportPath(dispatchItem?.id);
|
|
1746
|
+
if (!reportPath || !fs.existsSync(reportPath)) return null;
|
|
1747
|
+
const report = safeJson(reportPath);
|
|
1748
|
+
if (!report || typeof report !== 'object' || Array.isArray(report)) {
|
|
1749
|
+
log('warn', `Ignoring malformed completion report for ${dispatchItem?.id || 'unknown'}: ${reportPath}`);
|
|
1750
|
+
return null;
|
|
1751
|
+
}
|
|
1752
|
+
if (!report.status && report.outcome) report.status = report.outcome;
|
|
1753
|
+
if (!report.status) {
|
|
1754
|
+
log('warn', `Ignoring completion report without status for ${dispatchItem?.id || 'unknown'}: ${reportPath}`);
|
|
1755
|
+
return null;
|
|
1756
|
+
}
|
|
1757
|
+
report._source = 'report-file';
|
|
1758
|
+
report._path = reportPath;
|
|
1759
|
+
return report;
|
|
1760
|
+
}
|
|
1761
|
+
|
|
1703
1762
|
function normalizeCompletionStatus(status) {
|
|
1704
1763
|
return String(status || '').trim().toLowerCase().replace(/[\s_]+/g, '-');
|
|
1705
1764
|
}
|
|
1706
1765
|
|
|
1766
|
+
function isTerminalPendingValue(value) {
|
|
1767
|
+
const text = String(value || '').trim().toLowerCase();
|
|
1768
|
+
if (!text) return true;
|
|
1769
|
+
return /^(?:none|n\/a|na|no|nothing|not-applicable|not applicable|-)$/.test(text)
|
|
1770
|
+
|| /^no\s+(?:pending|remaining|outstanding)\b/.test(text)
|
|
1771
|
+
|| /^(?:all\s+)?(?:pending|remaining|outstanding)\s+(?:work|items?|tasks?)?\s*(?:resolved|complete|completed|done|closed)$/.test(text);
|
|
1772
|
+
}
|
|
1773
|
+
|
|
1774
|
+
function isTerminalPendingLine(line) {
|
|
1775
|
+
const text = String(line || '').trim().toLowerCase();
|
|
1776
|
+
return /\bno\s+pending\b/.test(text)
|
|
1777
|
+
|| /\bpending\s*[:=-]\s*(?:none|n\/a|na|no|nothing|not applicable|-)\b/.test(text)
|
|
1778
|
+
|| /\bpending\s+(?:work|items?|tasks?)?\s*(?:resolved|complete|completed|done|closed)\b/.test(text);
|
|
1779
|
+
}
|
|
1780
|
+
|
|
1781
|
+
function detectNonTerminalResultSummary(resultSummary, structuredCompletion) {
|
|
1782
|
+
const completionStatus = normalizeCompletionStatus(structuredCompletion?.status);
|
|
1783
|
+
if (completionStatus) {
|
|
1784
|
+
if (/^(?:partial|partially-complete|in-progress|pending|deferred|blocked|incomplete|to-be-continued)/.test(completionStatus)) {
|
|
1785
|
+
return {
|
|
1786
|
+
phrase: `status:${structuredCompletion.status}`,
|
|
1787
|
+
reason: `Nonterminal completion summary: structured status is '${structuredCompletion.status}'`,
|
|
1788
|
+
};
|
|
1789
|
+
}
|
|
1790
|
+
if (/^(?:fail|failed|failure|error)/.test(completionStatus)) {
|
|
1791
|
+
return {
|
|
1792
|
+
phrase: `status:${structuredCompletion.status}`,
|
|
1793
|
+
reason: `Nonterminal completion summary: structured status is '${structuredCompletion.status}', not a successful terminal state`,
|
|
1794
|
+
};
|
|
1795
|
+
}
|
|
1796
|
+
}
|
|
1797
|
+
|
|
1798
|
+
if (structuredCompletion?.pending && !isTerminalPendingValue(structuredCompletion.pending)) {
|
|
1799
|
+
return {
|
|
1800
|
+
phrase: 'pending',
|
|
1801
|
+
reason: `Nonterminal completion summary: pending work remains (${String(structuredCompletion.pending).slice(0, 160)})`,
|
|
1802
|
+
};
|
|
1803
|
+
}
|
|
1804
|
+
|
|
1805
|
+
const text = String(resultSummary || '').replace(/\r/g, '').trim();
|
|
1806
|
+
if (!text) return null;
|
|
1807
|
+
|
|
1808
|
+
const patterns = [
|
|
1809
|
+
{ phrase: 'still running', re: /\b(?:still|currently|continues?\s+to\s+be)\s+(?:running|ongoing|in\s+progress)\b/i },
|
|
1810
|
+
{ phrase: 'will check later', re: /\b(?:i(?:'|’)ll|i\s+will|we(?:'|’)ll|we\s+will|will)\s+(?:check|verify|review|follow\s+up|revisit)\s+(?:again\s+)?(?:later|soon|in\b|after\b|when\b)/i },
|
|
1811
|
+
{ phrase: 'wake up', re: /\bwake(?:\s|-)?up\b|\bwake\b.*\b(?:check|verify|review)\b/i },
|
|
1812
|
+
{ phrase: 'not yet complete', re: /\b(?:not\s+yet|isn(?:'|’)t|not|incomplete|not\s+fully|not\s+completely)\s+(?:complete|completed|done|finished|validated|verified)\b/i },
|
|
1813
|
+
{ phrase: 'partial', re: /\bpartial(?:ly)?\b/i },
|
|
1814
|
+
{ phrase: 'to be continued', re: /\bto\s+be\s+continued\b|\btbc\b/i },
|
|
1815
|
+
{ phrase: 'in progress', re: /\bin\s+progress\b|\bongoing\b|\bincomplete\b/i },
|
|
1816
|
+
];
|
|
1817
|
+
for (const { phrase, re } of patterns) {
|
|
1818
|
+
if (re.test(text)) {
|
|
1819
|
+
return { phrase, reason: `Nonterminal completion summary: matched '${phrase}'` };
|
|
1820
|
+
}
|
|
1821
|
+
}
|
|
1822
|
+
|
|
1823
|
+
const pendingLines = text.split('\n').filter(line => /\bpending\b/i.test(line));
|
|
1824
|
+
for (const line of pendingLines) {
|
|
1825
|
+
if (!isTerminalPendingLine(line)) {
|
|
1826
|
+
return { phrase: 'pending', reason: `Nonterminal completion summary: matched 'pending'` };
|
|
1827
|
+
}
|
|
1828
|
+
}
|
|
1829
|
+
|
|
1830
|
+
return null;
|
|
1831
|
+
}
|
|
1832
|
+
|
|
1833
|
+
function deferNonTerminalCompletion(meta, detection) {
|
|
1834
|
+
const itemId = meta?.item?.id;
|
|
1835
|
+
const reason = detection?.reason || 'Nonterminal completion summary';
|
|
1836
|
+
if (!itemId) return reason;
|
|
1837
|
+
const wiPath = resolveWorkItemPath(meta);
|
|
1838
|
+
if (!wiPath) return reason;
|
|
1839
|
+
|
|
1840
|
+
let finalStatus = WI_STATUS.PENDING;
|
|
1841
|
+
try {
|
|
1842
|
+
mutateJsonFileLocked(wiPath, data => {
|
|
1843
|
+
if (!Array.isArray(data)) return data;
|
|
1844
|
+
const w = data.find(i => i.id === itemId);
|
|
1845
|
+
if (!w) return data;
|
|
1846
|
+
const retries = w._retryCount || 0;
|
|
1847
|
+
if (retries < ENGINE_DEFAULTS.maxRetries) {
|
|
1848
|
+
w.status = WI_STATUS.PENDING;
|
|
1849
|
+
w._retryCount = retries + 1;
|
|
1850
|
+
w._lastRetryAt = ts();
|
|
1851
|
+
w._lastRetryReason = reason;
|
|
1852
|
+
w._pendingReason = 'nonterminal_completion';
|
|
1853
|
+
delete w.completedAt;
|
|
1854
|
+
delete w.dispatched_at;
|
|
1855
|
+
delete w.dispatched_to;
|
|
1856
|
+
delete w.failedAt;
|
|
1857
|
+
finalStatus = WI_STATUS.PENDING;
|
|
1858
|
+
log('warn', `Work item ${itemId} reported nonterminal success — retry ${retries + 1}/${ENGINE_DEFAULTS.maxRetries}: ${reason}`);
|
|
1859
|
+
} else {
|
|
1860
|
+
w.status = WI_STATUS.FAILED;
|
|
1861
|
+
w.failReason = `${reason} after ${ENGINE_DEFAULTS.maxRetries} attempts`;
|
|
1862
|
+
w.failedAt = ts();
|
|
1863
|
+
delete w.completedAt;
|
|
1864
|
+
delete w.dispatched_at;
|
|
1865
|
+
delete w.dispatched_to;
|
|
1866
|
+
delete w._pendingReason;
|
|
1867
|
+
finalStatus = WI_STATUS.FAILED;
|
|
1868
|
+
log('warn', `Work item ${itemId} failed — repeated nonterminal completion summaries after ${ENGINE_DEFAULTS.maxRetries} attempts`);
|
|
1869
|
+
}
|
|
1870
|
+
return data;
|
|
1871
|
+
}, { defaultValue: [], skipWriteIfUnchanged: true });
|
|
1872
|
+
syncPrdItemStatus(itemId, finalStatus, meta.item?.sourcePlan);
|
|
1873
|
+
} catch (err) {
|
|
1874
|
+
log('warn', `nonterminal completion gate: ${err.message}`);
|
|
1875
|
+
}
|
|
1876
|
+
return reason;
|
|
1877
|
+
}
|
|
1878
|
+
|
|
1879
|
+
function parseCompletionBoolean(value) {
|
|
1880
|
+
if (typeof value === 'boolean') return value;
|
|
1881
|
+
if (typeof value === 'string') {
|
|
1882
|
+
const normalized = value.trim().toLowerCase();
|
|
1883
|
+
if (['true', 'yes', '1'].includes(normalized)) return true;
|
|
1884
|
+
if (['false', 'no', '0'].includes(normalized)) return false;
|
|
1885
|
+
}
|
|
1886
|
+
return undefined;
|
|
1887
|
+
}
|
|
1888
|
+
|
|
1889
|
+
function normalizeReviewVerdict(verdict) {
|
|
1890
|
+
const value = String(verdict || '').trim().toLowerCase().replace(/[\s-]+/g, '_');
|
|
1891
|
+
if (value === 'approve' || value === 'approved') return 'approved';
|
|
1892
|
+
if (value === 'request_changes' || value === 'changes_requested' || value === 'changes-requested') return 'changes-requested';
|
|
1893
|
+
return null;
|
|
1894
|
+
}
|
|
1895
|
+
|
|
1896
|
+
function reviewVerdictFromCompletion(completion) {
|
|
1897
|
+
if (!completion || typeof completion !== 'object') return null;
|
|
1898
|
+
return normalizeReviewVerdict(completion.verdict || completion.review_verdict || completion.reviewVerdict);
|
|
1899
|
+
}
|
|
1900
|
+
|
|
1707
1901
|
function writeNonCleanAgentReport(dispatchItem, agentId, outcome, structuredCompletion, resultSummary, exitCode) {
|
|
1708
1902
|
if (!dispatchItem?.id || !outcome) {
|
|
1709
1903
|
log('warn', 'Cannot write non-clean agent report without dispatch id and outcome');
|
|
@@ -1839,21 +2033,31 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
|
|
|
1839
2033
|
// and for the foundation-only state of this plan item; downstream items
|
|
1840
2034
|
// (P-2a6d9c4f, P-9c4f2d6a) populate dispatchItem.meta.runtimeName at spawn time.
|
|
1841
2035
|
const runtimeName = dispatchItem.meta?.runtimeName || dispatchItem.runtimeName || 'claude';
|
|
1842
|
-
|
|
2036
|
+
let { resultSummary, taskUsage, sessionId, model } = parseAgentOutput(stdout, runtimeName);
|
|
1843
2037
|
|
|
1844
|
-
//
|
|
1845
|
-
const
|
|
2038
|
+
// Prefer the sidecar completion report; keep fenced output as a compatibility fallback.
|
|
2039
|
+
const reportCompletion = parseCompletionReportFile(dispatchItem);
|
|
2040
|
+
const structuredCompletion = reportCompletion || parseStructuredCompletion(stdout, runtimeName);
|
|
1846
2041
|
if (structuredCompletion) {
|
|
1847
|
-
|
|
2042
|
+
if (structuredCompletion.summary) resultSummary = String(structuredCompletion.summary);
|
|
2043
|
+
log('info', `Structured completion from ${agentId}: status=${structuredCompletion.status}, pr=${structuredCompletion.pr || 'N/A'}${structuredCompletion._source ? ` (${structuredCompletion._source})` : ''}`);
|
|
1848
2044
|
}
|
|
2045
|
+
const completionGateSummary = resultSummary || (typeof stdout === 'string' && !stdout.includes('"type":') ? stdout : '');
|
|
1849
2046
|
|
|
1850
2047
|
// Save session for potential resume on next dispatch
|
|
1851
2048
|
if (isSuccess && sessionId && agentId && !agentId.startsWith('temp-')) {
|
|
1852
2049
|
try {
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
2050
|
+
const runtime = resolveRuntime(runtimeName);
|
|
2051
|
+
if (runtime && typeof runtime.saveSession === 'function') {
|
|
2052
|
+
runtime.saveSession({
|
|
2053
|
+
agentId,
|
|
2054
|
+
dispatchId: dispatchItem.id,
|
|
2055
|
+
branch: dispatchItem.meta?.branch || null,
|
|
2056
|
+
sessionId,
|
|
2057
|
+
agentsDir: AGENTS_DIR,
|
|
2058
|
+
logger: { warn: (msg) => log('warn', msg) },
|
|
2059
|
+
});
|
|
2060
|
+
}
|
|
1857
2061
|
} catch (err) { log('warn', `Session save: ${err.message}`); }
|
|
1858
2062
|
}
|
|
1859
2063
|
|
|
@@ -1869,18 +2073,24 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
|
|
|
1869
2073
|
log('info', `Structured completion reports PR (${structuredCompletion.pr}) but regex sync found none — PR may already be tracked`);
|
|
1870
2074
|
}
|
|
1871
2075
|
|
|
2076
|
+
const completionStatus = normalizeCompletionStatus(structuredCompletion?.status);
|
|
2077
|
+
const agentNeedsRerun = parseCompletionBoolean(structuredCompletion?.needs_rerun ?? structuredCompletion?.needsRerun) === true;
|
|
2078
|
+
const agentReportedFailure = completionStatus.startsWith('fail') || agentNeedsRerun;
|
|
2079
|
+
const agentRetryable = parseCompletionBoolean(structuredCompletion?.retryable);
|
|
2080
|
+
|
|
1872
2081
|
// Auto-recover: if a failed implement/fix/test agent created PRs, it likely succeeded before the failure surfaced.
|
|
1873
2082
|
const prCreatingType = type === WORK_TYPE.IMPLEMENT || type === WORK_TYPE.IMPLEMENT_LARGE || type === WORK_TYPE.FIX || type === WORK_TYPE.TEST;
|
|
1874
|
-
const autoRecovered = !isSuccess && prsCreatedCount > 0 && prCreatingType && !!meta?.item?.id;
|
|
2083
|
+
const autoRecovered = !agentReportedFailure && !isSuccess && prsCreatedCount > 0 && prCreatingType && !!meta?.item?.id;
|
|
1875
2084
|
if (autoRecovered) {
|
|
1876
2085
|
log('info', `Auto-recovery: agent failed but created ${prsCreatedCount} PR(s) — upgrading ${meta.item.id} to done`);
|
|
1877
2086
|
}
|
|
1878
|
-
const effectiveSuccess = isSuccess || autoRecovered;
|
|
2087
|
+
const effectiveSuccess = (isSuccess && !agentReportedFailure) || autoRecovered;
|
|
1879
2088
|
|
|
1880
|
-
|
|
2089
|
+
let nonCleanReportWritten = false;
|
|
1881
2090
|
if (completionStatus.startsWith('partial') || autoRecovered || (completionStatus.startsWith('fail') && isSuccess)) {
|
|
1882
2091
|
const outcome = completionStatus.startsWith('fail') ? 'failure' : 'partial';
|
|
1883
|
-
writeNonCleanAgentReport(dispatchItem, agentId, outcome, structuredCompletion,
|
|
2092
|
+
writeNonCleanAgentReport(dispatchItem, agentId, outcome, structuredCompletion, completionGateSummary, code);
|
|
2093
|
+
nonCleanReportWritten = true;
|
|
1884
2094
|
}
|
|
1885
2095
|
|
|
1886
2096
|
// Handle decomposition results — create sub-items from decompose agent output
|
|
@@ -1903,7 +2113,7 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
|
|
|
1903
2113
|
// and after 3 such bailouts the WI flips to status=failed even though the
|
|
1904
2114
|
// original review was posted on the first run.
|
|
1905
2115
|
if (effectiveSuccess && type === WORK_TYPE.REVIEW && meta?.item?.id) {
|
|
1906
|
-
const verdict = parseReviewVerdict(resultSummary);
|
|
2116
|
+
const verdict = reviewVerdictFromCompletion(structuredCompletion) || parseReviewVerdict(resultSummary);
|
|
1907
2117
|
if (!verdict && isReviewBailout(resultSummary)) {
|
|
1908
2118
|
log('info', `Review ${meta.item.id} bailed out (review already posted) — treating as DONE without retry`);
|
|
1909
2119
|
} else if (!verdict) {
|
|
@@ -1988,8 +2198,22 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
|
|
|
1988
2198
|
|
|
1989
2199
|
let completionContractFailure = null;
|
|
1990
2200
|
if (effectiveSuccess && meta?.item?.id && !skipDoneStatus) {
|
|
1991
|
-
|
|
1992
|
-
if (
|
|
2201
|
+
const nonTerminalCompletion = detectNonTerminalResultSummary(completionGateSummary, structuredCompletion);
|
|
2202
|
+
if (nonTerminalCompletion) {
|
|
2203
|
+
skipDoneStatus = true;
|
|
2204
|
+
const reason = deferNonTerminalCompletion(meta, nonTerminalCompletion);
|
|
2205
|
+
completionContractFailure = { reason, itemId: meta.item.id, nonTerminal: true };
|
|
2206
|
+
if (!nonCleanReportWritten) {
|
|
2207
|
+
writeNonCleanAgentReport(dispatchItem, agentId, 'partial', structuredCompletion, completionGateSummary, code);
|
|
2208
|
+
}
|
|
2209
|
+
}
|
|
2210
|
+
}
|
|
2211
|
+
|
|
2212
|
+
if (effectiveSuccess && meta?.item?.id && !skipDoneStatus) {
|
|
2213
|
+
completionContractFailure = await enforcePrAttachmentContract(type, meta, agentId, config, resultSummary, stdout);
|
|
2214
|
+
if (completionContractFailure?.severity === 'hard' || completionContractFailure?.nonTerminal) {
|
|
2215
|
+
skipDoneStatus = true;
|
|
2216
|
+
}
|
|
1993
2217
|
}
|
|
1994
2218
|
|
|
1995
2219
|
if (effectiveSuccess && meta?.item?.id && !skipDoneStatus) {
|
|
@@ -2095,7 +2319,7 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
|
|
|
2095
2319
|
// (retryCount was being deleted by done-marking before the check could read it)
|
|
2096
2320
|
// Review verdict check similarly moved before updateWorkItemStatus(DONE) — same root cause.
|
|
2097
2321
|
|
|
2098
|
-
if (type === WORK_TYPE.REVIEW) await updatePrAfterReview(agentId, meta?.pr, meta?.project, config, resultSummary);
|
|
2322
|
+
if (type === WORK_TYPE.REVIEW) await updatePrAfterReview(agentId, meta?.pr, meta?.project, config, resultSummary, structuredCompletion);
|
|
2099
2323
|
if (type === WORK_TYPE.FIX) {
|
|
2100
2324
|
updatePrAfterFix(meta?.pr, meta?.project, meta?.source);
|
|
2101
2325
|
// (#984) Sync PRD status for PR-linked features: fix work items have a different ID
|
|
@@ -2114,7 +2338,9 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
|
|
|
2114
2338
|
}
|
|
2115
2339
|
}
|
|
2116
2340
|
checkForLearnings(agentId, config.agents[agentId], dispatchItem.task);
|
|
2117
|
-
const
|
|
2341
|
+
const hardContractFail = completionContractFailure?.severity === 'hard'
|
|
2342
|
+
|| completionContractFailure?.nonTerminal === true;
|
|
2343
|
+
const finalResult = hardContractFail ? DISPATCH_RESULT.ERROR : (effectiveSuccess ? DISPATCH_RESULT.SUCCESS : DISPATCH_RESULT.ERROR);
|
|
2118
2344
|
if (finalResult === DISPATCH_RESULT.SUCCESS) {
|
|
2119
2345
|
extractSkillsFromOutput(stdout, agentId, dispatchItem, config);
|
|
2120
2346
|
// Also scan inbox notes for skill blocks — agents often write skills to inbox, not stdout
|
|
@@ -2142,7 +2368,7 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
|
|
|
2142
2368
|
teams.teamsNotifyCompletion(dispatchItem, finalResult, agentId).catch(() => {});
|
|
2143
2369
|
} catch {}
|
|
2144
2370
|
|
|
2145
|
-
return { resultSummary, taskUsage, autoRecovered, structuredCompletion, completionContractFailure };
|
|
2371
|
+
return { resultSummary, taskUsage, autoRecovered, structuredCompletion, completionContractFailure, agentReportedFailure, agentRetryable };
|
|
2146
2372
|
}
|
|
2147
2373
|
|
|
2148
2374
|
// ─── PR → PRD Status Sync ─────────────────────────────────────────────────────
|
|
@@ -2322,6 +2548,8 @@ module.exports = {
|
|
|
2322
2548
|
parseReviewVerdict,
|
|
2323
2549
|
isReviewBailout,
|
|
2324
2550
|
parseStructuredCompletion,
|
|
2551
|
+
detectNonTerminalResultSummary,
|
|
2552
|
+
parseCompletionReportFile,
|
|
2325
2553
|
runPostCompletionHooks,
|
|
2326
2554
|
syncPrdFromPrs,
|
|
2327
2555
|
resolveWorkItemPath,
|
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
const fs = require('fs');
|
|
26
26
|
const os = require('os');
|
|
27
27
|
const path = require('path');
|
|
28
|
+
const { FAILURE_CLASS, safeWrite, ts } = require('../shared');
|
|
28
29
|
|
|
29
30
|
const ENGINE_DIR = __dirname.replace(/[\\/]runtimes$/, '');
|
|
30
31
|
const MINIONS_DIR = path.resolve(ENGINE_DIR, '..');
|
|
@@ -229,6 +230,88 @@ function buildArgs(opts = {}) {
|
|
|
229
230
|
return args;
|
|
230
231
|
}
|
|
231
232
|
|
|
233
|
+
function buildSpawnFlags(opts = {}) {
|
|
234
|
+
const flags = ['--runtime', 'claude'];
|
|
235
|
+
if (opts.maxTurns != null) flags.push('--max-turns', String(opts.maxTurns));
|
|
236
|
+
if (opts.model) flags.push('--model', String(opts.model));
|
|
237
|
+
if (opts.allowedTools) flags.push('--allowedTools', String(opts.allowedTools));
|
|
238
|
+
if (opts.effort) flags.push('--effort', String(opts.effort));
|
|
239
|
+
if (opts.sessionId) flags.push('--resume', String(opts.sessionId));
|
|
240
|
+
if (opts.maxBudget != null) flags.push('--max-budget-usd', String(opts.maxBudget));
|
|
241
|
+
if (opts.bare === true) flags.push('--bare');
|
|
242
|
+
if (opts.fallbackModel) flags.push('--fallback-model', String(opts.fallbackModel));
|
|
243
|
+
if (opts.stream != null && opts.stream !== '') flags.push('--stream', String(opts.stream));
|
|
244
|
+
if (opts.disableBuiltinMcps === true) flags.push('--disable-builtin-mcps');
|
|
245
|
+
if (opts.suppressAgentsMd === true) flags.push('--no-custom-instructions');
|
|
246
|
+
if (opts.reasoningSummaries === true) flags.push('--enable-reasoning-summaries');
|
|
247
|
+
return flags;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
function getResumeSessionId({ agentId, branchName, agentsDir, maxAgeMs = 2 * 60 * 60 * 1000, logger = console } = {}) {
|
|
251
|
+
if (!agentId || agentId.startsWith('temp-') || !agentsDir) return null;
|
|
252
|
+
try {
|
|
253
|
+
const sessionPath = path.join(agentsDir, agentId, 'session.json');
|
|
254
|
+
const sessionFile = _safeJson(sessionPath);
|
|
255
|
+
if (!sessionFile?.sessionId || !sessionFile.savedAt) return null;
|
|
256
|
+
const sessionAge = Date.now() - new Date(sessionFile.savedAt).getTime();
|
|
257
|
+
const sameBranch = branchName && sessionFile.branch && sessionFile.branch === branchName;
|
|
258
|
+
if (sessionAge < maxAgeMs && sameBranch) {
|
|
259
|
+
if (logger && typeof logger.info === 'function') {
|
|
260
|
+
logger.info(`Resuming session ${sessionFile.sessionId} for ${agentId} on branch ${branchName} (age: ${Math.round(sessionAge / 60000)}min)`);
|
|
261
|
+
}
|
|
262
|
+
return sessionFile.sessionId;
|
|
263
|
+
}
|
|
264
|
+
} catch (e) {
|
|
265
|
+
if (logger && typeof logger.warn === 'function') logger.warn('session resume lookup: ' + e.message);
|
|
266
|
+
}
|
|
267
|
+
return null;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
function saveSession({ agentId, dispatchId, branch, sessionId, agentsDir, now = ts, writeJson = safeWrite, logger = console } = {}) {
|
|
271
|
+
if (!sessionId || !agentId || agentId.startsWith('temp-') || !agentsDir) return false;
|
|
272
|
+
try {
|
|
273
|
+
writeJson(path.join(agentsDir, agentId, 'session.json'), {
|
|
274
|
+
sessionId,
|
|
275
|
+
dispatchId,
|
|
276
|
+
savedAt: typeof now === 'function' ? now() : new Date().toISOString(),
|
|
277
|
+
branch: branch || null,
|
|
278
|
+
});
|
|
279
|
+
return true;
|
|
280
|
+
} catch (err) {
|
|
281
|
+
if (logger && typeof logger.warn === 'function') logger.warn(`Session save: ${err.message}`);
|
|
282
|
+
return false;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
function detectPermissionGate(outputChunk) {
|
|
287
|
+
const lower = String(outputChunk || '').toLowerCase();
|
|
288
|
+
return /\b(trust this|do you trust|allow access|grant permission|approve tools?|permission prompt)\b/.test(lower);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
function getPromptDeliveryMode() {
|
|
292
|
+
return 'stdin';
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
function usesSystemPromptFile({ isResume } = {}) {
|
|
296
|
+
return !isResume;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
function _runtimeFailureClass(code) {
|
|
300
|
+
if (code === 'auth-failure' || code === 'budget-exceeded') return FAILURE_CLASS.PERMISSION_BLOCKED;
|
|
301
|
+
if (code === 'context-limit') return FAILURE_CLASS.OUT_OF_CONTEXT;
|
|
302
|
+
if (code === 'crash') return FAILURE_CLASS.SPAWN_ERROR;
|
|
303
|
+
return null;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
function classifyFailure({ code, stdout = '', stderr = '', fallback } = {}) {
|
|
307
|
+
if (code === 78) return { failureClass: FAILURE_CLASS.CONFIG_ERROR, retryable: false, message: 'Claude configuration error' };
|
|
308
|
+
const parsed = parseError(`${stdout || ''}\n${stderr || ''}`);
|
|
309
|
+
const runtimeClass = parsed.code ? _runtimeFailureClass(parsed.code) : null;
|
|
310
|
+
if (runtimeClass) return { failureClass: runtimeClass, retryable: parsed.retriable !== false, message: parsed.message || '' };
|
|
311
|
+
const fallbackClass = typeof fallback === 'function' ? fallback(code, stdout, stderr) : FAILURE_CLASS.UNKNOWN;
|
|
312
|
+
return { failureClass: fallbackClass, retryable: parsed.retriable !== false, message: parsed.message || '' };
|
|
313
|
+
}
|
|
314
|
+
|
|
232
315
|
/**
|
|
233
316
|
* Build the final prompt text delivered to the Claude CLI. Claude takes the
|
|
234
317
|
* system prompt via `--system-prompt-file` and the user prompt via stdin, so
|
|
@@ -536,8 +619,15 @@ module.exports = {
|
|
|
536
619
|
modelsCache: MODELS_CACHE,
|
|
537
620
|
spawnScript: path.join(ENGINE_DIR, 'spawn-agent.js'),
|
|
538
621
|
installHint: INSTALL_HINT,
|
|
622
|
+
buildSpawnFlags,
|
|
539
623
|
buildArgs,
|
|
540
624
|
buildPrompt,
|
|
625
|
+
getResumeSessionId,
|
|
626
|
+
saveSession,
|
|
627
|
+
detectPermissionGate,
|
|
628
|
+
getPromptDeliveryMode,
|
|
629
|
+
usesSystemPromptFile,
|
|
630
|
+
classifyFailure,
|
|
541
631
|
resolveModel,
|
|
542
632
|
parseOutput,
|
|
543
633
|
parseStreamChunk,
|