@yemi33/minions 0.1.2054 → 0.1.2055

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2067,6 +2067,90 @@ function updatePrAfterFix(pr, project, source, options = {}, legacyDispatchId =
2067
2067
  delete next.fixedAt;
2068
2068
  target.minionsReview = next;
2069
2069
  };
2070
+ // W-mpoeirqx0007712a — Build-fix push verification. The agent may report
2071
+ // SUCCESS while the git push silently failed to advance the remote head
2072
+ // (stale-worktree push rejected non-fast-forward, agent ignores non-zero
2073
+ // `git push` exit, etc). detectPrFixBranchChange falls back to
2074
+ // local-head / worktree-diff evidence in those scenarios and returns
2075
+ // `changed: true` even though origin/<branch> never moved. Without a
2076
+ // guard here, the optimistic stamp + 10-min buildFixGracePeriod
2077
+ // suppresses re-dispatch against a still-failing build that was never
2078
+ // actually fixed (live repro: opg-microsoft/minions PR #57).
2079
+ //
2080
+ // Only `evidence: 'remote-head'` proves the push landed. For
2081
+ // BUILD_FAILURE with changed=true AND evidence explicitly set to one of
2082
+ // the unverified types, increment `_buildFixPushFailedCount`, write an
2083
+ // inbox alert, route through recordPrNoOpFixAttempt so the cause stays
2084
+ // unhandled, and never write `_buildFixPushedAt`. When the counter
2085
+ // reaches `engine.maxBuildFixRetries`, flip `_buildFixNeedsHumanRebase`
2086
+ // so the engine stops retrying.
2087
+ //
2088
+ // Note: callers that omit `branchChange.evidence` (legacy / tests
2089
+ // predating evidence plumbing) still hit the trusted-push path below to
2090
+ // preserve backward compatibility — only the explicitly unverified
2091
+ // evidence kinds trigger this guard.
2092
+ const _unverifiedPushEvidence = new Set(['local-head', 'worktree-diff']);
2093
+ if (cause === shared.PR_FIX_CAUSE.BUILD_FAILURE
2094
+ && explicitlyChangedBranch
2095
+ && options.branchChange?.changed === true
2096
+ && _unverifiedPushEvidence.has(options.branchChange?.evidence)) {
2097
+ const maxRetries = options.config?.engine?.maxBuildFixRetries
2098
+ ?? ENGINE_DEFAULTS.maxBuildFixRetries;
2099
+ target._buildFixPushFailedCount = (Number(target._buildFixPushFailedCount) || 0) + 1;
2100
+ const reachedCap = target._buildFixPushFailedCount >= maxRetries;
2101
+ if (reachedCap) {
2102
+ target._buildFixNeedsHumanRebase = ts();
2103
+ }
2104
+ const beforeHeadStr = String(options.branchChange?.beforeHead || '').slice(0, 40);
2105
+ const afterHeadStr = String(options.branchChange?.afterHead || '').slice(0, 40);
2106
+ const evidenceStr = String(options.branchChange?.evidence || 'unknown');
2107
+ try {
2108
+ const wiId = options.dispatchItem?.meta?.item?.id || null;
2109
+ const noteBody = `# Build-fix push not verified for ${pr.id}\n\n`
2110
+ + `**PR:** ${pr.url || pr.id}\n`
2111
+ + `**Branch:** ${pr.branch || '(unknown)'}\n`
2112
+ + `**Cause:** build-failure\n`
2113
+ + `**Pre-dispatch head:** ${beforeHeadStr || '(unknown)'}\n`
2114
+ + `**Post-completion head (live):** ${afterHeadStr || '(unknown)'}\n`
2115
+ + `**Branch-change evidence:** ${evidenceStr}\n`
2116
+ + `**Attempt:** ${target._buildFixPushFailedCount}/${maxRetries}\n\n`
2117
+ + (reachedCap
2118
+ ? `⚠️ **Reached \`engine.maxBuildFixRetries\` (${maxRetries}).** PR flagged \`_buildFixNeedsHumanRebase\` — engine will stop auto-retrying. Likely root cause: worktree stale vs origin/master, push rejected non-fast-forward, or branch protection blocks the engine identity.\n`
2119
+ : `_Engine will re-dispatch on the next \`discoverFromPrs\` pass (counter < cap)._\n`)
2120
+ + `\nThe agent reported SUCCESS but the remote head did not advance — the optimistic \`_buildFixPushedAt\` stamp was suppressed to avoid the ${(ENGINE_DEFAULTS.buildFixGracePeriod / 60000) | 0}-minute grace-period blackout.\n`;
2121
+ shared.writeToInbox(
2122
+ 'engine',
2123
+ `build-fix-push-unverified-${pr.prNumber || pr.id}`,
2124
+ noteBody,
2125
+ null,
2126
+ { wi: wiId, pr: pr.id, cause: shared.PR_FIX_CAUSE.BUILD_FAILURE }
2127
+ );
2128
+ } catch (err) {
2129
+ log('warn', `build-fix push-verify inbox alert for ${pr.id}: ${err.message}`);
2130
+ }
2131
+ // Route through the noop path so the cause stays unhandled, the noop
2132
+ // counter advances symmetrically with the genuine-noop case, and the
2133
+ // existing `delete target._buildFixPushedAt` cleanup (line ~2016) runs.
2134
+ const verifyBranchChange = {
2135
+ changed: false,
2136
+ beforeHead: options.branchChange?.beforeHead,
2137
+ afterHead: options.branchChange?.afterHead,
2138
+ evidence: 'push-unverified',
2139
+ };
2140
+ const noopReason = `build-fix push unverified (evidence: ${evidenceStr}); attempt ${target._buildFixPushFailedCount}/${maxRetries}${reachedCap ? ' — needs-human-rebase' : ''}`;
2141
+ const record = recordPrNoOpFixAttempt(target, cause, source, options.dispatchItem, verifyBranchChange, options.config, noopReason);
2142
+ result = {
2143
+ noOp: true,
2144
+ cause,
2145
+ paused: !!record.paused,
2146
+ count: record.count,
2147
+ pushUnverified: true,
2148
+ pushFailedCount: target._buildFixPushFailedCount,
2149
+ needsHumanRebase: reachedCap,
2150
+ };
2151
+ log('warn', `Updated ${pr.id} → build-fix push unverified (${target._buildFixPushFailedCount}/${maxRetries}, evidence=${evidenceStr})${reachedCap ? ' [needs-human-rebase]' : ''}; remote head ${beforeHeadStr.slice(0, 8)} did not advance — inbox alert written, cause left unhandled for re-dispatch`);
2152
+ return prs;
2153
+ }
2070
2154
  if (explicitlyChangedBranch && options.branchChange?.changed === false) {
2071
2155
  const record = recordPrNoOpFixAttempt(target, cause, source, options.dispatchItem, options.branchChange, options.config, options.noopReason);
2072
2156
  result = { noOp: true, cause, paused: !!record.paused, count: record.count };
@@ -2086,6 +2170,19 @@ function updatePrAfterFix(pr, project, source, options = {}, legacyDispatchId =
2086
2170
  return prs;
2087
2171
  }
2088
2172
  clearPrNoOpFixAttempt(target, cause);
2173
+ // W-mpoeirqx0007712a — verified-push stamping for BUILD_FAILURE. Reaching
2174
+ // this point with explicitlyChangedBranch=true means the unverified-push
2175
+ // guard above did NOT trigger, so either evidence === 'remote-head'
2176
+ // (live remote refs prove the branch advanced) OR no branchChange info
2177
+ // was supplied (legacy callers that didn't pass branchChange — keep
2178
+ // existing behavior of trusting the agent's branchChanged claim).
2179
+ // Clear the push-failure counter on confirmed success so future
2180
+ // regressions start fresh.
2181
+ if (cause === shared.PR_FIX_CAUSE.BUILD_FAILURE && explicitlyChangedBranch) {
2182
+ target._buildFixPushedAt = ts();
2183
+ delete target._buildFixPushFailedCount;
2184
+ delete target._buildFixNeedsHumanRebase;
2185
+ }
2089
2186
  if (source === 'pr-human-feedback') {
2090
2187
  const clearPendingFix = shouldClearHumanFeedbackPendingFix(target, pr, automationCauseKey);
2091
2188
  if (target.humanFeedback && clearPendingFix) target.humanFeedback.pendingFix = false;
package/engine/shared.js CHANGED
@@ -1800,7 +1800,15 @@ const ENGINE_DEFAULTS = {
1800
1800
  logBufferSize: 50, // flush immediately when buffer exceeds this many entries
1801
1801
  lockRetries: 0, // no retries — single 5s timeout window with 25ms polling (200 attempts) is sufficient; stale lock recovery at 60s handles crashes
1802
1802
  lockRetryBackoffMs: 500, // base backoff between lock retries (doubles each attempt: 500ms, 1s, 2s, ...)
1803
- buildFixGracePeriod: 600000, // 10min — wait for CI to run after build fix before re-dispatching
1803
+ buildFixGracePeriod: 600000, // 10min — wait for CI to run after a verified build-fix push before re-dispatching
1804
+ // W-mpoeirqx0007712a: cap re-dispatch attempts when build-fix pushes
1805
+ // silently fail to advance the remote head (stale-worktree push rejected,
1806
+ // agent ignores non-zero git push exit and reports SUCCESS, etc).
1807
+ // updatePrAfterFix increments `_buildFixPushFailedCount` whenever the
1808
+ // post-completion branchChange has non-remote-head evidence; when the
1809
+ // counter reaches this cap, the PR is flagged `_buildFixNeedsHumanRebase`
1810
+ // so the dispatcher stops auto-retrying and a human can rescue the branch.
1811
+ maxBuildFixRetries: 3,
1804
1812
  adoPollEnabled: true, // poll ADO PR status, comments, and reconciliation on each tick cycle
1805
1813
  ghPollEnabled: true, // poll GitHub PR status, comments, and reconciliation on each tick cycle
1806
1814
  prPollStatusEvery: 12, // poll PR build/review/merge status every N ticks for both ADO and GitHub (~12 min at default interval)
package/engine.js CHANGED
@@ -4929,15 +4929,16 @@ async function discoverFromPrs(config, project) {
4929
4929
  }, `Fix build failure on ${pr.id}: ${pr.title || ''}`, { dispatchKey: key, cooldownKey: key, automationCauseKey: buildCauseKey, source: 'pr', pr, branch: prBranch, project: projMeta });
4930
4930
  if (item) {
4931
4931
  newWork.push(item); fixDispatched = true;
4932
- try {
4933
- const prPath = projectPrPath(project);
4934
- mutatePullRequests(prPath, prs => {
4935
- const target = shared.findPrRecord(prs, pr, project);
4936
- if (target) {
4937
- target._buildFixPushedAt = ts();
4938
- }
4939
- });
4940
- } catch (e) { log('warn', 'mark build fix dispatched: ' + e.message); }
4932
+ // W-mpoeirqx0007712a — DO NOT stamp `_buildFixPushedAt` at dispatch
4933
+ // time. The optimistic stamp here used to suppress re-dispatch for
4934
+ // the buildFixGracePeriod window even when the agent never pushed
4935
+ // (stale-worktree push silently rejected, agent reported SUCCESS
4936
+ // anyway). `_buildFixPushedAt` is now written only by
4937
+ // lifecycle.updatePrAfterFix after the post-completion branchChange
4938
+ // confirms the remote head actually advanced (evidence ===
4939
+ // 'remote-head'). In-flight dispatches are already deduplicated by
4940
+ // `isPrAutomationCausePending` + `isAlreadyDispatched` above, so no
4941
+ // race window opens by removing the optimistic stamp.
4941
4942
  }
4942
4943
 
4943
4944
  if (pr.agent && !pr._buildFailNotified) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yemi33/minions",
3
- "version": "0.1.2054",
3
+ "version": "0.1.2055",
4
4
  "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
5
5
  "bin": {
6
6
  "minions": "bin/minions.js"