@yemi33/minions 0.1.1947 → 0.1.1949
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/engine/abandoned-pr-reconciliation.js +143 -0
- package/engine/ado.js +165 -0
- package/engine/cli.js +26 -0
- package/engine/copilot-models.json +1 -1
- package/engine/github.js +256 -15
- package/engine/shared.js +41 -0
- package/package.json +1 -1
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* engine/abandoned-pr-reconciliation.js — One-shot startup reconciliation pass
|
|
3
|
+
* for `abandoned` PRs. Pairs with W-mp5trwh60008386d's forward-looking 404
|
|
4
|
+
* hardening in engine/github.js: that fix prevents *future* false-flips, but
|
|
5
|
+
* does nothing for PRs already wrongly marked `abandoned` before it shipped
|
|
6
|
+
* (e.g. the 16 PRs hit by the 2026-05-14 gh-auth-flip incident).
|
|
7
|
+
*
|
|
8
|
+
* `pollPrStatus` only iterates PRs in `PR_POLLABLE_STATUSES = {active, linked}`
|
|
9
|
+
* (engine/shared.js), so once a PR is `abandoned` the regular poll loop never
|
|
10
|
+
* visits it again. This module is the explicit one-time re-probe.
|
|
11
|
+
*
|
|
12
|
+
* ## Why one-shot, not periodic?
|
|
13
|
+
*
|
|
14
|
+
* Periodic re-probing of terminal-status PRs has real costs:
|
|
15
|
+
* - GitHub/ADO API quota burn on every restart (50+ PRs × 2 hosts).
|
|
16
|
+
* - Race risk against late-arriving merge events (a PR closed 30s before
|
|
17
|
+
* the pass would mis-classify based on stale local state).
|
|
18
|
+
* - Boot latency that grows linearly with abandoned-PR count.
|
|
19
|
+
*
|
|
20
|
+
* The version-gated one-shot model gives us exactly what we need: re-probe
|
|
21
|
+
* once per release that bumps `ENGINE_DEFAULTS.abandonedReconciliationVersion`,
|
|
22
|
+
* then never again until the next bump. First boot after this lands cleans up
|
|
23
|
+
* the historical false-flips; subsequent boots are a single state.json read +
|
|
24
|
+
* version compare → no-op.
|
|
25
|
+
*
|
|
26
|
+
* ## Why a separate WI from W-mp5trwh60008386d?
|
|
27
|
+
*
|
|
28
|
+
* Forward-fix (don't false-flip new PRs) and retroactive-heal (un-flip already
|
|
29
|
+
* damaged PRs) are different concerns with different risk profiles. The 404
|
|
30
|
+
* hardening fix is per-tick and ships independently; this reconciliation is
|
|
31
|
+
* boot-only and depends on the hardening fix existing in master so we don't
|
|
32
|
+
* fork the abandonment-confirmation logic.
|
|
33
|
+
*
|
|
34
|
+
* ## Boot wiring
|
|
35
|
+
*
|
|
36
|
+
* Called from engine/cli.js between the recovery sweep and the initial tick.
|
|
37
|
+
* Errors here must NOT block boot — the catch in cli.js logs and continues.
|
|
38
|
+
*/
|
|
39
|
+
|
|
40
|
+
const path = require('path');
|
|
41
|
+
const shared = require('./shared');
|
|
42
|
+
|
|
43
|
+
const { ENGINE_DEFAULTS, log, mutateEngineState, readEngineState } = shared;
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Run the version-gated startup reconciliation pass. Idempotent across reruns
|
|
47
|
+
* at the same code version: state.json gates re-execution.
|
|
48
|
+
*
|
|
49
|
+
* @param {object} config — engine config (used to enumerate projects)
|
|
50
|
+
* @param {object} [options]
|
|
51
|
+
* @param {boolean} [options.skipVersionCheck] — bypass the version gate (tests only)
|
|
52
|
+
* @param {object} [options.githubModule] — injectable for tests (default: require './github')
|
|
53
|
+
* @param {object} [options.adoModule] — injectable for tests (default: require './ado')
|
|
54
|
+
* @returns {Promise<{ skipped: boolean, totals: { flipped, confirmedDeleted, skipped, errored } }>}
|
|
55
|
+
*/
|
|
56
|
+
async function runStartupReconciliation(config, options = {}) {
|
|
57
|
+
const targetVersion = Number(ENGINE_DEFAULTS.abandonedReconciliationVersion) || 0;
|
|
58
|
+
const skipVersionCheck = options.skipVersionCheck === true;
|
|
59
|
+
|
|
60
|
+
if (!skipVersionCheck) {
|
|
61
|
+
const state = readEngineState();
|
|
62
|
+
const lastVersion = Number(state.lastAbandonedReconciliationVersion) || 0;
|
|
63
|
+
if (lastVersion >= targetVersion) {
|
|
64
|
+
// Already reconciled at this version — no-op. Don't even log; this runs
|
|
65
|
+
// every boot and the silent path is the common one.
|
|
66
|
+
return {
|
|
67
|
+
skipped: true,
|
|
68
|
+
totals: { flipped: 0, confirmedDeleted: 0, skipped: 0, errored: 0 },
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const github = options.githubModule || require('./github');
|
|
74
|
+
const ado = options.adoModule || require('./ado');
|
|
75
|
+
|
|
76
|
+
// Pre-count abandoned PRs (only those without the confirmed-404 marker) so
|
|
77
|
+
// the start-of-pass log line is meaningful. Wrapped in try/catch — the
|
|
78
|
+
// pre-count is observability, not correctness.
|
|
79
|
+
let preCount = 0;
|
|
80
|
+
let projectCount = 0;
|
|
81
|
+
try {
|
|
82
|
+
const projects = shared.getProjects(config);
|
|
83
|
+
projectCount = projects.length;
|
|
84
|
+
for (const p of projects) {
|
|
85
|
+
const prs = shared.safeJsonArr(shared.projectPrPath(p));
|
|
86
|
+
preCount += prs.filter(pr =>
|
|
87
|
+
pr.status === shared.PR_STATUS.ABANDONED && !pr._reconciliation404Confirmed
|
|
88
|
+
).length;
|
|
89
|
+
}
|
|
90
|
+
} catch { /* observability only */ }
|
|
91
|
+
|
|
92
|
+
log('info', `Abandoned PR reconciliation pass: scanning ${preCount} PR${preCount === 1 ? '' : 's'} across ${projectCount} project${projectCount === 1 ? '' : 's'} (version ${targetVersion})`);
|
|
93
|
+
|
|
94
|
+
// Run both reconcilers. We do NOT use Promise.allSettled here because
|
|
95
|
+
// adoFetch and ghApi share the global rate-limit counters — running them
|
|
96
|
+
// serially keeps log ordering deterministic and avoids interleaved
|
|
97
|
+
// throttle messages.
|
|
98
|
+
let ghTotals = { flipped: 0, confirmedDeleted: 0, skipped: 0, errored: 0 };
|
|
99
|
+
let adoTotals = { flipped: 0, confirmedDeleted: 0, skipped: 0, errored: 0 };
|
|
100
|
+
try {
|
|
101
|
+
ghTotals = await github.reconcileAbandonedPrs(config);
|
|
102
|
+
} catch (err) {
|
|
103
|
+
log('warn', `Abandoned PR reconciliation: GitHub pass threw: ${err.message}`);
|
|
104
|
+
}
|
|
105
|
+
try {
|
|
106
|
+
adoTotals = await ado.reconcileAbandonedPrs(config);
|
|
107
|
+
} catch (err) {
|
|
108
|
+
log('warn', `Abandoned PR reconciliation: ADO pass threw: ${err.message}`);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const totals = {
|
|
112
|
+
flipped: ghTotals.flipped + adoTotals.flipped,
|
|
113
|
+
confirmedDeleted: ghTotals.confirmedDeleted + adoTotals.confirmedDeleted,
|
|
114
|
+
skipped: ghTotals.skipped + adoTotals.skipped,
|
|
115
|
+
errored: ghTotals.errored + adoTotals.errored,
|
|
116
|
+
};
|
|
117
|
+
log('info', `Abandoned PR reconciliation: flipped ${totals.flipped}, confirmed-deleted ${totals.confirmedDeleted}, skipped ${totals.skipped}, errored ${totals.errored}`);
|
|
118
|
+
|
|
119
|
+
if (!skipVersionCheck) {
|
|
120
|
+
// Persist the version marker even when the pass found nothing (preCount=0)
|
|
121
|
+
// — the marker means "we ran reconciliation at this code version", not
|
|
122
|
+
// "we found something to fix". Without the write, every restart would
|
|
123
|
+
// re-run the no-op pass forever.
|
|
124
|
+
try {
|
|
125
|
+
mutateEngineState((state) => {
|
|
126
|
+
state.lastAbandonedReconciliationVersion = targetVersion;
|
|
127
|
+
state.lastAbandonedReconciliationAt = new Date().toISOString();
|
|
128
|
+
return state;
|
|
129
|
+
});
|
|
130
|
+
} catch (err) {
|
|
131
|
+
// State.json write failure is unfortunate but non-fatal: next boot will
|
|
132
|
+
// re-run the pass, which is idempotent (already-flipped PRs aren't
|
|
133
|
+
// abandoned anymore, confirmed-404s carry the marker). Log and continue.
|
|
134
|
+
log('warn', `Abandoned PR reconciliation: failed to persist version marker: ${err.message}`);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return { skipped: false, totals };
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
module.exports = {
|
|
142
|
+
runStartupReconciliation,
|
|
143
|
+
};
|
package/engine/ado.js
CHANGED
|
@@ -690,6 +690,14 @@ async function pollPrStatus(config) {
|
|
|
690
690
|
if (applyAdoPrMetadata(pr, prData)) updated = true;
|
|
691
691
|
|
|
692
692
|
let newStatus = pr.status;
|
|
693
|
+
// W-mp5trwh60008386d: ADO does NOT need the `prAbandonConfirmCount` confirmation logic
|
|
694
|
+
// that engine/github.js uses. ADO only flips a PR to `abandoned` when a *successful*
|
|
695
|
+
// adoFetch returns `prData.status === 'abandoned'` (below). On 404/auth failure adoFetch
|
|
696
|
+
// throws, the throw propagates to forEachActivePr's `Promise.allSettled`, and the PR
|
|
697
|
+
// record is left untouched — equivalent to "infinite confirmations required". The shared
|
|
698
|
+
// ENGINE_DEFAULTS.prAbandonConfirmCount constant lives in engine/shared.js so a future
|
|
699
|
+
// ADO change can adopt the same semantics if the failure model ever maps 404 to
|
|
700
|
+
// abandonment directly. See engine/github.js pollPrStatus for the GitHub implementation.
|
|
693
701
|
if (prData.status === 'completed') newStatus = PR_STATUS.MERGED;
|
|
694
702
|
else if (prData.status === 'abandoned') newStatus = PR_STATUS.ABANDONED;
|
|
695
703
|
else if (prData.status === 'active') newStatus = PR_STATUS.ACTIVE;
|
|
@@ -1594,12 +1602,169 @@ function _setAdoTokenForTest(token) {
|
|
|
1594
1602
|
}
|
|
1595
1603
|
}
|
|
1596
1604
|
|
|
1605
|
+
// ─── One-Shot Startup Reconciliation for Abandoned PRs (W-mp60tw0u000j3931) ───
|
|
1606
|
+
//
|
|
1607
|
+
// ADO equivalent of engine/github.js reconcileAbandonedPrs. Same shape:
|
|
1608
|
+
// per-(org, project, repo) base probe, cached for the duration of the pass;
|
|
1609
|
+
// then per-PR re-probe via adoFetch. ADO PRs use `prData.status` directly
|
|
1610
|
+
// ('active' | 'abandoned' | 'completed'), which we map back to PR_STATUS.
|
|
1611
|
+
//
|
|
1612
|
+
// Note: ADO's adoFetch throws on failure rather than returning a sentinel,
|
|
1613
|
+
// so the per-PR catch needs to distinguish 404 (PR truly deleted) from other
|
|
1614
|
+
// errors (network, throttle). isAdoAuthError covers 401/403 — for 404 we
|
|
1615
|
+
// inspect the error message directly.
|
|
1616
|
+
async function reconcileAbandonedPrs(config) {
|
|
1617
|
+
let flipped = 0, confirmedDeleted = 0, skipped = 0, errored = 0;
|
|
1618
|
+
|
|
1619
|
+
const projects = shared.getProjects(config).filter(p => !isGitHubProject(p));
|
|
1620
|
+
if (projects.length === 0) return { flipped, confirmedDeleted, skipped, errored };
|
|
1621
|
+
|
|
1622
|
+
const token = await getAdoToken();
|
|
1623
|
+
if (!token) {
|
|
1624
|
+
log('warn', 'Abandoned PR reconciliation: no ADO token — skipping ADO projects');
|
|
1625
|
+
// Count abandoned PRs across ADO projects as skipped so the orchestrator
|
|
1626
|
+
// log line is honest about what we left behind.
|
|
1627
|
+
let pending = 0;
|
|
1628
|
+
for (const project of projects) {
|
|
1629
|
+
const prs = shared.safeJsonArr(shared.projectPrPath(project));
|
|
1630
|
+
pending += prs.filter(pr =>
|
|
1631
|
+
pr.status === shared.PR_STATUS.ABANDONED && !pr._reconciliation404Confirmed
|
|
1632
|
+
).length;
|
|
1633
|
+
}
|
|
1634
|
+
return { flipped, confirmedDeleted, skipped: pending, errored };
|
|
1635
|
+
}
|
|
1636
|
+
|
|
1637
|
+
// Cache base-repo probe results for THIS pass. Key by (org, project, repoId)
|
|
1638
|
+
// since two minions projects can technically point at the same ADO repo.
|
|
1639
|
+
const probeCache = new Map();
|
|
1640
|
+
|
|
1641
|
+
for (const project of projects) {
|
|
1642
|
+
repairAdoProjectConfig(project, 'abandoned PR reconciliation');
|
|
1643
|
+
if (!project.adoOrg || !project.adoProject) continue;
|
|
1644
|
+
const adoRepositoryId = getAdoRepositoryId(project);
|
|
1645
|
+
if (!adoRepositoryId) {
|
|
1646
|
+
logMissingAdoRepository(project, 'abandoned PR reconciliation');
|
|
1647
|
+
continue;
|
|
1648
|
+
}
|
|
1649
|
+
|
|
1650
|
+
const prPath = shared.projectPrPath(project);
|
|
1651
|
+
const prs = shared.safeJsonArr(prPath);
|
|
1652
|
+
const abandonedPrs = prs.filter(pr =>
|
|
1653
|
+
pr.status === shared.PR_STATUS.ABANDONED
|
|
1654
|
+
&& !pr._reconciliation404Confirmed
|
|
1655
|
+
&& shared.isPrCompatibleWithProject(project, pr, pr.url || '')
|
|
1656
|
+
);
|
|
1657
|
+
if (abandonedPrs.length === 0) continue;
|
|
1658
|
+
|
|
1659
|
+
const orgBase = getAdoOrgBase(project);
|
|
1660
|
+
const probeKey = `${project.adoOrg}/${project.adoProject}/${adoRepositoryId}`;
|
|
1661
|
+
let probeResult = probeCache.get(probeKey);
|
|
1662
|
+
if (probeResult === undefined) {
|
|
1663
|
+
try {
|
|
1664
|
+
const repoUrl = `${orgBase}/${project.adoProject}/_apis/git/repositories/${encodeURIComponent(adoRepositoryId)}?api-version=7.1`;
|
|
1665
|
+
const repoData = await adoFetch(repoUrl, token);
|
|
1666
|
+
probeResult = repoData ? 'ok' : 'fail';
|
|
1667
|
+
} catch (err) {
|
|
1668
|
+
log('warn', `Abandoned PR reconciliation: ADO base-repo probe for ${probeKey} threw: ${err.message}`);
|
|
1669
|
+
probeResult = 'fail';
|
|
1670
|
+
}
|
|
1671
|
+
probeCache.set(probeKey, probeResult);
|
|
1672
|
+
}
|
|
1673
|
+
if (probeResult === 'fail') {
|
|
1674
|
+
log('warn', `Abandoned PR reconciliation: skipping ${probeKey} (${abandonedPrs.length} PR${abandonedPrs.length === 1 ? '' : 's'}) — base-repo probe failed, retry next startup`);
|
|
1675
|
+
skipped += abandonedPrs.length;
|
|
1676
|
+
continue;
|
|
1677
|
+
}
|
|
1678
|
+
|
|
1679
|
+
const updates = []; // { prNumber, action, newStatus?, mergedAt? }
|
|
1680
|
+
for (const pr of abandonedPrs) {
|
|
1681
|
+
const prNum = shared.getPrNumber(pr);
|
|
1682
|
+
if (!prNum) continue;
|
|
1683
|
+
try {
|
|
1684
|
+
const encodedRepoId = encodeURIComponent(adoRepositoryId);
|
|
1685
|
+
const prUrl = `${orgBase}/${project.adoProject}/_apis/git/repositories/${encodedRepoId}/pullrequests/${prNum}?api-version=7.1`;
|
|
1686
|
+
const prData = await adoFetch(prUrl, token);
|
|
1687
|
+
if (!prData) {
|
|
1688
|
+
log('warn', `Skipped ADO PR #${prNum} (${probeKey}): empty response, retry next startup`);
|
|
1689
|
+
errored++;
|
|
1690
|
+
continue;
|
|
1691
|
+
}
|
|
1692
|
+
let newStatus, reason;
|
|
1693
|
+
if (prData.status === 'completed') {
|
|
1694
|
+
newStatus = shared.PR_STATUS.MERGED;
|
|
1695
|
+
reason = 'was merged';
|
|
1696
|
+
} else if (prData.status === 'active') {
|
|
1697
|
+
newStatus = shared.PR_STATUS.ACTIVE;
|
|
1698
|
+
reason = 'was active';
|
|
1699
|
+
} else if (prData.status === 'abandoned') {
|
|
1700
|
+
// Genuinely abandoned on ADO too — mark so we don't re-probe.
|
|
1701
|
+
updates.push({ prNumber: prNum, action: 'confirm404' });
|
|
1702
|
+
confirmedDeleted++;
|
|
1703
|
+
log('info', `Confirmed ADO PR #${prNum} (${probeKey}): truly abandoned, leaving abandoned`);
|
|
1704
|
+
continue;
|
|
1705
|
+
} else {
|
|
1706
|
+
log('warn', `Skipped ADO PR #${prNum} (${probeKey}): unknown status ${JSON.stringify(prData.status)}`);
|
|
1707
|
+
errored++;
|
|
1708
|
+
continue;
|
|
1709
|
+
}
|
|
1710
|
+
updates.push({
|
|
1711
|
+
prNumber: prNum,
|
|
1712
|
+
action: 'flip',
|
|
1713
|
+
newStatus,
|
|
1714
|
+
mergedAt: prData.closedDate || null,
|
|
1715
|
+
});
|
|
1716
|
+
flipped++;
|
|
1717
|
+
log('info', `Reconciled ADO PR #${prNum} (${probeKey}): abandoned → ${newStatus} (${reason})`);
|
|
1718
|
+
} catch (err) {
|
|
1719
|
+
const msg = String(err?.message || '');
|
|
1720
|
+
if (/\b404\b|Not Found/i.test(msg)) {
|
|
1721
|
+
// 404 on a specific PR with base-probe OK → genuinely deleted.
|
|
1722
|
+
updates.push({ prNumber: prNum, action: 'confirm404' });
|
|
1723
|
+
confirmedDeleted++;
|
|
1724
|
+
log('info', `Confirmed ADO PR #${prNum} (${probeKey}): truly deleted (404), leaving abandoned`);
|
|
1725
|
+
} else {
|
|
1726
|
+
log('warn', `Abandoned PR reconciliation error on ADO PR #${prNum} (${probeKey}): ${msg}`);
|
|
1727
|
+
errored++;
|
|
1728
|
+
}
|
|
1729
|
+
}
|
|
1730
|
+
}
|
|
1731
|
+
|
|
1732
|
+
if (updates.length > 0) {
|
|
1733
|
+
const reconciledAt = ts();
|
|
1734
|
+
shared.mutatePullRequests(prPath, (currentPrs) => {
|
|
1735
|
+
for (const upd of updates) {
|
|
1736
|
+
const pr = currentPrs.find(p => shared.getPrNumber(p) === upd.prNumber);
|
|
1737
|
+
if (!pr) continue;
|
|
1738
|
+
// Defensive: never downgrade merged.
|
|
1739
|
+
if (pr.status === shared.PR_STATUS.MERGED && upd.newStatus !== shared.PR_STATUS.MERGED) continue;
|
|
1740
|
+
if (upd.action === 'flip') {
|
|
1741
|
+
pr.status = upd.newStatus;
|
|
1742
|
+
if (upd.mergedAt && !pr.mergedAt) pr.mergedAt = upd.mergedAt;
|
|
1743
|
+
delete pr._consecutive404s;
|
|
1744
|
+
delete pr._404Count;
|
|
1745
|
+
delete pr._404FirstAt;
|
|
1746
|
+
pr._reconciledAt = reconciledAt;
|
|
1747
|
+
pr._reconciledFrom = 'startup-pass';
|
|
1748
|
+
} else if (upd.action === 'confirm404') {
|
|
1749
|
+
pr._reconciledAt = reconciledAt;
|
|
1750
|
+
pr._reconciliation404Confirmed = true;
|
|
1751
|
+
}
|
|
1752
|
+
}
|
|
1753
|
+
return currentPrs;
|
|
1754
|
+
});
|
|
1755
|
+
}
|
|
1756
|
+
}
|
|
1757
|
+
|
|
1758
|
+
return { flipped, confirmedDeleted, skipped, errored };
|
|
1759
|
+
}
|
|
1760
|
+
|
|
1597
1761
|
module.exports = {
|
|
1598
1762
|
getAdoToken,
|
|
1599
1763
|
adoFetch,
|
|
1600
1764
|
pollPrStatus,
|
|
1601
1765
|
pollPrHumanComments,
|
|
1602
1766
|
reconcilePrs,
|
|
1767
|
+
reconcileAbandonedPrs, // W-mp60tw0u000j3931 — one-shot startup re-probe of abandoned PRs
|
|
1603
1768
|
checkLiveReviewStatus,
|
|
1604
1769
|
checkLiveBuildAndConflict,
|
|
1605
1770
|
needsAdoPollRetry,
|
package/engine/cli.js
CHANGED
|
@@ -709,6 +709,32 @@ const commands = {
|
|
|
709
709
|
}
|
|
710
710
|
})();
|
|
711
711
|
|
|
712
|
+
// W-mp60tw0u000j3931: One-shot startup reconciliation for `abandoned` PRs.
|
|
713
|
+
// Pairs with W-mp5trwh60008386d's forward-looking 404 hardening — that
|
|
714
|
+
// fix prevents future false-flips, this pass un-flips PRs that were
|
|
715
|
+
// already wrongly marked `abandoned` before the hardening shipped (e.g.
|
|
716
|
+
// the 16 PRs hit by the 2026-05-14 gh-auth-flip incident). Version-gated
|
|
717
|
+
// via engine/state.json so it runs once per ENGINE_DEFAULTS.abandonedReconciliationVersion
|
|
718
|
+
// bump and is otherwise a no-op. Fire-and-forget — boot must not block on
|
|
719
|
+
// network calls; the pass operates on `abandoned` PRs while pollPrStatus
|
|
720
|
+
// operates on `active`/`linked`, so there is no per-record race.
|
|
721
|
+
(function startupReconcileAbandonedPrs() {
|
|
722
|
+
try {
|
|
723
|
+
const reconciler = require('./abandoned-pr-reconciliation');
|
|
724
|
+
Promise.resolve(reconciler.runStartupReconciliation(config))
|
|
725
|
+
.then(result => {
|
|
726
|
+
if (!result.skipped && result.totals.flipped > 0) {
|
|
727
|
+
console.log(` Reconciled ${result.totals.flipped} abandoned PR(s) → active/merged/closed`);
|
|
728
|
+
}
|
|
729
|
+
})
|
|
730
|
+
.catch(err => {
|
|
731
|
+
e.log('warn', `Abandoned PR reconciliation failed at boot: ${err.message}`);
|
|
732
|
+
});
|
|
733
|
+
} catch (err) {
|
|
734
|
+
e.log('warn', `Abandoned PR reconciliation failed to start at boot: ${err.message}`);
|
|
735
|
+
}
|
|
736
|
+
})();
|
|
737
|
+
|
|
712
738
|
// Initial tick
|
|
713
739
|
e.tick();
|
|
714
740
|
|
package/engine/github.js
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
const shared = require('./shared');
|
|
8
|
-
const { exec, execAsync, getProjects, projectPrPath, projectWorkItemsPath, safeJson, safeJsonArr, safeWrite, mutateJsonFileLocked, MINIONS_DIR, getPrLinks, backfillPrPrdItems, log, ts, dateStamp, PR_STATUS, PR_POLLABLE_STATUSES, createThrottleTracker, getProjectOrg } = shared;
|
|
8
|
+
const { exec, execAsync, getProjects, projectPrPath, projectWorkItemsPath, safeJson, safeJsonArr, safeWrite, mutateJsonFileLocked, mutatePullRequests, MINIONS_DIR, getPrLinks, backfillPrPrdItems, log, ts, dateStamp, PR_STATUS, PR_POLLABLE_STATUSES, ENGINE_DEFAULTS, createThrottleTracker, getProjectOrg } = shared;
|
|
9
9
|
const { getPrs } = require('./queries');
|
|
10
10
|
const { MINIONS_COMMENT_MARKER_RE } = require('./gh-comment');
|
|
11
11
|
const path = require('path');
|
|
@@ -21,6 +21,19 @@ function engine() {
|
|
|
21
21
|
let _dispatch = null;
|
|
22
22
|
function dispatchModule() { if (!_dispatch) _dispatch = require('./dispatch'); return _dispatch; }
|
|
23
23
|
|
|
24
|
+
// W-mp5trwh60008386d: test seam so unit tests can mock `gh api` shell-outs
|
|
25
|
+
// without spawning the real CLI. Production code goes through the real
|
|
26
|
+
// `execAsync` from shared; tests call `_setExecAsyncForTest(fn)` to swap in
|
|
27
|
+
// a stub. Pass `null` to restore. Mirrors the `_setAdoTokenForTest` pattern
|
|
28
|
+
// in engine/ado.js.
|
|
29
|
+
let _execAsyncOverride = null;
|
|
30
|
+
function _runExec(cmd, opts) {
|
|
31
|
+
return (_execAsyncOverride || execAsync)(cmd, opts);
|
|
32
|
+
}
|
|
33
|
+
function _setExecAsyncForTest(fn) {
|
|
34
|
+
_execAsyncOverride = (typeof fn === 'function') ? fn : null;
|
|
35
|
+
}
|
|
36
|
+
|
|
24
37
|
// ─── Constants ──────────────────────────────────────────────────────────────
|
|
25
38
|
|
|
26
39
|
// 10 MB — prevents maxBuffer exceeded errors on repos with many open PRs.
|
|
@@ -144,7 +157,7 @@ let _cachedViewerLogin = null;
|
|
|
144
157
|
async function _resolveViewerLogin() {
|
|
145
158
|
if (_cachedViewerLogin) return _cachedViewerLogin;
|
|
146
159
|
try {
|
|
147
|
-
const result = await
|
|
160
|
+
const result = await _runExec('gh api user', { timeout: 10000, encoding: 'utf-8', maxBuffer: GH_MAX_BUFFER });
|
|
148
161
|
const parsed = JSON.parse(String(result || ''));
|
|
149
162
|
const login = parsed?.login ? String(parsed.login).toLowerCase() : null;
|
|
150
163
|
if (login) _cachedViewerLogin = login;
|
|
@@ -241,7 +254,7 @@ async function ghApi(endpoint, slug, opts = {}) {
|
|
|
241
254
|
try {
|
|
242
255
|
const paginateFlag = opts.paginate ? ' --paginate' : '';
|
|
243
256
|
const cmd = `gh api${paginateFlag} "repos/${slug}${endpoint}"`;
|
|
244
|
-
const result = await
|
|
257
|
+
const result = await _runExec(cmd, { timeout: opts.timeout || 30000, encoding: 'utf-8', maxBuffer: GH_MAX_BUFFER });
|
|
245
258
|
const parsed = JSON.parse(result);
|
|
246
259
|
_ghThrottle.recordSuccess();
|
|
247
260
|
return parsed;
|
|
@@ -310,7 +323,7 @@ async function fetchGhBuildErrorLog(slug, failedRuns) {
|
|
|
310
323
|
// Always fetch job log — annotations alone often lack test failure details
|
|
311
324
|
try {
|
|
312
325
|
const cmd = `gh api "repos/${slug}/actions/jobs/${run.id}/logs" 2>&1`;
|
|
313
|
-
const result = await
|
|
326
|
+
const result = await _runExec(cmd, { timeout: 15000, encoding: 'utf-8', maxBuffer: GH_MAX_BUFFER });
|
|
314
327
|
if (result && !result.includes('Not Found')) {
|
|
315
328
|
logParts.push(`--- ${run.name || 'Check'} (log) ---\n${result}`);
|
|
316
329
|
}
|
|
@@ -351,9 +364,18 @@ async function forEachActiveGhPr(config, callback) {
|
|
|
351
364
|
&& shared.isPrCompatibleWithProject(project, pr, pr.url || ''));
|
|
352
365
|
if (activePrs.length === 0) continue;
|
|
353
366
|
|
|
354
|
-
// Probe repo accessibility before iterating PRs — avoids N warnings per inaccessible repo
|
|
367
|
+
// Probe repo accessibility before iterating PRs — avoids N warnings per inaccessible repo.
|
|
368
|
+
// W-mp5trwh60008386d: ghApi returns the GH_NOT_FOUND sentinel on 404 (a frozen object,
|
|
369
|
+
// *not* null). The pre-fix gate only matched `null`, so a 404 on the base repo (caused by
|
|
370
|
+
// a multi-account `gh auth` switch, network blip, or token rotation) fell through and every
|
|
371
|
+
// per-PR call below 404'd, permanently flipping all active PRs to `abandoned`. We now treat
|
|
372
|
+
// both null and GH_NOT_FOUND as "skip the project for this tick" and explicitly do NOT
|
|
373
|
+
// increment per-PR `_consecutive404s` counters since no per-PR call was made.
|
|
355
374
|
const probe = await ghApi('', slug);
|
|
356
|
-
if (probe === null) {
|
|
375
|
+
if (probe === null || probe === GH_NOT_FOUND) {
|
|
376
|
+
if (probe === GH_NOT_FOUND) {
|
|
377
|
+
log('warn', `GitHub repo probe for ${slug} returned 404 — skipping all per-PR polls for this project this tick (avoids spurious abandonments). Per-PR 404 counters NOT incremented.`);
|
|
378
|
+
}
|
|
357
379
|
recordSlugFailure(slug);
|
|
358
380
|
continue;
|
|
359
381
|
}
|
|
@@ -388,6 +410,13 @@ async function forEachActiveGhPr(config, callback) {
|
|
|
388
410
|
if (currentPrs[idx].reviewStatus === 'approved' && after.reviewStatus !== 'approved') {
|
|
389
411
|
after.reviewStatus = 'approved';
|
|
390
412
|
}
|
|
413
|
+
// W-mp5trwh60008386d: never downgrade `status: merged` — terminal state. A stale
|
|
414
|
+
// 404 reaching `prAbandonConfirmCount` could otherwise overwrite a concurrent
|
|
415
|
+
// success/merge poll. Keeps the central pull-requests.json status invariants
|
|
416
|
+
// intact even under multi-writer races.
|
|
417
|
+
if (currentPrs[idx].status === PR_STATUS.MERGED && after.status !== PR_STATUS.MERGED) {
|
|
418
|
+
after.status = PR_STATUS.MERGED;
|
|
419
|
+
}
|
|
391
420
|
shared.applyPrFieldDelta(currentPrs[idx], before, after);
|
|
392
421
|
}
|
|
393
422
|
}
|
|
@@ -410,13 +439,40 @@ async function forEachActiveGhPr(config, callback) {
|
|
|
410
439
|
const centralPath = path.join(MINIONS_DIR, 'pull-requests.json');
|
|
411
440
|
const centralPrs = safeJsonArr(centralPath);
|
|
412
441
|
const activeCentral = centralPrs.filter(pr => PR_POLLABLE_STATUSES.has(pr.status) && pr.url);
|
|
442
|
+
|
|
443
|
+
// W-mp5trwh60008386d: probe each unique slug in the central list ONCE before iterating PRs.
|
|
444
|
+
// Without this gate, central PRs would inherit the same per-PR 404 trapdoor that project-local
|
|
445
|
+
// PRs had pre-fix — a multi-account `gh auth` switch or token rotation would let every
|
|
446
|
+
// central PR for an inaccessible slug accumulate `_consecutive404s` even though the failure
|
|
447
|
+
// is at the auth/repo layer, not the PR. Probe results live for this tick only (Map cleared
|
|
448
|
+
// each invocation).
|
|
449
|
+
const centralSlugProbes = new Map(); // slug → 'ok' | 'fail'
|
|
450
|
+
for (const pr of activeCentral) {
|
|
451
|
+
const ghMatch = pr.url.match(/github\.com\/([^/]+\/[^/]+)\/pull\/(\d+)/);
|
|
452
|
+
if (!ghMatch) continue;
|
|
453
|
+
const slug = ghMatch[1];
|
|
454
|
+
if (centralSlugProbes.has(slug)) continue;
|
|
455
|
+
if (isSlugInBackoff(slug)) { centralSlugProbes.set(slug, 'fail'); continue; }
|
|
456
|
+
const probe = await ghApi('', slug);
|
|
457
|
+
if (probe === null || probe === GH_NOT_FOUND) {
|
|
458
|
+
if (probe === GH_NOT_FOUND) {
|
|
459
|
+
log('warn', `GitHub repo probe for ${slug} returned 404 (central PR poll) — skipping all central PRs for this slug this tick. Per-PR 404 counters NOT incremented.`);
|
|
460
|
+
}
|
|
461
|
+
recordSlugFailure(slug);
|
|
462
|
+
centralSlugProbes.set(slug, 'fail');
|
|
463
|
+
} else {
|
|
464
|
+
resetSlugBackoff(slug);
|
|
465
|
+
centralSlugProbes.set(slug, 'ok');
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
|
|
413
469
|
let centralUpdated = 0;
|
|
414
470
|
const updatedCentralRecords = [];
|
|
415
471
|
for (const pr of activeCentral) {
|
|
416
472
|
const ghMatch = pr.url.match(/github\.com\/([^/]+\/[^/]+)\/pull\/(\d+)/);
|
|
417
473
|
if (!ghMatch) continue;
|
|
418
474
|
const slug = ghMatch[1];
|
|
419
|
-
if (
|
|
475
|
+
if (centralSlugProbes.get(slug) !== 'ok') continue; // probe failed → skip per-PR call
|
|
420
476
|
const prNum = ghMatch[2];
|
|
421
477
|
try {
|
|
422
478
|
const before = shared.snapshotPrRecord(pr);
|
|
@@ -452,7 +508,13 @@ async function forEachActiveGhPr(config, callback) {
|
|
|
452
508
|
// Only merge back central PRs that the callback actually modified
|
|
453
509
|
for (const { before, after } of updatedCentralRecords) {
|
|
454
510
|
const idx = currentPrs.findIndex(p => p.id === after.id);
|
|
455
|
-
if (idx >= 0)
|
|
511
|
+
if (idx >= 0) {
|
|
512
|
+
// W-mp5trwh60008386d: same merged-status guard as project-local PRs.
|
|
513
|
+
if (currentPrs[idx].status === PR_STATUS.MERGED && after.status !== PR_STATUS.MERGED) {
|
|
514
|
+
after.status = PR_STATUS.MERGED;
|
|
515
|
+
}
|
|
516
|
+
shared.applyPrFieldDelta(currentPrs[idx], before, after);
|
|
517
|
+
}
|
|
456
518
|
}
|
|
457
519
|
return currentPrs;
|
|
458
520
|
}, { defaultValue: [] });
|
|
@@ -465,21 +527,49 @@ async function forEachActiveGhPr(config, callback) {
|
|
|
465
527
|
// ─── PR Status Polling ──────────────────────────────────────────────────────
|
|
466
528
|
|
|
467
529
|
async function pollPrStatus(config) {
|
|
530
|
+
// W-mp5trwh60008386d: per-PR 404 must be confirmed N times before flipping to abandoned.
|
|
531
|
+
// Single 404s are routinely transient (multi-account `gh auth` race, network blip, token
|
|
532
|
+
// rotation). Counter is per-PR (`pr._consecutive404s`) and reset on any successful response.
|
|
533
|
+
// Default N from ENGINE_DEFAULTS; opt-in override via config.engine.prAbandonConfirmCount.
|
|
534
|
+
const confirmCount = Math.max(1, Number(config?.engine?.prAbandonConfirmCount) || ENGINE_DEFAULTS.prAbandonConfirmCount);
|
|
535
|
+
|
|
468
536
|
const totalUpdated = await forEachActiveGhPr(config, async (project, pr, prNum, slug) => {
|
|
469
537
|
const prData = await ghApi(`/pulls/${prNum}`, slug);
|
|
470
538
|
if (!prData) return false;
|
|
471
539
|
if (prData === GH_NOT_FOUND) {
|
|
472
|
-
// PR
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
540
|
+
// Per-PR 404. Base-repo probe in forEachActiveGhPr already passed for this tick, so the
|
|
541
|
+
// 404 is specific to this PR — but we still require N consecutive confirmations across
|
|
542
|
+
// separate poll ticks before flipping. Worst-case delay for a genuinely deleted PR is
|
|
543
|
+
// N × prPollStatusEvery × tickInterval (~36 min at defaults), which is acceptable for an
|
|
544
|
+
// irreversible status change.
|
|
545
|
+
const next = (Number(pr._consecutive404s) || 0) + 1;
|
|
546
|
+
if (next >= confirmCount) {
|
|
547
|
+
if (pr.status !== PR_STATUS.ABANDONED) {
|
|
548
|
+
pr.status = PR_STATUS.ABANDONED;
|
|
549
|
+
delete pr._consecutive404s;
|
|
550
|
+
log('info', `PR ${pr.id} returned 404 ${next} consecutive polls — marking abandoned`);
|
|
551
|
+
return true;
|
|
552
|
+
}
|
|
553
|
+
// Already abandoned; clear the counter if it lingered.
|
|
554
|
+
if (pr._consecutive404s) {
|
|
555
|
+
delete pr._consecutive404s;
|
|
556
|
+
return true;
|
|
557
|
+
}
|
|
558
|
+
return false;
|
|
477
559
|
}
|
|
478
|
-
|
|
560
|
+
pr._consecutive404s = next;
|
|
561
|
+
log('warn', `PR ${pr.id} returned 404 (${next}/${confirmCount}) — deferring abandonment until threshold`);
|
|
562
|
+
return true; // counter increment is a state change worth persisting
|
|
479
563
|
}
|
|
480
564
|
|
|
481
565
|
let updated = false;
|
|
482
566
|
|
|
567
|
+
// Successful response — clear any pending 404 confirmation streak.
|
|
568
|
+
if (pr._consecutive404s) {
|
|
569
|
+
delete pr._consecutive404s;
|
|
570
|
+
updated = true;
|
|
571
|
+
}
|
|
572
|
+
|
|
483
573
|
const headBranch = prData.head?.ref ? String(prData.head.ref).trim() : '';
|
|
484
574
|
if (headBranch && pr.branch !== headBranch) {
|
|
485
575
|
pr.branch = headBranch;
|
|
@@ -742,7 +832,7 @@ async function pollPrStatus(config) {
|
|
|
742
832
|
if (autoComplete) {
|
|
743
833
|
try {
|
|
744
834
|
const mergeMethod = ['squash', 'merge', 'rebase'].includes(config.engine?.prMergeMethod) ? config.engine.prMergeMethod : 'squash';
|
|
745
|
-
await
|
|
835
|
+
await _runExec(`gh pr merge ${prNum} --${mergeMethod} --repo ${slug} --delete-branch`, { timeout: 30000, encoding: 'utf-8', maxBuffer: GH_MAX_BUFFER });
|
|
746
836
|
pr._autoCompleted = true;
|
|
747
837
|
log('info', `Auto-completed PR ${pr.id}: builds green + review approved → merged (${mergeMethod})`);
|
|
748
838
|
updated = true;
|
|
@@ -1111,10 +1201,159 @@ async function checkLiveBuildAndConflict(pr, project) {
|
|
|
1111
1201
|
}
|
|
1112
1202
|
}
|
|
1113
1203
|
|
|
1204
|
+
// ─── One-Shot Startup Reconciliation for Abandoned PRs (W-mp60tw0u000j3931) ───
|
|
1205
|
+
//
|
|
1206
|
+
// Pairs with the W-mp5trwh60008386d 404-hardening fix in pollPrStatus.
|
|
1207
|
+
// pollPrStatus only iterates PR_POLLABLE_STATUSES = {active, linked} — once a
|
|
1208
|
+
// PR has status: 'abandoned' it is terminal for the regular poll loop. That
|
|
1209
|
+
// behavior is correct (no per-tick re-probing of dead PRs), but it leaves any
|
|
1210
|
+
// historical false-flips (e.g. the 16 PRs marked abandoned by the 2026-05-14
|
|
1211
|
+
// gh-auth-flip incident) permanently stuck unless something explicitly
|
|
1212
|
+
// re-probes the abandoned set.
|
|
1213
|
+
//
|
|
1214
|
+
// This function is the explicit re-probe. Same base-repo probe gate as
|
|
1215
|
+
// pollPrStatus (cached per slug for the duration of the pass), then a single
|
|
1216
|
+
// `repos/{slug}/pulls/{n}` per abandoned PR. Open → active, merged → merged,
|
|
1217
|
+
// closed (not merged) → closed, confirmed-404 → leave abandoned + mark
|
|
1218
|
+
// `_reconciliation404Confirmed: true` so we don't re-probe deleted PRs on
|
|
1219
|
+
// every future restart.
|
|
1220
|
+
//
|
|
1221
|
+
// Caller: engine/abandoned-pr-reconciliation.js (version-gated).
|
|
1222
|
+
async function reconcileAbandonedPrs(config) {
|
|
1223
|
+
const projects = shared.getProjects(config).filter(isGitHub);
|
|
1224
|
+
let flipped = 0, confirmedDeleted = 0, skipped = 0, errored = 0;
|
|
1225
|
+
// Cache base-repo probe results for the duration of THIS pass — multiple
|
|
1226
|
+
// projects can share the same slug (rare) and multiple PRs definitely share
|
|
1227
|
+
// it. Cleared when the function returns.
|
|
1228
|
+
const slugProbeCache = new Map(); // slug → 'ok' | 'fail'
|
|
1229
|
+
|
|
1230
|
+
for (const project of projects) {
|
|
1231
|
+
const slug = getRepoSlug(project);
|
|
1232
|
+
if (!slug) continue;
|
|
1233
|
+
|
|
1234
|
+
const prPath = projectPrPath(project);
|
|
1235
|
+
const prs = safeJsonArr(prPath);
|
|
1236
|
+
// Filter: only abandoned PRs that don't already have the confirmed-404
|
|
1237
|
+
// marker from a previous reconciliation pass. Marker means "we already
|
|
1238
|
+
// verified this PR is genuinely deleted on the server" — no point
|
|
1239
|
+
// re-probing it on every future pass.
|
|
1240
|
+
const abandonedPrs = prs.filter(pr =>
|
|
1241
|
+
pr.status === PR_STATUS.ABANDONED
|
|
1242
|
+
&& !pr._reconciliation404Confirmed
|
|
1243
|
+
&& shared.isPrCompatibleWithProject(project, pr, pr.url || '')
|
|
1244
|
+
);
|
|
1245
|
+
if (abandonedPrs.length === 0) continue;
|
|
1246
|
+
|
|
1247
|
+
// Probe base repo (cached). Failure → skip ALL of this slug's abandoned
|
|
1248
|
+
// PRs and increment skipped counter — auth/access issue at boot, retry
|
|
1249
|
+
// next restart.
|
|
1250
|
+
let probeResult = slugProbeCache.get(slug);
|
|
1251
|
+
if (probeResult === undefined) {
|
|
1252
|
+
const probe = await ghApi('', slug);
|
|
1253
|
+
probeResult = (probe === null || probe === GH_NOT_FOUND) ? 'fail' : 'ok';
|
|
1254
|
+
slugProbeCache.set(slug, probeResult);
|
|
1255
|
+
}
|
|
1256
|
+
if (probeResult === 'fail') {
|
|
1257
|
+
log('warn', `Abandoned PR reconciliation: skipping ${slug} (${abandonedPrs.length} PR${abandonedPrs.length === 1 ? '' : 's'}) — base-repo probe failed, retry next startup`);
|
|
1258
|
+
skipped += abandonedPrs.length;
|
|
1259
|
+
continue;
|
|
1260
|
+
}
|
|
1261
|
+
|
|
1262
|
+
// Per-PR re-probe. Collect updates first, then apply via mutatePullRequests
|
|
1263
|
+
// for atomic single-writer semantics. We match by prNumber on writeback
|
|
1264
|
+
// (not id) because mutatePullRequests calls normalizePrRecords which can
|
|
1265
|
+
// lowercase the id slug — prNumber is the stable key within a project's
|
|
1266
|
+
// pull-requests.json.
|
|
1267
|
+
const updates = []; // { prNumber, action, newStatus?, mergedAt? }
|
|
1268
|
+
for (const pr of abandonedPrs) {
|
|
1269
|
+
const prNum = shared.getPrNumber(pr);
|
|
1270
|
+
if (!prNum) continue;
|
|
1271
|
+
|
|
1272
|
+
try {
|
|
1273
|
+
const prData = await ghApi(`/pulls/${prNum}`, slug);
|
|
1274
|
+
if (prData === GH_NOT_FOUND) {
|
|
1275
|
+
// 404 with base-probe OK → genuinely deleted. Mark so we don't
|
|
1276
|
+
// re-probe this PR on future startups.
|
|
1277
|
+
updates.push({ prNumber: prNum, action: 'confirm404' });
|
|
1278
|
+
confirmedDeleted++;
|
|
1279
|
+
log('info', `Confirmed PR #${prNum} (${slug}): truly deleted, leaving abandoned`);
|
|
1280
|
+
} else if (prData) {
|
|
1281
|
+
// Successful response. Map GitHub state to minions status.
|
|
1282
|
+
let newStatus, reason;
|
|
1283
|
+
if (prData.merged) {
|
|
1284
|
+
newStatus = PR_STATUS.MERGED;
|
|
1285
|
+
reason = 'was merged';
|
|
1286
|
+
} else if (prData.state === 'open') {
|
|
1287
|
+
newStatus = PR_STATUS.ACTIVE;
|
|
1288
|
+
reason = 'was open';
|
|
1289
|
+
} else if (prData.state === 'closed') {
|
|
1290
|
+
newStatus = PR_STATUS.CLOSED;
|
|
1291
|
+
reason = 'was closed';
|
|
1292
|
+
} else {
|
|
1293
|
+
// Unknown state — be defensive, leave as-is and don't mark as
|
|
1294
|
+
// confirmed-404 either (wait for clearer signal next pass).
|
|
1295
|
+
log('warn', `Skipped PR #${prNum} (${slug}): unknown GitHub state ${JSON.stringify(prData.state)}`);
|
|
1296
|
+
errored++;
|
|
1297
|
+
continue;
|
|
1298
|
+
}
|
|
1299
|
+
updates.push({
|
|
1300
|
+
prNumber: prNum,
|
|
1301
|
+
action: 'flip',
|
|
1302
|
+
newStatus,
|
|
1303
|
+
mergedAt: prData.merged_at || null,
|
|
1304
|
+
});
|
|
1305
|
+
flipped++;
|
|
1306
|
+
log('info', `Reconciled PR #${prNum} (${slug}): abandoned → ${newStatus} (${reason})`);
|
|
1307
|
+
} else {
|
|
1308
|
+
// null = network/rate-limit/other non-404 error. Don't mark
|
|
1309
|
+
// _reconciliation404Confirmed — we want to retry next startup.
|
|
1310
|
+
log('warn', `Skipped PR #${prNum} (${slug}): API error, retry next startup`);
|
|
1311
|
+
errored++;
|
|
1312
|
+
}
|
|
1313
|
+
} catch (err) {
|
|
1314
|
+
log('warn', `Abandoned PR reconciliation error on PR #${prNum} (${slug}): ${err.message}`);
|
|
1315
|
+
errored++;
|
|
1316
|
+
}
|
|
1317
|
+
}
|
|
1318
|
+
|
|
1319
|
+
if (updates.length > 0) {
|
|
1320
|
+
const reconciledAt = ts();
|
|
1321
|
+
mutatePullRequests(prPath, (currentPrs) => {
|
|
1322
|
+
for (const upd of updates) {
|
|
1323
|
+
const pr = currentPrs.find(p => shared.getPrNumber(p) === upd.prNumber);
|
|
1324
|
+
if (!pr) continue;
|
|
1325
|
+
// Defensive: never downgrade a merged record. Should already be
|
|
1326
|
+
// filtered by the abandoned-only scan above, but a concurrent writer
|
|
1327
|
+
// could have flipped it between our read and this write.
|
|
1328
|
+
if (pr.status === PR_STATUS.MERGED && upd.action !== 'flip') continue;
|
|
1329
|
+
if (pr.status === PR_STATUS.MERGED && upd.newStatus !== PR_STATUS.MERGED) continue;
|
|
1330
|
+
if (upd.action === 'flip') {
|
|
1331
|
+
pr.status = upd.newStatus;
|
|
1332
|
+
if (upd.mergedAt && !pr.mergedAt) pr.mergedAt = upd.mergedAt;
|
|
1333
|
+
// Clear stale 404-counter state from the false-flip era.
|
|
1334
|
+
delete pr._consecutive404s;
|
|
1335
|
+
delete pr._404Count;
|
|
1336
|
+
delete pr._404FirstAt;
|
|
1337
|
+
pr._reconciledAt = reconciledAt;
|
|
1338
|
+
pr._reconciledFrom = 'startup-pass';
|
|
1339
|
+
} else if (upd.action === 'confirm404') {
|
|
1340
|
+
pr._reconciledAt = reconciledAt;
|
|
1341
|
+
pr._reconciliation404Confirmed = true;
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
return currentPrs;
|
|
1345
|
+
});
|
|
1346
|
+
}
|
|
1347
|
+
}
|
|
1348
|
+
|
|
1349
|
+
return { flipped, confirmedDeleted, skipped, errored };
|
|
1350
|
+
}
|
|
1351
|
+
|
|
1114
1352
|
module.exports = {
|
|
1115
1353
|
pollPrStatus,
|
|
1116
1354
|
pollPrHumanComments,
|
|
1117
1355
|
reconcilePrs,
|
|
1356
|
+
reconcileAbandonedPrs, // W-mp60tw0u000j3931 — one-shot startup re-probe of abandoned PRs
|
|
1118
1357
|
checkLiveReviewStatus,
|
|
1119
1358
|
checkLiveBuildAndConflict,
|
|
1120
1359
|
isGhThrottled,
|
|
@@ -1138,4 +1377,6 @@ module.exports = {
|
|
|
1138
1377
|
_resolveViewerLogin, // exported for testing (W-mp3bp0ha000997ab-b backfill)
|
|
1139
1378
|
_setCachedViewerLogin, // exported for testing (W-mp3bp0ha000997ab-b backfill)
|
|
1140
1379
|
_backfillViewerDidAuthor, // exported for testing (W-mp3bp0ha000997ab-b backfill)
|
|
1380
|
+
_setExecAsyncForTest, // W-mp5trwh60008386d: test seam to mock `gh api` shell-outs
|
|
1381
|
+
GH_NOT_FOUND, // W-mp5trwh60008386d: exported so tests can assert sentinel propagation
|
|
1141
1382
|
};
|
package/engine/shared.js
CHANGED
|
@@ -60,6 +60,11 @@ const MINIONS_DIR = process.env.MINIONS_TEST_DIR || resolveMinionsHome(false, {
|
|
|
60
60
|
const ENGINE_DIR = path.join(MINIONS_DIR, 'engine');
|
|
61
61
|
const CONTROL_PATH = path.join(ENGINE_DIR, 'control.json');
|
|
62
62
|
const COOLDOWNS_PATH = path.join(ENGINE_DIR, 'cooldowns.json');
|
|
63
|
+
// W-mp60tw0u000j3931: Persistent cross-restart engine state (migration markers,
|
|
64
|
+
// one-shot reconciliation versions, etc.). Distinct from CONTROL_PATH which is
|
|
65
|
+
// process-lifetime (state/pid/ownerToken). See ENGINE_DEFAULTS.abandonedReconciliationVersion
|
|
66
|
+
// for the first consumer.
|
|
67
|
+
const ENGINE_STATE_PATH = path.join(ENGINE_DIR, 'state.json');
|
|
63
68
|
const PR_LINKS_PATH = path.join(MINIONS_DIR, 'engine', 'pr-links.json');
|
|
64
69
|
const PINNED_ITEMS_PATH = path.join(MINIONS_DIR, 'engine', 'kb-pins.json');
|
|
65
70
|
const LOG_PATH = path.join(MINIONS_DIR, 'engine', 'log.json');
|
|
@@ -725,6 +730,22 @@ function mutateControl(mutator) {
|
|
|
725
730
|
}, { defaultValue: { state: 'stopped', pid: null }, skipWriteIfUnchanged: true });
|
|
726
731
|
}
|
|
727
732
|
|
|
733
|
+
// W-mp60tw0u000j3931: Lock-safe read-modify-write for engine/state.json — the
|
|
734
|
+
// persistent cross-restart engine-level state file (migration markers,
|
|
735
|
+
// one-shot reconciliation versions, etc.). Mirrors mutateControl's shape.
|
|
736
|
+
function mutateEngineState(mutator) {
|
|
737
|
+
return mutateJsonFileLocked(ENGINE_STATE_PATH, (data) => {
|
|
738
|
+
if (!data || typeof data !== 'object' || Array.isArray(data)) data = {};
|
|
739
|
+
return mutator(data) || data;
|
|
740
|
+
}, { defaultValue: {}, skipWriteIfUnchanged: true });
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
function readEngineState() {
|
|
744
|
+
const data = safeJson(ENGINE_STATE_PATH);
|
|
745
|
+
if (!data || typeof data !== 'object' || Array.isArray(data)) return {};
|
|
746
|
+
return data;
|
|
747
|
+
}
|
|
748
|
+
|
|
728
749
|
function mutateCooldowns(mutator) {
|
|
729
750
|
return mutateJsonFileLocked(COOLDOWNS_PATH, (data) => {
|
|
730
751
|
if (!data || typeof data !== 'object' || Array.isArray(data)) data = {};
|
|
@@ -1093,6 +1114,23 @@ const ENGINE_DEFAULTS = {
|
|
|
1093
1114
|
ghPollEnabled: true, // poll GitHub PR status, comments, and reconciliation on each tick cycle
|
|
1094
1115
|
prPollStatusEvery: 12, // poll PR build/review/merge status every N ticks for both ADO and GitHub (~12 min at default interval)
|
|
1095
1116
|
prPollCommentsEvery: 12, // poll PR human comments every N ticks for both ADO and GitHub (~12 min at default interval)
|
|
1117
|
+
// W-mp5trwh60008386d: per-PR 404 must repeat across N consecutive successful base-repo probes
|
|
1118
|
+
// before flipping a PR to `abandoned`. A single 404 on `repos/{slug}/pulls/{n}` can be a transient
|
|
1119
|
+
// multi-account `gh auth` race, network blip, or token rotation — those used to permanently
|
|
1120
|
+
// corrupt active PRs. Counter resets to 0 on any successful per-PR response. ADO doesn't have
|
|
1121
|
+
// an analogous abandon-on-404 trapdoor (`adoFetch` throws on 404 → caught by Promise.allSettled
|
|
1122
|
+
// → no flip), so the constant is GitHub-only today but lives in shared defaults so a future
|
|
1123
|
+
// ADO change can adopt the same semantics.
|
|
1124
|
+
prAbandonConfirmCount: 3,
|
|
1125
|
+
// W-mp60tw0u000j3931: One-shot startup reconciliation pass for `abandoned` PRs runs
|
|
1126
|
+
// exactly once per bump of this constant. The engine compares this value against
|
|
1127
|
+
// engine/state.json:lastAbandonedReconciliationVersion at boot; if the on-disk version
|
|
1128
|
+
// is lower, every project's pull-requests.json is scanned, every abandoned PR is
|
|
1129
|
+
// re-probed, and any false-flipped PRs (e.g. transient 404 victims from before the
|
|
1130
|
+
// hardening in W-mp5trwh60008386d shipped) are flipped back to active/merged/closed
|
|
1131
|
+
// based on their live API state. Bump this when reconciliation logic itself changes
|
|
1132
|
+
// and we want it to re-run once on every install.
|
|
1133
|
+
abandonedReconciliationVersion: 1,
|
|
1096
1134
|
watchesIncludeBehindBy: false, // opt-in: when true, GitHub PR poll calls /compare/{base}...{head} once per pr per pollPrStatusEvery cadence to populate pr.behindBy (powers the `behind-master` watch predicate). Off by default to avoid the extra API call. ADO PRs always get null (no commit-graph walk yet).
|
|
1097
1135
|
autoCompletePrs: false, // auto-merge PRs when builds green + review approved (opt-in)
|
|
1098
1136
|
prMergeMethod: 'squash', // merge method: squash, merge, rebase
|
|
@@ -3656,6 +3694,7 @@ module.exports = {
|
|
|
3656
3694
|
openUrlInBrowser,
|
|
3657
3695
|
CONTROL_PATH,
|
|
3658
3696
|
COOLDOWNS_PATH,
|
|
3697
|
+
ENGINE_STATE_PATH, // W-mp60tw0u000j3931
|
|
3659
3698
|
PR_LINKS_PATH,
|
|
3660
3699
|
PINNED_ITEMS_PATH,
|
|
3661
3700
|
LOG_PATH,
|
|
@@ -3683,6 +3722,8 @@ module.exports = {
|
|
|
3683
3722
|
withFileLock,
|
|
3684
3723
|
mutateJsonFileLocked,
|
|
3685
3724
|
mutateControl,
|
|
3725
|
+
mutateEngineState, // W-mp60tw0u000j3931
|
|
3726
|
+
readEngineState, // W-mp60tw0u000j3931
|
|
3686
3727
|
mutateCooldowns,
|
|
3687
3728
|
mutateWorkItems,
|
|
3688
3729
|
reopenWorkItem,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yemi33/minions",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1949",
|
|
4
4
|
"description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
|
|
5
5
|
"bin": {
|
|
6
6
|
"minions": "bin/minions.js"
|