@yemi33/minions 0.1.1948 → 0.1.1950
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/engine/abandoned-pr-reconciliation.js +143 -0
- package/engine/ado.js +157 -0
- package/engine/cli.js +26 -0
- package/engine/copilot-models.json +1 -1
- package/engine/github.js +150 -1
- package/engine/shared.js +68 -16
- package/package.json +1 -1
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* engine/abandoned-pr-reconciliation.js — One-shot startup reconciliation pass
|
|
3
|
+
* for `abandoned` PRs. Pairs with W-mp5trwh60008386d's forward-looking 404
|
|
4
|
+
* hardening in engine/github.js: that fix prevents *future* false-flips, but
|
|
5
|
+
* does nothing for PRs already wrongly marked `abandoned` before it shipped
|
|
6
|
+
* (e.g. the 16 PRs hit by the 2026-05-14 gh-auth-flip incident).
|
|
7
|
+
*
|
|
8
|
+
* `pollPrStatus` only iterates PRs in `PR_POLLABLE_STATUSES = {active, linked}`
|
|
9
|
+
* (engine/shared.js), so once a PR is `abandoned` the regular poll loop never
|
|
10
|
+
* visits it again. This module is the explicit one-time re-probe.
|
|
11
|
+
*
|
|
12
|
+
* ## Why one-shot, not periodic?
|
|
13
|
+
*
|
|
14
|
+
* Periodic re-probing of terminal-status PRs has real costs:
|
|
15
|
+
* - GitHub/ADO API quota burn on every restart (50+ PRs × 2 hosts).
|
|
16
|
+
* - Race risk against late-arriving merge events (a PR closed 30s before
|
|
17
|
+
* the pass would mis-classify based on stale local state).
|
|
18
|
+
* - Boot latency that grows linearly with abandoned-PR count.
|
|
19
|
+
*
|
|
20
|
+
* The version-gated one-shot model gives us exactly what we need: re-probe
|
|
21
|
+
* once per release that bumps `ENGINE_DEFAULTS.abandonedReconciliationVersion`,
|
|
22
|
+
* then never again until the next bump. First boot after this lands cleans up
|
|
23
|
+
* the historical false-flips; subsequent boots are a single state.json read +
|
|
24
|
+
* version compare → no-op.
|
|
25
|
+
*
|
|
26
|
+
* ## Why a separate WI from W-mp5trwh60008386d?
|
|
27
|
+
*
|
|
28
|
+
* Forward-fix (don't false-flip new PRs) and retroactive-heal (un-flip already
|
|
29
|
+
* damaged PRs) are different concerns with different risk profiles. The 404
|
|
30
|
+
* hardening fix is per-tick and ships independently; this reconciliation is
|
|
31
|
+
* boot-only and depends on the hardening fix existing in master so we don't
|
|
32
|
+
* fork the abandonment-confirmation logic.
|
|
33
|
+
*
|
|
34
|
+
* ## Boot wiring
|
|
35
|
+
*
|
|
36
|
+
* Called from engine/cli.js between the recovery sweep and the initial tick.
|
|
37
|
+
* Errors here must NOT block boot — the catch in cli.js logs and continues.
|
|
38
|
+
*/
|
|
39
|
+
|
|
40
|
+
const path = require('path');
|
|
41
|
+
const shared = require('./shared');
|
|
42
|
+
|
|
43
|
+
const { ENGINE_DEFAULTS, log, mutateEngineState, readEngineState } = shared;
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Run the version-gated startup reconciliation pass. Idempotent across reruns
|
|
47
|
+
* at the same code version: state.json gates re-execution.
|
|
48
|
+
*
|
|
49
|
+
* @param {object} config — engine config (used to enumerate projects)
|
|
50
|
+
* @param {object} [options]
|
|
51
|
+
* @param {boolean} [options.skipVersionCheck] — bypass the version gate (tests only)
|
|
52
|
+
* @param {object} [options.githubModule] — injectable for tests (default: require './github')
|
|
53
|
+
* @param {object} [options.adoModule] — injectable for tests (default: require './ado')
|
|
54
|
+
* @returns {Promise<{ skipped: boolean, totals: { flipped, confirmedDeleted, skipped, errored } }>}
|
|
55
|
+
*/
|
|
56
|
+
async function runStartupReconciliation(config, options = {}) {
|
|
57
|
+
const targetVersion = Number(ENGINE_DEFAULTS.abandonedReconciliationVersion) || 0;
|
|
58
|
+
const skipVersionCheck = options.skipVersionCheck === true;
|
|
59
|
+
|
|
60
|
+
if (!skipVersionCheck) {
|
|
61
|
+
const state = readEngineState();
|
|
62
|
+
const lastVersion = Number(state.lastAbandonedReconciliationVersion) || 0;
|
|
63
|
+
if (lastVersion >= targetVersion) {
|
|
64
|
+
// Already reconciled at this version — no-op. Don't even log; this runs
|
|
65
|
+
// every boot and the silent path is the common one.
|
|
66
|
+
return {
|
|
67
|
+
skipped: true,
|
|
68
|
+
totals: { flipped: 0, confirmedDeleted: 0, skipped: 0, errored: 0 },
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const github = options.githubModule || require('./github');
|
|
74
|
+
const ado = options.adoModule || require('./ado');
|
|
75
|
+
|
|
76
|
+
// Pre-count abandoned PRs (only those without the confirmed-404 marker) so
|
|
77
|
+
// the start-of-pass log line is meaningful. Wrapped in try/catch — the
|
|
78
|
+
// pre-count is observability, not correctness.
|
|
79
|
+
let preCount = 0;
|
|
80
|
+
let projectCount = 0;
|
|
81
|
+
try {
|
|
82
|
+
const projects = shared.getProjects(config);
|
|
83
|
+
projectCount = projects.length;
|
|
84
|
+
for (const p of projects) {
|
|
85
|
+
const prs = shared.safeJsonArr(shared.projectPrPath(p));
|
|
86
|
+
preCount += prs.filter(pr =>
|
|
87
|
+
pr.status === shared.PR_STATUS.ABANDONED && !pr._reconciliation404Confirmed
|
|
88
|
+
).length;
|
|
89
|
+
}
|
|
90
|
+
} catch { /* observability only */ }
|
|
91
|
+
|
|
92
|
+
log('info', `Abandoned PR reconciliation pass: scanning ${preCount} PR${preCount === 1 ? '' : 's'} across ${projectCount} project${projectCount === 1 ? '' : 's'} (version ${targetVersion})`);
|
|
93
|
+
|
|
94
|
+
// Run both reconcilers. We do NOT use Promise.allSettled here because
|
|
95
|
+
// adoFetch and ghApi share the global rate-limit counters — running them
|
|
96
|
+
// serially keeps log ordering deterministic and avoids interleaved
|
|
97
|
+
// throttle messages.
|
|
98
|
+
let ghTotals = { flipped: 0, confirmedDeleted: 0, skipped: 0, errored: 0 };
|
|
99
|
+
let adoTotals = { flipped: 0, confirmedDeleted: 0, skipped: 0, errored: 0 };
|
|
100
|
+
try {
|
|
101
|
+
ghTotals = await github.reconcileAbandonedPrs(config);
|
|
102
|
+
} catch (err) {
|
|
103
|
+
log('warn', `Abandoned PR reconciliation: GitHub pass threw: ${err.message}`);
|
|
104
|
+
}
|
|
105
|
+
try {
|
|
106
|
+
adoTotals = await ado.reconcileAbandonedPrs(config);
|
|
107
|
+
} catch (err) {
|
|
108
|
+
log('warn', `Abandoned PR reconciliation: ADO pass threw: ${err.message}`);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const totals = {
|
|
112
|
+
flipped: ghTotals.flipped + adoTotals.flipped,
|
|
113
|
+
confirmedDeleted: ghTotals.confirmedDeleted + adoTotals.confirmedDeleted,
|
|
114
|
+
skipped: ghTotals.skipped + adoTotals.skipped,
|
|
115
|
+
errored: ghTotals.errored + adoTotals.errored,
|
|
116
|
+
};
|
|
117
|
+
log('info', `Abandoned PR reconciliation: flipped ${totals.flipped}, confirmed-deleted ${totals.confirmedDeleted}, skipped ${totals.skipped}, errored ${totals.errored}`);
|
|
118
|
+
|
|
119
|
+
if (!skipVersionCheck) {
|
|
120
|
+
// Persist the version marker even when the pass found nothing (preCount=0)
|
|
121
|
+
// — the marker means "we ran reconciliation at this code version", not
|
|
122
|
+
// "we found something to fix". Without the write, every restart would
|
|
123
|
+
// re-run the no-op pass forever.
|
|
124
|
+
try {
|
|
125
|
+
mutateEngineState((state) => {
|
|
126
|
+
state.lastAbandonedReconciliationVersion = targetVersion;
|
|
127
|
+
state.lastAbandonedReconciliationAt = new Date().toISOString();
|
|
128
|
+
return state;
|
|
129
|
+
});
|
|
130
|
+
} catch (err) {
|
|
131
|
+
// State.json write failure is unfortunate but non-fatal: next boot will
|
|
132
|
+
// re-run the pass, which is idempotent (already-flipped PRs aren't
|
|
133
|
+
// abandoned anymore, confirmed-404s carry the marker). Log and continue.
|
|
134
|
+
log('warn', `Abandoned PR reconciliation: failed to persist version marker: ${err.message}`);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return { skipped: false, totals };
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
module.exports = {
|
|
142
|
+
runStartupReconciliation,
|
|
143
|
+
};
|
package/engine/ado.js
CHANGED
|
@@ -1602,12 +1602,169 @@ function _setAdoTokenForTest(token) {
|
|
|
1602
1602
|
}
|
|
1603
1603
|
}
|
|
1604
1604
|
|
|
1605
|
+
// ─── One-Shot Startup Reconciliation for Abandoned PRs (W-mp60tw0u000j3931) ───
|
|
1606
|
+
//
|
|
1607
|
+
// ADO equivalent of engine/github.js reconcileAbandonedPrs. Same shape:
|
|
1608
|
+
// per-(org, project, repo) base probe, cached for the duration of the pass;
|
|
1609
|
+
// then per-PR re-probe via adoFetch. ADO PRs use `prData.status` directly
|
|
1610
|
+
// ('active' | 'abandoned' | 'completed'), which we map back to PR_STATUS.
|
|
1611
|
+
//
|
|
1612
|
+
// Note: ADO's adoFetch throws on failure rather than returning a sentinel,
|
|
1613
|
+
// so the per-PR catch needs to distinguish 404 (PR truly deleted) from other
|
|
1614
|
+
// errors (network, throttle). isAdoAuthError covers 401/403 — for 404 we
|
|
1615
|
+
// inspect the error message directly.
|
|
1616
|
+
async function reconcileAbandonedPrs(config) {
|
|
1617
|
+
let flipped = 0, confirmedDeleted = 0, skipped = 0, errored = 0;
|
|
1618
|
+
|
|
1619
|
+
const projects = shared.getProjects(config).filter(p => !isGitHubProject(p));
|
|
1620
|
+
if (projects.length === 0) return { flipped, confirmedDeleted, skipped, errored };
|
|
1621
|
+
|
|
1622
|
+
const token = await getAdoToken();
|
|
1623
|
+
if (!token) {
|
|
1624
|
+
log('warn', 'Abandoned PR reconciliation: no ADO token — skipping ADO projects');
|
|
1625
|
+
// Count abandoned PRs across ADO projects as skipped so the orchestrator
|
|
1626
|
+
// log line is honest about what we left behind.
|
|
1627
|
+
let pending = 0;
|
|
1628
|
+
for (const project of projects) {
|
|
1629
|
+
const prs = shared.safeJsonArr(shared.projectPrPath(project));
|
|
1630
|
+
pending += prs.filter(pr =>
|
|
1631
|
+
pr.status === shared.PR_STATUS.ABANDONED && !pr._reconciliation404Confirmed
|
|
1632
|
+
).length;
|
|
1633
|
+
}
|
|
1634
|
+
return { flipped, confirmedDeleted, skipped: pending, errored };
|
|
1635
|
+
}
|
|
1636
|
+
|
|
1637
|
+
// Cache base-repo probe results for THIS pass. Key by (org, project, repoId)
|
|
1638
|
+
// since two minions projects can technically point at the same ADO repo.
|
|
1639
|
+
const probeCache = new Map();
|
|
1640
|
+
|
|
1641
|
+
for (const project of projects) {
|
|
1642
|
+
repairAdoProjectConfig(project, 'abandoned PR reconciliation');
|
|
1643
|
+
if (!project.adoOrg || !project.adoProject) continue;
|
|
1644
|
+
const adoRepositoryId = getAdoRepositoryId(project);
|
|
1645
|
+
if (!adoRepositoryId) {
|
|
1646
|
+
logMissingAdoRepository(project, 'abandoned PR reconciliation');
|
|
1647
|
+
continue;
|
|
1648
|
+
}
|
|
1649
|
+
|
|
1650
|
+
const prPath = shared.projectPrPath(project);
|
|
1651
|
+
const prs = shared.safeJsonArr(prPath);
|
|
1652
|
+
const abandonedPrs = prs.filter(pr =>
|
|
1653
|
+
pr.status === shared.PR_STATUS.ABANDONED
|
|
1654
|
+
&& !pr._reconciliation404Confirmed
|
|
1655
|
+
&& shared.isPrCompatibleWithProject(project, pr, pr.url || '')
|
|
1656
|
+
);
|
|
1657
|
+
if (abandonedPrs.length === 0) continue;
|
|
1658
|
+
|
|
1659
|
+
const orgBase = getAdoOrgBase(project);
|
|
1660
|
+
const probeKey = `${project.adoOrg}/${project.adoProject}/${adoRepositoryId}`;
|
|
1661
|
+
let probeResult = probeCache.get(probeKey);
|
|
1662
|
+
if (probeResult === undefined) {
|
|
1663
|
+
try {
|
|
1664
|
+
const repoUrl = `${orgBase}/${project.adoProject}/_apis/git/repositories/${encodeURIComponent(adoRepositoryId)}?api-version=7.1`;
|
|
1665
|
+
const repoData = await adoFetch(repoUrl, token);
|
|
1666
|
+
probeResult = repoData ? 'ok' : 'fail';
|
|
1667
|
+
} catch (err) {
|
|
1668
|
+
log('warn', `Abandoned PR reconciliation: ADO base-repo probe for ${probeKey} threw: ${err.message}`);
|
|
1669
|
+
probeResult = 'fail';
|
|
1670
|
+
}
|
|
1671
|
+
probeCache.set(probeKey, probeResult);
|
|
1672
|
+
}
|
|
1673
|
+
if (probeResult === 'fail') {
|
|
1674
|
+
log('warn', `Abandoned PR reconciliation: skipping ${probeKey} (${abandonedPrs.length} PR${abandonedPrs.length === 1 ? '' : 's'}) — base-repo probe failed, retry next startup`);
|
|
1675
|
+
skipped += abandonedPrs.length;
|
|
1676
|
+
continue;
|
|
1677
|
+
}
|
|
1678
|
+
|
|
1679
|
+
const updates = []; // { prNumber, action, newStatus?, mergedAt? }
|
|
1680
|
+
for (const pr of abandonedPrs) {
|
|
1681
|
+
const prNum = shared.getPrNumber(pr);
|
|
1682
|
+
if (!prNum) continue;
|
|
1683
|
+
try {
|
|
1684
|
+
const encodedRepoId = encodeURIComponent(adoRepositoryId);
|
|
1685
|
+
const prUrl = `${orgBase}/${project.adoProject}/_apis/git/repositories/${encodedRepoId}/pullrequests/${prNum}?api-version=7.1`;
|
|
1686
|
+
const prData = await adoFetch(prUrl, token);
|
|
1687
|
+
if (!prData) {
|
|
1688
|
+
log('warn', `Skipped ADO PR #${prNum} (${probeKey}): empty response, retry next startup`);
|
|
1689
|
+
errored++;
|
|
1690
|
+
continue;
|
|
1691
|
+
}
|
|
1692
|
+
let newStatus, reason;
|
|
1693
|
+
if (prData.status === 'completed') {
|
|
1694
|
+
newStatus = shared.PR_STATUS.MERGED;
|
|
1695
|
+
reason = 'was merged';
|
|
1696
|
+
} else if (prData.status === 'active') {
|
|
1697
|
+
newStatus = shared.PR_STATUS.ACTIVE;
|
|
1698
|
+
reason = 'was active';
|
|
1699
|
+
} else if (prData.status === 'abandoned') {
|
|
1700
|
+
// Genuinely abandoned on ADO too — mark so we don't re-probe.
|
|
1701
|
+
updates.push({ prNumber: prNum, action: 'confirm404' });
|
|
1702
|
+
confirmedDeleted++;
|
|
1703
|
+
log('info', `Confirmed ADO PR #${prNum} (${probeKey}): truly abandoned, leaving abandoned`);
|
|
1704
|
+
continue;
|
|
1705
|
+
} else {
|
|
1706
|
+
log('warn', `Skipped ADO PR #${prNum} (${probeKey}): unknown status ${JSON.stringify(prData.status)}`);
|
|
1707
|
+
errored++;
|
|
1708
|
+
continue;
|
|
1709
|
+
}
|
|
1710
|
+
updates.push({
|
|
1711
|
+
prNumber: prNum,
|
|
1712
|
+
action: 'flip',
|
|
1713
|
+
newStatus,
|
|
1714
|
+
mergedAt: prData.closedDate || null,
|
|
1715
|
+
});
|
|
1716
|
+
flipped++;
|
|
1717
|
+
log('info', `Reconciled ADO PR #${prNum} (${probeKey}): abandoned → ${newStatus} (${reason})`);
|
|
1718
|
+
} catch (err) {
|
|
1719
|
+
const msg = String(err?.message || '');
|
|
1720
|
+
if (/\b404\b|Not Found/i.test(msg)) {
|
|
1721
|
+
// 404 on a specific PR with base-probe OK → genuinely deleted.
|
|
1722
|
+
updates.push({ prNumber: prNum, action: 'confirm404' });
|
|
1723
|
+
confirmedDeleted++;
|
|
1724
|
+
log('info', `Confirmed ADO PR #${prNum} (${probeKey}): truly deleted (404), leaving abandoned`);
|
|
1725
|
+
} else {
|
|
1726
|
+
log('warn', `Abandoned PR reconciliation error on ADO PR #${prNum} (${probeKey}): ${msg}`);
|
|
1727
|
+
errored++;
|
|
1728
|
+
}
|
|
1729
|
+
}
|
|
1730
|
+
}
|
|
1731
|
+
|
|
1732
|
+
if (updates.length > 0) {
|
|
1733
|
+
const reconciledAt = ts();
|
|
1734
|
+
shared.mutatePullRequests(prPath, (currentPrs) => {
|
|
1735
|
+
for (const upd of updates) {
|
|
1736
|
+
const pr = currentPrs.find(p => shared.getPrNumber(p) === upd.prNumber);
|
|
1737
|
+
if (!pr) continue;
|
|
1738
|
+
// Defensive: never downgrade merged.
|
|
1739
|
+
if (pr.status === shared.PR_STATUS.MERGED && upd.newStatus !== shared.PR_STATUS.MERGED) continue;
|
|
1740
|
+
if (upd.action === 'flip') {
|
|
1741
|
+
pr.status = upd.newStatus;
|
|
1742
|
+
if (upd.mergedAt && !pr.mergedAt) pr.mergedAt = upd.mergedAt;
|
|
1743
|
+
delete pr._consecutive404s;
|
|
1744
|
+
delete pr._404Count;
|
|
1745
|
+
delete pr._404FirstAt;
|
|
1746
|
+
pr._reconciledAt = reconciledAt;
|
|
1747
|
+
pr._reconciledFrom = 'startup-pass';
|
|
1748
|
+
} else if (upd.action === 'confirm404') {
|
|
1749
|
+
pr._reconciledAt = reconciledAt;
|
|
1750
|
+
pr._reconciliation404Confirmed = true;
|
|
1751
|
+
}
|
|
1752
|
+
}
|
|
1753
|
+
return currentPrs;
|
|
1754
|
+
});
|
|
1755
|
+
}
|
|
1756
|
+
}
|
|
1757
|
+
|
|
1758
|
+
return { flipped, confirmedDeleted, skipped, errored };
|
|
1759
|
+
}
|
|
1760
|
+
|
|
1605
1761
|
module.exports = {
|
|
1606
1762
|
getAdoToken,
|
|
1607
1763
|
adoFetch,
|
|
1608
1764
|
pollPrStatus,
|
|
1609
1765
|
pollPrHumanComments,
|
|
1610
1766
|
reconcilePrs,
|
|
1767
|
+
reconcileAbandonedPrs, // W-mp60tw0u000j3931 — one-shot startup re-probe of abandoned PRs
|
|
1611
1768
|
checkLiveReviewStatus,
|
|
1612
1769
|
checkLiveBuildAndConflict,
|
|
1613
1770
|
needsAdoPollRetry,
|
package/engine/cli.js
CHANGED
|
@@ -709,6 +709,32 @@ const commands = {
|
|
|
709
709
|
}
|
|
710
710
|
})();
|
|
711
711
|
|
|
712
|
+
// W-mp60tw0u000j3931: One-shot startup reconciliation for `abandoned` PRs.
|
|
713
|
+
// Pairs with W-mp5trwh60008386d's forward-looking 404 hardening — that
|
|
714
|
+
// fix prevents future false-flips, this pass un-flips PRs that were
|
|
715
|
+
// already wrongly marked `abandoned` before the hardening shipped (e.g.
|
|
716
|
+
// the 16 PRs hit by the 2026-05-14 gh-auth-flip incident). Version-gated
|
|
717
|
+
// via engine/state.json so it runs once per ENGINE_DEFAULTS.abandonedReconciliationVersion
|
|
718
|
+
// bump and is otherwise a no-op. Fire-and-forget — boot must not block on
|
|
719
|
+
// network calls; the pass operates on `abandoned` PRs while pollPrStatus
|
|
720
|
+
// operates on `active`/`linked`, so there is no per-record race.
|
|
721
|
+
(function startupReconcileAbandonedPrs() {
|
|
722
|
+
try {
|
|
723
|
+
const reconciler = require('./abandoned-pr-reconciliation');
|
|
724
|
+
Promise.resolve(reconciler.runStartupReconciliation(config))
|
|
725
|
+
.then(result => {
|
|
726
|
+
if (!result.skipped && result.totals.flipped > 0) {
|
|
727
|
+
console.log(` Reconciled ${result.totals.flipped} abandoned PR(s) → active/merged/closed`);
|
|
728
|
+
}
|
|
729
|
+
})
|
|
730
|
+
.catch(err => {
|
|
731
|
+
e.log('warn', `Abandoned PR reconciliation failed at boot: ${err.message}`);
|
|
732
|
+
});
|
|
733
|
+
} catch (err) {
|
|
734
|
+
e.log('warn', `Abandoned PR reconciliation failed to start at boot: ${err.message}`);
|
|
735
|
+
}
|
|
736
|
+
})();
|
|
737
|
+
|
|
712
738
|
// Initial tick
|
|
713
739
|
e.tick();
|
|
714
740
|
|
package/engine/github.js
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
const shared = require('./shared');
|
|
8
|
-
const { exec, execAsync, getProjects, projectPrPath, projectWorkItemsPath, safeJson, safeJsonArr, safeWrite, mutateJsonFileLocked, MINIONS_DIR, getPrLinks, backfillPrPrdItems, log, ts, dateStamp, PR_STATUS, PR_POLLABLE_STATUSES, ENGINE_DEFAULTS, createThrottleTracker, getProjectOrg } = shared;
|
|
8
|
+
const { exec, execAsync, getProjects, projectPrPath, projectWorkItemsPath, safeJson, safeJsonArr, safeWrite, mutateJsonFileLocked, mutatePullRequests, MINIONS_DIR, getPrLinks, backfillPrPrdItems, log, ts, dateStamp, PR_STATUS, PR_POLLABLE_STATUSES, ENGINE_DEFAULTS, createThrottleTracker, getProjectOrg } = shared;
|
|
9
9
|
const { getPrs } = require('./queries');
|
|
10
10
|
const { MINIONS_COMMENT_MARKER_RE } = require('./gh-comment');
|
|
11
11
|
const path = require('path');
|
|
@@ -1201,10 +1201,159 @@ async function checkLiveBuildAndConflict(pr, project) {
|
|
|
1201
1201
|
}
|
|
1202
1202
|
}
|
|
1203
1203
|
|
|
1204
|
+
// ─── One-Shot Startup Reconciliation for Abandoned PRs (W-mp60tw0u000j3931) ───
|
|
1205
|
+
//
|
|
1206
|
+
// Pairs with the W-mp5trwh60008386d 404-hardening fix in pollPrStatus.
|
|
1207
|
+
// pollPrStatus only iterates PR_POLLABLE_STATUSES = {active, linked} — once a
|
|
1208
|
+
// PR has status: 'abandoned' it is terminal for the regular poll loop. That
|
|
1209
|
+
// behavior is correct (no per-tick re-probing of dead PRs), but it leaves any
|
|
1210
|
+
// historical false-flips (e.g. the 16 PRs marked abandoned by the 2026-05-14
|
|
1211
|
+
// gh-auth-flip incident) permanently stuck unless something explicitly
|
|
1212
|
+
// re-probes the abandoned set.
|
|
1213
|
+
//
|
|
1214
|
+
// This function is the explicit re-probe. Same base-repo probe gate as
|
|
1215
|
+
// pollPrStatus (cached per slug for the duration of the pass), then a single
|
|
1216
|
+
// `repos/{slug}/pulls/{n}` per abandoned PR. Open → active, merged → merged,
|
|
1217
|
+
// closed (not merged) → closed, confirmed-404 → leave abandoned + mark
|
|
1218
|
+
// `_reconciliation404Confirmed: true` so we don't re-probe deleted PRs on
|
|
1219
|
+
// every future restart.
|
|
1220
|
+
//
|
|
1221
|
+
// Caller: engine/abandoned-pr-reconciliation.js (version-gated).
|
|
1222
|
+
async function reconcileAbandonedPrs(config) {
|
|
1223
|
+
const projects = shared.getProjects(config).filter(isGitHub);
|
|
1224
|
+
let flipped = 0, confirmedDeleted = 0, skipped = 0, errored = 0;
|
|
1225
|
+
// Cache base-repo probe results for the duration of THIS pass — multiple
|
|
1226
|
+
// projects can share the same slug (rare) and multiple PRs definitely share
|
|
1227
|
+
// it. Cleared when the function returns.
|
|
1228
|
+
const slugProbeCache = new Map(); // slug → 'ok' | 'fail'
|
|
1229
|
+
|
|
1230
|
+
for (const project of projects) {
|
|
1231
|
+
const slug = getRepoSlug(project);
|
|
1232
|
+
if (!slug) continue;
|
|
1233
|
+
|
|
1234
|
+
const prPath = projectPrPath(project);
|
|
1235
|
+
const prs = safeJsonArr(prPath);
|
|
1236
|
+
// Filter: only abandoned PRs that don't already have the confirmed-404
|
|
1237
|
+
// marker from a previous reconciliation pass. Marker means "we already
|
|
1238
|
+
// verified this PR is genuinely deleted on the server" — no point
|
|
1239
|
+
// re-probing it on every future pass.
|
|
1240
|
+
const abandonedPrs = prs.filter(pr =>
|
|
1241
|
+
pr.status === PR_STATUS.ABANDONED
|
|
1242
|
+
&& !pr._reconciliation404Confirmed
|
|
1243
|
+
&& shared.isPrCompatibleWithProject(project, pr, pr.url || '')
|
|
1244
|
+
);
|
|
1245
|
+
if (abandonedPrs.length === 0) continue;
|
|
1246
|
+
|
|
1247
|
+
// Probe base repo (cached). Failure → skip ALL of this slug's abandoned
|
|
1248
|
+
// PRs and increment skipped counter — auth/access issue at boot, retry
|
|
1249
|
+
// next restart.
|
|
1250
|
+
let probeResult = slugProbeCache.get(slug);
|
|
1251
|
+
if (probeResult === undefined) {
|
|
1252
|
+
const probe = await ghApi('', slug);
|
|
1253
|
+
probeResult = (probe === null || probe === GH_NOT_FOUND) ? 'fail' : 'ok';
|
|
1254
|
+
slugProbeCache.set(slug, probeResult);
|
|
1255
|
+
}
|
|
1256
|
+
if (probeResult === 'fail') {
|
|
1257
|
+
log('warn', `Abandoned PR reconciliation: skipping ${slug} (${abandonedPrs.length} PR${abandonedPrs.length === 1 ? '' : 's'}) — base-repo probe failed, retry next startup`);
|
|
1258
|
+
skipped += abandonedPrs.length;
|
|
1259
|
+
continue;
|
|
1260
|
+
}
|
|
1261
|
+
|
|
1262
|
+
// Per-PR re-probe. Collect updates first, then apply via mutatePullRequests
|
|
1263
|
+
// for atomic single-writer semantics. We match by prNumber on writeback
|
|
1264
|
+
// (not id) because mutatePullRequests calls normalizePrRecords which can
|
|
1265
|
+
// lowercase the id slug — prNumber is the stable key within a project's
|
|
1266
|
+
// pull-requests.json.
|
|
1267
|
+
const updates = []; // { prNumber, action, newStatus?, mergedAt? }
|
|
1268
|
+
for (const pr of abandonedPrs) {
|
|
1269
|
+
const prNum = shared.getPrNumber(pr);
|
|
1270
|
+
if (!prNum) continue;
|
|
1271
|
+
|
|
1272
|
+
try {
|
|
1273
|
+
const prData = await ghApi(`/pulls/${prNum}`, slug);
|
|
1274
|
+
if (prData === GH_NOT_FOUND) {
|
|
1275
|
+
// 404 with base-probe OK → genuinely deleted. Mark so we don't
|
|
1276
|
+
// re-probe this PR on future startups.
|
|
1277
|
+
updates.push({ prNumber: prNum, action: 'confirm404' });
|
|
1278
|
+
confirmedDeleted++;
|
|
1279
|
+
log('info', `Confirmed PR #${prNum} (${slug}): truly deleted, leaving abandoned`);
|
|
1280
|
+
} else if (prData) {
|
|
1281
|
+
// Successful response. Map GitHub state to minions status.
|
|
1282
|
+
let newStatus, reason;
|
|
1283
|
+
if (prData.merged) {
|
|
1284
|
+
newStatus = PR_STATUS.MERGED;
|
|
1285
|
+
reason = 'was merged';
|
|
1286
|
+
} else if (prData.state === 'open') {
|
|
1287
|
+
newStatus = PR_STATUS.ACTIVE;
|
|
1288
|
+
reason = 'was open';
|
|
1289
|
+
} else if (prData.state === 'closed') {
|
|
1290
|
+
newStatus = PR_STATUS.CLOSED;
|
|
1291
|
+
reason = 'was closed';
|
|
1292
|
+
} else {
|
|
1293
|
+
// Unknown state — be defensive, leave as-is and don't mark as
|
|
1294
|
+
// confirmed-404 either (wait for clearer signal next pass).
|
|
1295
|
+
log('warn', `Skipped PR #${prNum} (${slug}): unknown GitHub state ${JSON.stringify(prData.state)}`);
|
|
1296
|
+
errored++;
|
|
1297
|
+
continue;
|
|
1298
|
+
}
|
|
1299
|
+
updates.push({
|
|
1300
|
+
prNumber: prNum,
|
|
1301
|
+
action: 'flip',
|
|
1302
|
+
newStatus,
|
|
1303
|
+
mergedAt: prData.merged_at || null,
|
|
1304
|
+
});
|
|
1305
|
+
flipped++;
|
|
1306
|
+
log('info', `Reconciled PR #${prNum} (${slug}): abandoned → ${newStatus} (${reason})`);
|
|
1307
|
+
} else {
|
|
1308
|
+
// null = network/rate-limit/other non-404 error. Don't mark
|
|
1309
|
+
// _reconciliation404Confirmed — we want to retry next startup.
|
|
1310
|
+
log('warn', `Skipped PR #${prNum} (${slug}): API error, retry next startup`);
|
|
1311
|
+
errored++;
|
|
1312
|
+
}
|
|
1313
|
+
} catch (err) {
|
|
1314
|
+
log('warn', `Abandoned PR reconciliation error on PR #${prNum} (${slug}): ${err.message}`);
|
|
1315
|
+
errored++;
|
|
1316
|
+
}
|
|
1317
|
+
}
|
|
1318
|
+
|
|
1319
|
+
if (updates.length > 0) {
|
|
1320
|
+
const reconciledAt = ts();
|
|
1321
|
+
mutatePullRequests(prPath, (currentPrs) => {
|
|
1322
|
+
for (const upd of updates) {
|
|
1323
|
+
const pr = currentPrs.find(p => shared.getPrNumber(p) === upd.prNumber);
|
|
1324
|
+
if (!pr) continue;
|
|
1325
|
+
// Defensive: never downgrade a merged record. Should already be
|
|
1326
|
+
// filtered by the abandoned-only scan above, but a concurrent writer
|
|
1327
|
+
// could have flipped it between our read and this write.
|
|
1328
|
+
if (pr.status === PR_STATUS.MERGED && upd.action !== 'flip') continue;
|
|
1329
|
+
if (pr.status === PR_STATUS.MERGED && upd.newStatus !== PR_STATUS.MERGED) continue;
|
|
1330
|
+
if (upd.action === 'flip') {
|
|
1331
|
+
pr.status = upd.newStatus;
|
|
1332
|
+
if (upd.mergedAt && !pr.mergedAt) pr.mergedAt = upd.mergedAt;
|
|
1333
|
+
// Clear stale 404-counter state from the false-flip era.
|
|
1334
|
+
delete pr._consecutive404s;
|
|
1335
|
+
delete pr._404Count;
|
|
1336
|
+
delete pr._404FirstAt;
|
|
1337
|
+
pr._reconciledAt = reconciledAt;
|
|
1338
|
+
pr._reconciledFrom = 'startup-pass';
|
|
1339
|
+
} else if (upd.action === 'confirm404') {
|
|
1340
|
+
pr._reconciledAt = reconciledAt;
|
|
1341
|
+
pr._reconciliation404Confirmed = true;
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
return currentPrs;
|
|
1345
|
+
});
|
|
1346
|
+
}
|
|
1347
|
+
}
|
|
1348
|
+
|
|
1349
|
+
return { flipped, confirmedDeleted, skipped, errored };
|
|
1350
|
+
}
|
|
1351
|
+
|
|
1204
1352
|
module.exports = {
|
|
1205
1353
|
pollPrStatus,
|
|
1206
1354
|
pollPrHumanComments,
|
|
1207
1355
|
reconcilePrs,
|
|
1356
|
+
reconcileAbandonedPrs, // W-mp60tw0u000j3931 — one-shot startup re-probe of abandoned PRs
|
|
1208
1357
|
checkLiveReviewStatus,
|
|
1209
1358
|
checkLiveBuildAndConflict,
|
|
1210
1359
|
isGhThrottled,
|
package/engine/shared.js
CHANGED
|
@@ -60,6 +60,11 @@ const MINIONS_DIR = process.env.MINIONS_TEST_DIR || resolveMinionsHome(false, {
|
|
|
60
60
|
const ENGINE_DIR = path.join(MINIONS_DIR, 'engine');
|
|
61
61
|
const CONTROL_PATH = path.join(ENGINE_DIR, 'control.json');
|
|
62
62
|
const COOLDOWNS_PATH = path.join(ENGINE_DIR, 'cooldowns.json');
|
|
63
|
+
// W-mp60tw0u000j3931: Persistent cross-restart engine state (migration markers,
|
|
64
|
+
// one-shot reconciliation versions, etc.). Distinct from CONTROL_PATH which is
|
|
65
|
+
// process-lifetime (state/pid/ownerToken). See ENGINE_DEFAULTS.abandonedReconciliationVersion
|
|
66
|
+
// for the first consumer.
|
|
67
|
+
const ENGINE_STATE_PATH = path.join(ENGINE_DIR, 'state.json');
|
|
63
68
|
const PR_LINKS_PATH = path.join(MINIONS_DIR, 'engine', 'pr-links.json');
|
|
64
69
|
const PINNED_ITEMS_PATH = path.join(MINIONS_DIR, 'engine', 'kb-pins.json');
|
|
65
70
|
const LOG_PATH = path.join(MINIONS_DIR, 'engine', 'log.json');
|
|
@@ -725,6 +730,22 @@ function mutateControl(mutator) {
|
|
|
725
730
|
}, { defaultValue: { state: 'stopped', pid: null }, skipWriteIfUnchanged: true });
|
|
726
731
|
}
|
|
727
732
|
|
|
733
|
+
// W-mp60tw0u000j3931: Lock-safe read-modify-write for engine/state.json — the
|
|
734
|
+
// persistent cross-restart engine-level state file (migration markers,
|
|
735
|
+
// one-shot reconciliation versions, etc.). Mirrors mutateControl's shape.
|
|
736
|
+
function mutateEngineState(mutator) {
|
|
737
|
+
return mutateJsonFileLocked(ENGINE_STATE_PATH, (data) => {
|
|
738
|
+
if (!data || typeof data !== 'object' || Array.isArray(data)) data = {};
|
|
739
|
+
return mutator(data) || data;
|
|
740
|
+
}, { defaultValue: {}, skipWriteIfUnchanged: true });
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
function readEngineState() {
|
|
744
|
+
const data = safeJson(ENGINE_STATE_PATH);
|
|
745
|
+
if (!data || typeof data !== 'object' || Array.isArray(data)) return {};
|
|
746
|
+
return data;
|
|
747
|
+
}
|
|
748
|
+
|
|
728
749
|
function mutateCooldowns(mutator) {
|
|
729
750
|
return mutateJsonFileLocked(COOLDOWNS_PATH, (data) => {
|
|
730
751
|
if (!data || typeof data !== 'object' || Array.isArray(data)) data = {};
|
|
@@ -1101,6 +1122,15 @@ const ENGINE_DEFAULTS = {
|
|
|
1101
1122
|
// → no flip), so the constant is GitHub-only today but lives in shared defaults so a future
|
|
1102
1123
|
// ADO change can adopt the same semantics.
|
|
1103
1124
|
prAbandonConfirmCount: 3,
|
|
1125
|
+
// W-mp60tw0u000j3931: One-shot startup reconciliation pass for `abandoned` PRs runs
|
|
1126
|
+
// exactly once per bump of this constant. The engine compares this value against
|
|
1127
|
+
// engine/state.json:lastAbandonedReconciliationVersion at boot; if the on-disk version
|
|
1128
|
+
// is lower, every project's pull-requests.json is scanned, every abandoned PR is
|
|
1129
|
+
// re-probed, and any false-flipped PRs (e.g. transient 404 victims from before the
|
|
1130
|
+
// hardening in W-mp5trwh60008386d shipped) are flipped back to active/merged/closed
|
|
1131
|
+
// based on their live API state. Bump this when reconciliation logic itself changes
|
|
1132
|
+
// and we want it to re-run once on every install.
|
|
1133
|
+
abandonedReconciliationVersion: 1,
|
|
1104
1134
|
watchesIncludeBehindBy: false, // opt-in: when true, GitHub PR poll calls /compare/{base}...{head} once per pr per pollPrStatusEvery cadence to populate pr.behindBy (powers the `behind-master` watch predicate). Off by default to avoid the extra API call. ADO PRs always get null (no commit-graph walk yet).
|
|
1105
1135
|
autoCompletePrs: false, // auto-merge PRs when builds green + review approved (opt-in)
|
|
1106
1136
|
prMergeMethod: 'squash', // merge method: squash, merge, rebase
|
|
@@ -2305,6 +2335,11 @@ function sanitizePath(file, baseDir) {
|
|
|
2305
2335
|
|
|
2306
2336
|
const _DANGEROUS_KEYS = new Set(['__proto__', 'constructor', 'prototype']);
|
|
2307
2337
|
|
|
2338
|
+
// DoS caps for the recursive walk. Reaching either limit is treated as
|
|
2339
|
+
// "dangerous" — see hasDangerousKey() for rationale (P-e8b1d3a6 / F6).
|
|
2340
|
+
const HAS_DANGEROUS_KEY_MAX_DEPTH = 64;
|
|
2341
|
+
const HAS_DANGEROUS_KEY_MAX_NODES = 10000;
|
|
2342
|
+
|
|
2308
2343
|
/**
|
|
2309
2344
|
* Detect the presence of prototype-pollution attack keys in a JSON-decoded payload.
|
|
2310
2345
|
*
|
|
@@ -2315,43 +2350,55 @@ const _DANGEROUS_KEYS = new Set(['__proto__', 'constructor', 'prototype']);
|
|
|
2315
2350
|
* but downstream code that shallow-merges the payload into a target object
|
|
2316
2351
|
* CAN elevate it into a prototype write.
|
|
2317
2352
|
*
|
|
2318
|
-
* Contract is **rejection, not sanitization**: we
|
|
2319
|
-
*
|
|
2320
|
-
*
|
|
2353
|
+
* Contract is **rejection, not sanitization**: we walk the full tree and
|
|
2354
|
+
* return a boolean. To avoid DoS via deeply-nested or pathologically-wide
|
|
2355
|
+
* inputs (stack exhaustion, unbounded CPU), the walk is capped:
|
|
2356
|
+
*
|
|
2357
|
+
* - Recursion depth > {@link HAS_DANGEROUS_KEY_MAX_DEPTH} (64) → return true.
|
|
2358
|
+
* - Total visited nodes > {@link HAS_DANGEROUS_KEY_MAX_NODES} (10000) → return true.
|
|
2359
|
+
* Every recursive call counts (including primitive leaves and array elements),
|
|
2360
|
+
* so a pathologically wide payload trips the cap even if no key is dangerous.
|
|
2361
|
+
*
|
|
2362
|
+
* Returning `true` on overflow is the safe-by-default policy: the sole
|
|
2363
|
+
* caller (dashboard.js request-body guard) rejects on `true`, so degrading
|
|
2364
|
+
* to rejection is conservative. The caps also protect against cyclic
|
|
2365
|
+
* objects, since each visit increments the node counter.
|
|
2321
2366
|
*
|
|
2322
2367
|
* - Null / undefined / primitives → false.
|
|
2323
2368
|
* - Arrays are transparent: each element is checked at the same depth as the
|
|
2324
|
-
* array itself (an array does NOT consume a depth level)
|
|
2325
|
-
*
|
|
2326
|
-
* are intentionally NOT flagged.
|
|
2369
|
+
* array itself (an array does NOT consume a depth level), but each element
|
|
2370
|
+
* visit increments the node counter.
|
|
2327
2371
|
* - Never mutates the input.
|
|
2328
2372
|
*
|
|
2329
2373
|
* @param {*} obj - any JSON-decoded value
|
|
2330
2374
|
* @param {number} [_depth=0] - internal recursion counter; do not pass externally
|
|
2331
|
-
* @
|
|
2375
|
+
* @param {{n:number}} [_nodeCount={n:0}] - internal mutable node counter; do not pass externally
|
|
2376
|
+
* @returns {boolean} true if any forbidden key is present, or if the depth/node cap is exceeded
|
|
2332
2377
|
*/
|
|
2333
|
-
function hasDangerousKey(obj, _depth = 0) {
|
|
2378
|
+
function hasDangerousKey(obj, _depth = 0, _nodeCount = { n: 0 }) {
|
|
2379
|
+
// DoS caps: count EVERY visited node (including primitive leaves and
|
|
2380
|
+
// array elements per F6 contract), and bail conservatively on either cap.
|
|
2381
|
+
// Returning `true` on overflow is the safe-by-default policy since the
|
|
2382
|
+
// sole caller (dashboard.js request-body guard) rejects on `true`.
|
|
2383
|
+
if (++_nodeCount.n > HAS_DANGEROUS_KEY_MAX_NODES) return true;
|
|
2384
|
+
if (_depth > HAS_DANGEROUS_KEY_MAX_DEPTH) return true;
|
|
2385
|
+
|
|
2334
2386
|
if (obj === null || obj === undefined || typeof obj !== 'object') return false;
|
|
2335
2387
|
|
|
2336
2388
|
// Arrays are transparent — preserve depth when recursing into elements.
|
|
2337
2389
|
if (Array.isArray(obj)) {
|
|
2338
2390
|
for (const elt of obj) {
|
|
2339
|
-
if (hasDangerousKey(elt, _depth)) return true;
|
|
2391
|
+
if (hasDangerousKey(elt, _depth, _nodeCount)) return true;
|
|
2340
2392
|
}
|
|
2341
2393
|
return false;
|
|
2342
2394
|
}
|
|
2343
2395
|
|
|
2344
|
-
// Object: check own keys at the current depth.
|
|
2396
|
+
// Object: check own keys at the current depth, then recurse into values.
|
|
2345
2397
|
for (const key of Object.keys(obj)) {
|
|
2346
2398
|
if (_DANGEROUS_KEYS.has(key)) return true;
|
|
2347
2399
|
}
|
|
2348
|
-
|
|
2349
|
-
// Stop after one level of object nesting. Deeper recursion is an explicit
|
|
2350
|
-
// non-goal (see DoS note in the header).
|
|
2351
|
-
if (_depth >= 1) return false;
|
|
2352
|
-
|
|
2353
2400
|
for (const v of Object.values(obj)) {
|
|
2354
|
-
if (hasDangerousKey(v, _depth + 1)) return true;
|
|
2401
|
+
if (hasDangerousKey(v, _depth + 1, _nodeCount)) return true;
|
|
2355
2402
|
}
|
|
2356
2403
|
return false;
|
|
2357
2404
|
}
|
|
@@ -3664,6 +3711,7 @@ module.exports = {
|
|
|
3664
3711
|
openUrlInBrowser,
|
|
3665
3712
|
CONTROL_PATH,
|
|
3666
3713
|
COOLDOWNS_PATH,
|
|
3714
|
+
ENGINE_STATE_PATH, // W-mp60tw0u000j3931
|
|
3667
3715
|
PR_LINKS_PATH,
|
|
3668
3716
|
PINNED_ITEMS_PATH,
|
|
3669
3717
|
LOG_PATH,
|
|
@@ -3691,6 +3739,8 @@ module.exports = {
|
|
|
3691
3739
|
withFileLock,
|
|
3692
3740
|
mutateJsonFileLocked,
|
|
3693
3741
|
mutateControl,
|
|
3742
|
+
mutateEngineState, // W-mp60tw0u000j3931
|
|
3743
|
+
readEngineState, // W-mp60tw0u000j3931
|
|
3694
3744
|
mutateCooldowns,
|
|
3695
3745
|
mutateWorkItems,
|
|
3696
3746
|
reopenWorkItem,
|
|
@@ -3789,6 +3839,8 @@ module.exports = {
|
|
|
3789
3839
|
isAllowedOrigin,
|
|
3790
3840
|
buildSecurityHeaders,
|
|
3791
3841
|
hasDangerousKey,
|
|
3842
|
+
HAS_DANGEROUS_KEY_MAX_DEPTH,
|
|
3843
|
+
HAS_DANGEROUS_KEY_MAX_NODES,
|
|
3792
3844
|
validateProjectName,
|
|
3793
3845
|
validateProjectPath,
|
|
3794
3846
|
validatePid,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yemi33/minions",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1950",
|
|
4
4
|
"description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
|
|
5
5
|
"bin": {
|
|
6
6
|
"minions": "bin/minions.js"
|