@yemi33/minions 0.1.2121 → 0.1.2123
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/js/settings.js +4 -0
- package/dashboard.js +3 -0
- package/docs/harness-mode.md +92 -0
- package/engine/ado.js +142 -21
- package/engine/github.js +4 -1
- package/engine/harness.js +592 -0
- package/engine/lifecycle.js +91 -0
- package/engine/scheduler.js +40 -3
- package/engine/shared.js +16 -0
- package/engine/timeout.js +286 -21
- package/engine.js +66 -15
- package/package.json +1 -1
package/dashboard/js/settings.js
CHANGED
|
@@ -274,6 +274,8 @@ async function openSettings() {
|
|
|
274
274
|
settingsField('Restart Grace Period', 'set-restartGracePeriod', e.restartGracePeriod || 1200000, 'ms', 'Grace period before orphan detection on restart') +
|
|
275
275
|
settingsField('Shutdown Timeout', 'set-shutdownTimeout', e.shutdownTimeout || 300000, 'ms', 'Max wait for agents during graceful shutdown') +
|
|
276
276
|
settingsField('Meeting Round Timeout', 'set-meetingRoundTimeout', e.meetingRoundTimeout || 900000, 'ms', 'Auto-advance meeting round after this') +
|
|
277
|
+
settingsField('Steering Deferred Max', 'set-steeringDeferredMaxMs', e.steeringDeferredMaxMs || 900000, 'ms', 'Max wait for a runtime to emit a resumable checkpoint before a deferred steering message is flagged stranded. After this, the engine warns to live-output, marks _steeringStranded on the dispatch, and (when the steering store is present) sets store status=stranded. Default 15min; range 60s–4h.') +
|
|
278
|
+
settingsField('Steering Max Kill Retries', 'set-steeringMaxKillRetries', e.steeringMaxKillRetries ?? 3, '', 'Cap on graceful+escalation kill attempts after a steering kill is issued. Ladder waits 30s → 60s → 120s between attempts (last interval reused). Attempt 1 is graceful; attempts 2..cap are platform hard kills (taskkill /F /T on Windows; descendant-tree SIGKILL + pkill on Unix). Past cap, the engine gives up with a [steering-stuck] log + inbox notice. Default 3; range 1–5.') +
|
|
277
279
|
'</div>';
|
|
278
280
|
|
|
279
281
|
const paneWorktree =
|
|
@@ -839,6 +841,8 @@ async function saveSettings() {
|
|
|
839
841
|
shutdownTimeout: document.getElementById('set-shutdownTimeout').value,
|
|
840
842
|
restartGracePeriod: document.getElementById('set-restartGracePeriod').value,
|
|
841
843
|
meetingRoundTimeout: document.getElementById('set-meetingRoundTimeout').value,
|
|
844
|
+
steeringDeferredMaxMs: document.getElementById('set-steeringDeferredMaxMs').value,
|
|
845
|
+
steeringMaxKillRetries: document.getElementById('set-steeringMaxKillRetries').value,
|
|
842
846
|
operatorLogin: (document.getElementById('set-operatorLogin')?.value ?? '').trim(),
|
|
843
847
|
autoApprovePlans: document.getElementById('set-autoApprovePlans').checked,
|
|
844
848
|
evalLoop: document.getElementById('set-evalLoop').checked,
|
package/dashboard.js
CHANGED
|
@@ -9225,6 +9225,9 @@ What would you like to discuss or change? When you're happy, say "approve" and I
|
|
|
9225
9225
|
worktreeCreateTimeout: [60000], worktreeCreateRetries: [0, 3],
|
|
9226
9226
|
idleAlertMinutes: [1], shutdownTimeout: [30000], restartGracePeriod: [60000],
|
|
9227
9227
|
meetingRoundTimeout: [60000],
|
|
9228
|
+
// W-mq066js7000fff1f-c (Gap B/C): steering safety-net knobs.
|
|
9229
|
+
steeringDeferredMaxMs: [60000, 14400000],
|
|
9230
|
+
steeringMaxKillRetries: [1, 5],
|
|
9228
9231
|
versionCheckInterval: [60000],
|
|
9229
9232
|
prPollStatusEvery: [1], prPollCommentsEvery: [1],
|
|
9230
9233
|
agentBusyReassignMs: [0],
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# Tri-Agent Harness Mode
|
|
2
|
+
|
|
3
|
+
> Status: opt-in feature flag on scheduled tasks (`harness_mode: "tri_agent"`).
|
|
4
|
+
> Shipped: W-mq07a9gf000jbc2b. Module: [`engine/harness.js`](../engine/harness.js).
|
|
5
|
+
|
|
6
|
+
## What it is
|
|
7
|
+
|
|
8
|
+
A way to turn one schedule firing into a coordinated **Planner → Generator → Evaluator** trio that iterates on a shared on-disk artifact until the artifact meets a rubric or hits an iteration cap. Useful for "produce a piece of work, then improve it" loops where a single agent call would either underspecify the task or produce uneven quality.
|
|
9
|
+
|
|
10
|
+
The three roles in order:
|
|
11
|
+
|
|
12
|
+
1. **Planner** (`ask` type, read-only) — reads the rubric, writes a short plan into the mission directory.
|
|
13
|
+
2. **Generator** (defaults to `ask`, inherits `sched.type`) — produces the artifact at `<MINIONS_DIR>/engine/harness/<missionId>/artifact.md` per the plan.
|
|
14
|
+
3. **Evaluator** (`ask`, read-only) — scores the artifact against the rubric and reports a verdict.
|
|
15
|
+
|
|
16
|
+
If the evaluator's verdict score is below `harness_threshold` (and the iteration cap hasn't been hit), the engine appends a fresh `Generator → Evaluator` pair carrying the evaluator's feedback in the next generator's prompt. Loop continues until pass or cap.
|
|
17
|
+
|
|
18
|
+
## Config schema (add to a schedule in `config.json`)
|
|
19
|
+
|
|
20
|
+
```json
|
|
21
|
+
{
|
|
22
|
+
"id": "weekly-design-review",
|
|
23
|
+
"title": "Tri-agent design review",
|
|
24
|
+
"cron": "0 9 * * MON",
|
|
25
|
+
"type": "ask",
|
|
26
|
+
"harness_mode": "tri_agent",
|
|
27
|
+
"harness_rubric": "Score 0-1. 1.0 = all sections complete with code examples. 0 = missing sections.",
|
|
28
|
+
"harness_threshold": 0.7,
|
|
29
|
+
"harness_max_iterations": 5
|
|
30
|
+
}
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
| Field | Required | Default | Notes |
|
|
34
|
+
|--------------------------|----------|---------|-----------------------------------------------------------------------|
|
|
35
|
+
| `harness_mode` | yes | — | Must equal `"tri_agent"` to enable. Any other value falls back to plain scheduled work. |
|
|
36
|
+
| `harness_rubric` | yes | — | Non-empty string. Injected into every role's prompt. The evaluator scores against this. |
|
|
37
|
+
| `harness_threshold` | no | `0.7` | Number in `(0, 1]`. Verdict score `>= threshold` = pass; `<` = iterate. |
|
|
38
|
+
| `harness_max_iterations` | no | `5` | Positive integer, capped at `20`. Counts generator iterations; planner is iteration 1. |
|
|
39
|
+
|
|
40
|
+
Invalid harness config logs a warning and **skips the firing without recording a schedule run**, so fixing the config and waiting for the next cron tick is enough to recover — no manual reset needed.
|
|
41
|
+
|
|
42
|
+
## Lifecycle
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
cron fires
|
|
46
|
+
└─ scheduler.discoverScheduledWork detects harness_mode === 'tri_agent'
|
|
47
|
+
└─ validateHarnessConfig (skip+warn on failure)
|
|
48
|
+
└─ createTriAgentMission → 3 work items
|
|
49
|
+
├─ Planner (iteration 1)
|
|
50
|
+
├─ Generator (iteration 1, depends on Planner)
|
|
51
|
+
└─ Evaluator (iteration 1, depends on Generator)
|
|
52
|
+
│
|
|
53
|
+
▼ (on success)
|
|
54
|
+
lifecycle.runPostCompletionHooks
|
|
55
|
+
└─ handleHarnessIterationResult
|
|
56
|
+
└─ parseEvaluatorVerdict + shouldIterateAgain
|
|
57
|
+
└─ if iterate: append Generator + Evaluator (iteration N+1)
|
|
58
|
+
└─ next tick dispatches them
|
|
59
|
+
└─ if pass / cap / inconclusive: mission terminal
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Artifact layout
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
<MINIONS_DIR>/engine/harness/<missionId>/
|
|
66
|
+
└─ artifact.md ← Generator writes here, Evaluator reads here
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Mission ID format: `<scheduleId>-<unixMs>-<rand6>`. The mission directory is the contract — agents in all 3 roles get the same path injected into their prompts.
|
|
70
|
+
|
|
71
|
+
## Evaluator verdict protocol
|
|
72
|
+
|
|
73
|
+
The evaluator can signal pass/fail/score either way:
|
|
74
|
+
|
|
75
|
+
- **Preferred (structured):** include the fields in the completion report sidecar:
|
|
76
|
+
```json
|
|
77
|
+
{ "harness_pass": true, "harness_score": 0.82, "harness_feedback": "all sections present" }
|
|
78
|
+
```
|
|
79
|
+
- **Fallback (text):** include `Score: 0.82` and `PASS` / `FAIL` in the summary. Structured fields win when both present. `FAIL` takes precedence when both `PASS` and `FAIL` appear in the text.
|
|
80
|
+
|
|
81
|
+
If neither signal is parseable, the harness treats the verdict as inconclusive and stops iterating (`shouldIterateAgain` returns false) to avoid an infinite loop driven by a silent agent.
|
|
82
|
+
|
|
83
|
+
## Dedup behavior (engine.js)
|
|
84
|
+
|
|
85
|
+
Within a single tick the standard scheduled-work dedup is keyed by `_scheduleId`, which would collapse the harness trio to one item. The harness trio share a `_missionId`; engine.js snapshots active mission IDs **before** the dedup loop so all 3 land together, while plain scheduled items keep the original `_scheduleId` dedup.
|
|
86
|
+
|
|
87
|
+
## Operational notes
|
|
88
|
+
|
|
89
|
+
- Tri-agent items are **schedule-driven** — there's no manual "fire a harness mission" entry point. Add a schedule with `harness_mode: "tri_agent"` to opt in.
|
|
90
|
+
- Iteration pairs always reuse the original mission's artifact path, threshold, max-iterations, and rubric. The evaluator's verdict feedback is appended to the next generator's prompt.
|
|
91
|
+
- Mission state lives entirely on disk: the work-items.json trio + the artifact file. No new DB tables.
|
|
92
|
+
- Each iteration's evaluator is a separate work item, so dispatch retries, cooldowns, and steering apply normally to every role.
|
package/engine/ado.js
CHANGED
|
@@ -698,10 +698,68 @@ function _hasPendingReReviewWi(pr) {
|
|
|
698
698
|
let _adoTokenCache = { token: null, expiresAt: 0 };
|
|
699
699
|
let _adoTokenFailedUntil = 0; // backoff: skip token acquisition calls until this timestamp
|
|
700
700
|
|
|
701
|
-
// ─── ADO Throttle State
|
|
702
|
-
// Tracks rate-limiting (HTTP 429/503) from ADO API responses
|
|
703
|
-
//
|
|
704
|
-
|
|
701
|
+
// ─── ADO Throttle State (per-org) ───────────────────────────────────────────
|
|
702
|
+
// Tracks rate-limiting (HTTP 429/503) from ADO API responses, isolated per ADO
|
|
703
|
+
// org so a throttle storm on org A doesn't stall PR polling for org B.
|
|
704
|
+
// Each tracker uses createThrottleTracker: backoffMs starts at 60s, doubles,
|
|
705
|
+
// caps at 32 min, with 20% jitter (silently ignored on older shared.js until
|
|
706
|
+
// the jitter foundation lands as W-mq03l6zh0006f0a1-a).
|
|
707
|
+
// W-mq03l6zh0006f0a1-b — Per-org ADO throttle isolation.
|
|
708
|
+
const _adoThrottlesByOrg = new Map();
|
|
709
|
+
|
|
710
|
+
/** Canonicalize an orgBase URL or already-canonical key to a stable Map key.
|
|
711
|
+
* Lowercases the org segment and prefers `dev.azure.com/<org>` even when the
|
|
712
|
+
* source uses the legacy `<org>.visualstudio.com` host. */
|
|
713
|
+
function canonicalAdoOrgKey(orgBaseOrUrl) {
|
|
714
|
+
if (!orgBaseOrUrl) return 'dev.azure.com/__unknown__';
|
|
715
|
+
const s = String(orgBaseOrUrl);
|
|
716
|
+
if (/^https?:\/\//i.test(s)) return resolveAdoOrgBaseFromUrl(s);
|
|
717
|
+
return s.toLowerCase();
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
/** Parse an ADO API URL down to a stable orgBase key.
|
|
721
|
+
* Examples:
|
|
722
|
+
* https://dev.azure.com/Microsoft/... → dev.azure.com/microsoft
|
|
723
|
+
* https://microsoft.visualstudio.com/... → dev.azure.com/microsoft
|
|
724
|
+
* https://microsoft.visualstudio.com/DefaultCollection/... → dev.azure.com/microsoft
|
|
725
|
+
* Returns 'dev.azure.com/__unknown__' on parse failure so the throttle map
|
|
726
|
+
* always has a non-null key. */
|
|
727
|
+
function resolveAdoOrgBaseFromUrl(url) {
|
|
728
|
+
if (!url) return 'dev.azure.com/__unknown__';
|
|
729
|
+
try {
|
|
730
|
+
const u = new URL(url);
|
|
731
|
+
const host = u.hostname.toLowerCase();
|
|
732
|
+
if (host === 'dev.azure.com') {
|
|
733
|
+
const seg = (u.pathname.split('/').filter(Boolean)[0] || '').toLowerCase();
|
|
734
|
+
return seg ? `dev.azure.com/${seg}` : 'dev.azure.com/__unknown__';
|
|
735
|
+
}
|
|
736
|
+
if (host.endsWith('.visualstudio.com')) {
|
|
737
|
+
const org = host.slice(0, -'.visualstudio.com'.length);
|
|
738
|
+
return org ? `dev.azure.com/${org}` : 'dev.azure.com/__unknown__';
|
|
739
|
+
}
|
|
740
|
+
// Unknown host shape — derive a stable key from host + first path segment.
|
|
741
|
+
const seg = (u.pathname.split('/').filter(Boolean)[0] || '').toLowerCase();
|
|
742
|
+
return seg ? `${host}/${seg}` : host;
|
|
743
|
+
} catch {
|
|
744
|
+
return 'dev.azure.com/__unknown__';
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
/** Lazily get-or-create the per-org throttle tracker. */
|
|
749
|
+
function getAdoThrottleForOrg(orgBase) {
|
|
750
|
+
const key = canonicalAdoOrgKey(orgBase);
|
|
751
|
+
let tracker = _adoThrottlesByOrg.get(key);
|
|
752
|
+
if (!tracker) {
|
|
753
|
+
tracker = createThrottleTracker({
|
|
754
|
+
label: `ado:${key}`,
|
|
755
|
+
baseBackoffMs: 60000,
|
|
756
|
+
maxBackoffMs: 32 * 60000,
|
|
757
|
+
jitterRatio: 0.2,
|
|
758
|
+
});
|
|
759
|
+
_adoThrottlesByOrg.set(key, tracker);
|
|
760
|
+
}
|
|
761
|
+
return tracker;
|
|
762
|
+
}
|
|
705
763
|
|
|
706
764
|
// ─── Auth Failure Tracking ──────────────────────────────────────────────────
|
|
707
765
|
// Set when pollPrStatus encounters auth errors mid-loop. The engine checks this
|
|
@@ -742,6 +800,7 @@ async function adoFetch(url, token, opts = {}) {
|
|
|
742
800
|
const body = (typeof opts === 'object' && opts.body) || undefined;
|
|
743
801
|
const timeout = (typeof opts === 'object' && Number.isFinite(opts.timeout)) ? opts.timeout : 30000;
|
|
744
802
|
const MAX_RETRIES = ADO_TOKEN_REFRESH_MAX_RETRIES;
|
|
803
|
+
const throttle = getAdoThrottleForOrg(resolveAdoOrgBaseFromUrl(url));
|
|
745
804
|
const res = await fetch(url, {
|
|
746
805
|
method,
|
|
747
806
|
headers: { 'Authorization': `Bearer ${token}`, 'Content-Type': 'application/json' },
|
|
@@ -752,8 +811,8 @@ async function adoFetch(url, token, opts = {}) {
|
|
|
752
811
|
if (res.status === 429 || res.status === 503) {
|
|
753
812
|
const retryAfterSec = parseInt(res.headers.get('Retry-After'), 10);
|
|
754
813
|
const retryAfterMs = (retryAfterSec > 0) ? retryAfterSec * 1000 : 0;
|
|
755
|
-
|
|
756
|
-
const state =
|
|
814
|
+
throttle.recordThrottle(retryAfterMs);
|
|
815
|
+
const state = throttle.getState();
|
|
757
816
|
throw new Error(`ADO API throttled (${res.status}): retry after ${Math.round((state.retryAfter - Date.now()) / 1000)}s`);
|
|
758
817
|
}
|
|
759
818
|
if (!res.ok) throw new Error(`ADO API ${method} ${res.status}: ${res.statusText}`);
|
|
@@ -771,12 +830,13 @@ async function adoFetch(url, token, opts = {}) {
|
|
|
771
830
|
}
|
|
772
831
|
const json = JSON.parse(text);
|
|
773
832
|
// ── Success decay: decrement consecutiveHits, reset when fully recovered ──
|
|
774
|
-
|
|
833
|
+
throttle.recordSuccess();
|
|
775
834
|
return json;
|
|
776
835
|
}
|
|
777
836
|
|
|
778
837
|
/** Fetch raw text from ADO API (for build logs which aren't JSON). */
|
|
779
838
|
async function adoFetchText(url, token) {
|
|
839
|
+
const throttle = getAdoThrottleForOrg(resolveAdoOrgBaseFromUrl(url));
|
|
780
840
|
const res = await fetch(url, {
|
|
781
841
|
headers: { 'Authorization': `Bearer ${token}` },
|
|
782
842
|
signal: AbortSignal.timeout(30000),
|
|
@@ -785,8 +845,8 @@ async function adoFetchText(url, token) {
|
|
|
785
845
|
if (res.status === 429 || res.status === 503) {
|
|
786
846
|
const retryAfterSec = parseInt(res.headers.get('Retry-After'), 10);
|
|
787
847
|
const retryAfterMs = (retryAfterSec > 0) ? retryAfterSec * 1000 : 0;
|
|
788
|
-
|
|
789
|
-
const state =
|
|
848
|
+
throttle.recordThrottle(retryAfterMs);
|
|
849
|
+
const state = throttle.getState();
|
|
790
850
|
throw new Error(`ADO API throttled (${res.status}): retry after ${Math.round((state.retryAfter - Date.now()) / 1000)}s`);
|
|
791
851
|
}
|
|
792
852
|
if (!res.ok) throw new Error(`ADO API ${res.status}: ${res.statusText}`);
|
|
@@ -908,6 +968,21 @@ async function forEachActivePr(config, token, callback) {
|
|
|
908
968
|
let projectUpdated = 0;
|
|
909
969
|
const updatedRecords = [];
|
|
910
970
|
const orgBase = getAdoOrgBase(project);
|
|
971
|
+
// W-mq03l6zh0006f0a1-b — Per-org throttle isolation: skip just this
|
|
972
|
+
// project when its org is rate-limited, keep iterating others.
|
|
973
|
+
if (isAdoThrottled(orgBase)) {
|
|
974
|
+
log('info', `[ado] PR polling skipped for ${project.name || project.repoName || orgBase} — ${orgBase} throttled`);
|
|
975
|
+
continue;
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
// Per-project throttle skip — emit one log line per skipped project, then continue.
|
|
979
|
+
// Sub-item W-mq03l6zh0006f0a1-b will replace the global isAdoThrottled() probe with
|
|
980
|
+
// a per-org `isOrgBaseThrottled(orgBase)` check so a 429 on one org no longer pauses
|
|
981
|
+
// polling for healthy orgs.
|
|
982
|
+
if (isAdoThrottled()) {
|
|
983
|
+
log('info', `[ado] PR poll skipped for ${project.name || project.repoName || 'unknown project'} — org ${orgBase} throttled`);
|
|
984
|
+
continue;
|
|
985
|
+
}
|
|
911
986
|
|
|
912
987
|
// Parallelize PR polling within each project (max 5 concurrent to avoid rate limits)
|
|
913
988
|
const CONCURRENCY = 5;
|
|
@@ -2241,11 +2316,53 @@ async function fetchSinglePrBuildStatus(project, prNumber) {
|
|
|
2241
2316
|
|
|
2242
2317
|
// ─── ADO Throttle Queries ────────────────────────────────────────────────────
|
|
2243
2318
|
|
|
2244
|
-
/** Returns true if ADO is throttled
|
|
2245
|
-
|
|
2319
|
+
/** Returns true if ADO is throttled. If orgBase is provided, checks that org's
|
|
2320
|
+
* tracker only; if omitted, returns true when ANY tracked org is throttled
|
|
2321
|
+
* (back-compat OR semantics for existing call sites). Auto-clears stale state. */
|
|
2322
|
+
const isAdoThrottled = (orgBase) => {
|
|
2323
|
+
if (orgBase != null) {
|
|
2324
|
+
const tracker = _adoThrottlesByOrg.get(canonicalAdoOrgKey(orgBase));
|
|
2325
|
+
return tracker ? tracker.isThrottled() : false;
|
|
2326
|
+
}
|
|
2327
|
+
for (const tracker of _adoThrottlesByOrg.values()) {
|
|
2328
|
+
if (tracker.isThrottled()) return true;
|
|
2329
|
+
}
|
|
2330
|
+
return false;
|
|
2331
|
+
};
|
|
2332
|
+
|
|
2333
|
+
/** Returns a snapshot of the throttle state.
|
|
2334
|
+
* - getAdoThrottleState(orgBase) → that org's `{ throttled, retryAfter, consecutiveHits }`.
|
|
2335
|
+
* Returns a zero-state default for orgs that have never been touched.
|
|
2336
|
+
* - getAdoThrottleState() → aggregate snapshot with back-compat fields
|
|
2337
|
+
* (`throttled` = OR, `retryAfter` = max, `consecutiveHits` = sum) plus a
|
|
2338
|
+
* `perOrg` map keyed by canonical orgBase. */
|
|
2339
|
+
const getAdoThrottleState = (orgBase) => {
|
|
2340
|
+
if (orgBase != null) {
|
|
2341
|
+
const tracker = _adoThrottlesByOrg.get(canonicalAdoOrgKey(orgBase));
|
|
2342
|
+
return tracker ? tracker.getState() : { throttled: false, retryAfter: 0, consecutiveHits: 0 };
|
|
2343
|
+
}
|
|
2344
|
+
let throttled = false;
|
|
2345
|
+
let retryAfter = 0;
|
|
2346
|
+
let consecutiveHits = 0;
|
|
2347
|
+
const perOrg = {};
|
|
2348
|
+
for (const [key, tracker] of _adoThrottlesByOrg) {
|
|
2349
|
+
const state = tracker.getState();
|
|
2350
|
+
perOrg[key] = state;
|
|
2351
|
+
if (state.throttled) throttled = true;
|
|
2352
|
+
if (state.retryAfter > retryAfter) retryAfter = state.retryAfter;
|
|
2353
|
+
consecutiveHits += state.consecutiveHits;
|
|
2354
|
+
}
|
|
2355
|
+
return { throttled, retryAfter, consecutiveHits, perOrg };
|
|
2356
|
+
};
|
|
2246
2357
|
|
|
2247
|
-
/** Returns
|
|
2248
|
-
const
|
|
2358
|
+
/** Returns the per-org tracker state map keyed by canonical orgBase. */
|
|
2359
|
+
const getAdoThrottleStateAll = () => {
|
|
2360
|
+
const out = {};
|
|
2361
|
+
for (const [key, tracker] of _adoThrottlesByOrg) {
|
|
2362
|
+
out[key] = tracker.getState();
|
|
2363
|
+
}
|
|
2364
|
+
return out;
|
|
2365
|
+
};
|
|
2249
2366
|
|
|
2250
2367
|
/**
|
|
2251
2368
|
* Query ADO for an open PR on a specific branch.
|
|
@@ -2263,13 +2380,13 @@ async function findOpenPrOnBranch(project, branch) {
|
|
|
2263
2380
|
logMissingAdoRepository(project, 'ADO branch PR lookup');
|
|
2264
2381
|
return null;
|
|
2265
2382
|
}
|
|
2266
|
-
|
|
2267
|
-
|
|
2383
|
+
const orgBase = shared.getAdoOrgBase(project);
|
|
2384
|
+
if (isAdoThrottled(orgBase)) {
|
|
2385
|
+
log('debug', `[ado] Skipping branch PR lookup for ${project.name || project.repoName || 'unknown project'}:${branch} — ${orgBase} throttled`);
|
|
2268
2386
|
return null;
|
|
2269
2387
|
}
|
|
2270
2388
|
const token = await getAdoToken();
|
|
2271
2389
|
if (!token) return null;
|
|
2272
|
-
const orgBase = shared.getAdoOrgBase(project);
|
|
2273
2390
|
const sourceRef = encodeURIComponent(`refs/heads/${branch}`);
|
|
2274
2391
|
const url = `${orgBase}/${project.adoProject}/_apis/git/repositories/${encodeURIComponent(adoRepositoryId)}/pullrequests?searchCriteria.status=active&searchCriteria.sourceRefName=${sourceRef}&api-version=7.1`;
|
|
2275
2392
|
const data = await adoFetch(url, token);
|
|
@@ -2280,14 +2397,17 @@ async function findOpenPrOnBranch(project, branch) {
|
|
|
2280
2397
|
return { prNumber, url: prUrl };
|
|
2281
2398
|
}
|
|
2282
2399
|
|
|
2283
|
-
/** Reset throttle state — exported for testing only. */
|
|
2400
|
+
/** Reset throttle state — exported for testing only. Clears the entire per-org Map. */
|
|
2284
2401
|
function _resetAdoThrottle() {
|
|
2285
|
-
|
|
2402
|
+
_adoThrottlesByOrg.clear();
|
|
2286
2403
|
}
|
|
2287
2404
|
|
|
2288
|
-
/** Set throttle state directly — exported for testing only.
|
|
2289
|
-
|
|
2290
|
-
|
|
2405
|
+
/** Set throttle state directly — exported for testing only.
|
|
2406
|
+
* Default orgBase keeps back-compat with arg-less callers that just want
|
|
2407
|
+
* "some org is throttled" semantics through isAdoThrottled() / getAdoThrottleState(). */
|
|
2408
|
+
function _setAdoThrottleForTest(state, orgBase = 'dev.azure.com/__test__') {
|
|
2409
|
+
const tracker = getAdoThrottleForOrg(orgBase);
|
|
2410
|
+
tracker._setForTest(state);
|
|
2291
2411
|
}
|
|
2292
2412
|
|
|
2293
2413
|
/** Inject a token into the cache — exported for testing only.
|
|
@@ -2476,6 +2596,7 @@ module.exports = {
|
|
|
2476
2596
|
isAdoAuthError, // exported for testing
|
|
2477
2597
|
isAdoThrottled,
|
|
2478
2598
|
getAdoThrottleState,
|
|
2599
|
+
getAdoThrottleStateAll,
|
|
2479
2600
|
fetchAdoPrMetadata,
|
|
2480
2601
|
fetchSinglePrBuildStatus,
|
|
2481
2602
|
findOpenPrOnBranch,
|
package/engine/github.js
CHANGED
|
@@ -295,7 +295,10 @@ function resetSlugBackoff(slug) {
|
|
|
295
295
|
// ─── GitHub Rate-Limit Throttle ────────────────────────────────────────────
|
|
296
296
|
// Tracks rate-limiting from GitHub API (gh CLI exits non-zero with rate-limit messages).
|
|
297
297
|
// GitHub rate limits reset hourly, so cap at 60 min.
|
|
298
|
-
|
|
298
|
+
// jitterRatio: 0.2 — apply ±20% random jitter to backoff to avoid thundering herd
|
|
299
|
+
// when many concurrent gh calls race the same 1-hr reset window. See sub-item
|
|
300
|
+
// W-mq03l6zh0006f0a1-a for the createThrottleTracker jitter math.
|
|
301
|
+
const _ghThrottle = createThrottleTracker({ label: 'gh', baseBackoffMs: 60000, maxBackoffMs: 60 * 60000, jitterRatio: 0.2 });
|
|
299
302
|
|
|
300
303
|
/** Returns true if GitHub is rate-limited and retryAfter hasn't elapsed. */
|
|
301
304
|
const isGhThrottled = () => _ghThrottle.isThrottled();
|