npm - @yemi33/minions - Versions diffs - 0.1.2121 → 0.1.2123 - Mend

@yemi33/minions 0.1.2121 → 0.1.2123

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dashboard/js/settings.js CHANGED Viewed

@@ -274,6 +274,8 @@ async function openSettings() {
       settingsField('Restart Grace Period', 'set-restartGracePeriod', e.restartGracePeriod || 1200000, 'ms', 'Grace period before orphan detection on restart') +
       settingsField('Shutdown Timeout', 'set-shutdownTimeout', e.shutdownTimeout || 300000, 'ms', 'Max wait for agents during graceful shutdown') +
       settingsField('Meeting Round Timeout', 'set-meetingRoundTimeout', e.meetingRoundTimeout || 900000, 'ms', 'Auto-advance meeting round after this') +
+      settingsField('Steering Deferred Max', 'set-steeringDeferredMaxMs', e.steeringDeferredMaxMs || 900000, 'ms', 'Max wait for a runtime to emit a resumable checkpoint before a deferred steering message is flagged stranded. After this, the engine warns to live-output, marks _steeringStranded on the dispatch, and (when the steering store is present) sets store status=stranded. Default 15min; range 60s–4h.') +
+      settingsField('Steering Max Kill Retries', 'set-steeringMaxKillRetries', e.steeringMaxKillRetries ?? 3, '', 'Cap on graceful+escalation kill attempts after a steering kill is issued. Ladder waits 30s → 60s → 120s between attempts (last interval reused). Attempt 1 is graceful; attempts 2..cap are platform hard kills (taskkill /F /T on Windows; descendant-tree SIGKILL + pkill on Unix). Past cap, the engine gives up with a [steering-stuck] log + inbox notice. Default 3; range 1–5.') +
     '</div>';
   const paneWorktree =
@@ -839,6 +841,8 @@ async function saveSettings() {
       shutdownTimeout: document.getElementById('set-shutdownTimeout').value,
       restartGracePeriod: document.getElementById('set-restartGracePeriod').value,
       meetingRoundTimeout: document.getElementById('set-meetingRoundTimeout').value,
+      steeringDeferredMaxMs: document.getElementById('set-steeringDeferredMaxMs').value,
+      steeringMaxKillRetries: document.getElementById('set-steeringMaxKillRetries').value,
       operatorLogin: (document.getElementById('set-operatorLogin')?.value ?? '').trim(),
       autoApprovePlans: document.getElementById('set-autoApprovePlans').checked,
       evalLoop: document.getElementById('set-evalLoop').checked,

package/dashboard.js CHANGED Viewed

@@ -9225,6 +9225,9 @@ What would you like to discuss or change? When you're happy, say "approve" and I
           worktreeCreateTimeout: [60000], worktreeCreateRetries: [0, 3],
           idleAlertMinutes: [1], shutdownTimeout: [30000], restartGracePeriod: [60000],
           meetingRoundTimeout: [60000],
+          // W-mq066js7000fff1f-c (Gap B/C): steering safety-net knobs.
+          steeringDeferredMaxMs: [60000, 14400000],
+          steeringMaxKillRetries: [1, 5],
           versionCheckInterval: [60000],
           prPollStatusEvery: [1], prPollCommentsEvery: [1],
           agentBusyReassignMs: [0],

package/docs/harness-mode.md ADDED Viewed

@@ -0,0 +1,92 @@
+# Tri-Agent Harness Mode
+> Status: opt-in feature flag on scheduled tasks (`harness_mode: "tri_agent"`).
+> Shipped: W-mq07a9gf000jbc2b. Module: [`engine/harness.js`](../engine/harness.js).
+## What it is
+A way to turn one schedule firing into a coordinated **Planner → Generator → Evaluator** trio that iterates on a shared on-disk artifact until the artifact meets a rubric or hits an iteration cap. Useful for "produce a piece of work, then improve it" loops where a single agent call would either underspecify the task or produce uneven quality.
+The three roles in order:
+1. **Planner** (`ask` type, read-only) — reads the rubric, writes a short plan into the mission directory.
+2. **Generator** (defaults to `ask`, inherits `sched.type`) — produces the artifact at `<MINIONS_DIR>/engine/harness/<missionId>/artifact.md` per the plan.
+3. **Evaluator** (`ask`, read-only) — scores the artifact against the rubric and reports a verdict.
+If the evaluator's verdict score is below `harness_threshold` (and the iteration cap hasn't been hit), the engine appends a fresh `Generator → Evaluator` pair carrying the evaluator's feedback in the next generator's prompt. Loop continues until pass or cap.
+## Config schema (add to a schedule in `config.json`)
+```json
+{
+  "id": "weekly-design-review",
+  "title": "Tri-agent design review",
+  "cron": "0 9 * * MON",
+  "type": "ask",
+  "harness_mode": "tri_agent",
+  "harness_rubric": "Score 0-1. 1.0 = all sections complete with code examples. 0 = missing sections.",
+  "harness_threshold": 0.7,
+  "harness_max_iterations": 5
+}
+```
+| Field                    | Required | Default | Notes                                                                 |
+|--------------------------|----------|---------|-----------------------------------------------------------------------|
+| `harness_mode`           | yes      | —       | Must equal `"tri_agent"` to enable. Any other value falls back to plain scheduled work. |
+| `harness_rubric`         | yes      | —       | Non-empty string. Injected into every role's prompt. The evaluator scores against this. |
+| `harness_threshold`      | no       | `0.7`   | Number in `(0, 1]`. Verdict score `>= threshold` = pass; `<` = iterate. |
+| `harness_max_iterations` | no       | `5`     | Positive integer, capped at `20`. Counts generator iterations; planner is iteration 1. |
+Invalid harness config logs a warning and **skips the firing without recording a schedule run**, so fixing the config and waiting for the next cron tick is enough to recover — no manual reset needed.
+## Lifecycle
+```
+cron fires
+  └─ scheduler.discoverScheduledWork detects harness_mode === 'tri_agent'
+       └─ validateHarnessConfig (skip+warn on failure)
+            └─ createTriAgentMission → 3 work items
+                 ├─ Planner   (iteration 1)
+                 ├─ Generator (iteration 1, depends on Planner)
+                 └─ Evaluator (iteration 1, depends on Generator)
+                      │
+                      ▼ (on success)
+                 lifecycle.runPostCompletionHooks
+                  └─ handleHarnessIterationResult
+                       └─ parseEvaluatorVerdict + shouldIterateAgain
+                            └─ if iterate: append Generator + Evaluator (iteration N+1)
+                                 └─ next tick dispatches them
+                            └─ if pass / cap / inconclusive: mission terminal
+```
+## Artifact layout
+```
+<MINIONS_DIR>/engine/harness/<missionId>/
+  └─ artifact.md            ← Generator writes here, Evaluator reads here
+```
+Mission ID format: `<scheduleId>-<unixMs>-<rand6>`. The mission directory is the contract — agents in all 3 roles get the same path injected into their prompts.
+## Evaluator verdict protocol
+The evaluator can signal pass/fail/score either way:
+- **Preferred (structured):** include the fields in the completion report sidecar:
+  ```json
+  { "harness_pass": true, "harness_score": 0.82, "harness_feedback": "all sections present" }
+  ```
+- **Fallback (text):** include `Score: 0.82` and `PASS` / `FAIL` in the summary. Structured fields win when both present. `FAIL` takes precedence when both `PASS` and `FAIL` appear in the text.
+If neither signal is parseable, the harness treats the verdict as inconclusive and stops iterating (`shouldIterateAgain` returns false) to avoid an infinite loop driven by a silent agent.
+## Dedup behavior (engine.js)
+Within a single tick the standard scheduled-work dedup is keyed by `_scheduleId`, which would collapse the harness trio to one item. The harness trio share a `_missionId`; engine.js snapshots active mission IDs **before** the dedup loop so all 3 land together, while plain scheduled items keep the original `_scheduleId` dedup.
+## Operational notes
+- Tri-agent items are **schedule-driven** — there's no manual "fire a harness mission" entry point. Add a schedule with `harness_mode: "tri_agent"` to opt in.
+- Iteration pairs always reuse the original mission's artifact path, threshold, max-iterations, and rubric. The evaluator's verdict feedback is appended to the next generator's prompt.
+- Mission state lives entirely on disk: the work-items.json trio + the artifact file. No new DB tables.
+- Each iteration's evaluator is a separate work item, so dispatch retries, cooldowns, and steering apply normally to every role.

package/engine/ado.js CHANGED Viewed

@@ -698,10 +698,68 @@ function _hasPendingReReviewWi(pr) {
 let _adoTokenCache = { token: null, expiresAt: 0 };
 let _adoTokenFailedUntil = 0; // backoff: skip token acquisition calls until this timestamp
-// ─── ADO Throttle State ─────────────────────────────────────────────────────
-// Tracks rate-limiting (HTTP 429/503) from ADO API responses.
-// Uses shared createThrottleTracker factory: backoffMs starts at 60s, doubles, caps at 32 min.
-const _adoThrottle = createThrottleTracker({ label: 'ado', baseBackoffMs: 60000, maxBackoffMs: 32 * 60000 });
+// ─── ADO Throttle State (per-org) ───────────────────────────────────────────
+// Tracks rate-limiting (HTTP 429/503) from ADO API responses, isolated per ADO
+// org so a throttle storm on org A doesn't stall PR polling for org B.
+// Each tracker uses createThrottleTracker: backoffMs starts at 60s, doubles,
+// caps at 32 min, with 20% jitter (silently ignored on older shared.js until
+// the jitter foundation lands as W-mq03l6zh0006f0a1-a).
+// W-mq03l6zh0006f0a1-b — Per-org ADO throttle isolation.
+const _adoThrottlesByOrg = new Map();
+/** Canonicalize an orgBase URL or already-canonical key to a stable Map key.
+ *  Lowercases the org segment and prefers `dev.azure.com/<org>` even when the
+ *  source uses the legacy `<org>.visualstudio.com` host. */
+function canonicalAdoOrgKey(orgBaseOrUrl) {
+  if (!orgBaseOrUrl) return 'dev.azure.com/__unknown__';
+  const s = String(orgBaseOrUrl);
+  if (/^https?:\/\//i.test(s)) return resolveAdoOrgBaseFromUrl(s);
+  return s.toLowerCase();
+}
+/** Parse an ADO API URL down to a stable orgBase key.
+ *  Examples:
+ *    https://dev.azure.com/Microsoft/...        → dev.azure.com/microsoft
+ *    https://microsoft.visualstudio.com/...     → dev.azure.com/microsoft
+ *    https://microsoft.visualstudio.com/DefaultCollection/... → dev.azure.com/microsoft
+ *  Returns 'dev.azure.com/__unknown__' on parse failure so the throttle map
+ *  always has a non-null key. */
+function resolveAdoOrgBaseFromUrl(url) {
+  if (!url) return 'dev.azure.com/__unknown__';
+  try {
+    const u = new URL(url);
+    const host = u.hostname.toLowerCase();
+    if (host === 'dev.azure.com') {
+      const seg = (u.pathname.split('/').filter(Boolean)[0] || '').toLowerCase();
+      return seg ? `dev.azure.com/${seg}` : 'dev.azure.com/__unknown__';
+    }
+    if (host.endsWith('.visualstudio.com')) {
+      const org = host.slice(0, -'.visualstudio.com'.length);
+      return org ? `dev.azure.com/${org}` : 'dev.azure.com/__unknown__';
+    }
+    // Unknown host shape — derive a stable key from host + first path segment.
+    const seg = (u.pathname.split('/').filter(Boolean)[0] || '').toLowerCase();
+    return seg ? `${host}/${seg}` : host;
+  } catch {
+    return 'dev.azure.com/__unknown__';
+  }
+}
+/** Lazily get-or-create the per-org throttle tracker. */
+function getAdoThrottleForOrg(orgBase) {
+  const key = canonicalAdoOrgKey(orgBase);
+  let tracker = _adoThrottlesByOrg.get(key);
+  if (!tracker) {
+    tracker = createThrottleTracker({
+      label: `ado:${key}`,
+      baseBackoffMs: 60000,
+      maxBackoffMs: 32 * 60000,
+      jitterRatio: 0.2,
+    });
+    _adoThrottlesByOrg.set(key, tracker);
+  }
+  return tracker;
+}
 // ─── Auth Failure Tracking ──────────────────────────────────────────────────
 // Set when pollPrStatus encounters auth errors mid-loop. The engine checks this
@@ -742,6 +800,7 @@ async function adoFetch(url, token, opts = {}) {
   const body = (typeof opts === 'object' && opts.body) || undefined;
   const timeout = (typeof opts === 'object' && Number.isFinite(opts.timeout)) ? opts.timeout : 30000;
   const MAX_RETRIES = ADO_TOKEN_REFRESH_MAX_RETRIES;
+  const throttle = getAdoThrottleForOrg(resolveAdoOrgBaseFromUrl(url));
   const res = await fetch(url, {
     method,
     headers: { 'Authorization': `Bearer ${token}`, 'Content-Type': 'application/json' },
@@ -752,8 +811,8 @@ async function adoFetch(url, token, opts = {}) {
   if (res.status === 429 || res.status === 503) {
     const retryAfterSec = parseInt(res.headers.get('Retry-After'), 10);
     const retryAfterMs = (retryAfterSec > 0) ? retryAfterSec * 1000 : 0;
-    _adoThrottle.recordThrottle(retryAfterMs);
-    const state = _adoThrottle.getState();
+    throttle.recordThrottle(retryAfterMs);
+    const state = throttle.getState();
     throw new Error(`ADO API throttled (${res.status}): retry after ${Math.round((state.retryAfter - Date.now()) / 1000)}s`);
   }
   if (!res.ok) throw new Error(`ADO API ${method} ${res.status}: ${res.statusText}`);
@@ -771,12 +830,13 @@ async function adoFetch(url, token, opts = {}) {
   }
   const json = JSON.parse(text);
   // ── Success decay: decrement consecutiveHits, reset when fully recovered ──
-  _adoThrottle.recordSuccess();
+  throttle.recordSuccess();
   return json;
 }
 /** Fetch raw text from ADO API (for build logs which aren't JSON). */
 async function adoFetchText(url, token) {
+  const throttle = getAdoThrottleForOrg(resolveAdoOrgBaseFromUrl(url));
   const res = await fetch(url, {
     headers: { 'Authorization': `Bearer ${token}` },
     signal: AbortSignal.timeout(30000),
@@ -785,8 +845,8 @@ async function adoFetchText(url, token) {
   if (res.status === 429 || res.status === 503) {
     const retryAfterSec = parseInt(res.headers.get('Retry-After'), 10);
     const retryAfterMs = (retryAfterSec > 0) ? retryAfterSec * 1000 : 0;
-    _adoThrottle.recordThrottle(retryAfterMs);
-    const state = _adoThrottle.getState();
+    throttle.recordThrottle(retryAfterMs);
+    const state = throttle.getState();
     throw new Error(`ADO API throttled (${res.status}): retry after ${Math.round((state.retryAfter - Date.now()) / 1000)}s`);
   }
   if (!res.ok) throw new Error(`ADO API ${res.status}: ${res.statusText}`);
@@ -908,6 +968,21 @@ async function forEachActivePr(config, token, callback) {
     let projectUpdated = 0;
     const updatedRecords = [];
     const orgBase = getAdoOrgBase(project);
+    // W-mq03l6zh0006f0a1-b — Per-org throttle isolation: skip just this
+    // project when its org is rate-limited, keep iterating others.
+    if (isAdoThrottled(orgBase)) {
+      log('info', `[ado] PR polling skipped for ${project.name || project.repoName || orgBase} — ${orgBase} throttled`);
+      continue;
+    }
+    // Per-project throttle skip — emit one log line per skipped project, then continue.
+    // Sub-item W-mq03l6zh0006f0a1-b will replace the global isAdoThrottled() probe with
+    // a per-org `isOrgBaseThrottled(orgBase)` check so a 429 on one org no longer pauses
+    // polling for healthy orgs.
+    if (isAdoThrottled()) {
+      log('info', `[ado] PR poll skipped for ${project.name || project.repoName || 'unknown project'} — org ${orgBase} throttled`);
+      continue;
+    }
     // Parallelize PR polling within each project (max 5 concurrent to avoid rate limits)
     const CONCURRENCY = 5;
@@ -2241,11 +2316,53 @@ async function fetchSinglePrBuildStatus(project, prNumber) {
 // ─── ADO Throttle Queries ────────────────────────────────────────────────────
-/** Returns true if ADO is throttled and retryAfter hasn't elapsed. Auto-clears when retryAfter passes. */
-const isAdoThrottled = () => _adoThrottle.isThrottled();
+/** Returns true if ADO is throttled. If orgBase is provided, checks that org's
+ *  tracker only; if omitted, returns true when ANY tracked org is throttled
+ *  (back-compat OR semantics for existing call sites). Auto-clears stale state. */
+const isAdoThrottled = (orgBase) => {
+  if (orgBase != null) {
+    const tracker = _adoThrottlesByOrg.get(canonicalAdoOrgKey(orgBase));
+    return tracker ? tracker.isThrottled() : false;
+  }
+  for (const tracker of _adoThrottlesByOrg.values()) {
+    if (tracker.isThrottled()) return true;
+  }
+  return false;
+};
+/** Returns a snapshot of the throttle state.
+ *  - getAdoThrottleState(orgBase) → that org's `{ throttled, retryAfter, consecutiveHits }`.
+ *    Returns a zero-state default for orgs that have never been touched.
+ *  - getAdoThrottleState() → aggregate snapshot with back-compat fields
+ *    (`throttled` = OR, `retryAfter` = max, `consecutiveHits` = sum) plus a
+ *    `perOrg` map keyed by canonical orgBase. */
+const getAdoThrottleState = (orgBase) => {
+  if (orgBase != null) {
+    const tracker = _adoThrottlesByOrg.get(canonicalAdoOrgKey(orgBase));
+    return tracker ? tracker.getState() : { throttled: false, retryAfter: 0, consecutiveHits: 0 };
+  }
+  let throttled = false;
+  let retryAfter = 0;
+  let consecutiveHits = 0;
+  const perOrg = {};
+  for (const [key, tracker] of _adoThrottlesByOrg) {
+    const state = tracker.getState();
+    perOrg[key] = state;
+    if (state.throttled) throttled = true;
+    if (state.retryAfter > retryAfter) retryAfter = state.retryAfter;
+    consecutiveHits += state.consecutiveHits;
+  }
+  return { throttled, retryAfter, consecutiveHits, perOrg };
+};
-/** Returns a snapshot of the current throttle state. Calls isAdoThrottled() for a fresh value. */
-const getAdoThrottleState = () => _adoThrottle.getState();
+/** Returns the per-org tracker state map keyed by canonical orgBase. */
+const getAdoThrottleStateAll = () => {
+  const out = {};
+  for (const [key, tracker] of _adoThrottlesByOrg) {
+    out[key] = tracker.getState();
+  }
+  return out;
+};
 /**
  * Query ADO for an open PR on a specific branch.
@@ -2263,13 +2380,13 @@ async function findOpenPrOnBranch(project, branch) {
     logMissingAdoRepository(project, 'ADO branch PR lookup');
     return null;
   }
-  if (isAdoThrottled()) {
-    log('debug', `[ado] Skipping branch PR lookup for ${project.name || project.repoName || 'unknown project'}:${branch} — throttled`);
+  const orgBase = shared.getAdoOrgBase(project);
+  if (isAdoThrottled(orgBase)) {
+    log('debug', `[ado] Skipping branch PR lookup for ${project.name || project.repoName || 'unknown project'}:${branch} — ${orgBase} throttled`);
     return null;
   }
   const token = await getAdoToken();
   if (!token) return null;
-  const orgBase = shared.getAdoOrgBase(project);
   const sourceRef = encodeURIComponent(`refs/heads/${branch}`);
   const url = `${orgBase}/${project.adoProject}/_apis/git/repositories/${encodeURIComponent(adoRepositoryId)}/pullrequests?searchCriteria.status=active&searchCriteria.sourceRefName=${sourceRef}&api-version=7.1`;
   const data = await adoFetch(url, token);
@@ -2280,14 +2397,17 @@ async function findOpenPrOnBranch(project, branch) {
   return { prNumber, url: prUrl };
 }
-/** Reset throttle state — exported for testing only. */
+/** Reset throttle state — exported for testing only. Clears the entire per-org Map. */
 function _resetAdoThrottle() {
-  _adoThrottle._reset();
+  _adoThrottlesByOrg.clear();
 }
-/** Set throttle state directly — exported for testing only. */
-function _setAdoThrottleForTest(state) {
-  _adoThrottle._setForTest(state);
+/** Set throttle state directly — exported for testing only.
+ *  Default orgBase keeps back-compat with arg-less callers that just want
+ *  "some org is throttled" semantics through isAdoThrottled() / getAdoThrottleState(). */
+function _setAdoThrottleForTest(state, orgBase = 'dev.azure.com/__test__') {
+  const tracker = getAdoThrottleForOrg(orgBase);
+  tracker._setForTest(state);
 }
 /** Inject a token into the cache — exported for testing only.
@@ -2476,6 +2596,7 @@ module.exports = {
   isAdoAuthError, // exported for testing
   isAdoThrottled,
   getAdoThrottleState,
+  getAdoThrottleStateAll,
   fetchAdoPrMetadata,
   fetchSinglePrBuildStatus,
   findOpenPrOnBranch,

package/engine/github.js CHANGED Viewed

@@ -295,7 +295,10 @@ function resetSlugBackoff(slug) {
 // ─── GitHub Rate-Limit Throttle ────────────────────────────────────────────
 // Tracks rate-limiting from GitHub API (gh CLI exits non-zero with rate-limit messages).
 // GitHub rate limits reset hourly, so cap at 60 min.
-const _ghThrottle = createThrottleTracker({ label: 'gh', baseBackoffMs: 60000, maxBackoffMs: 60 * 60000 });
+// jitterRatio: 0.2 — apply ±20% random jitter to backoff to avoid thundering herd
+// when many concurrent gh calls race the same 1-hr reset window. See sub-item
+// W-mq03l6zh0006f0a1-a for the createThrottleTracker jitter math.
+const _ghThrottle = createThrottleTracker({ label: 'gh', baseBackoffMs: 60000, maxBackoffMs: 60 * 60000, jitterRatio: 0.2 });
 /** Returns true if GitHub is rate-limited and retryAfter hasn't elapsed. */
 const isGhThrottled = () => _ghThrottle.isThrottled();