npm - @yemi33/minions - Versions diffs - 0.1.2122 → 0.1.2124 - Mend

@yemi33/minions 0.1.2122 → 0.1.2124

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dashboard/js/refresh.js +1 -1
package/dashboard/js/render-work-items.js +100 -1
package/dashboard/styles.css +2 -0
package/docs/README.md +1 -0
package/docs/harness-mode.md +92 -0
package/docs/workspace-manifests.md +104 -0
package/engine/ado.js +9 -0
package/engine/github.js +4 -1
package/engine/harness.js +592 -0
package/engine/lifecycle.js +91 -0
package/engine/scheduler.js +40 -3
package/engine/shared.js +269 -2
package/engine.js +91 -15
package/package.json +1 -1

package/dashboard/js/refresh.js CHANGED Viewed

@@ -146,7 +146,7 @@ const RENDER_VERSIONS = {
   dispatch: 2,
   engineLog: 2,
   metrics: 1,
-  workItems: 1,
+  workItems: 2,
   skills: 1,
   mcpServers: 1,
   schedules: 1,

package/dashboard/js/render-work-items.js CHANGED Viewed

@@ -15,8 +15,71 @@ const _WI_ENRICHMENT_FIELDS = [
   '_pr', '_prUrl', '_pendingReason', '_skipReason', '_blockedBy',
   '_humanFeedback', '_reopened', '_managedSpawnPartial', '_securityFlag',
   '_artifacts', 'referencesCount', 'acceptanceCriteriaCount',
+  // W-mq08kuog001110a6 — _preDispatchEval IS persisted to disk by
+  // engine/dispatch.js#_persistInvalidWorkItem, but the slim /api/status
+  // overlay may drop it on later passes. Carry it across the overlay so
+  // the Needs-Attention badge survives /state polls.
+  '_preDispatchEval',
 ];
+// W-mq08kuog001110a6 — Needs-Attention badge for stuck pending work items.
+//
+// A pending WI is "needs attention" when the engine is intentionally refusing
+// to dispatch it and a human (or follow-up dispatch) has to unblock it.
+// Surfaces existing engine state (_preDispatchEval / _pendingReason) — no
+// new fields, no dispatcher behavior change.
+//
+// Triggers:
+//   * _preDispatchEval.valid === false (pre-dispatch acceptance gate rejected)
+//   * _pendingReason in { pr_not_found, no_agent, budget_exceeded, dependency_unmet }
+//
+// Does NOT trigger for transient reasons that auto-clear:
+//   * cooldown / retry_cooldown — wait it out
+//   * already_dispatched — engine reconciles on next tick
+//   * branch_locked — wait for the holding dispatch
+//
+// Returns { kind, short, full } when the item needs attention, else null.
+// Pure helper (no DOM / escapeHtml deps) so it can be unit-tested in Node.
+function needsAttentionInfo(item) {
+  if (!item || item.status !== 'pending') return null;
+  if (item._preDispatchEval && item._preDispatchEval.valid === false) {
+    var rawReason = String(item._preDispatchEval.reason || '(no reason recorded)');
+    var firstLine = rawReason.split(/\r?\n/, 1)[0] || rawReason;
+    return {
+      kind: 'pre_dispatch_eval',
+      short: 'Pre-dispatch evaluation rejected: ' + firstLine,
+      full: rawReason,
+    };
+  }
+  var reason = item._pendingReason;
+  if (!reason) return null;
+  if (reason === 'pr_not_found') {
+    var prRef = item.prNumber || item.targetPr || item.pr_id || '?';
+    var prMsg = 'Target PR not tracked: PR #' + prRef + '. The PR was likely merged/closed or never linked.';
+    return { kind: 'pr_not_found', short: prMsg, full: prMsg };
+  }
+  if (reason === 'no_agent') {
+    var noAgentMsg = 'No agent assigned and routing.md has no match for type=' + (item.type || 'unknown') + '.';
+    return { kind: 'no_agent', short: noAgentMsg, full: noAgentMsg };
+  }
+  if (reason === 'budget_exceeded') {
+    var agent = item.agent || item.dispatched_to || '<unknown>';
+    var budgetMsg = 'Assigned agent ' + agent + ' is over its maxBudgetUsd cap.';
+    return { kind: 'budget_exceeded', short: budgetMsg, full: budgetMsg };
+  }
+  if (reason === 'dependency_unmet') {
+    var deps = Array.isArray(item.depends_on) && item.depends_on.length
+      ? item.depends_on.join(', ')
+      : '<unknown>';
+    var depMsg = 'Waiting on dependencies: ' + deps + '. (One or more are not yet done.)';
+    return { kind: 'dependency_unmet', short: depMsg, full: depMsg };
+  }
+  return null;
+}
 // Pull each project's work-items.json straight off disk through
 // /state/projects/<name>/work-items.json. The endpoint is a static-file
 // passthrough with mtime+size ETag — there is no server-side cache in the
@@ -129,6 +192,15 @@ function wiRow(item) {
     '<td>' + priBadge(item.priority) + '</td>' +
     '<td>' + statusBadge(item.status || 'pending') +
       (item._reopened ? ' <span style="font-size:var(--text-xs);color:var(--purple);margin-left:4px" title="This item was reopened from a previously completed state">reopened</span>' : '') +
+      (function() {
+        // W-mq08kuog001110a6 — Needs-Attention badge for stuck pending WIs.
+        // Click anywhere on the row to open the modal where the full reason
+        // is rendered under "Why this is blocked".
+        var info = needsAttentionInfo(item);
+        if (!info) return '';
+        var tooltip = (info.short || '').replace(/\s+/g, ' ').slice(0, 120);
+        return ' <span class="pr-badge needs-attention" style="font-size:var(--text-xs);margin-left:4px" title="' + escapeHtml(tooltip) + ' — click row for full reason">&#x26A0; Needs attention</span>';
+      })() +
       (item._pendingReason && item.status === 'pending' && item._pendingReason !== 'already_dispatched' ? ' <span style="font-size:var(--text-xs);color:var(--muted);margin-left:4px" title="Pending reason: ' + escapeHtml(item._pendingReason) + '">' + escapeHtml(item._pendingReason.replace(/_/g, ' ')) + '</span>' : '') +
       (item._pendingReason === 'already_dispatched' && item.status === 'pending' ? ' <span style="font-size:var(--text-xs);color:var(--blue);margin-left:4px" title="In dispatch queue, waiting to be assigned">queued</span>' : '') +
       (item._managedSpawnPartial && Array.isArray(item._managedSpawnPartial.failed) && item._managedSpawnPartial.failed.length
@@ -597,7 +669,28 @@ function _wiRenderDetail(item) {
     (item._reopened ? ' <span style="font-size:var(--text-xs);color:var(--purple);margin-left:4px" title="This item was reopened from a previously completed state">reopened</span>' : '') + ' ' +
     '<span class="dispatch-type ' + (item.type || 'implement') + '">' + escapeHtml(item.type || 'implement') + '</span>' +
     '<span class="prd-item-priority ' + (item.priority || '') + '">' + escapeHtml(item.priority || 'medium') + '</span>' +
+    (function() {
+      // W-mq08kuog001110a6 — Needs-Attention chip in the detail modal header.
+      var info = needsAttentionInfo(item);
+      if (!info) return '';
+      return ' <span class="pr-badge needs-attention" style="font-size:var(--text-xs);margin-left:4px" title="See &quot;Why this is blocked&quot; below">&#x26A0; Needs attention</span>';
+    })() +
     '</div>';
+  // W-mq08kuog001110a6 — "Why this is blocked" section, rendered FIRST so the
+  // human eye lands on the unblocking reason before scrolling through agent,
+  // source, dates, etc. _preDispatchEval reasons are evaluator prose and can
+  // be multi-line — render verbatim in a <pre> to preserve newlines.
+  (function() {
+    var info = needsAttentionInfo(item);
+    if (!info) return;
+    html += field(
+      'Why this is blocked',
+      '<div style="border-left:3px solid var(--yellow);padding:8px 10px;background:rgba(210,153,34,0.08);border-radius:var(--radius-sm)">' +
+        '<div style="font-size:var(--text-sm);color:var(--muted);margin-bottom:4px">reason: <code>' + escapeHtml(info.kind) + '</code></div>' +
+        '<pre style="margin:0;font-size:var(--text-sm);white-space:pre-wrap;word-break:break-word;font-family:var(--font-mono, monospace)">' + escapeHtml(info.full) + '</pre>' +
+      '</div>'
+    );
+  })();
   // Description: rendered from the FULL record once it's hydrated. The /api/status
   // slice drops `description` to keep the SPA payload <500KB; on first paint we
   // either render the title as a placeholder or, when description is already
@@ -788,4 +881,10 @@ function openInboxNote(filename) {
   switchPage('inbox');
 }
-window.MinionsWork = { wiRow, renderWorkItems, editWorkItem, submitWorkItemEdit, deleteWorkItem, archiveWorkItem, toggleWorkItemArchive, retryWorkItem, wiPrev, wiNext, feedbackWorkItem, submitFeedback, openCreateWorkItemModal, openWorkItemDetail, openAllWorkItems, viewAgentOutput, openInboxNote };
+if (typeof window !== 'undefined') {
+  window.MinionsWork = { wiRow, renderWorkItems, editWorkItem, submitWorkItemEdit, deleteWorkItem, archiveWorkItem, toggleWorkItemArchive, retryWorkItem, wiPrev, wiNext, feedbackWorkItem, submitFeedback, openCreateWorkItemModal, openWorkItemDetail, openAllWorkItems, viewAgentOutput, openInboxNote, needsAttentionInfo };
+}
+// exported for testing — pure helpers safe to require under Node (no DOM deps).
+if (typeof module !== 'undefined' && module.exports) {
+  module.exports = { needsAttentionInfo };
+}

package/dashboard/styles.css CHANGED Viewed

@@ -322,6 +322,8 @@
   .pr-badge.no-build { background: var(--surface); color: var(--muted); border: 1px solid var(--border); }
   .pr-badge.build-stale { background: rgba(210,153,34,0.15); color: var(--orange); border: 1px dashed var(--orange); }
   .pr-badge.build-escalated { background: rgba(248,81,73,0.25); color: var(--red); border: 2px solid var(--red); font-weight: 600; }
+  /* W-mq08kuog001110a6 — Needs-Attention chip for stuck pending work items. */
+  .pr-badge.needs-attention { background: rgba(210,153,34,0.15); color: var(--yellow); border: 1px solid var(--yellow); font-weight: 600; cursor: help; }
   .error-details-btn { font-size: var(--text-xs); padding: var(--space-1) var(--space-3); margin-left: var(--space-2); background: rgba(248,81,73,0.15); color: var(--red); border: 1px solid var(--red); border-radius: var(--radius-lg); cursor: pointer; font-weight: 600; text-transform: uppercase; letter-spacing: 0.3px; }
   .error-details-btn:hover { background: rgba(248,81,73,0.3); }
   .pr-empty { color: var(--muted); font-style: italic; font-size: var(--text-md); padding: var(--space-6) 0; }

package/docs/README.md CHANGED Viewed

@@ -36,6 +36,7 @@ Architecture, design proposals, and lifecycle references for people working on t
 - [slim-ux/architecture-suggestions.md](slim-ux/architecture-suggestions.md) — Slim-UX follow-up architecture suggestions paired with `concepts.md`.
 - [team-memory.md](team-memory.md) — Per-agent memory layer (`knowledge/agents/<id>.md`) and the consolidation/routing rules that populate it from `notes/inbox/`.
 - [watches.md](watches.md) — Persistent monitoring jobs (`engine/watches.json`): target-type registry, conditions, follow-up actions, and the `watches.d/` plugin folder.
+- [workspace-manifests.md](workspace-manifests.md) — Declarative per-agent permission scoping: `allowed_tools` / `allowed_repos` / `allowed_external_urls` / `memory_scope`, dispatch-time repo gate, and runtime `--allowedTools` narrowing.
 ## Operations

package/docs/harness-mode.md ADDED Viewed

@@ -0,0 +1,92 @@
+# Tri-Agent Harness Mode
+> Status: opt-in feature flag on scheduled tasks (`harness_mode: "tri_agent"`).
+> Shipped: W-mq07a9gf000jbc2b. Module: [`engine/harness.js`](../engine/harness.js).
+## What it is
+A way to turn one schedule firing into a coordinated **Planner → Generator → Evaluator** trio that iterates on a shared on-disk artifact until the artifact meets a rubric or hits an iteration cap. Useful for "produce a piece of work, then improve it" loops where a single agent call would either underspecify the task or produce uneven quality.
+The three roles in order:
+1. **Planner** (`ask` type, read-only) — reads the rubric, writes a short plan into the mission directory.
+2. **Generator** (defaults to `ask`, inherits `sched.type`) — produces the artifact at `<MINIONS_DIR>/engine/harness/<missionId>/artifact.md` per the plan.
+3. **Evaluator** (`ask`, read-only) — scores the artifact against the rubric and reports a verdict.
+If the evaluator's verdict score is below `harness_threshold` (and the iteration cap hasn't been hit), the engine appends a fresh `Generator → Evaluator` pair carrying the evaluator's feedback in the next generator's prompt. Loop continues until pass or cap.
+## Config schema (add to a schedule in `config.json`)
+```json
+{
+  "id": "weekly-design-review",
+  "title": "Tri-agent design review",
+  "cron": "0 9 * * MON",
+  "type": "ask",
+  "harness_mode": "tri_agent",
+  "harness_rubric": "Score 0-1. 1.0 = all sections complete with code examples. 0 = missing sections.",
+  "harness_threshold": 0.7,
+  "harness_max_iterations": 5
+}
+```
+| Field                    | Required | Default | Notes                                                                 |
+|--------------------------|----------|---------|-----------------------------------------------------------------------|
+| `harness_mode`           | yes      | —       | Must equal `"tri_agent"` to enable. Any other value falls back to plain scheduled work. |
+| `harness_rubric`         | yes      | —       | Non-empty string. Injected into every role's prompt. The evaluator scores against this. |
+| `harness_threshold`      | no       | `0.7`   | Number in `(0, 1]`. Verdict score `>= threshold` = pass; `<` = iterate. |
+| `harness_max_iterations` | no       | `5`     | Positive integer, capped at `20`. Counts generator iterations; planner is iteration 1. |
+Invalid harness config logs a warning and **skips the firing without recording a schedule run**, so fixing the config and waiting for the next cron tick is enough to recover — no manual reset needed.
+## Lifecycle
+```
+cron fires
+  └─ scheduler.discoverScheduledWork detects harness_mode === 'tri_agent'
+       └─ validateHarnessConfig (skip+warn on failure)
+            └─ createTriAgentMission → 3 work items
+                 ├─ Planner   (iteration 1)
+                 ├─ Generator (iteration 1, depends on Planner)
+                 └─ Evaluator (iteration 1, depends on Generator)
+                      │
+                      ▼ (on success)
+                 lifecycle.runPostCompletionHooks
+                  └─ handleHarnessIterationResult
+                       └─ parseEvaluatorVerdict + shouldIterateAgain
+                            └─ if iterate: append Generator + Evaluator (iteration N+1)
+                                 └─ next tick dispatches them
+                            └─ if pass / cap / inconclusive: mission terminal
+```
+## Artifact layout
+```
+<MINIONS_DIR>/engine/harness/<missionId>/
+  └─ artifact.md            ← Generator writes here, Evaluator reads here
+```
+Mission ID format: `<scheduleId>-<unixMs>-<rand6>`. The mission directory is the contract — agents in all 3 roles get the same path injected into their prompts.
+## Evaluator verdict protocol
+The evaluator can signal pass/fail/score either way:
+- **Preferred (structured):** include the fields in the completion report sidecar:
+  ```json
+  { "harness_pass": true, "harness_score": 0.82, "harness_feedback": "all sections present" }
+  ```
+- **Fallback (text):** include `Score: 0.82` and `PASS` / `FAIL` in the summary. Structured fields win when both present. `FAIL` takes precedence when both `PASS` and `FAIL` appear in the text.
+If neither signal is parseable, the harness treats the verdict as inconclusive and stops iterating (`shouldIterateAgain` returns false) to avoid an infinite loop driven by a silent agent.
+## Dedup behavior (engine.js)
+Within a single tick the standard scheduled-work dedup is keyed by `_scheduleId`, which would collapse the harness trio to one item. The harness trio share a `_missionId`; engine.js snapshots active mission IDs **before** the dedup loop so all 3 land together, while plain scheduled items keep the original `_scheduleId` dedup.
+## Operational notes
+- Tri-agent items are **schedule-driven** — there's no manual "fire a harness mission" entry point. Add a schedule with `harness_mode: "tri_agent"` to opt in.
+- Iteration pairs always reuse the original mission's artifact path, threshold, max-iterations, and rubric. The evaluator's verdict feedback is appended to the next generator's prompt.
+- Mission state lives entirely on disk: the work-items.json trio + the artifact file. No new DB tables.
+- Each iteration's evaluator is a separate work item, so dispatch retries, cooldowns, and steering apply normally to every role.

package/docs/workspace-manifests.md ADDED Viewed

@@ -0,0 +1,104 @@
+# Workspace Manifests — Declarative Permission Scoping for Agents
+> **Status:** Implemented in W-mq07avbk000m5543. Inspired by OpenAI's AGENTS.md / Workspace Manifest primitive (April 2026, [link](https://openai.com/index/the-next-evolution-of-the-agents-sdk/)).
+Workspace manifests move per-agent tool / repo / URL / memory permissions from implicit system-prompt text to **machine-readable, runtime-enforced declarations**. Each agent's manifest lives on its config entry and is enforced at dispatch time and spawn time without requiring agents to read or honour prompt language.
+## Schema
+A manifest lives on each agent definition under `workspace_manifest`. All fields are optional; any missing field is **permissive** (no restriction), so agents without a manifest behave exactly as they did before this feature shipped.
+```json
+{
+  "agents": {
+    "ralph": {
+      "name": "Ralph",
+      "role": "Engineer",
+      "skills": ["implementation", "testing"],
+      "workspace_manifest": {
+        "allowed_tools": ["Edit", "Read", "Bash", "Grep", "Glob"],
+        "allowed_repos": ["github:yemi33/minions", "opg-microsoft/minions"],
+        "allowed_external_urls": ["github.com", "*.npmjs.com"],
+        "memory_scope": "shared"
+      }
+    }
+  }
+}
+```
+| Field | Type | Default | Semantics |
+| ----- | ---- | ------- | --------- |
+| `allowed_tools` | `string[]` \| `null` | `null` (permissive) | Canonical tool-name whitelist (e.g. `Edit`, `Read`, `Bash`). Empty array = deny all. Case-sensitive. Merged into the runtime adapter's `--allowedTools` flag at spawn time. |
+| `allowed_repos` | `string[]` \| `null` | `null` (permissive) | Canonical PR-scope (`github:owner/repo`, `ado:org/proj/repo`) or bare `owner/repo`. Empty array = deny all. Case-insensitive. Enforced at dispatch time — out-of-scope dispatch fails with `failure_class: 'workspace-manifest-repo-forbidden'`. |
+| `allowed_external_urls` | `string[]` \| `null` | `null` (permissive) | Host allow-list for `web_fetch` / `web_search`. Bare host (`github.com`) = exact match; `*.example.com` = wildcard subdomain **and** apex. Empty array = deny all. Currently advisory — see "Enforcement points" below. |
+| `memory_scope` | `'private'` \| `'shared'` \| `'read-only-shared'` | `'shared'` | What slice of team knowledge the agent can read/write. Currently exposed via `shared.agentMemoryScope(agent)` for callers that want to gate inbox writes; full enforcement at the consolidation layer is future work. |
+## Default behaviour & backward compatibility
+The defaults are deliberately permissive so the manifest can be rolled out incrementally:
+```js
+// engine/shared.js
+const WORKSPACE_MANIFEST_DEFAULTS = {
+  allowed_tools:         null,   // no tool restriction
+  allowed_repos:         null,   // no repo restriction
+  allowed_external_urls: null,   // no URL restriction
+  memory_scope:          'shared',
+};
+```
+A config that never mentions `workspace_manifest` produces the exact same dispatch and spawn behaviour as the pre-manifest engine. Built-in agents (Ripley, Dallas, Lambert, Rebecca, Ralph) ship without manifests by default — manifests are an opt-in per-agent hardening primitive, not a fleet-wide default.
+## Enforcement points
+| Phase | Location | What it does |
+| ----- | -------- | ------------ |
+| **Dispatch — repo gate** | `engine.js spawnAgent()` (right after project resolution) | Calls `shared.agentCanUseRepo(agent, project)`. Mismatch → `completeDispatch(... FAILURE_CLASS.WORKSPACE_MANIFEST_REPO, agentRetryable: false)` and `cleanupTempAgent` runs. Non-retryable because the structural answer is "widen the manifest or pick a different agent". |
+| **Spawn — tool merge** | `engine.js spawnAgent()` → `_buildAgentSpawnFlags(..., allowedTools)` | `shared.mergeManifestAllowedTools(claudeConfig.allowedTools, manifest.allowed_tools)` produces the intersection of the runtime baseline and the manifest list. Result is passed as `--allowedTools <csv>` to Claude / Copilot / Codex, so the CLI itself enforces the narrowed surface. Empty manifest list (`[]`) = deny-all. Same merge runs on the steering-resume codepath. |
+| **Agent context** | Future work (playbook.js) | Manifest can be surfaced into the agent prompt so the agent sees its declared scope. Today, `shared.resolveAgentManifest(agent)` returns the resolved struct any caller can read. |
+| **URL fetch** | Advisory today | `shared.agentCanFetchUrl(agent, url)` is available for runtime intercepts. Minions itself does not currently intercept `web_fetch` inside the spawned CLI subprocess; a future runtime adapter hook can wire this. |
+| **Memory scope** | Advisory today | `shared.agentMemoryScope(agent)` is available for consolidation / playbook / inbox callers. Semantics: `private` = agent only sees its own `knowledge/agents/<id>.md`; `shared` = full team knowledge (current default); `read-only-shared` = reads shared memory but should not write inbox/notes. |
+## Helpers (in `engine/shared.js`)
+```js
+const { MEMORY_SCOPES, WORKSPACE_MANIFEST_DEFAULTS,
+        validateWorkspaceManifest, resolveAgentManifest,
+        agentCanUseRepo, agentCanUseTool, agentCanFetchUrl, agentMemoryScope,
+        mergeManifestAllowedTools, formatManifestRejection } = require('./engine/shared');
+```
+- `validateWorkspaceManifest(manifest)` → `{ ok, errors }`. `null`/`undefined` is valid (uses defaults).
+- `resolveAgentManifest(agent, config?)` → fresh copy of `WORKSPACE_MANIFEST_DEFAULTS` overlaid with the agent's `workspace_manifest`. Malformed manifest silently falls back to defaults; surface the error via `validateWorkspaceManifest` at config-load time.
+- `agentCanUseRepo(agent, projectOrString)` → `bool`. Accepts a project object or a string identifier (`github:owner/repo`, `ado:org/proj/repo`, or bare `owner/repo`). Case-insensitive.
+- `agentCanUseTool(agent, toolName)` → `bool`. Case-sensitive.
+- `agentCanFetchUrl(agent, url)` → `bool`. Wildcard `*.example.com` matches subdomains **and** apex.
+- `agentMemoryScope(agent)` → one of `MEMORY_SCOPES`. Unknown values fall back to `'shared'`.
+- `mergeManifestAllowedTools(baselineCsv, manifestArray)` → merged CSV. Intersection semantics: `null` manifest = baseline unchanged; empty array = deny-all; empty baseline + manifest = manifest as ceiling.
+- `formatManifestRejection({ agentId, kind, target, allowed })` → structured human-readable rejection string used by the dispatch repo gate.
+## Failure classes (`shared.FAILURE_CLASS`)
+| Code | Meaning |
+| ---- | ------- |
+| `workspace-manifest-repo-forbidden` | Dispatch routed an agent to a project not in its `allowed_repos`. Non-retryable. |
+| `workspace-manifest-tool-forbidden` | Reserved for future tool-call intercepts. Non-retryable as-is. |
+| `workspace-manifest-url-forbidden`  | Reserved for future URL-fetch intercepts. Non-retryable as-is. |
+## Audit trail
+Because rejections route through the standard `completeDispatch(... ERROR ...)` path, the existing dispatch + inbox + dashboard timeline machinery already surfaces every manifest rejection. No new state file or event topic was added. Per-agent audit of what each minion actually touched can be derived from existing `engine/dispatch.json` records filtered by `failure_class === 'workspace-manifest-repo-forbidden'`.
+## Rollout guidance
+1. Leave defaults (no manifests) until you have at least one agent that should be locked down (e.g. a contract agent that should only ever touch one repo, or an external collaborator's agent).
+2. Set `workspace_manifest.allowed_repos` first — it's the highest-impact gate and is fully enforced at dispatch time.
+3. Add `workspace_manifest.allowed_tools` for agents whose role doesn't need `Bash` / `WebFetch` / `WebSearch`. The runtime CLI enforces this, so the narrowing is hard-edged.
+4. Treat `allowed_external_urls` and `memory_scope` as documentation today; they become hard gates as we add runtime intercepts.
+## Related
+- `engine/shared.js` — manifest helpers and `FAILURE_CLASS`.
+- `engine.js spawnAgent()` — dispatch-time enforcement.
+- `test/unit/workspace-manifest.test.js` — full validator + gate + integration tests.
+- [`completion-reports.md`](completion-reports.md) — how the dispatch failure surfaces in the standard completion contract.

package/engine/ado.js CHANGED Viewed

@@ -975,6 +975,15 @@ async function forEachActivePr(config, token, callback) {
       continue;
     }
+    // Per-project throttle skip — emit one log line per skipped project, then continue.
+    // Sub-item W-mq03l6zh0006f0a1-b will replace the global isAdoThrottled() probe with
+    // a per-org `isOrgBaseThrottled(orgBase)` check so a 429 on one org no longer pauses
+    // polling for healthy orgs.
+    if (isAdoThrottled()) {
+      log('info', `[ado] PR poll skipped for ${project.name || project.repoName || 'unknown project'} — org ${orgBase} throttled`);
+      continue;
+    }
     // Parallelize PR polling within each project (max 5 concurrent to avoid rate limits)
     const CONCURRENCY = 5;
     for (let i = 0; i < activePrs.length; i += CONCURRENCY) {

package/engine/github.js CHANGED Viewed

@@ -295,7 +295,10 @@ function resetSlugBackoff(slug) {
 // ─── GitHub Rate-Limit Throttle ────────────────────────────────────────────
 // Tracks rate-limiting from GitHub API (gh CLI exits non-zero with rate-limit messages).
 // GitHub rate limits reset hourly, so cap at 60 min.
-const _ghThrottle = createThrottleTracker({ label: 'gh', baseBackoffMs: 60000, maxBackoffMs: 60 * 60000 });
+// jitterRatio: 0.2 — apply ±20% random jitter to backoff to avoid thundering herd
+// when many concurrent gh calls race the same 1-hr reset window. See sub-item
+// W-mq03l6zh0006f0a1-a for the createThrottleTracker jitter math.
+const _ghThrottle = createThrottleTracker({ label: 'gh', baseBackoffMs: 60000, maxBackoffMs: 60 * 60000, jitterRatio: 0.2 });
 /** Returns true if GitHub is rate-limited and retryAfter hasn't elapsed. */
 const isGhThrottled = () => _ghThrottle.isThrottled();