npm - @adaptic/maestro - Versions diffs - 1.9.0 → 1.9.2 - Mend

@adaptic/maestro 1.9.0 → 1.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/.env.example +21 -3
package/bin/maestro.mjs +37 -0
package/package.json +1 -1
package/scripts/daemon/cadence-consumer.mjs +228 -10
package/scripts/daemon/cadence-consumer.test.mjs +69 -0

package/.env.example CHANGED Viewed

@@ -16,11 +16,29 @@
 # The agent's reasoning engines. At minimum you need Anthropic (Claude).
 #
-# REQUIRED — Primary reasoning engine (Claude Code uses this)
-# Get your key: https://console.anthropic.com/settings/keys
-# Subscription: Anthropic API plan (pay-per-token) or Max subscription
+# REQUIRED — Primary reasoning engine. Two ways to authenticate:
+#
+#   Option A — API key (pay-per-token)
+#     Set ANTHROPIC_API_KEY below to a valid sk-ant-api03-... key.
+#     Get one: https://console.anthropic.com/settings/keys
+#
+#   Option B — Claude Code subscription (Pro/Max, OAuth via Keychain)
+#     LEAVE ANTHROPIC_API_KEY EMPTY *and* set MAESTRO_PREFER_SUBSCRIPTION_AUTH=1.
+#     This tells the cadence consumer to strip ANTHROPIC_API_KEY from every
+#     sub-session spawn so claude --print falls back to the keychain OAuth
+#     token. Most agents on a Mac mini with a Claude Code subscription
+#     should use this option — routine cadence ticks cost zero API credits.
+#
+# Doctor validates the key against api.anthropic.com on every run; an
+# invalid key here will cascade 401s through every sub-session spawn.
 ANTHROPIC_API_KEY=
+# OPTIONAL — When set to 1, the cadence consumer strips ANTHROPIC_API_KEY
+# from every claude --print sub-session env so claude falls back to
+# Claude Code subscription auth (Keychain OAuth). Use this when the
+# agent's Mac has a Claude Code Pro/Max subscription.
+MAESTRO_PREFER_SUBSCRIPTION_AUTH=
 # OPTIONAL — Supplemental model access (GPT-4, embeddings)
 # Get your key: https://platform.openai.com/api-keys
 # Subscription: OpenAI API plan (pay-per-token)

package/bin/maestro.mjs CHANGED Viewed

@@ -1462,6 +1462,43 @@ function doctor() {
     check("ANTHROPIC_API_KEY", true);
     check("SLACK_USER_TOKEN", false);
     check("GMAIL_APP_PASSWORD", false);
+    // Auth validity: if ANTHROPIC_API_KEY is set, ping the API to
+    // verify it works. An invalid key in .env will silently be sent
+    // to every `claude --print` sub-session and cause cascading 401s
+    // (exactly the ravi-ai inbox-processor runaway). Better to catch
+    // it here. Skips the check if the user opted out via
+    // MAESTRO_PREFER_SUBSCRIPTION_AUTH=1 (subscription wins).
+    const keyMatch = env.match(/^ANTHROPIC_API_KEY=(.+)$/m);
+    const preferSubsMatch = env.match(/^MAESTRO_PREFER_SUBSCRIPTION_AUTH=(.+)$/m);
+    const preferSubs = preferSubsMatch && /^1|true|yes$/i.test(preferSubsMatch[1].trim());
+    if (keyMatch && !preferSubs) {
+      const key = keyMatch[1].trim().replace(/^"|"$/g, "");
+      try {
+        const result = spawnSync("curl", [
+          "-s", "-o", "/dev/null", "-w", "%{http_code}",
+          "-X", "POST",
+          "-H", `x-api-key: ${key}`,
+          "-H", "anthropic-version: 2023-06-01",
+          "-H", "content-type: application/json",
+          "--max-time", "8",
+          "https://api.anthropic.com/v1/messages",
+          "-d", JSON.stringify({ model: "claude-haiku-4-5", max_tokens: 5, messages: [{ role: "user", content: "ping" }] }),
+        ], { encoding: "utf-8" });
+        const code = (result.stdout || "").trim();
+        if (code === "200") ok("ANTHROPIC_API_KEY validated against api.anthropic.com");
+        else if (code === "401") {
+          warn(`ANTHROPIC_API_KEY is INVALID (HTTP 401 from api.anthropic.com).`);
+          warn(`  This will cause every sub-session spawn to fail. Either:`);
+          warn(`    1. Replace the key in .env with a valid one, OR`);
+          warn(`    2. Set MAESTRO_PREFER_SUBSCRIPTION_AUTH=1 in .env to use Claude Code subscription auth.`);
+          issues++;
+        } else if (code) warn(`ANTHROPIC_API_KEY check returned HTTP ${code} (expected 200)`);
+        else warn(`ANTHROPIC_API_KEY check skipped (no network / curl missing)`);
+      } catch { warn("ANTHROPIC_API_KEY check failed (curl error)"); }
+    } else if (preferSubs) {
+      ok("MAESTRO_PREFER_SUBSCRIPTION_AUTH=1 — using Claude Code subscription (Keychain OAuth)");
+    }
   } else {
     fail(".env file not found — copy from .env.example");
     issues++;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@adaptic/maestro",
-  "version": "1.9.0",
+  "version": "1.9.2",
   "description": "Maestro — Autonomous AI agent operating system. Deploy AI employees on dedicated Mac minis.",
   "type": "module",
   "bin": {

package/scripts/daemon/cadence-consumer.mjs CHANGED Viewed

@@ -43,7 +43,7 @@
  *   logger         optional fn({ ts, level, …rest }) → void for tests.
  */
-import { existsSync, readFileSync, writeFileSync } from "node:fs";
+import { existsSync, readFileSync, writeFileSync, mkdirSync, appendFileSync, openSync, closeSync, statSync, unlinkSync } from "node:fs";
 import { join } from "node:path";
 import { spawn } from "node:child_process";
 import { homedir } from "node:os";
@@ -75,6 +75,17 @@ const DEFAULT_SPAWN_TIMEOUT_MS = 30 * 60_000;
 // preferable to thrashing Claude / hitting usage limits.
 const MAX_CONCURRENT_SUB_SESSIONS = 1;
+// Retry policy. Most cadence failures are systemic (broken prompt, bad
+// auth, transient API errors) — 5 retries doesn't help, it just amplifies
+// the burn. 2 retries with exponential back-off is the right balance.
+const DEFAULT_MAX_ATTEMPTS = 2;
+const BACKOFF_SCHEDULE_MS = [0, 30_000, 120_000]; // 1st retry +30s, 2nd retry +2m
+// Circuit breaker — when 3 same-cadence failures land in a row, stop
+// spawning that cadence for 30 minutes. Prevents launchd-rate runaway.
+const CIRCUIT_OPEN_THRESHOLD = 3;
+const CIRCUIT_OPEN_DURATION_MS = 30 * 60_000;
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
@@ -129,8 +140,13 @@ function resolveClaudeBin() {
 /**
  * Spawn a sub-session running the cadence's trigger prompt and resolve
- * with { exit_code, durationMs }. Reads the prompt at call time so the
- * latest version (possibly upgraded between ticks) is always used.
+ * with { exit_code, durationMs, stderr_tail }. Reads the prompt at call
+ * time so the latest version (possibly upgraded between ticks) is always
+ * used.
+ *
+ * Robustness: stdout + stderr are tee'd to logs/cadence-bus/subsessions/
+ * so non-zero exits remain diagnosable after the fact. The last ~4 KB of
+ * stderr is also captured in-memory and surfaced on the failure event.
  */
 function realSpawnSession({ agentRoot, cadence, promptPath, timeoutMs, log }) {
   return new Promise((resolveOut) => {
@@ -167,14 +183,43 @@ function realSpawnSession({ agentRoot, cadence, promptPath, timeoutMs, log }) {
       AGENT_DIR: agentRoot,
       PATH: augmentedPath,
     };
+    // Auth handling. Claude Code authenticates via macOS Keychain
+    // (OAuth from the user's Pro/Max subscription) when no API key is
+    // set, OR via the ANTHROPIC_API_KEY env var when one is present.
+    // If the env key is present BUT looks like a placeholder / empty
+    // string, we strip it so claude can fall back to Keychain OAuth.
+    // Set MAESTRO_PREFER_SUBSCRIPTION_AUTH=1 in .env to always strip
+    // the API key (force subscription auth) — useful when the agent
+    // owns a Claude Code Pro/Max subscription and shouldn't burn API
+    // credits for routine ticks.
+    const preferSubscription = process.env.MAESTRO_PREFER_SUBSCRIPTION_AUTH === "1";
+    const apiKey = env.ANTHROPIC_API_KEY || "";
+    if (preferSubscription || !apiKey.trim() || /^(your-api-key|placeholder|xxx+|sk-ant-xxx)/i.test(apiKey)) {
+      delete env.ANTHROPIC_API_KEY;
+    }
     const started = Date.now();
-    log({ level: "info", stage: "subsession_spawn", cadence, bin });
+    // Per-run log file. Pattern is short enough to be tail-friendly.
+    const logsDir = join(agentRoot, "logs", "cadence-bus", "subsessions");
+    mkdirSync(logsDir, { recursive: true });
+    const date = new Date().toISOString().slice(0, 10);
+    const stamp = new Date().toISOString().replace(/[:.]/g, "-");
+    const stdoutPath = join(logsDir, `${date}-${cadence}-${stamp}.stdout.log`);
+    const stderrPath = join(logsDir, `${date}-${cadence}-${stamp}.stderr.log`);
+    const stdoutFd = openSync(stdoutPath, "a");
+    const stderrFd = openSync(stderrPath, "a");
+    log({ level: "info", stage: "subsession_spawn", cadence, bin, stdout: stdoutPath, stderr: stderrPath });
     let child;
     try {
-      child = spawn(bin, args, { cwd: agentRoot, env, stdio: "ignore" });
+      // stdio:
+      //   0 ignore  (claude --print reads prompt from argv, not stdin)
+      //   1 → file  (capture stdout for later inspection)
+      //   2 → file  (capture stderr — critical for diagnosing exit-1)
+      child = spawn(bin, args, { cwd: agentRoot, env, stdio: ["ignore", stdoutFd, stderrFd] });
     } catch (err) {
+      try { closeSync(stdoutFd); closeSync(stderrFd); } catch { /* */ }
       resolveOut({ ok: false, exit_code: -4, error: `spawn failed: ${err.message}` });
       return;
     }
@@ -188,8 +233,22 @@ function realSpawnSession({ agentRoot, cadence, promptPath, timeoutMs, log }) {
     child.on("exit", (code, signal) => {
       clearTimeout(timer);
+      try { closeSync(stdoutFd); closeSync(stderrFd); } catch { /* */ }
       const durationMs = Date.now() - started;
       const exit_code = typeof code === "number" ? code : (signal ? -1 : -5);
+      // Pull tail of stderr (and stdout if stderr empty) for the failure
+      // surface. Best-effort; we never block on file size.
+      let stderrTail = "";
+      try {
+        const body = readFileSync(stderrPath, "utf-8");
+        stderrTail = body.slice(-4096);
+        if (!stderrTail.trim()) {
+          const so = readFileSync(stdoutPath, "utf-8");
+          stderrTail = so.slice(-4096);
+        }
+      } catch { /* file may not exist if spawn ENOENT before fd-redirect */ }
       // Record cost-ledger row. Token counts are 0 until we parse the
       // session's JSON output; for now exit-code + duration are enough
       // to spot pathological retry loops.
@@ -208,16 +267,29 @@ function realSpawnSession({ agentRoot, cadence, promptPath, timeoutMs, log }) {
           ], { stdio: "ignore", env: { ...env, AGENT_ROOT: agentRoot } }).unref();
         }
       } catch { /* cost tracking is best-effort */ }
+      // Clean up empty log files so the directory doesn't accumulate
+      // hundreds of zero-byte successes.
+      try {
+        if (statSync(stdoutPath).size === 0) unlinkSync(stdoutPath);
+        if (statSync(stderrPath).size === 0) unlinkSync(stderrPath);
+      } catch { /* */ }
       resolveOut({
         ok: exit_code === 0,
         exit_code,
         signal: signal || null,
         duration_ms: durationMs,
+        stderr_tail: stderrTail || null,
+        stdout_path: stdoutPath,
+        stderr_path: stderrPath,
       });
     });
     child.on("error", (err) => {
       clearTimeout(timer);
+      try { closeSync(stdoutFd); closeSync(stderrFd); } catch { /* */ }
       const durationMs = Date.now() - started;
       resolveOut({ ok: false, exit_code: -6, error: err.message, duration_ms: durationMs });
     });
@@ -242,6 +314,11 @@ export function startConsumer(opts = {}) {
   const maxSpawnMs = opts.maxSpawnMs ?? DEFAULT_SPAWN_TIMEOUT_MS;
   const spawnSession = opts.spawnSession || realSpawnSession;
   const userLogger = opts.logger;
+  // Test / tuning hooks for the reliability layer.
+  const backoffSchedule = opts.backoffSchedule || BACKOFF_SCHEDULE_MS;
+  const circuitThreshold = opts.circuitThreshold ?? CIRCUIT_OPEN_THRESHOLD;
+  const circuitDurationMs = opts.circuitDurationMs ?? CIRCUIT_OPEN_DURATION_MS;
+  const maxAttempts = opts.maxAttempts ?? DEFAULT_MAX_ATTEMPTS;
   const stats = {
     started_at: new Date().toISOString(),
@@ -249,6 +326,8 @@ export function startConsumer(opts = {}) {
     inline: 0,
     escalated: 0,
     skipped_emergency_stop: 0,
+    skipped_circuit_open: 0,
+    skipped_backoff: 0,
     dlq: 0,
     retries: 0,
     spawn_failures: 0,
@@ -261,6 +340,75 @@ export function startConsumer(opts = {}) {
   let timers = [];
   let activeSubSessions = 0;
+  // Per-cadence reliability state. Tracks consecutive failure count and
+  // the earliest moment we'll allow another spawn for that cadence.
+  // Persists nothing — circuit state is in-memory only. On daemon restart
+  // we get a fresh slate; that's intentional (operators expect a restart
+  // to mean "try again now").
+  const cadenceState = new Map(); // cadence → { failures, openUntil, nextAllowedAt }
+  function getCadenceState(cadence) {
+    let s = cadenceState.get(cadence);
+    if (!s) { s = { failures: 0, openUntil: 0, nextAllowedAt: 0 }; cadenceState.set(cadence, s); }
+    return s;
+  }
+  function recordSubsessionSuccess(cadence) {
+    const s = getCadenceState(cadence);
+    s.failures = 0;
+    s.openUntil = 0;
+    s.nextAllowedAt = 0;
+  }
+  function recordSubsessionFailure(cadence) {
+    const s = getCadenceState(cadence);
+    s.failures += 1;
+    // Exponential back-off honouring the (test-overridable) schedule.
+    const idx = Math.min(s.failures, backoffSchedule.length - 1);
+    s.nextAllowedAt = Date.now() + backoffSchedule[idx];
+    if (s.failures >= circuitThreshold) {
+      s.openUntil = Date.now() + circuitDurationMs;
+      log({ level: "error", stage: "circuit_opened", cadence, failures: s.failures, open_until: new Date(s.openUntil).toISOString() });
+      writeCircuitFile();
+    }
+  }
+  function writeCircuitFile() {
+    // Persist the open-circuit snapshot so doctor + the operator can see
+    // which cadences are currently held back without scraping logs.
+    const open = {};
+    for (const [cad, s] of cadenceState.entries()) {
+      if (s.openUntil > Date.now()) {
+        open[cad] = { failures: s.failures, open_until: new Date(s.openUntil).toISOString() };
+      }
+    }
+    const path = join(agentRoot, "state/cadence-bus/circuit-open.json");
+    try {
+      if (Object.keys(open).length === 0) {
+        // Remove the file when nothing is open.
+        try { unlinkSync(path); } catch { /* */ }
+      } else {
+        writeFileSync(path, JSON.stringify({ generated: new Date().toISOString(), open }, null, 2) + "\n");
+      }
+    } catch { /* best-effort */ }
+  }
+  function isCadenceAllowed(cadence) {
+    const s = getCadenceState(cadence);
+    const now = Date.now();
+    if (s.openUntil > now) return { allowed: false, reason: "circuit-open", retry_at: s.openUntil };
+    if (s.nextAllowedAt > now) return { allowed: false, reason: "backoff", retry_at: s.nextAllowedAt };
+    // Circuit closes automatically when openUntil passes.
+    if (s.openUntil && s.openUntil <= now) {
+      s.openUntil = 0;
+      s.failures = 0;
+      log({ level: "info", stage: "circuit_closed", cadence });
+      writeCircuitFile();
+    }
+    return { allowed: true };
+  }
   function log(entry) {
     const enriched = { ts: new Date().toISOString(), ...entry };
     logBusEvent(agentRoot, enriched);
@@ -280,6 +428,32 @@ export function startConsumer(opts = {}) {
   }
   async function escalate(event) {
+    // Circuit-breaker / back-off gate. If this cadence is currently held
+    // back, requeue without spawning. The event keeps its attempt count
+    // because the failure was upstream (not a per-event problem).
+    const gate = isCadenceAllowed(event.cadence);
+    if (!gate.allowed) {
+      log({
+        level: "warn",
+        stage: gate.reason === "circuit-open" ? "skipped_circuit_open" : "skipped_backoff",
+        id: event.id,
+        cadence: event.cadence,
+        retry_at: new Date(gate.retry_at).toISOString(),
+      });
+      if (gate.reason === "circuit-open") stats.skipped_circuit_open += 1;
+      else stats.skipped_backoff += 1;
+      // Put the event back in inbox WITHOUT bumping attempts so it doesn't
+      // burn its retry budget while the circuit is open.
+      const paths2 = getBusPaths(agentRoot);
+      try {
+        const event2 = { ...event, attempts: Math.max(0, (event.attempts || 1) - 1) };
+        writeFileSync(join(paths2.inbox, `${event.id}.json`), JSON.stringify(event2, null, 2) + "\n");
+        try { unlinkSync(join(paths2.claimed, `${event.id}.json`)); } catch { /* */ }
+      } catch { /* best-effort */ }
+      return { ok: false, decision: gate.reason };
+    }
     if (activeSubSessions >= MAX_CONCURRENT_SUB_SESSIONS) {
       // Re-queue and try again next tick. Single-owner cadence consumer
       // means this can only happen when a prior tick is still running —
@@ -291,7 +465,15 @@ export function startConsumer(opts = {}) {
         cadence: event.cadence,
         active_subsessions: activeSubSessions,
       });
-      failTick(agentRoot, event.id, "deferred:concurrent-spawn", { maxAttempts: 10 });
+      // Re-queue without burning the retry budget — concurrent-spawn isn't
+      // a per-event failure.
+      const paths2 = getBusPaths(agentRoot);
+      try {
+        const event2 = { ...event, attempts: Math.max(0, (event.attempts || 1) - 1) };
+        writeFileSync(join(paths2.inbox, `${event.id}.json`), JSON.stringify(event2, null, 2) + "\n");
+        try { unlinkSync(join(paths2.claimed, `${event.id}.json`)); } catch { /* */ }
+      } catch { /* best-effort */ }
       stats.retries += 1;
       return { ok: false, decision: "deferred" };
     }
@@ -334,14 +516,31 @@ export function startConsumer(opts = {}) {
         prompt: promptPath,
         exit_code: result.exit_code,
         duration_ms: result.duration_ms,
+        stdout_path: result.stdout_path || null,
+        stderr_path: result.stderr_path || null,
       });
+      recordSubsessionSuccess(event.cadence);
       stats.escalated += 1;
       stats.last_decision = "escalated";
       return { ok: true, decision: "escalated", exit_code: result.exit_code };
     }
-    log({ level: "error", stage: "subsession_failed", id: event.id, cadence: event.cadence, exit_code: result.exit_code, error: result.error || null });
+    // Failure path: log + cap retries low. The exact stderr tail comes
+    // from the spawn helper so we never DLQ "blind" again.
+    const stderrTail = (result.stderr_tail || "").trim().split("\n").slice(-3).join(" | ");
+    log({
+      level: "error",
+      stage: "subsession_failed",
+      id: event.id,
+      cadence: event.cadence,
+      exit_code: result.exit_code,
+      duration_ms: result.duration_ms,
+      error: result.error || stderrTail || `exit ${result.exit_code}`,
+      stderr_path: result.stderr_path || null,
+    });
     stats.spawn_failures += 1;
-    const outcome = failTick(agentRoot, event.id, result.error || `exit ${result.exit_code}`);
+    recordSubsessionFailure(event.cadence);
+    const reason = result.error || (stderrTail ? `exit ${result.exit_code}: ${stderrTail}` : `exit ${result.exit_code}`);
+    const outcome = failTick(agentRoot, event.id, reason, { maxAttempts });
     if (outcome?.destination === "dlq") stats.dlq += 1;
     else stats.retries += 1;
     return { ok: false, decision: outcome?.destination || "failed" };
@@ -428,19 +627,38 @@ export function startConsumer(opts = {}) {
     recoverStaleClaims(agentRoot);
     let processed = 0;
-    // Drain as much as the consumer can in one tick, but yield to the
-    // event loop between events so heartbeats and stop signals fire.
+    let escalatedThisTick = 0;
+    // Drain inline events as much as the consumer can in one tick; cap
+    // sub-session escalations at 1 per tick so a fast-failing cadence
+    // can't burn a whole minute's worth of retries inside a single poll.
+    // The next poll (DEFAULT_POLL_MS later) will pick up where we left off.
     while (!stopping) {
       const claim = claimNextTick(agentRoot);
       if (!claim) break;
       const event = claim.event;
       activeTick = event.id;
+      let didEscalate = false;
       try {
+        const def = getCadenceDef(event.cadence);
+        const willEscalate = !def || (def.mode !== "inline" && (def.mode !== "guarded" || true));
+        // Roughly: if it's not a registry-inline cadence, we MAY escalate.
+        // We don't yet know if the guard will say inline; processEvent
+        // will tell us via stats. Use the escalated stats delta as the
+        // signal that an actual sub-session ran this iteration.
+        const before = stats.escalated + stats.spawn_failures + stats.skipped_circuit_open + stats.skipped_backoff;
         await processEvent(event);
+        const after = stats.escalated + stats.spawn_failures + stats.skipped_circuit_open + stats.skipped_backoff;
+        if (after > before) didEscalate = true;
+        // Silence unused var warning.
+        void willEscalate;
       } finally {
         activeTick = null;
       }
       processed += 1;
+      if (didEscalate) escalatedThisTick += 1;
+      // Hard cap: at most ONE sub-session spawn per tick. Inline ticks
+      // keep draining freely (they're cheap).
+      if (escalatedThisTick >= 1) break;
       if (processed >= 16) break; // soft batch cap
     }
     return { processed };

package/scripts/daemon/cadence-consumer.test.mjs CHANGED Viewed

@@ -210,9 +210,16 @@ test("unknown cadence with no prompt file DLQ's immediately", async () => {
 test("spawn failure retries within the budget, then DLQs", async () => {
   const root = await makeAgentRoot();
   plantPrompt(root, "weekly-strategic-memo");
+  // Disable back-off + raise circuit threshold so the test exercises the
+  // retry-then-DLQ path without waiting for back-off windows. The
+  // real defaults (30s/2m back-off, 3-failure circuit) are exercised by
+  // dedicated tests below.
   const consumer = startConsumer({
     agentRoot: root,
     pollMs: 25,
+    backoffSchedule: [0, 0, 0],
+    circuitThreshold: 999,
+    maxAttempts: 2,
     spawnSession: async () => ({ ok: false, exit_code: 1, error: "always-fail", duration_ms: 1 }),
   });
   try {
@@ -226,6 +233,68 @@ test("spawn failure retries within the budget, then DLQs", async () => {
   }
 });
+test("circuit breaker opens after consecutive failures and blocks further spawns", async () => {
+  const root = await makeAgentRoot();
+  plantPrompt(root, "weekly-strategic-memo");
+  let spawnCount = 0;
+  const consumer = startConsumer({
+    agentRoot: root,
+    pollMs: 20,
+    backoffSchedule: [0, 0, 0],
+    circuitThreshold: 2,
+    circuitDurationMs: 60_000, // 1 min — long enough for the assertion window
+    maxAttempts: 1, // each event DLQs on first failure so we don't conflate retry-counts
+    spawnSession: async () => { spawnCount++; return { ok: false, exit_code: 1, error: "fail", duration_ms: 1 }; },
+  });
+  try {
+    // Enqueue 5 events; circuit should open after 2 failures, blocking the rest.
+    for (let i = 0; i < 5; i++) {
+      enqueueTick({ cadence: "weekly-strategic-memo", source: "launchd", agentRoot: root });
+    }
+    const opened = await waitFor(() => consumer.getStats().skipped_circuit_open >= 1, { timeoutMs: 10_000 });
+    assert.ok(opened, `circuit should open; stats=${JSON.stringify(consumer.getStats())}`);
+    // Spawn count must NOT keep climbing once the circuit is open.
+    const spawnsAtOpen = spawnCount;
+    await new Promise((r) => setTimeout(r, 500));
+    assert.equal(spawnCount, spawnsAtOpen, `spawns must stop once circuit opens (was ${spawnsAtOpen}, now ${spawnCount})`);
+  } finally {
+    await consumer.stop();
+    await rmRoot(root);
+  }
+});
+test("back-off skips re-spawning until the cooldown elapses", async () => {
+  const root = await makeAgentRoot();
+  plantPrompt(root, "weekly-strategic-memo");
+  let spawnCount = 0;
+  const consumer = startConsumer({
+    agentRoot: root,
+    pollMs: 20,
+    backoffSchedule: [0, 300, 300], // 300ms cooldown after each failure
+    circuitThreshold: 999,
+    maxAttempts: 1,
+    spawnSession: async () => { spawnCount++; return { ok: false, exit_code: 1, error: "fail", duration_ms: 1 }; },
+  });
+  try {
+    // Enqueue 2 events back-to-back. The 1st triggers a spawn (fails). The
+    // 2nd should be held back by the 300ms back-off window.
+    enqueueTick({ cadence: "weekly-strategic-memo", source: "launchd", agentRoot: root });
+    enqueueTick({ cadence: "weekly-strategic-memo", source: "launchd", agentRoot: root });
+    await waitFor(() => spawnCount >= 1, { timeoutMs: 5_000 });
+    const spawnsBeforeWait = spawnCount;
+    // During the back-off window no new spawn should fire.
+    await new Promise((r) => setTimeout(r, 150));
+    assert.ok(spawnCount === spawnsBeforeWait, `spawns must wait for back-off (was ${spawnsBeforeWait}, now ${spawnCount})`);
+    assert.ok(consumer.getStats().skipped_backoff >= 1, "skipped_backoff should be recorded");
+    // After the window passes, the next event should be processed.
+    await waitFor(() => spawnCount > spawnsBeforeWait, { timeoutMs: 5_000 });
+    assert.ok(spawnCount > spawnsBeforeWait, "spawning resumes after back-off");
+  } finally {
+    await consumer.stop();
+    await rmRoot(root);
+  }
+});
 // ---------------------------------------------------------------------------
 // Emergency stop
 // ---------------------------------------------------------------------------