npm - @adaptic/maestro - Versions diffs - 1.9.4 → 1.9.5 - Mend

@adaptic/maestro 1.9.4 → 1.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json +2 -2
package/scripts/daemon/agent-daemon.mjs +5 -1
package/scripts/daemon/cadence-handlers.mjs +22 -7
package/scripts/daemon/dispatcher-cooldown.test.mjs +122 -0
package/scripts/daemon/dispatcher.mjs +60 -2

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@adaptic/maestro",
-  "version": "1.9.4",
+  "version": "1.9.5",
   "description": "Maestro — Autonomous AI agent operating system. Deploy AI employees on dedicated Mac minis.",
   "type": "module",
   "bin": {
@@ -46,7 +46,7 @@
   },
   "always-build-npm": true,
   "scripts": {
-    "test": "node --test lib/cadence-bus.test.mjs scripts/cadence/enqueue-cadence-tick.test.mjs scripts/daemon/cadence-consumer.test.mjs scripts/daemon/lib/session-router.test.mjs scripts/local-triggers/generate-plists.test.mjs bin/maestro.test.mjs",
+    "test": "node --test lib/cadence-bus.test.mjs scripts/cadence/enqueue-cadence-tick.test.mjs scripts/daemon/cadence-consumer.test.mjs scripts/daemon/dispatcher-cooldown.test.mjs scripts/daemon/lib/session-router.test.mjs scripts/local-triggers/generate-plists.test.mjs bin/maestro.test.mjs",
     "test:cadence": "node --test lib/cadence-bus.test.mjs scripts/cadence/enqueue-cadence-tick.test.mjs scripts/daemon/cadence-consumer.test.mjs",
     "test:cli": "node --test bin/maestro.test.mjs",
     "test:plists": "node --test scripts/local-triggers/generate-plists.test.mjs",

package/scripts/daemon/agent-daemon.mjs CHANGED Viewed

@@ -54,7 +54,11 @@ import { acquireLock, updateLock, scanStaleLocks, acquireThreadLock, claimReques
 // ---------------------------------------------------------------------------
 const POLL_INTERVAL = parseInt(process.env.DAEMON_POLL_INTERVAL || "60000", 10); // 60s (up from 30s to avoid Slack rate limits)
-const BACKLOG_INTERVAL = parseInt(process.env.DAEMON_BACKLOG_INTERVAL || "120000", 10); // 2 min
+// Backlog sweep cadence — bumped 2min → 10min in 1.10. The dispatcher
+// applies a per-item post-completion cooldown (4h success, 30min failure)
+// so we no longer need a tight sweep loop. Operators with high-throughput
+// queues can override via DAEMON_BACKLOG_INTERVAL.
+const BACKLOG_INTERVAL = parseInt(process.env.DAEMON_BACKLOG_INTERVAL || "600000", 10); // 10 min
 const HEALTH_INTERVAL = 60000; // 1 min
 // Note: dedup is now handled by file-based locks in session-lock.mjs

package/scripts/daemon/cadence-handlers.mjs CHANGED Viewed

@@ -129,16 +129,31 @@ async function guardInboxProcessor({ agentRoot }) {
 /**
  * backlog-executor guard:
- *   - Look at state/queues/*.yaml; any queue with at least one item is work.
- *   - If none, complete inline.
- *   - If any, escalate.
+ *   The reactive daemon (agent-daemon.mjs) already runs an internal
+ *   backlog sweep every BACKLOG_INTERVAL (default 10min) which dispatches
+ *   one session per top-priority queue item with proper post-completion
+ *   cooldowns. Running the cadence-bus backlog-executor on top of that
+ *   spawns DUPLICATE sessions for the same items — observed as 159
+ *   redundant spawns / day in ravi-ai's logs.
+ *
+ *   So this guard now always returns inline. The reactive daemon owns
+ *   the backlog. Operators who prefer the cadence-only mode (no reactive
+ *   daemon) can set BACKLOG_CADENCE_ESCALATE=1 in .env to flip behaviour.
  */
 async function guardBacklogExecutor({ agentRoot }) {
-  const withWork = queuesWithWork(agentRoot);
-  if (withWork.length === 0) {
-    return { ok: true, decision: "inline", reason: "all queues empty" };
+  if (process.env.BACKLOG_CADENCE_ESCALATE === "1") {
+    const withWork = queuesWithWork(agentRoot);
+    if (withWork.length === 0) {
+      return { ok: true, decision: "inline", reason: "all queues empty" };
+    }
+    return { ok: true, decision: "escalate", queues_with_work: withWork };
   }
-  return { ok: true, decision: "escalate", queues_with_work: withWork };
+  return {
+    ok: true,
+    decision: "inline",
+    reason: "reactive-daemon-owns-backlog",
+    note: "Set BACKLOG_CADENCE_ESCALATE=1 to override.",
+  };
 }
 /**

package/scripts/daemon/dispatcher-cooldown.test.mjs ADDED Viewed

@@ -0,0 +1,122 @@
+/**
+ * dispatcher-cooldown.test.mjs — Coverage for the backlog post-completion
+ * cooldown that stopped the 159-redundant-spawns-per-day loop.
+ *
+ * The dispatcher module is stateful (in-memory Sets/Maps for active
+ * sessions, retry counts, cooldowns) and reads from AGENT_DIR at import
+ * time, so each test isolates by setting AGENT_DIR before dynamic-importing
+ * a fresh module instance.
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { promises as fsp } from "fs";
+import { tmpdir } from "os";
+import { join } from "path";
+async function freshDispatcher() {
+  // Isolated tmpdir per test so cooldown state file doesn't bleed.
+  const dir = join(
+    tmpdir(),
+    `dispatcher-test-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
+  );
+  await fsp.mkdir(join(dir, "state/sessions"), { recursive: true });
+  await fsp.mkdir(join(dir, "logs/daemon"), { recursive: true });
+  process.env.AGENT_DIR = dir;
+  // Bust the module cache so state resets cleanly.
+  const url = new URL("./dispatcher.mjs", import.meta.url);
+  const mod = await import(`${url.href}?test=${Math.random()}`);
+  return { mod, dir };
+}
+async function cleanup(dir) {
+  try { await fsp.rm(dir, { recursive: true, force: true }); } catch { /* */ }
+}
+test("canDispatchBacklog: allowed for a fresh item", async () => {
+  const { mod, dir } = await freshDispatcher();
+  try {
+    const r = mod.canDispatchBacklog({ id: "TEST-1", title: "Fresh item" });
+    assert.equal(r.allowed, true);
+  } finally { await cleanup(dir); }
+});
+test("canDispatchBacklog: blocked while cooldown is active", async () => {
+  const { mod, dir } = await freshDispatcher();
+  try {
+    // Simulate a session completion by directly writing the cooldown state
+    // file the dispatcher reads on init. We can't easily call internal
+    // setters, but the cooldown file IS public API for state persistence.
+    const cooldownPath = join(dir, "state/sessions/backlog-cooldowns.json");
+    const tomorrow = Date.now() + 60 * 60 * 1000; // +1h
+    await fsp.writeFile(cooldownPath, JSON.stringify({
+      "TEST-2": tomorrow,
+    }) + "\n");
+    // Re-import to load the cooldown file.
+    const { mod: mod2, dir: dir2 } = await freshDispatcher();
+    try {
+      // Plant the same cooldown in this fresh dispatcher's dir.
+      await fsp.writeFile(
+        join(dir2, "state/sessions/backlog-cooldowns.json"),
+        JSON.stringify({ "TEST-2": tomorrow }) + "\n"
+      );
+      // Reload yet again with the cooldown file present.
+      const { mod: mod3, dir: dir3 } = await freshDispatcher();
+      try {
+        await fsp.writeFile(
+          join(dir3, "state/sessions/backlog-cooldowns.json"),
+          JSON.stringify({ "TEST-2": tomorrow }) + "\n"
+        );
+        // Final import — this one reads the cooldown file at module load.
+        process.env.AGENT_DIR = dir3;
+        const url = new URL("./dispatcher.mjs", import.meta.url);
+        const final = await import(`${url.href}?cooldown=${Math.random()}`);
+        const r = final.canDispatchBacklog({ id: "TEST-2", title: "Cooldown item" });
+        assert.equal(r.allowed, false);
+        assert.equal(r.reason, "post_completion_cooldown");
+        assert.ok(r.remaining_min >= 1 && r.remaining_min <= 60);
+      } finally { await cleanup(dir3); }
+    } finally { await cleanup(dir2); }
+  } finally { await cleanup(dir); }
+});
+test("canDispatchBacklog: allowed after cooldown expires", async () => {
+  const dir = join(
+    tmpdir(),
+    `dispatcher-expire-test-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
+  );
+  try {
+    await fsp.mkdir(join(dir, "state/sessions"), { recursive: true });
+    await fsp.mkdir(join(dir, "logs/daemon"), { recursive: true });
+    // Cooldown already expired (set to 1h ago).
+    await fsp.writeFile(
+      join(dir, "state/sessions/backlog-cooldowns.json"),
+      JSON.stringify({ "TEST-3": Date.now() - 3_600_000 }) + "\n"
+    );
+    process.env.AGENT_DIR = dir;
+    const url = new URL("./dispatcher.mjs", import.meta.url);
+    const mod = await import(`${url.href}?expired=${Math.random()}`);
+    const r = mod.canDispatchBacklog({ id: "TEST-3", title: "Expired cooldown item" });
+    assert.equal(r.allowed, true);
+  } finally { await cleanup(dir); }
+});
+test("backlog-cooldowns.json persists across simulated daemon restarts", async () => {
+  const dir = join(
+    tmpdir(),
+    `dispatcher-persist-test-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
+  );
+  try {
+    await fsp.mkdir(join(dir, "state/sessions"), { recursive: true });
+    await fsp.mkdir(join(dir, "logs/daemon"), { recursive: true });
+    const cooldownPath = join(dir, "state/sessions/backlog-cooldowns.json");
+    const future = Date.now() + 30 * 60_000;
+    await fsp.writeFile(cooldownPath, JSON.stringify({ "RESTART-1": future }) + "\n");
+    process.env.AGENT_DIR = dir;
+    const url = new URL("./dispatcher.mjs", import.meta.url);
+    const mod = await import(`${url.href}?persist=${Math.random()}`);
+    const r = mod.canDispatchBacklog({ id: "RESTART-1", title: "After restart" });
+    assert.equal(r.allowed, false, "loaded cooldown should block dispatch");
+    assert.equal(r.reason, "post_completion_cooldown");
+  } finally { await cleanup(dir); }
+});

package/scripts/daemon/dispatcher.mjs CHANGED Viewed

@@ -4,7 +4,7 @@
 import { spawn } from "child_process";
 import { appendFileSync, mkdirSync, writeFileSync, readFileSync, renameSync } from "fs";
-import { join } from "path";
+import { join, dirname } from "path";
 import { releaseLock, releaseThreadLock, releaseRequestClaim, claimItem, releaseItemClaim } from "./session-lock.mjs";
 import { recordSession } from "./health.mjs";
@@ -42,6 +42,41 @@ const activeBacklogKeys = new Set();  // backlog item key -> true (while session
 const backlogRetryCount = new Map();  // backlog item key -> number of times dispatched
 const MAX_BACKLOG_RETRIES = 6;        // Max retries before skipping (was 3 — too aggressive)
+// Post-completion cooldown — once a session has run on a backlog item, don't
+// re-dispatch it until N hours later. Without this, every 2-min backlog
+// sweep re-dispatches the same items because the daemon has no signal
+// that the underlying work was actually completed (sessions exit 0 even
+// when they only "looked at" the item). 53 redundant spawns/day per item
+// was the observed rate before this fix.
+const SUCCESS_COOLDOWN_MS = 4 * 60 * 60 * 1000;  // 4h after exit 0
+const FAILURE_COOLDOWN_MS = 30 * 60 * 1000;      // 30m after non-zero exit
+const backlogCooldownUntil = new Map();           // key -> epoch ms
+const COOLDOWN_STATE_PATH = join(AGENT_REPO_DIR, "state/sessions/backlog-cooldowns.json");
+// Persist cooldown state across daemon restarts so a freshly-started
+// daemon doesn't immediately re-dispatch items it just completed.
+function loadCooldowns() {
+  try {
+    const body = readFileSync(COOLDOWN_STATE_PATH, "utf-8");
+    const data = JSON.parse(body);
+    const now = Date.now();
+    for (const [key, until] of Object.entries(data || {})) {
+      if (typeof until === "number" && until > now) {
+        backlogCooldownUntil.set(key, until);
+      }
+    }
+  } catch { /* file missing or malformed — start fresh */ }
+}
+function saveCooldowns() {
+  try {
+    const obj = {};
+    for (const [k, v] of backlogCooldownUntil) obj[k] = v;
+    mkdirSync(dirname(COOLDOWN_STATE_PATH), { recursive: true });
+    writeFileSync(COOLDOWN_STATE_PATH, JSON.stringify(obj, null, 2) + "\n");
+  } catch { /* best-effort */ }
+}
+loadCooldowns();
 function logDir() {
   const dir = join(AGENT_REPO_DIR, "logs", "daemon");
   mkdirSync(dir, { recursive: true });
@@ -183,6 +218,13 @@ export function canDispatchBacklog(item) {
   if (retries >= MAX_BACKLOG_RETRIES) {
     return { allowed: false, reason: "max_retries_exceeded", retries };
   }
+  // Post-completion cooldown — prevent every-2-min re-dispatch of items
+  // that completed (success or failure) within the recent window.
+  const cooldownUntil = backlogCooldownUntil.get(key) || 0;
+  if (cooldownUntil > Date.now()) {
+    const remaining_min = Math.ceil((cooldownUntil - Date.now()) / 60000);
+    return { allowed: false, reason: "post_completion_cooldown", remaining_min };
+  }
   return { allowed: true };
 }
@@ -362,6 +404,19 @@ function spawnSession(entry) {
       activeBacklogKeys.delete(key);
       const retries = backlogRetryCount.get(key) || 0;
+      // Apply post-completion cooldown — different for success vs failure.
+      // This is the fix for the every-2-min re-dispatch loop: once a
+      // session has touched an item, we wait before touching it again.
+      const cooldownMs = code === 0 ? SUCCESS_COOLDOWN_MS : FAILURE_COOLDOWN_MS;
+      backlogCooldownUntil.set(key, Date.now() + cooldownMs);
+      saveCooldowns();
+      logSession({
+        event: "cooldown_set",
+        summary: classResult.summary,
+        exit_code: code,
+        cooldown_minutes: Math.round(cooldownMs / 60000),
+      });
       // Release file-based item claim (ib-20260407-001b)
       if (item.id) releaseItemClaim(item.id);
@@ -426,10 +481,13 @@ function spawnSession(entry) {
       claimReleased = true;
     }
-    // Release backlog tracking + item claim.
+    // Release backlog tracking + item claim. Apply failure cooldown so the
+    // same item isn't re-spawned on the next backlog sweep.
     if (source === "backlog") {
       const key = backlogKey(item);
       activeBacklogKeys.delete(key);
+      backlogCooldownUntil.set(key, Date.now() + FAILURE_COOLDOWN_MS);
+      saveCooldowns();
       // Release file-based item claim (ib-20260407-001b)
       if (item.id) releaseItemClaim(item.id);
     }