@adaptic/maestro 1.9.4 → 1.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adaptic/maestro",
|
|
3
|
-
"version": "1.9.
|
|
3
|
+
"version": "1.9.5",
|
|
4
4
|
"description": "Maestro — Autonomous AI agent operating system. Deploy AI employees on dedicated Mac minis.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -46,7 +46,7 @@
|
|
|
46
46
|
},
|
|
47
47
|
"always-build-npm": true,
|
|
48
48
|
"scripts": {
|
|
49
|
-
"test": "node --test lib/cadence-bus.test.mjs scripts/cadence/enqueue-cadence-tick.test.mjs scripts/daemon/cadence-consumer.test.mjs scripts/daemon/lib/session-router.test.mjs scripts/local-triggers/generate-plists.test.mjs bin/maestro.test.mjs",
|
|
49
|
+
"test": "node --test lib/cadence-bus.test.mjs scripts/cadence/enqueue-cadence-tick.test.mjs scripts/daemon/cadence-consumer.test.mjs scripts/daemon/dispatcher-cooldown.test.mjs scripts/daemon/lib/session-router.test.mjs scripts/local-triggers/generate-plists.test.mjs bin/maestro.test.mjs",
|
|
50
50
|
"test:cadence": "node --test lib/cadence-bus.test.mjs scripts/cadence/enqueue-cadence-tick.test.mjs scripts/daemon/cadence-consumer.test.mjs",
|
|
51
51
|
"test:cli": "node --test bin/maestro.test.mjs",
|
|
52
52
|
"test:plists": "node --test scripts/local-triggers/generate-plists.test.mjs",
|
|
@@ -54,7 +54,11 @@ import { acquireLock, updateLock, scanStaleLocks, acquireThreadLock, claimReques
|
|
|
54
54
|
// ---------------------------------------------------------------------------
|
|
55
55
|
|
|
56
56
|
const POLL_INTERVAL = parseInt(process.env.DAEMON_POLL_INTERVAL || "60000", 10); // 60s (up from 30s to avoid Slack rate limits)
|
|
57
|
-
|
|
57
|
+
// Backlog sweep cadence — bumped 2min → 10min in 1.10. The dispatcher
|
|
58
|
+
// applies a per-item post-completion cooldown (4h success, 30min failure)
|
|
59
|
+
// so we no longer need a tight sweep loop. Operators with high-throughput
|
|
60
|
+
// queues can override via DAEMON_BACKLOG_INTERVAL.
|
|
61
|
+
const BACKLOG_INTERVAL = parseInt(process.env.DAEMON_BACKLOG_INTERVAL || "600000", 10); // 10 min
|
|
58
62
|
const HEALTH_INTERVAL = 60000; // 1 min
|
|
59
63
|
// Note: dedup is now handled by file-based locks in session-lock.mjs
|
|
60
64
|
|
|
@@ -129,16 +129,31 @@ async function guardInboxProcessor({ agentRoot }) {
|
|
|
129
129
|
|
|
130
130
|
/**
|
|
131
131
|
* backlog-executor guard:
|
|
132
|
-
*
|
|
133
|
-
*
|
|
134
|
-
* -
|
|
132
|
+
* The reactive daemon (agent-daemon.mjs) already runs an internal
|
|
133
|
+
* backlog sweep every BACKLOG_INTERVAL (default 10min) which dispatches
|
|
134
|
+
* one session per top-priority queue item with proper post-completion
|
|
135
|
+
* cooldowns. Running the cadence-bus backlog-executor on top of that
|
|
136
|
+
* spawns DUPLICATE sessions for the same items — observed as 159
|
|
137
|
+
* redundant spawns / day in ravi-ai's logs.
|
|
138
|
+
*
|
|
139
|
+
* So this guard now always returns inline. The reactive daemon owns
|
|
140
|
+
* the backlog. Operators who prefer the cadence-only mode (no reactive
|
|
141
|
+
* daemon) can set BACKLOG_CADENCE_ESCALATE=1 in .env to flip behaviour.
|
|
135
142
|
*/
|
|
136
143
|
async function guardBacklogExecutor({ agentRoot }) {
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
144
|
+
if (process.env.BACKLOG_CADENCE_ESCALATE === "1") {
|
|
145
|
+
const withWork = queuesWithWork(agentRoot);
|
|
146
|
+
if (withWork.length === 0) {
|
|
147
|
+
return { ok: true, decision: "inline", reason: "all queues empty" };
|
|
148
|
+
}
|
|
149
|
+
return { ok: true, decision: "escalate", queues_with_work: withWork };
|
|
140
150
|
}
|
|
141
|
-
return {
|
|
151
|
+
return {
|
|
152
|
+
ok: true,
|
|
153
|
+
decision: "inline",
|
|
154
|
+
reason: "reactive-daemon-owns-backlog",
|
|
155
|
+
note: "Set BACKLOG_CADENCE_ESCALATE=1 to override.",
|
|
156
|
+
};
|
|
142
157
|
}
|
|
143
158
|
|
|
144
159
|
/**
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* dispatcher-cooldown.test.mjs — Coverage for the backlog post-completion
|
|
3
|
+
* cooldown that stopped the 159-redundant-spawns-per-day loop.
|
|
4
|
+
*
|
|
5
|
+
* The dispatcher module is stateful (in-memory Sets/Maps for active
|
|
6
|
+
* sessions, retry counts, cooldowns) and reads from AGENT_DIR at import
|
|
7
|
+
* time, so each test isolates by setting AGENT_DIR before dynamic-importing
|
|
8
|
+
* a fresh module instance.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { test } from "node:test";
|
|
12
|
+
import assert from "node:assert/strict";
|
|
13
|
+
import { promises as fsp } from "fs";
|
|
14
|
+
import { tmpdir } from "os";
|
|
15
|
+
import { join } from "path";
|
|
16
|
+
|
|
17
|
+
async function freshDispatcher() {
|
|
18
|
+
// Isolated tmpdir per test so cooldown state file doesn't bleed.
|
|
19
|
+
const dir = join(
|
|
20
|
+
tmpdir(),
|
|
21
|
+
`dispatcher-test-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
|
|
22
|
+
);
|
|
23
|
+
await fsp.mkdir(join(dir, "state/sessions"), { recursive: true });
|
|
24
|
+
await fsp.mkdir(join(dir, "logs/daemon"), { recursive: true });
|
|
25
|
+
process.env.AGENT_DIR = dir;
|
|
26
|
+
// Bust the module cache so state resets cleanly.
|
|
27
|
+
const url = new URL("./dispatcher.mjs", import.meta.url);
|
|
28
|
+
const mod = await import(`${url.href}?test=${Math.random()}`);
|
|
29
|
+
return { mod, dir };
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async function cleanup(dir) {
|
|
33
|
+
try { await fsp.rm(dir, { recursive: true, force: true }); } catch { /* */ }
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
test("canDispatchBacklog: allowed for a fresh item", async () => {
|
|
37
|
+
const { mod, dir } = await freshDispatcher();
|
|
38
|
+
try {
|
|
39
|
+
const r = mod.canDispatchBacklog({ id: "TEST-1", title: "Fresh item" });
|
|
40
|
+
assert.equal(r.allowed, true);
|
|
41
|
+
} finally { await cleanup(dir); }
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
test("canDispatchBacklog: blocked while cooldown is active", async () => {
|
|
45
|
+
const { mod, dir } = await freshDispatcher();
|
|
46
|
+
try {
|
|
47
|
+
// Simulate a session completion by directly writing the cooldown state
|
|
48
|
+
// file the dispatcher reads on init. We can't easily call internal
|
|
49
|
+
// setters, but the cooldown file IS public API for state persistence.
|
|
50
|
+
const cooldownPath = join(dir, "state/sessions/backlog-cooldowns.json");
|
|
51
|
+
const tomorrow = Date.now() + 60 * 60 * 1000; // +1h
|
|
52
|
+
await fsp.writeFile(cooldownPath, JSON.stringify({
|
|
53
|
+
"TEST-2": tomorrow,
|
|
54
|
+
}) + "\n");
|
|
55
|
+
// Re-import to load the cooldown file.
|
|
56
|
+
const { mod: mod2, dir: dir2 } = await freshDispatcher();
|
|
57
|
+
try {
|
|
58
|
+
// Plant the same cooldown in this fresh dispatcher's dir.
|
|
59
|
+
await fsp.writeFile(
|
|
60
|
+
join(dir2, "state/sessions/backlog-cooldowns.json"),
|
|
61
|
+
JSON.stringify({ "TEST-2": tomorrow }) + "\n"
|
|
62
|
+
);
|
|
63
|
+
// Reload yet again with the cooldown file present.
|
|
64
|
+
const { mod: mod3, dir: dir3 } = await freshDispatcher();
|
|
65
|
+
try {
|
|
66
|
+
await fsp.writeFile(
|
|
67
|
+
join(dir3, "state/sessions/backlog-cooldowns.json"),
|
|
68
|
+
JSON.stringify({ "TEST-2": tomorrow }) + "\n"
|
|
69
|
+
);
|
|
70
|
+
// Final import — this one reads the cooldown file at module load.
|
|
71
|
+
process.env.AGENT_DIR = dir3;
|
|
72
|
+
const url = new URL("./dispatcher.mjs", import.meta.url);
|
|
73
|
+
const final = await import(`${url.href}?cooldown=${Math.random()}`);
|
|
74
|
+
const r = final.canDispatchBacklog({ id: "TEST-2", title: "Cooldown item" });
|
|
75
|
+
assert.equal(r.allowed, false);
|
|
76
|
+
assert.equal(r.reason, "post_completion_cooldown");
|
|
77
|
+
assert.ok(r.remaining_min >= 1 && r.remaining_min <= 60);
|
|
78
|
+
} finally { await cleanup(dir3); }
|
|
79
|
+
} finally { await cleanup(dir2); }
|
|
80
|
+
} finally { await cleanup(dir); }
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
test("canDispatchBacklog: allowed after cooldown expires", async () => {
|
|
84
|
+
const dir = join(
|
|
85
|
+
tmpdir(),
|
|
86
|
+
`dispatcher-expire-test-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
|
|
87
|
+
);
|
|
88
|
+
try {
|
|
89
|
+
await fsp.mkdir(join(dir, "state/sessions"), { recursive: true });
|
|
90
|
+
await fsp.mkdir(join(dir, "logs/daemon"), { recursive: true });
|
|
91
|
+
// Cooldown already expired (set to 1h ago).
|
|
92
|
+
await fsp.writeFile(
|
|
93
|
+
join(dir, "state/sessions/backlog-cooldowns.json"),
|
|
94
|
+
JSON.stringify({ "TEST-3": Date.now() - 3_600_000 }) + "\n"
|
|
95
|
+
);
|
|
96
|
+
process.env.AGENT_DIR = dir;
|
|
97
|
+
const url = new URL("./dispatcher.mjs", import.meta.url);
|
|
98
|
+
const mod = await import(`${url.href}?expired=${Math.random()}`);
|
|
99
|
+
const r = mod.canDispatchBacklog({ id: "TEST-3", title: "Expired cooldown item" });
|
|
100
|
+
assert.equal(r.allowed, true);
|
|
101
|
+
} finally { await cleanup(dir); }
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
test("backlog-cooldowns.json persists across simulated daemon restarts", async () => {
|
|
105
|
+
const dir = join(
|
|
106
|
+
tmpdir(),
|
|
107
|
+
`dispatcher-persist-test-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
|
|
108
|
+
);
|
|
109
|
+
try {
|
|
110
|
+
await fsp.mkdir(join(dir, "state/sessions"), { recursive: true });
|
|
111
|
+
await fsp.mkdir(join(dir, "logs/daemon"), { recursive: true });
|
|
112
|
+
const cooldownPath = join(dir, "state/sessions/backlog-cooldowns.json");
|
|
113
|
+
const future = Date.now() + 30 * 60_000;
|
|
114
|
+
await fsp.writeFile(cooldownPath, JSON.stringify({ "RESTART-1": future }) + "\n");
|
|
115
|
+
process.env.AGENT_DIR = dir;
|
|
116
|
+
const url = new URL("./dispatcher.mjs", import.meta.url);
|
|
117
|
+
const mod = await import(`${url.href}?persist=${Math.random()}`);
|
|
118
|
+
const r = mod.canDispatchBacklog({ id: "RESTART-1", title: "After restart" });
|
|
119
|
+
assert.equal(r.allowed, false, "loaded cooldown should block dispatch");
|
|
120
|
+
assert.equal(r.reason, "post_completion_cooldown");
|
|
121
|
+
} finally { await cleanup(dir); }
|
|
122
|
+
});
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import { spawn } from "child_process";
|
|
6
6
|
import { appendFileSync, mkdirSync, writeFileSync, readFileSync, renameSync } from "fs";
|
|
7
|
-
import { join } from "path";
|
|
7
|
+
import { join, dirname } from "path";
|
|
8
8
|
import { releaseLock, releaseThreadLock, releaseRequestClaim, claimItem, releaseItemClaim } from "./session-lock.mjs";
|
|
9
9
|
import { recordSession } from "./health.mjs";
|
|
10
10
|
|
|
@@ -42,6 +42,41 @@ const activeBacklogKeys = new Set(); // backlog item key -> true (while session
|
|
|
42
42
|
const backlogRetryCount = new Map(); // backlog item key -> number of times dispatched
|
|
43
43
|
const MAX_BACKLOG_RETRIES = 6; // Max retries before skipping (was 3 — too aggressive)
|
|
44
44
|
|
|
45
|
+
// Post-completion cooldown — once a session has run on a backlog item, don't
|
|
46
|
+
// re-dispatch it until N hours later. Without this, every 2-min backlog
|
|
47
|
+
// sweep re-dispatches the same items because the daemon has no signal
|
|
48
|
+
// that the underlying work was actually completed (sessions exit 0 even
|
|
49
|
+
// when they only "looked at" the item). 53 redundant spawns/day per item
|
|
50
|
+
// was the observed rate before this fix.
|
|
51
|
+
const SUCCESS_COOLDOWN_MS = 4 * 60 * 60 * 1000; // 4h after exit 0
|
|
52
|
+
const FAILURE_COOLDOWN_MS = 30 * 60 * 1000; // 30m after non-zero exit
|
|
53
|
+
const backlogCooldownUntil = new Map(); // key -> epoch ms
|
|
54
|
+
const COOLDOWN_STATE_PATH = join(AGENT_REPO_DIR, "state/sessions/backlog-cooldowns.json");
|
|
55
|
+
|
|
56
|
+
// Persist cooldown state across daemon restarts so a freshly-started
|
|
57
|
+
// daemon doesn't immediately re-dispatch items it just completed.
|
|
58
|
+
function loadCooldowns() {
|
|
59
|
+
try {
|
|
60
|
+
const body = readFileSync(COOLDOWN_STATE_PATH, "utf-8");
|
|
61
|
+
const data = JSON.parse(body);
|
|
62
|
+
const now = Date.now();
|
|
63
|
+
for (const [key, until] of Object.entries(data || {})) {
|
|
64
|
+
if (typeof until === "number" && until > now) {
|
|
65
|
+
backlogCooldownUntil.set(key, until);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
} catch { /* file missing or malformed — start fresh */ }
|
|
69
|
+
}
|
|
70
|
+
function saveCooldowns() {
|
|
71
|
+
try {
|
|
72
|
+
const obj = {};
|
|
73
|
+
for (const [k, v] of backlogCooldownUntil) obj[k] = v;
|
|
74
|
+
mkdirSync(dirname(COOLDOWN_STATE_PATH), { recursive: true });
|
|
75
|
+
writeFileSync(COOLDOWN_STATE_PATH, JSON.stringify(obj, null, 2) + "\n");
|
|
76
|
+
} catch { /* best-effort */ }
|
|
77
|
+
}
|
|
78
|
+
loadCooldowns();
|
|
79
|
+
|
|
45
80
|
function logDir() {
|
|
46
81
|
const dir = join(AGENT_REPO_DIR, "logs", "daemon");
|
|
47
82
|
mkdirSync(dir, { recursive: true });
|
|
@@ -183,6 +218,13 @@ export function canDispatchBacklog(item) {
|
|
|
183
218
|
if (retries >= MAX_BACKLOG_RETRIES) {
|
|
184
219
|
return { allowed: false, reason: "max_retries_exceeded", retries };
|
|
185
220
|
}
|
|
221
|
+
// Post-completion cooldown — prevent every-2-min re-dispatch of items
|
|
222
|
+
// that completed (success or failure) within the recent window.
|
|
223
|
+
const cooldownUntil = backlogCooldownUntil.get(key) || 0;
|
|
224
|
+
if (cooldownUntil > Date.now()) {
|
|
225
|
+
const remaining_min = Math.ceil((cooldownUntil - Date.now()) / 60000);
|
|
226
|
+
return { allowed: false, reason: "post_completion_cooldown", remaining_min };
|
|
227
|
+
}
|
|
186
228
|
return { allowed: true };
|
|
187
229
|
}
|
|
188
230
|
|
|
@@ -362,6 +404,19 @@ function spawnSession(entry) {
|
|
|
362
404
|
activeBacklogKeys.delete(key);
|
|
363
405
|
const retries = backlogRetryCount.get(key) || 0;
|
|
364
406
|
|
|
407
|
+
// Apply post-completion cooldown — different for success vs failure.
|
|
408
|
+
// This is the fix for the every-2-min re-dispatch loop: once a
|
|
409
|
+
// session has touched an item, we wait before touching it again.
|
|
410
|
+
const cooldownMs = code === 0 ? SUCCESS_COOLDOWN_MS : FAILURE_COOLDOWN_MS;
|
|
411
|
+
backlogCooldownUntil.set(key, Date.now() + cooldownMs);
|
|
412
|
+
saveCooldowns();
|
|
413
|
+
logSession({
|
|
414
|
+
event: "cooldown_set",
|
|
415
|
+
summary: classResult.summary,
|
|
416
|
+
exit_code: code,
|
|
417
|
+
cooldown_minutes: Math.round(cooldownMs / 60000),
|
|
418
|
+
});
|
|
419
|
+
|
|
365
420
|
// Release file-based item claim (ib-20260407-001b)
|
|
366
421
|
if (item.id) releaseItemClaim(item.id);
|
|
367
422
|
|
|
@@ -426,10 +481,13 @@ function spawnSession(entry) {
|
|
|
426
481
|
claimReleased = true;
|
|
427
482
|
}
|
|
428
483
|
|
|
429
|
-
// Release backlog tracking + item claim.
|
|
484
|
+
// Release backlog tracking + item claim. Apply failure cooldown so the
|
|
485
|
+
// same item isn't re-spawned on the next backlog sweep.
|
|
430
486
|
if (source === "backlog") {
|
|
431
487
|
const key = backlogKey(item);
|
|
432
488
|
activeBacklogKeys.delete(key);
|
|
489
|
+
backlogCooldownUntil.set(key, Date.now() + FAILURE_COOLDOWN_MS);
|
|
490
|
+
saveCooldowns();
|
|
433
491
|
// Release file-based item claim (ib-20260407-001b)
|
|
434
492
|
if (item.id) releaseItemClaim(item.id);
|
|
435
493
|
}
|