@adaptic/maestro 1.9.3 → 1.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/claude-bin.mjs +70 -0
- package/package.json +2 -2
- package/scripts/daemon/agent-daemon.mjs +562 -0
- package/scripts/daemon/cadence-consumer.mjs +7 -47
- package/scripts/daemon/cadence-handlers.mjs +22 -7
- package/scripts/daemon/classifier.mjs +5 -3
- package/scripts/daemon/dispatcher-cooldown.test.mjs +122 -0
- package/scripts/daemon/dispatcher.mjs +66 -4
- package/scripts/daemon/maestro-daemon.mjs +12 -9
- package/scripts/daemon/responder.mjs +5 -2
- package/scripts/daemon/sophie-daemon.mjs +11 -552
|
@@ -59,6 +59,7 @@ import {
|
|
|
59
59
|
logBusEvent,
|
|
60
60
|
busDepth,
|
|
61
61
|
} from "../../lib/cadence-bus.mjs";
|
|
62
|
+
import { resolveClaudeBin as sharedResolveClaude, augmentedPath } from "../../lib/claude-bin.mjs";
|
|
62
63
|
import { getCadenceDef } from "./cadence-handlers.mjs";
|
|
63
64
|
|
|
64
65
|
// ---------------------------------------------------------------------------
|
|
@@ -104,39 +105,10 @@ function defaultLogger(entry) {
|
|
|
104
105
|
}
|
|
105
106
|
}
|
|
106
107
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
* stuck in ravi-ai's DLQ. This resolver returns the first existing
|
|
112
|
-
* candidate among:
|
|
113
|
-
*
|
|
114
|
-
* 1. $CLAUDE_BIN env var (if set + executable)
|
|
115
|
-
* 2. ~/.local/bin/claude (default Claude Code install path)
|
|
116
|
-
* 3. /opt/homebrew/bin/claude (homebrew on Apple Silicon)
|
|
117
|
-
* 4. /usr/local/bin/claude (homebrew on Intel)
|
|
118
|
-
* 5. /usr/bin/claude
|
|
119
|
-
*
|
|
120
|
-
* Falls back to bare "claude" so the spawn's own error stays informative
|
|
121
|
-
* when nothing is found.
|
|
122
|
-
*/
|
|
123
|
-
let _resolvedClaude = null;
|
|
124
|
-
function resolveClaudeBin() {
|
|
125
|
-
if (_resolvedClaude) return _resolvedClaude;
|
|
126
|
-
const envOverride = process.env.CLAUDE_BIN;
|
|
127
|
-
const candidates = [
|
|
128
|
-
envOverride,
|
|
129
|
-
join(homedir(), ".local/bin/claude"),
|
|
130
|
-
"/opt/homebrew/bin/claude",
|
|
131
|
-
"/usr/local/bin/claude",
|
|
132
|
-
"/usr/bin/claude",
|
|
133
|
-
].filter(Boolean);
|
|
134
|
-
for (const c of candidates) {
|
|
135
|
-
if (existsSync(c)) { _resolvedClaude = c; return c; }
|
|
136
|
-
}
|
|
137
|
-
_resolvedClaude = "claude"; // last-resort; spawn will report ENOENT
|
|
138
|
-
return _resolvedClaude;
|
|
139
|
-
}
|
|
108
|
+
// Claude binary resolution moved to lib/claude-bin.mjs (shared by
|
|
109
|
+
// dispatcher, responder, and this consumer). See that file for the
|
|
110
|
+
// candidate search order.
|
|
111
|
+
const resolveClaudeBin = sharedResolveClaude;
|
|
140
112
|
|
|
141
113
|
/**
|
|
142
114
|
* Spawn a sub-session running the cadence's trigger prompt and resolve
|
|
@@ -164,24 +136,12 @@ function realSpawnSession({ agentRoot, cadence, promptPath, timeoutMs, log }) {
|
|
|
164
136
|
|
|
165
137
|
const bin = resolveClaudeBin();
|
|
166
138
|
const args = ["--print", "--dangerously-skip-permissions", body];
|
|
167
|
-
//
|
|
168
|
-
// can still be found. launchd's bare env strips /opt/homebrew/bin etc.
|
|
169
|
-
const augmentedPath = [
|
|
170
|
-
process.env.PATH || "",
|
|
171
|
-
`${homedir()}/.local/bin`,
|
|
172
|
-
"/opt/homebrew/bin",
|
|
173
|
-
"/opt/homebrew/sbin",
|
|
174
|
-
"/usr/local/bin",
|
|
175
|
-
"/usr/bin",
|
|
176
|
-
"/bin",
|
|
177
|
-
"/usr/sbin",
|
|
178
|
-
"/sbin",
|
|
179
|
-
].filter(Boolean).join(":");
|
|
139
|
+
// PATH augmented via lib/claude-bin.mjs so subsession can find jq/node.
|
|
180
140
|
const env = {
|
|
181
141
|
...process.env,
|
|
182
142
|
AGENT_ROOT: agentRoot,
|
|
183
143
|
AGENT_DIR: agentRoot,
|
|
184
|
-
PATH: augmentedPath,
|
|
144
|
+
PATH: augmentedPath(),
|
|
185
145
|
};
|
|
186
146
|
// Auth handling. Claude Code authenticates via macOS Keychain
|
|
187
147
|
// (OAuth from the user's Pro/Max subscription) when no API key is
|
|
@@ -129,16 +129,31 @@ async function guardInboxProcessor({ agentRoot }) {
|
|
|
129
129
|
|
|
130
130
|
/**
|
|
131
131
|
* backlog-executor guard:
|
|
132
|
-
*
|
|
133
|
-
*
|
|
134
|
-
* -
|
|
132
|
+
* The reactive daemon (agent-daemon.mjs) already runs an internal
|
|
133
|
+
* backlog sweep every BACKLOG_INTERVAL (default 10min) which dispatches
|
|
134
|
+
* one session per top-priority queue item with proper post-completion
|
|
135
|
+
* cooldowns. Running the cadence-bus backlog-executor on top of that
|
|
136
|
+
* spawns DUPLICATE sessions for the same items — observed as 159
|
|
137
|
+
* redundant spawns / day in ravi-ai's logs.
|
|
138
|
+
*
|
|
139
|
+
* So this guard now always returns inline. The reactive daemon owns
|
|
140
|
+
* the backlog. Operators who prefer the cadence-only mode (no reactive
|
|
141
|
+
* daemon) can set BACKLOG_CADENCE_ESCALATE=1 in .env to flip behaviour.
|
|
135
142
|
*/
|
|
136
143
|
async function guardBacklogExecutor({ agentRoot }) {
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
144
|
+
if (process.env.BACKLOG_CADENCE_ESCALATE === "1") {
|
|
145
|
+
const withWork = queuesWithWork(agentRoot);
|
|
146
|
+
if (withWork.length === 0) {
|
|
147
|
+
return { ok: true, decision: "inline", reason: "all queues empty" };
|
|
148
|
+
}
|
|
149
|
+
return { ok: true, decision: "escalate", queues_with_work: withWork };
|
|
140
150
|
}
|
|
141
|
-
return {
|
|
151
|
+
return {
|
|
152
|
+
ok: true,
|
|
153
|
+
decision: "inline",
|
|
154
|
+
reason: "reactive-daemon-owns-backlog",
|
|
155
|
+
note: "Set BACKLOG_CADENCE_ESCALATE=1 to override.",
|
|
156
|
+
};
|
|
142
157
|
}
|
|
143
158
|
|
|
144
159
|
/**
|
|
@@ -102,8 +102,9 @@ function loadAgentRegistry() {
|
|
|
102
102
|
|
|
103
103
|
const ANTHROPIC_MODEL = "claude-haiku-4-5-20251001";
|
|
104
104
|
const OPENAI_MODEL = "gpt-4o-mini";
|
|
105
|
-
//
|
|
106
|
-
|
|
105
|
+
// Resolve claude against the agent's PATH (not launchd's bare env).
|
|
106
|
+
import { resolveClaudeBin, augmentedPath } from "../../lib/claude-bin.mjs";
|
|
107
|
+
const CLAUDE_BIN = resolveClaudeBin();
|
|
107
108
|
const CLAUDE_CLI_TIMEOUT_MS = 30_000;
|
|
108
109
|
|
|
109
110
|
// ── System prompt shared by both LLM classifiers ────────────────────────────
|
|
@@ -321,7 +322,8 @@ async function runClaudeCLI(systemPrompt, userPrompt) {
|
|
|
321
322
|
stdio: ["pipe", "pipe", "pipe"],
|
|
322
323
|
// Force claude CLI onto keychain OAuth (Max subscription); strip any
|
|
323
324
|
// stale ANTHROPIC_API_KEY/AUTH_TOKEN inherited from the daemon env.
|
|
324
|
-
|
|
325
|
+
// Augment PATH so spawned subsessions find homebrew/nvm binaries.
|
|
326
|
+
env: { ...process.env, PATH: augmentedPath(), ANTHROPIC_API_KEY: "", ANTHROPIC_AUTH_TOKEN: "" },
|
|
325
327
|
});
|
|
326
328
|
|
|
327
329
|
let stdout = "";
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* dispatcher-cooldown.test.mjs — Coverage for the backlog post-completion
|
|
3
|
+
* cooldown that stopped the 159-redundant-spawns-per-day loop.
|
|
4
|
+
*
|
|
5
|
+
* The dispatcher module is stateful (in-memory Sets/Maps for active
|
|
6
|
+
* sessions, retry counts, cooldowns) and reads from AGENT_DIR at import
|
|
7
|
+
* time, so each test isolates by setting AGENT_DIR before dynamic-importing
|
|
8
|
+
* a fresh module instance.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { test } from "node:test";
|
|
12
|
+
import assert from "node:assert/strict";
|
|
13
|
+
import { promises as fsp } from "fs";
|
|
14
|
+
import { tmpdir } from "os";
|
|
15
|
+
import { join } from "path";
|
|
16
|
+
|
|
17
|
+
async function freshDispatcher() {
|
|
18
|
+
// Isolated tmpdir per test so cooldown state file doesn't bleed.
|
|
19
|
+
const dir = join(
|
|
20
|
+
tmpdir(),
|
|
21
|
+
`dispatcher-test-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
|
|
22
|
+
);
|
|
23
|
+
await fsp.mkdir(join(dir, "state/sessions"), { recursive: true });
|
|
24
|
+
await fsp.mkdir(join(dir, "logs/daemon"), { recursive: true });
|
|
25
|
+
process.env.AGENT_DIR = dir;
|
|
26
|
+
// Bust the module cache so state resets cleanly.
|
|
27
|
+
const url = new URL("./dispatcher.mjs", import.meta.url);
|
|
28
|
+
const mod = await import(`${url.href}?test=${Math.random()}`);
|
|
29
|
+
return { mod, dir };
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async function cleanup(dir) {
|
|
33
|
+
try { await fsp.rm(dir, { recursive: true, force: true }); } catch { /* */ }
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
test("canDispatchBacklog: allowed for a fresh item", async () => {
|
|
37
|
+
const { mod, dir } = await freshDispatcher();
|
|
38
|
+
try {
|
|
39
|
+
const r = mod.canDispatchBacklog({ id: "TEST-1", title: "Fresh item" });
|
|
40
|
+
assert.equal(r.allowed, true);
|
|
41
|
+
} finally { await cleanup(dir); }
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
test("canDispatchBacklog: blocked while cooldown is active", async () => {
|
|
45
|
+
const { mod, dir } = await freshDispatcher();
|
|
46
|
+
try {
|
|
47
|
+
// Simulate a session completion by directly writing the cooldown state
|
|
48
|
+
// file the dispatcher reads on init. We can't easily call internal
|
|
49
|
+
// setters, but the cooldown file IS public API for state persistence.
|
|
50
|
+
const cooldownPath = join(dir, "state/sessions/backlog-cooldowns.json");
|
|
51
|
+
const tomorrow = Date.now() + 60 * 60 * 1000; // +1h
|
|
52
|
+
await fsp.writeFile(cooldownPath, JSON.stringify({
|
|
53
|
+
"TEST-2": tomorrow,
|
|
54
|
+
}) + "\n");
|
|
55
|
+
// Re-import to load the cooldown file.
|
|
56
|
+
const { mod: mod2, dir: dir2 } = await freshDispatcher();
|
|
57
|
+
try {
|
|
58
|
+
// Plant the same cooldown in this fresh dispatcher's dir.
|
|
59
|
+
await fsp.writeFile(
|
|
60
|
+
join(dir2, "state/sessions/backlog-cooldowns.json"),
|
|
61
|
+
JSON.stringify({ "TEST-2": tomorrow }) + "\n"
|
|
62
|
+
);
|
|
63
|
+
// Reload yet again with the cooldown file present.
|
|
64
|
+
const { mod: mod3, dir: dir3 } = await freshDispatcher();
|
|
65
|
+
try {
|
|
66
|
+
await fsp.writeFile(
|
|
67
|
+
join(dir3, "state/sessions/backlog-cooldowns.json"),
|
|
68
|
+
JSON.stringify({ "TEST-2": tomorrow }) + "\n"
|
|
69
|
+
);
|
|
70
|
+
// Final import — this one reads the cooldown file at module load.
|
|
71
|
+
process.env.AGENT_DIR = dir3;
|
|
72
|
+
const url = new URL("./dispatcher.mjs", import.meta.url);
|
|
73
|
+
const final = await import(`${url.href}?cooldown=${Math.random()}`);
|
|
74
|
+
const r = final.canDispatchBacklog({ id: "TEST-2", title: "Cooldown item" });
|
|
75
|
+
assert.equal(r.allowed, false);
|
|
76
|
+
assert.equal(r.reason, "post_completion_cooldown");
|
|
77
|
+
assert.ok(r.remaining_min >= 1 && r.remaining_min <= 60);
|
|
78
|
+
} finally { await cleanup(dir3); }
|
|
79
|
+
} finally { await cleanup(dir2); }
|
|
80
|
+
} finally { await cleanup(dir); }
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
test("canDispatchBacklog: allowed after cooldown expires", async () => {
|
|
84
|
+
const dir = join(
|
|
85
|
+
tmpdir(),
|
|
86
|
+
`dispatcher-expire-test-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
|
|
87
|
+
);
|
|
88
|
+
try {
|
|
89
|
+
await fsp.mkdir(join(dir, "state/sessions"), { recursive: true });
|
|
90
|
+
await fsp.mkdir(join(dir, "logs/daemon"), { recursive: true });
|
|
91
|
+
// Cooldown already expired (set to 1h ago).
|
|
92
|
+
await fsp.writeFile(
|
|
93
|
+
join(dir, "state/sessions/backlog-cooldowns.json"),
|
|
94
|
+
JSON.stringify({ "TEST-3": Date.now() - 3_600_000 }) + "\n"
|
|
95
|
+
);
|
|
96
|
+
process.env.AGENT_DIR = dir;
|
|
97
|
+
const url = new URL("./dispatcher.mjs", import.meta.url);
|
|
98
|
+
const mod = await import(`${url.href}?expired=${Math.random()}`);
|
|
99
|
+
const r = mod.canDispatchBacklog({ id: "TEST-3", title: "Expired cooldown item" });
|
|
100
|
+
assert.equal(r.allowed, true);
|
|
101
|
+
} finally { await cleanup(dir); }
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
test("backlog-cooldowns.json persists across simulated daemon restarts", async () => {
|
|
105
|
+
const dir = join(
|
|
106
|
+
tmpdir(),
|
|
107
|
+
`dispatcher-persist-test-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
|
|
108
|
+
);
|
|
109
|
+
try {
|
|
110
|
+
await fsp.mkdir(join(dir, "state/sessions"), { recursive: true });
|
|
111
|
+
await fsp.mkdir(join(dir, "logs/daemon"), { recursive: true });
|
|
112
|
+
const cooldownPath = join(dir, "state/sessions/backlog-cooldowns.json");
|
|
113
|
+
const future = Date.now() + 30 * 60_000;
|
|
114
|
+
await fsp.writeFile(cooldownPath, JSON.stringify({ "RESTART-1": future }) + "\n");
|
|
115
|
+
process.env.AGENT_DIR = dir;
|
|
116
|
+
const url = new URL("./dispatcher.mjs", import.meta.url);
|
|
117
|
+
const mod = await import(`${url.href}?persist=${Math.random()}`);
|
|
118
|
+
const r = mod.canDispatchBacklog({ id: "RESTART-1", title: "After restart" });
|
|
119
|
+
assert.equal(r.allowed, false, "loaded cooldown should block dispatch");
|
|
120
|
+
assert.equal(r.reason, "post_completion_cooldown");
|
|
121
|
+
} finally { await cleanup(dir); }
|
|
122
|
+
});
|
|
@@ -4,12 +4,15 @@
|
|
|
4
4
|
|
|
5
5
|
import { spawn } from "child_process";
|
|
6
6
|
import { appendFileSync, mkdirSync, writeFileSync, readFileSync, renameSync } from "fs";
|
|
7
|
-
import { join } from "path";
|
|
7
|
+
import { join, dirname } from "path";
|
|
8
8
|
import { releaseLock, releaseThreadLock, releaseRequestClaim, claimItem, releaseItemClaim } from "./session-lock.mjs";
|
|
9
9
|
import { recordSession } from "./health.mjs";
|
|
10
10
|
|
|
11
11
|
const AGENT_REPO_DIR = process.env.AGENT_DIR || join(new URL(".", import.meta.url).pathname, "../..");
|
|
12
|
-
|
|
12
|
+
// Resolve the claude binary against the agent's PATH (not launchd's bare
|
|
13
|
+
// env). Without this, every daemon-spawned `claude --print` exits ENOENT.
|
|
14
|
+
import { resolveClaudeBin, augmentedPath } from "../../lib/claude-bin.mjs";
|
|
15
|
+
const CLAUDE_BIN = resolveClaudeBin();
|
|
13
16
|
const MAX_CONCURRENT = parseInt(process.env.DAEMON_MAX_CONCURRENT || "10", 10);
|
|
14
17
|
const RESERVED_INBOX_SLOTS = 3; // Always keep 3 slots free for real-time inbox items
|
|
15
18
|
|
|
@@ -39,6 +42,41 @@ const activeBacklogKeys = new Set(); // backlog item key -> true (while session
|
|
|
39
42
|
const backlogRetryCount = new Map(); // backlog item key -> number of times dispatched
|
|
40
43
|
const MAX_BACKLOG_RETRIES = 6; // Max retries before skipping (was 3 — too aggressive)
|
|
41
44
|
|
|
45
|
+
// Post-completion cooldown — once a session has run on a backlog item, don't
|
|
46
|
+
// re-dispatch it until N hours later. Without this, every 2-min backlog
|
|
47
|
+
// sweep re-dispatches the same items because the daemon has no signal
|
|
48
|
+
// that the underlying work was actually completed (sessions exit 0 even
|
|
49
|
+
// when they only "looked at" the item). 53 redundant spawns/day per item
|
|
50
|
+
// was the observed rate before this fix.
|
|
51
|
+
const SUCCESS_COOLDOWN_MS = 4 * 60 * 60 * 1000; // 4h after exit 0
|
|
52
|
+
const FAILURE_COOLDOWN_MS = 30 * 60 * 1000; // 30m after non-zero exit
|
|
53
|
+
const backlogCooldownUntil = new Map(); // key -> epoch ms
|
|
54
|
+
const COOLDOWN_STATE_PATH = join(AGENT_REPO_DIR, "state/sessions/backlog-cooldowns.json");
|
|
55
|
+
|
|
56
|
+
// Persist cooldown state across daemon restarts so a freshly-started
|
|
57
|
+
// daemon doesn't immediately re-dispatch items it just completed.
|
|
58
|
+
function loadCooldowns() {
|
|
59
|
+
try {
|
|
60
|
+
const body = readFileSync(COOLDOWN_STATE_PATH, "utf-8");
|
|
61
|
+
const data = JSON.parse(body);
|
|
62
|
+
const now = Date.now();
|
|
63
|
+
for (const [key, until] of Object.entries(data || {})) {
|
|
64
|
+
if (typeof until === "number" && until > now) {
|
|
65
|
+
backlogCooldownUntil.set(key, until);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
} catch { /* file missing or malformed — start fresh */ }
|
|
69
|
+
}
|
|
70
|
+
function saveCooldowns() {
|
|
71
|
+
try {
|
|
72
|
+
const obj = {};
|
|
73
|
+
for (const [k, v] of backlogCooldownUntil) obj[k] = v;
|
|
74
|
+
mkdirSync(dirname(COOLDOWN_STATE_PATH), { recursive: true });
|
|
75
|
+
writeFileSync(COOLDOWN_STATE_PATH, JSON.stringify(obj, null, 2) + "\n");
|
|
76
|
+
} catch { /* best-effort */ }
|
|
77
|
+
}
|
|
78
|
+
loadCooldowns();
|
|
79
|
+
|
|
42
80
|
function logDir() {
|
|
43
81
|
const dir = join(AGENT_REPO_DIR, "logs", "daemon");
|
|
44
82
|
mkdirSync(dir, { recursive: true });
|
|
@@ -180,6 +218,13 @@ export function canDispatchBacklog(item) {
|
|
|
180
218
|
if (retries >= MAX_BACKLOG_RETRIES) {
|
|
181
219
|
return { allowed: false, reason: "max_retries_exceeded", retries };
|
|
182
220
|
}
|
|
221
|
+
// Post-completion cooldown — prevent every-2-min re-dispatch of items
|
|
222
|
+
// that completed (success or failure) within the recent window.
|
|
223
|
+
const cooldownUntil = backlogCooldownUntil.get(key) || 0;
|
|
224
|
+
if (cooldownUntil > Date.now()) {
|
|
225
|
+
const remaining_min = Math.ceil((cooldownUntil - Date.now()) / 60000);
|
|
226
|
+
return { allowed: false, reason: "post_completion_cooldown", remaining_min };
|
|
227
|
+
}
|
|
183
228
|
return { allowed: true };
|
|
184
229
|
}
|
|
185
230
|
|
|
@@ -281,9 +326,10 @@ function spawnSession(entry) {
|
|
|
281
326
|
// to the keychain OAuth (Max subscription) per CEO directive 2026-04-27.
|
|
282
327
|
// A stale ANTHROPIC_API_KEY in the daemon's inherited env will otherwise
|
|
283
328
|
// override the OAuth token and cause "Invalid API key" failures.
|
|
329
|
+
// PATH is augmented so the spawn finds homebrew/nvm tools (jq, node, etc).
|
|
284
330
|
const proc = spawn(CLAUDE_BIN, args, {
|
|
285
331
|
cwd: AGENT_REPO_DIR,
|
|
286
|
-
env: { ...process.env, ANTHROPIC_API_KEY: "", ANTHROPIC_AUTH_TOKEN: "" },
|
|
332
|
+
env: { ...process.env, PATH: augmentedPath(), ANTHROPIC_API_KEY: "", ANTHROPIC_AUTH_TOKEN: "" },
|
|
287
333
|
stdio: ["ignore", "pipe", "pipe"],
|
|
288
334
|
});
|
|
289
335
|
|
|
@@ -358,6 +404,19 @@ function spawnSession(entry) {
|
|
|
358
404
|
activeBacklogKeys.delete(key);
|
|
359
405
|
const retries = backlogRetryCount.get(key) || 0;
|
|
360
406
|
|
|
407
|
+
// Apply post-completion cooldown — different for success vs failure.
|
|
408
|
+
// This is the fix for the every-2-min re-dispatch loop: once a
|
|
409
|
+
// session has touched an item, we wait before touching it again.
|
|
410
|
+
const cooldownMs = code === 0 ? SUCCESS_COOLDOWN_MS : FAILURE_COOLDOWN_MS;
|
|
411
|
+
backlogCooldownUntil.set(key, Date.now() + cooldownMs);
|
|
412
|
+
saveCooldowns();
|
|
413
|
+
logSession({
|
|
414
|
+
event: "cooldown_set",
|
|
415
|
+
summary: classResult.summary,
|
|
416
|
+
exit_code: code,
|
|
417
|
+
cooldown_minutes: Math.round(cooldownMs / 60000),
|
|
418
|
+
});
|
|
419
|
+
|
|
361
420
|
// Release file-based item claim (ib-20260407-001b)
|
|
362
421
|
if (item.id) releaseItemClaim(item.id);
|
|
363
422
|
|
|
@@ -422,10 +481,13 @@ function spawnSession(entry) {
|
|
|
422
481
|
claimReleased = true;
|
|
423
482
|
}
|
|
424
483
|
|
|
425
|
-
// Release backlog tracking + item claim.
|
|
484
|
+
// Release backlog tracking + item claim. Apply failure cooldown so the
|
|
485
|
+
// same item isn't re-spawned on the next backlog sweep.
|
|
426
486
|
if (source === "backlog") {
|
|
427
487
|
const key = backlogKey(item);
|
|
428
488
|
activeBacklogKeys.delete(key);
|
|
489
|
+
backlogCooldownUntil.set(key, Date.now() + FAILURE_COOLDOWN_MS);
|
|
490
|
+
saveCooldowns();
|
|
429
491
|
// Release file-based item claim (ib-20260407-001b)
|
|
430
492
|
if (item.id) releaseItemClaim(item.id);
|
|
431
493
|
}
|
|
@@ -8,11 +8,12 @@
|
|
|
8
8
|
//
|
|
9
9
|
// Lifecycle:
|
|
10
10
|
// 1. Honour .emergency-stop BEFORE doing anything (don't acquire singleton
|
|
11
|
-
// lock, don't start consumer, don't import
|
|
11
|
+
// lock, don't start consumer, don't import the core daemon). Stops the
|
|
12
12
|
// launchd restart treadmill cold.
|
|
13
13
|
// 2. Acquire the daemon singleton lock so only one instance runs.
|
|
14
14
|
// 3. Start the cadence consumer (state/cadence-bus/ drain loop).
|
|
15
|
-
// 4. Import the core daemon (
|
|
15
|
+
// 4. Import the core daemon (agent-daemon.mjs canonical, with fallbacks
|
|
16
|
+
// to legacy sophie-daemon.mjs or <firstName>-daemon.mjs for back-compat).
|
|
16
17
|
//
|
|
17
18
|
// Run: node scripts/daemon/maestro-daemon.mjs
|
|
18
19
|
// Install: launchd plist with KeepAlive.SuccessfulExit: false (clean exits
|
|
@@ -82,13 +83,15 @@ try {
|
|
|
82
83
|
// 4. Core daemon import
|
|
83
84
|
// ---------------------------------------------------------------------------
|
|
84
85
|
// Resolve the core daemon module. Try, in order:
|
|
85
|
-
// 1. ./
|
|
86
|
-
// 2.
|
|
87
|
-
// 3.
|
|
86
|
+
// 1. ./agent-daemon.mjs — canonical filename (1.9.4+)
|
|
87
|
+
// 2. ./sophie-daemon.mjs — legacy canonical (pre-1.9.4)
|
|
88
|
+
// 3. ./<firstName>-daemon.mjs — even older per-agent rename
|
|
89
|
+
// 4. The first scripts/daemon/*-daemon.mjs that isn't this file
|
|
88
90
|
function resolveCoreDaemon() {
|
|
89
|
-
const localCandidates = [
|
|
90
|
-
|
|
91
|
-
|
|
91
|
+
const localCandidates = [
|
|
92
|
+
resolve(__dirname, "agent-daemon.mjs"),
|
|
93
|
+
resolve(__dirname, "sophie-daemon.mjs"),
|
|
94
|
+
];
|
|
92
95
|
|
|
93
96
|
try {
|
|
94
97
|
const agentJson = join(AGENT_DIR, "config", "agent.json");
|
|
@@ -116,7 +119,7 @@ function resolveCoreDaemon() {
|
|
|
116
119
|
|
|
117
120
|
const coreDaemon = resolveCoreDaemon();
|
|
118
121
|
if (!coreDaemon) {
|
|
119
|
-
console.error("[DAEMON] could not locate a core daemon module under scripts/daemon/. Expected
|
|
122
|
+
console.error("[DAEMON] could not locate a core daemon module under scripts/daemon/. Expected agent-daemon.mjs (canonical) or sophie-daemon.mjs / <firstName>-daemon.mjs (legacy).");
|
|
120
123
|
process.exit(78);
|
|
121
124
|
}
|
|
122
125
|
// Import and run the daemon (handles its own .env loading).
|
|
@@ -25,7 +25,9 @@ import { routingKey as deriveRoutingKey, createRouter } from "./lib/session-rout
|
|
|
25
25
|
|
|
26
26
|
const AGENT_REPO_DIR = process.env.AGENT_DIR || join(new URL(".", import.meta.url).pathname, "../..");
|
|
27
27
|
const SONNET_MODEL = "claude-sonnet-4-6";
|
|
28
|
-
|
|
28
|
+
// Resolve claude against the agent's PATH (not launchd's bare env).
|
|
29
|
+
import { resolveClaudeBin, augmentedPath } from "../../lib/claude-bin.mjs";
|
|
30
|
+
const CLAUDE_BIN = resolveClaudeBin();
|
|
29
31
|
const CLAUDE_CLI_TIMEOUT_MS = 60_000;
|
|
30
32
|
const SESSION_REGISTRY_PATH = join(AGENT_REPO_DIR, "state", "daemon", "session-router-registry.json");
|
|
31
33
|
|
|
@@ -140,7 +142,8 @@ function runClaudeCLI(systemPrompt, userPrompt, model, opts = {}) {
|
|
|
140
142
|
stdio: ["pipe", "pipe", "pipe"],
|
|
141
143
|
// Force claude CLI onto keychain OAuth (Max subscription); strip any
|
|
142
144
|
// stale ANTHROPIC_API_KEY/AUTH_TOKEN inherited from the daemon env.
|
|
143
|
-
|
|
145
|
+
// Augment PATH so the subsession finds homebrew/nvm tools.
|
|
146
|
+
env: { ...process.env, PATH: augmentedPath(), ANTHROPIC_API_KEY: "", ANTHROPIC_AUTH_TOKEN: "" },
|
|
144
147
|
});
|
|
145
148
|
|
|
146
149
|
let stdout = "";
|