@adaptic/maestro 1.7.3 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/.claude/commands/init-maestro.md +15 -2
  2. package/.gitignore +7 -0
  3. package/README.md +62 -11
  4. package/bin/maestro.mjs +338 -2
  5. package/bin/maestro.test.mjs +299 -0
  6. package/docs/guides/poller-daemon-setup.md +21 -8
  7. package/docs/runbooks/perpetual-operations.md +19 -15
  8. package/docs/runbooks/recovery-and-failover.md +42 -0
  9. package/lib/cadence-bus.mjs +625 -0
  10. package/lib/cadence-bus.test.mjs +354 -0
  11. package/package.json +6 -1
  12. package/scaffold/CLAUDE.md +11 -7
  13. package/scripts/cadence/cadence-status.mjs +36 -0
  14. package/scripts/cadence/enqueue-cadence-tick.mjs +158 -0
  15. package/scripts/cadence/enqueue-cadence-tick.test.mjs +154 -0
  16. package/scripts/cadence/launchd-cadence-wrapper.sh +85 -0
  17. package/scripts/daemon/cadence-consumer.mjs +439 -0
  18. package/scripts/daemon/cadence-consumer.test.mjs +397 -0
  19. package/scripts/daemon/cadence-handlers.mjs +263 -0
  20. package/scripts/daemon/maestro-daemon.mjs +20 -0
  21. package/scripts/local-triggers/generate-plists.sh +33 -12
  22. package/scripts/local-triggers/generate-plists.test.mjs +185 -0
  23. package/scripts/local-triggers/plists/.gitkeep +0 -0
  24. package/scripts/local-triggers/run-trigger.sh +22 -3
  25. package/scripts/local-triggers/plists/ai.adaptic.sophie-backlog-executor.plist +0 -21
  26. package/scripts/local-triggers/plists/ai.adaptic.sophie-daemon.plist +0 -32
  27. package/scripts/local-triggers/plists/ai.adaptic.sophie-inbox-processor.plist +0 -21
  28. package/scripts/local-triggers/plists/ai.adaptic.sophie-meeting-action-capture.plist +0 -21
  29. package/scripts/local-triggers/plists/ai.adaptic.sophie-meeting-prep.plist +0 -21
  30. package/scripts/local-triggers/plists/ai.adaptic.sophie-midday-sweep.plist +0 -26
  31. package/scripts/local-triggers/plists/ai.adaptic.sophie-quarterly-self-assessment.plist +0 -62
  32. package/scripts/local-triggers/plists/ai.adaptic.sophie-weekly-engineering-health.plist +0 -28
  33. package/scripts/local-triggers/plists/ai.adaptic.sophie-weekly-execution.plist +0 -28
  34. package/scripts/local-triggers/plists/ai.adaptic.sophie-weekly-hiring.plist +0 -28
  35. package/scripts/local-triggers/plists/ai.adaptic.sophie-weekly-priorities.plist +0 -28
  36. package/scripts/local-triggers/plists/ai.adaptic.sophie-weekly-strategic-memo.plist +0 -28
@@ -0,0 +1,154 @@
1
+ /**
2
+ * enqueue-cadence-tick.test.mjs — node:test coverage for the CLI enqueue
3
+ * script (the entry point used by launchd plists).
4
+ */
5
+
6
+ import { test } from "node:test";
7
+ import assert from "node:assert/strict";
8
+ import { promises as fsp } from "fs";
9
+ import { writeFileSync, readFileSync, existsSync } from "node:fs";
10
+ import { tmpdir } from "os";
11
+ import { join, resolve, dirname } from "path";
12
+ import { fileURLToPath } from "node:url";
13
+ import { spawnSync } from "node:child_process";
14
+
15
+ const __dirname = dirname(fileURLToPath(import.meta.url));
16
+ const ENQUEUE_SCRIPT = resolve(__dirname, "enqueue-cadence-tick.mjs");
17
+
18
+ async function makeAgentRoot() {
19
+ const path = join(
20
+ tmpdir(),
21
+ `enqueue-cli-test-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
22
+ );
23
+ await fsp.mkdir(path, { recursive: true });
24
+ return path;
25
+ }
26
+
27
+ async function rmRoot(path) {
28
+ try { await fsp.rm(path, { recursive: true, force: true }); } catch { /* */ }
29
+ }
30
+
31
+ function run(cwd, args, env = {}) {
32
+ return spawnSync(process.execPath, [ENQUEUE_SCRIPT, ...args], {
33
+ cwd,
34
+ env: { ...process.env, AGENT_ROOT: cwd, ...env },
35
+ encoding: "utf-8",
36
+ });
37
+ }
38
+
39
+ // ---------------------------------------------------------------------------
40
+ // Happy path
41
+ // ---------------------------------------------------------------------------
42
+
43
+ test("CLI enqueues a tick and writes inbox JSON", async () => {
44
+ const root = await makeAgentRoot();
45
+ try {
46
+ const r = run(root, ["inbox-processor", "--source=launchd"]);
47
+ assert.equal(r.status, 0, r.stderr);
48
+ const out = JSON.parse(r.stdout.trim());
49
+ assert.equal(out.ok, true);
50
+ assert.equal(out.cadence, "inbox-processor");
51
+ assert.equal(out.fallback_only, false);
52
+ assert.ok(out.path);
53
+ assert.ok(existsSync(out.path));
54
+ const event = JSON.parse(readFileSync(out.path, "utf-8"));
55
+ assert.equal(event.source, "launchd");
56
+ assert.equal(event.cadence, "inbox-processor");
57
+ } finally { await rmRoot(root); }
58
+ });
59
+
60
+ test("CLI accepts metadata pairs (repeated and comma-separated)", async () => {
61
+ const root = await makeAgentRoot();
62
+ try {
63
+ const r = run(root, [
64
+ "weekly-strategic-memo",
65
+ "--source=manual",
66
+ "--metadata=note=launch",
67
+ "--metadata=k1=v1,k2=v2",
68
+ ]);
69
+ assert.equal(r.status, 0);
70
+ const out = JSON.parse(r.stdout.trim());
71
+ const event = JSON.parse(readFileSync(out.path, "utf-8"));
72
+ assert.equal(event.metadata.note, "launch");
73
+ assert.equal(event.metadata.k1, "v1");
74
+ assert.equal(event.metadata.k2, "v2");
75
+ } finally { await rmRoot(root); }
76
+ });
77
+
78
+ test("CLI --quiet suppresses stdout JSON", async () => {
79
+ const root = await makeAgentRoot();
80
+ try {
81
+ const r = run(root, ["inbox-processor", "--source=launchd", "--quiet"]);
82
+ assert.equal(r.status, 0);
83
+ assert.equal(r.stdout.trim(), "");
84
+ } finally { await rmRoot(root); }
85
+ });
86
+
87
+ test("CLI --version prints bus schema version", async () => {
88
+ const r = spawnSync(process.execPath, [ENQUEUE_SCRIPT, "--version"], { encoding: "utf-8" });
89
+ assert.equal(r.status, 0);
90
+ assert.match(r.stdout, /^\d+$/m);
91
+ });
92
+
93
+ test("CLI --help prints usage and exits 0", async () => {
94
+ const r = spawnSync(process.execPath, [ENQUEUE_SCRIPT, "--help"], { encoding: "utf-8" });
95
+ assert.equal(r.status, 0);
96
+ assert.match(r.stdout, /enqueue-cadence-tick/);
97
+ });
98
+
99
+ // ---------------------------------------------------------------------------
100
+ // Error paths
101
+ // ---------------------------------------------------------------------------
102
+
103
+ test("CLI exits 2 with no cadence", async () => {
104
+ const root = await makeAgentRoot();
105
+ try {
106
+ const r = run(root, []);
107
+ assert.equal(r.status, 2);
108
+ assert.match(r.stderr, /required/);
109
+ } finally { await rmRoot(root); }
110
+ });
111
+
112
+ test("CLI exits 2 on unknown flag", async () => {
113
+ const root = await makeAgentRoot();
114
+ try {
115
+ const r = run(root, ["x", "--what-is-this"]);
116
+ assert.equal(r.status, 2);
117
+ assert.match(r.stderr, /unknown flag/);
118
+ } finally { await rmRoot(root); }
119
+ });
120
+
121
+ // ---------------------------------------------------------------------------
122
+ // Emergency stop
123
+ // ---------------------------------------------------------------------------
124
+
125
+ test("CLI returns 0 with skipped=emergency-stop when stop flag exists", async () => {
126
+ const root = await makeAgentRoot();
127
+ try {
128
+ writeFileSync(join(root, ".emergency-stop"), "now");
129
+ const r = run(root, ["weekly-strategic-memo", "--source=launchd"]);
130
+ assert.equal(r.status, 0);
131
+ const out = JSON.parse(r.stdout.trim());
132
+ assert.equal(out.skipped, "emergency-stop");
133
+ assert.equal(out.fallback_only, true);
134
+ assert.equal(out.path, null);
135
+ // No file landed in inbox/.
136
+ assert.ok(!existsSync(join(root, "state/cadence-bus/inbox", `${out.id}.json`)));
137
+ } finally { await rmRoot(root); }
138
+ });
139
+
140
+ // ---------------------------------------------------------------------------
141
+ // Bus-handoff sanity — the CLI must finish in well under a second so launchd
142
+ // never piles up; we don't pin a number, just a generous ceiling.
143
+ // ---------------------------------------------------------------------------
144
+
145
+ test("CLI completes in well under 2 seconds", async () => {
146
+ const root = await makeAgentRoot();
147
+ try {
148
+ const t0 = Date.now();
149
+ const r = run(root, ["inbox-processor", "--source=launchd", "--quiet"]);
150
+ const dt = Date.now() - t0;
151
+ assert.equal(r.status, 0);
152
+ assert.ok(dt < 2000, `enqueue took ${dt}ms`);
153
+ } finally { await rmRoot(root); }
154
+ });
@@ -0,0 +1,85 @@
1
+ #!/bin/bash
2
+ # launchd-cadence-wrapper.sh — Bootstraps env and execs the Node cadence
3
+ # enqueue script under launchd.
4
+ #
5
+ # Usage in a plist:
6
+ # <key>ProgramArguments</key>
7
+ # <array>
8
+ # <string>/path/to/scripts/cadence/launchd-cadence-wrapper.sh</string>
9
+ # <string>/path/to/scripts/cadence/enqueue-cadence-tick.mjs</string>
10
+ # <string>cadence-name</string>
11
+ # <string>--source=launchd</string>
12
+ # </array>
13
+ #
14
+ # What it does:
15
+ # 1. Sets HOME, PATH, USER, AGENT_ROOT (launchd's bare env doesn't include them).
16
+ # 2. Picks a node binary (nvm, homebrew, system fallback).
17
+ # 3. Redirects stdout/stderr to logs/cadence-bus/launchd-YYYY-MM-DD.log.
18
+ # 4. Exec's `node <enqueue-script> "$@"`.
19
+ #
20
+ # This is a *pure enqueue* wrapper — it must NOT spawn Claude Code. The
21
+ # heavyweight cadence work happens inside the persistent daemon's cadence
22
+ # consumer, not here.
23
+
24
+ set -e
25
+
26
+ if [ $# -lt 1 ]; then
27
+ echo "[cadence-wrapper] FATAL: enqueue script path required" >&2
28
+ exit 64
29
+ fi
30
+
31
+ ENQUEUE_SCRIPT="$1"
32
+ shift
33
+
34
+ # Walk up from the enqueue script to find the agent root. AGENT_ROOT is the
35
+ # nearest ancestor with both package.json and config/.
36
+ TARGET_DIR="$(cd "$(dirname "$ENQUEUE_SCRIPT")" && pwd -P)"
37
+ CANDIDATE="$TARGET_DIR"
38
+ AGENT_ROOT=""
39
+ while [ "$CANDIDATE" != "/" ]; do
40
+ if [ -f "$CANDIDATE/package.json" ] && [ -d "$CANDIDATE/config" ]; then
41
+ AGENT_ROOT="$CANDIDATE"
42
+ break
43
+ fi
44
+ CANDIDATE="$(dirname "$CANDIDATE")"
45
+ done
46
+ AGENT_ROOT="${AGENT_ROOT:-$TARGET_DIR}"
47
+
48
+ export AGENT_ROOT
49
+ export AGENT_DIR="$AGENT_ROOT"
50
+ export HOME="${HOME:-/Users/$(whoami)}"
51
+ export USER="${USER:-$(whoami)}"
52
+ export PATH="/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$PATH"
53
+
54
+ cd "$AGENT_ROOT"
55
+
56
+ # Resolve node binary — prefer nvm, fall back to homebrew, then system.
57
+ NODE_BIN=""
58
+ for candidate in \
59
+ "$HOME/.nvm/versions/node/v24.11.1/bin/node" \
60
+ "$HOME/.nvm/versions/node/v24/bin/node" \
61
+ "$HOME/.nvm/versions/node/v22/bin/node" \
62
+ "$HOME/.nvm/versions/node/v20/bin/node" \
63
+ /opt/homebrew/bin/node \
64
+ /usr/local/bin/node \
65
+ /usr/bin/node; do
66
+ if [ -x "$candidate" ]; then
67
+ NODE_BIN="$candidate"
68
+ break
69
+ fi
70
+ done
71
+ if [ -z "$NODE_BIN" ] && [ -d "$HOME/.nvm/versions/node" ]; then
72
+ NODE_BIN=$(ls -1d "$HOME/.nvm/versions/node"/v*/bin/node 2>/dev/null | sort -V | tail -1)
73
+ fi
74
+ if [ -z "$NODE_BIN" ] || [ ! -x "$NODE_BIN" ]; then
75
+ echo "[cadence-wrapper] FATAL: could not find node binary" >&2
76
+ exit 127
77
+ fi
78
+
79
+ LOG_DATE="$(date +%Y-%m-%d)"
80
+ LOG_DIR="$AGENT_ROOT/logs/cadence-bus"
81
+ mkdir -p "$LOG_DIR" 2>/dev/null || true
82
+ LOG_FILE="$LOG_DIR/launchd-${LOG_DATE}.log"
83
+
84
+ # Exec the enqueue script. stdin is closed; stdout/stderr go to the log.
85
+ exec "$NODE_BIN" "$ENQUEUE_SCRIPT" "$@" >> "$LOG_FILE" 2>&1
@@ -0,0 +1,439 @@
1
+ /**
2
+ * Maestro — Cadence Consumer
3
+ *
4
+ * The persistent main session's drain loop for the cadence bus.
5
+ *
6
+ * Lifecycle: launched once per agent process (inside maestro-daemon.mjs).
7
+ * On every tick it:
8
+ *
9
+ * 1. Recovers any stale claims (events whose handler crashed mid-flight).
10
+ * 2. Drains inbox/, atomically claiming each event.
11
+ * 3. Routes the event through cadence-handlers.mjs:
12
+ * - inline → run handler in-process, complete.
13
+ * - guarded → run cheap pre-check; if "no work", complete inline,
14
+ * else escalate to a sub-session.
15
+ * - escalate → spawn a sub-session running the cadence's trigger
16
+ * prompt under schedules/triggers/<name>.md.
17
+ * Unknown cadences with a prompt on disk default to escalate; without
18
+ * a prompt they go straight to dlq with a clear error.
19
+ * 4. Respects .emergency-stop: while present, the loop logs a heartbeat
20
+ * but never spawns a sub-session and never processes events. Existing
21
+ * claims remain on disk so they can be resumed once the stop is lifted.
22
+ * 5. Writes a heartbeat to state/cadence-bus/health.json on every cycle
23
+ * so doctor / healthcheck can confirm liveness.
24
+ *
25
+ * The consumer never spawns more than ONE sub-session at a time on its own.
26
+ * That bound exists because the parent process is the single owner of
27
+ * cadence housekeeping; if your workflow needs higher parallelism, lean on
28
+ * the existing daemon dispatcher (which is purpose-built for inbox items),
29
+ * not on multiplying cadence consumers.
30
+ *
31
+ * Public API:
32
+ * startConsumer(opts) → { stop(), getStats(), tickOnce() }
33
+ *
34
+ * Options:
35
+ * agentRoot override AGENT_ROOT (tests).
36
+ * pollMs drain interval (default 2_000).
37
+ * heartbeatMs health.json refresh interval (default 15_000).
38
+ * recoveryMs stale-claim sweep interval (default 5 * 60_000).
39
+ * spawnSession injected spawner for tests; defaults to a real
40
+ * `claude --print --dangerously-skip-permissions <prompt>`
41
+ * child_process.spawn.
42
+ * maxSpawnMs hard timeout per sub-session (default 30 * 60_000).
43
+ * logger optional fn({ ts, level, …rest }) → void for tests.
44
+ */
45
+
46
+ import { existsSync, readFileSync, writeFileSync } from "node:fs";
47
+ import { join } from "node:path";
48
+ import { spawn } from "node:child_process";
49
+
50
+ import {
51
+ ensureBusDirs,
52
+ claimNextTick,
53
+ completeTick,
54
+ failTick,
55
+ recoverStaleClaims,
56
+ writeHealth,
57
+ getBusPaths,
58
+ logBusEvent,
59
+ busDepth,
60
+ } from "../../lib/cadence-bus.mjs";
61
+ import { getCadenceDef } from "./cadence-handlers.mjs";
62
+
63
+ // ---------------------------------------------------------------------------
64
+ // Defaults
65
+ // ---------------------------------------------------------------------------
66
+
67
+ const DEFAULT_POLL_MS = 2_000;
68
+ const DEFAULT_HEARTBEAT_MS = 15_000;
69
+ const DEFAULT_RECOVERY_MS = 5 * 60_000;
70
+ const DEFAULT_SPAWN_TIMEOUT_MS = 30 * 60_000;
71
+
72
+ // Concurrency: at most one sub-session at a time per cadence consumer.
73
+ // Cadence events are not realtime — queueing one tick behind another is
74
+ // preferable to thrashing Claude / hitting usage limits.
75
+ const MAX_CONCURRENT_SUB_SESSIONS = 1;
76
+
77
+ // ---------------------------------------------------------------------------
78
+ // Helpers
79
+ // ---------------------------------------------------------------------------
80
+
81
+ function isEmergencyStop(paths) {
82
+ return existsSync(paths.emergencyStop);
83
+ }
84
+
85
+ function defaultLogger(entry) {
86
+ // The default logger ALSO writes to logs/cadence-bus/<date>.jsonl via
87
+ // logBusEvent, but mirrors important events to stderr so launchd's
88
+ // StandardErrorPath captures them.
89
+ if (entry.level === "error" || entry.level === "warn") {
90
+ try { process.stderr.write(`[cadence-consumer] ${JSON.stringify(entry)}\n`); }
91
+ catch { /* ignore */ }
92
+ }
93
+ }
94
+
95
+ /**
96
+ * Spawn a sub-session running the cadence's trigger prompt and resolve
97
+ * with { exit_code, durationMs }. Reads the prompt at call time so the
98
+ * latest version (possibly upgraded between ticks) is always used.
99
+ */
100
+ function realSpawnSession({ agentRoot, cadence, promptPath, timeoutMs, log }) {
101
+ return new Promise((resolveOut) => {
102
+ const fullPrompt = join(agentRoot, promptPath);
103
+ if (!existsSync(fullPrompt)) {
104
+ resolveOut({ ok: false, exit_code: -2, error: `prompt not found: ${promptPath}` });
105
+ return;
106
+ }
107
+ let body;
108
+ try { body = readFileSync(fullPrompt, "utf-8"); }
109
+ catch (err) {
110
+ resolveOut({ ok: false, exit_code: -3, error: `prompt read failed: ${err.message}` });
111
+ return;
112
+ }
113
+
114
+ const bin = process.env.CLAUDE_BIN || "claude";
115
+ const args = ["--print", "--dangerously-skip-permissions", body];
116
+ const env = { ...process.env, AGENT_ROOT: agentRoot, AGENT_DIR: agentRoot };
117
+ const started = Date.now();
118
+
119
+ log({ level: "info", stage: "subsession_spawn", cadence, bin });
120
+
121
+ let child;
122
+ try {
123
+ child = spawn(bin, args, { cwd: agentRoot, env, stdio: "ignore" });
124
+ } catch (err) {
125
+ resolveOut({ ok: false, exit_code: -4, error: `spawn failed: ${err.message}` });
126
+ return;
127
+ }
128
+
129
+ const timer = setTimeout(() => {
130
+ log({ level: "warn", stage: "subsession_timeout", cadence, timeout_ms: timeoutMs });
131
+ try { child.kill("SIGTERM"); } catch { /* ignore */ }
132
+ // Give it a beat to die before SIGKILL.
133
+ setTimeout(() => { try { child.kill("SIGKILL"); } catch { /* ignore */ } }, 5_000);
134
+ }, timeoutMs);
135
+
136
+ child.on("exit", (code, signal) => {
137
+ clearTimeout(timer);
138
+ const durationMs = Date.now() - started;
139
+ const exit_code = typeof code === "number" ? code : (signal ? -1 : -5);
140
+ resolveOut({
141
+ ok: exit_code === 0,
142
+ exit_code,
143
+ signal: signal || null,
144
+ duration_ms: durationMs,
145
+ });
146
+ });
147
+
148
+ child.on("error", (err) => {
149
+ clearTimeout(timer);
150
+ const durationMs = Date.now() - started;
151
+ resolveOut({ ok: false, exit_code: -6, error: err.message, duration_ms: durationMs });
152
+ });
153
+ });
154
+ }
155
+
156
+ // ---------------------------------------------------------------------------
157
+ // Public API
158
+ // ---------------------------------------------------------------------------
159
+
160
+ /**
161
+ * Start the consumer. Returns control handles (stop, getStats, tickOnce).
162
+ * Caller must `await stop()` to flush state on shutdown.
163
+ */
164
+ export function startConsumer(opts = {}) {
165
+ const agentRoot = opts.agentRoot || process.env.AGENT_ROOT || process.env.AGENT_DIR || process.cwd();
166
+ const paths = ensureBusDirs(agentRoot);
167
+
168
+ const pollMs = opts.pollMs ?? DEFAULT_POLL_MS;
169
+ const heartbeatMs = opts.heartbeatMs ?? DEFAULT_HEARTBEAT_MS;
170
+ const recoveryMs = opts.recoveryMs ?? DEFAULT_RECOVERY_MS;
171
+ const maxSpawnMs = opts.maxSpawnMs ?? DEFAULT_SPAWN_TIMEOUT_MS;
172
+ const spawnSession = opts.spawnSession || realSpawnSession;
173
+ const userLogger = opts.logger;
174
+
175
+ const stats = {
176
+ started_at: new Date().toISOString(),
177
+ received: 0,
178
+ inline: 0,
179
+ escalated: 0,
180
+ skipped_emergency_stop: 0,
181
+ dlq: 0,
182
+ retries: 0,
183
+ spawn_failures: 0,
184
+ last_event_id: null,
185
+ last_decision: null,
186
+ };
187
+
188
+ let stopping = false;
189
+ let activeTick = null;
190
+ let timers = [];
191
+ let activeSubSessions = 0;
192
+
193
+ function log(entry) {
194
+ const enriched = { ts: new Date().toISOString(), ...entry };
195
+ logBusEvent(agentRoot, enriched);
196
+ if (userLogger) {
197
+ try { userLogger(enriched); } catch { /* never crash on logging */ }
198
+ } else {
199
+ defaultLogger(enriched);
200
+ }
201
+ }
202
+
203
+ function heartbeat() {
204
+ writeHealth(agentRoot, {
205
+ stats: { ...stats, depth: busDepth(agentRoot) },
206
+ active_subsessions: activeSubSessions,
207
+ stopping,
208
+ });
209
+ }
210
+
211
+ async function escalate(event) {
212
+ if (activeSubSessions >= MAX_CONCURRENT_SUB_SESSIONS) {
213
+ // Re-queue and try again next tick. Single-owner cadence consumer
214
+ // means this can only happen when a prior tick is still running —
215
+ // queue depth is the right back-pressure signal.
216
+ log({
217
+ level: "info",
218
+ stage: "escalate_deferred",
219
+ id: event.id,
220
+ cadence: event.cadence,
221
+ active_subsessions: activeSubSessions,
222
+ });
223
+ failTick(agentRoot, event.id, "deferred:concurrent-spawn", { maxAttempts: 10 });
224
+ stats.retries += 1;
225
+ return { ok: false, decision: "deferred" };
226
+ }
227
+
228
+ const def = getCadenceDef(event.cadence);
229
+ let promptPath = def?.prompt;
230
+ if (!promptPath) {
231
+ // Unknown cadence — try the conventional location.
232
+ const conventional = `schedules/triggers/${event.cadence}.md`;
233
+ if (existsSync(join(agentRoot, conventional))) {
234
+ promptPath = conventional;
235
+ log({ level: "warn", stage: "escalate_unknown_cadence", id: event.id, cadence: event.cadence, prompt: conventional });
236
+ } else {
237
+ log({ level: "error", stage: "escalate_no_prompt", id: event.id, cadence: event.cadence });
238
+ failTick(agentRoot, event.id, `no handler and no prompt at ${conventional}`, { terminal: true });
239
+ stats.dlq += 1;
240
+ return { ok: false, decision: "dlq-no-prompt" };
241
+ }
242
+ }
243
+
244
+ activeSubSessions += 1;
245
+ let result;
246
+ try {
247
+ log({ level: "info", stage: "escalating", id: event.id, cadence: event.cadence, prompt: promptPath, reason: event.metadata?.reason || "registry policy" });
248
+ result = await spawnSession({
249
+ agentRoot,
250
+ cadence: event.cadence,
251
+ promptPath,
252
+ timeoutMs: maxSpawnMs,
253
+ log,
254
+ });
255
+ } finally {
256
+ activeSubSessions -= 1;
257
+ }
258
+
259
+ if (result.ok) {
260
+ completeTick(agentRoot, event.id, {
261
+ decision: "escalated",
262
+ cadence: event.cadence,
263
+ prompt: promptPath,
264
+ exit_code: result.exit_code,
265
+ duration_ms: result.duration_ms,
266
+ });
267
+ stats.escalated += 1;
268
+ stats.last_decision = "escalated";
269
+ return { ok: true, decision: "escalated", exit_code: result.exit_code };
270
+ }
271
+ log({ level: "error", stage: "subsession_failed", id: event.id, cadence: event.cadence, exit_code: result.exit_code, error: result.error || null });
272
+ stats.spawn_failures += 1;
273
+ const outcome = failTick(agentRoot, event.id, result.error || `exit ${result.exit_code}`);
274
+ if (outcome?.destination === "dlq") stats.dlq += 1;
275
+ else stats.retries += 1;
276
+ return { ok: false, decision: outcome?.destination || "failed" };
277
+ }
278
+
279
+ // NOTE: do not name this `process` — function declarations are hoisted
280
+ // and would shadow the global `process` object inside startConsumer.
281
+ async function processEvent(event) {
282
+ stats.received += 1;
283
+ stats.last_event_id = event.id;
284
+
285
+ const def = getCadenceDef(event.cadence);
286
+ if (def?.mode === "inline" && typeof def.handler === "function") {
287
+ try {
288
+ const out = await def.handler({ event, agentRoot, log });
289
+ completeTick(agentRoot, event.id, {
290
+ decision: "inline",
291
+ cadence: event.cadence,
292
+ handler_result: out,
293
+ });
294
+ stats.inline += 1;
295
+ stats.last_decision = "inline";
296
+ return { decision: "inline", result: out };
297
+ } catch (err) {
298
+ log({ level: "error", stage: "inline_handler_threw", id: event.id, cadence: event.cadence, error: err.message });
299
+ const outcome = failTick(agentRoot, event.id, `inline-handler-threw: ${err.message}`);
300
+ if (outcome?.destination === "dlq") stats.dlq += 1;
301
+ return { decision: "failed", error: err.message };
302
+ }
303
+ }
304
+
305
+ if (def?.mode === "guarded" && typeof def.guard === "function") {
306
+ let guardOut;
307
+ try {
308
+ guardOut = await def.guard({ event, agentRoot, log });
309
+ } catch (err) {
310
+ log({ level: "error", stage: "guard_threw", id: event.id, cadence: event.cadence, error: err.message });
311
+ failTick(agentRoot, event.id, `guard-threw: ${err.message}`);
312
+ stats.retries += 1;
313
+ return { decision: "failed", error: err.message };
314
+ }
315
+ if (guardOut?.decision === "inline") {
316
+ completeTick(agentRoot, event.id, {
317
+ decision: "inline_via_guard",
318
+ cadence: event.cadence,
319
+ guard_result: guardOut,
320
+ });
321
+ stats.inline += 1;
322
+ stats.last_decision = "inline_via_guard";
323
+ return { decision: "inline_via_guard", result: guardOut };
324
+ }
325
+ // Guard says escalate — record WHY so the archived event shows the
326
+ // substantive pre-check that justified spawning a sub-session. Persist
327
+ // the annotated metadata to claimed/<id>.json so completeTick picks it
328
+ // up when it archives to processed/.
329
+ event.metadata = {
330
+ ...(event.metadata || {}),
331
+ reason: guardOut?.reason || "guard:escalate",
332
+ guard: guardOut,
333
+ };
334
+ try {
335
+ const claimedPath = join(paths.claimed, `${event.id}.json`);
336
+ writeFileSync(claimedPath, JSON.stringify(event, null, 2) + "\n");
337
+ } catch (err) {
338
+ log({ level: "warn", stage: "persist_metadata_failed", id: event.id, error: err.message });
339
+ }
340
+ return escalate(event);
341
+ }
342
+
343
+ // No registry entry, or registry says escalate.
344
+ return escalate(event);
345
+ }
346
+
347
+ async function tickOnce() {
348
+ if (stopping) return { processed: 0 };
349
+ if (isEmergencyStop(paths)) {
350
+ stats.skipped_emergency_stop += 1;
351
+ heartbeat();
352
+ log({ level: "warn", stage: "emergency_stop_active" });
353
+ return { processed: 0, emergency_stop: true };
354
+ }
355
+ // Recover stale claims occasionally — done by the periodic timer too,
356
+ // but every tick is cheap because it short-circuits on empty claimed/.
357
+ recoverStaleClaims(agentRoot);
358
+
359
+ let processed = 0;
360
+ // Drain as much as the consumer can in one tick, but yield to the
361
+ // event loop between events so heartbeats and stop signals fire.
362
+ while (!stopping) {
363
+ const claim = claimNextTick(agentRoot);
364
+ if (!claim) break;
365
+ const event = claim.event;
366
+ activeTick = event.id;
367
+ try {
368
+ await processEvent(event);
369
+ } finally {
370
+ activeTick = null;
371
+ }
372
+ processed += 1;
373
+ if (processed >= 16) break; // soft batch cap
374
+ }
375
+ return { processed };
376
+ }
377
+
378
+ // Background loops
379
+ const pollTimer = setInterval(() => {
380
+ tickOnce().catch((err) => {
381
+ log({ level: "error", stage: "tick_threw", error: err?.message || String(err) });
382
+ });
383
+ }, pollMs);
384
+ pollTimer.unref?.();
385
+ timers.push(pollTimer);
386
+
387
+ const hbTimer = setInterval(() => heartbeat(), heartbeatMs);
388
+ hbTimer.unref?.();
389
+ timers.push(hbTimer);
390
+
391
+ const recoveryTimer = setInterval(() => {
392
+ try {
393
+ const out = recoverStaleClaims(agentRoot);
394
+ if (out.scanned > 0) {
395
+ log({ level: "info", stage: "periodic_recovery", ...out });
396
+ }
397
+ } catch (err) {
398
+ log({ level: "error", stage: "periodic_recovery_failed", error: err.message });
399
+ }
400
+ }, recoveryMs);
401
+ recoveryTimer.unref?.();
402
+ timers.push(recoveryTimer);
403
+
404
+ // Initial sweep + heartbeat
405
+ recoverStaleClaims(agentRoot);
406
+ heartbeat();
407
+ log({ level: "info", stage: "consumer_started", pollMs, heartbeatMs, recoveryMs });
408
+
409
+ async function stop() {
410
+ if (stopping) return;
411
+ stopping = true;
412
+ for (const t of timers) clearInterval(t);
413
+ timers = [];
414
+ log({ level: "info", stage: "consumer_stopping", active_tick: activeTick });
415
+ heartbeat();
416
+ }
417
+
418
+ function getStats() {
419
+ return { ...stats, depth: busDepth(agentRoot), agent_root: agentRoot };
420
+ }
421
+
422
+ return { stop, getStats, tickOnce, _paths: getBusPaths(agentRoot) };
423
+ }
424
+
425
+ // Allow `node scripts/daemon/cadence-consumer.mjs` to run the consumer
426
+ // standalone (useful for development and for migrations that need to
427
+ // drain the bus without starting the full daemon).
428
+ if (import.meta.url === `file://${process.argv[1]}`) {
429
+ const consumer = startConsumer({});
430
+ const shutdown = async (sig) => {
431
+ process.stderr.write(`[cadence-consumer] caught ${sig}, stopping…\n`);
432
+ await consumer.stop();
433
+ process.exit(0);
434
+ };
435
+ process.on("SIGTERM", () => shutdown("SIGTERM"));
436
+ process.on("SIGINT", () => shutdown("SIGINT"));
437
+ // Keep the process alive — unref'd timers won't otherwise.
438
+ setInterval(() => {}, 60_000);
439
+ }