@adaptic/maestro 1.1.8 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/init-maestro.md +304 -8
- package/README.md +28 -0
- package/bin/maestro.mjs +1 -1
- package/docs/guides/agents-observe-setup.md +64 -0
- package/docs/guides/ccxray-diagnostics.md +65 -0
- package/docs/guides/claude-mem-setup.md +79 -0
- package/docs/guides/claude-pace-setup.md +56 -0
- package/docs/guides/claudraband-sessions.md +98 -0
- package/docs/guides/clawteam-swarm.md +116 -0
- package/docs/guides/code-review-graph-setup.md +86 -0
- package/docs/guides/self-optimization-pattern.md +82 -0
- package/docs/guides/slack-setup.md +4 -2
- package/docs/guides/twilio-subaccounts-setup.md +223 -0
- package/docs/guides/webhook-relay-setup.md +349 -0
- package/package.json +2 -1
- package/plugins/maestro-skills/plugin.json +16 -0
- package/plugins/maestro-skills/skills/agents-observe.md +110 -0
- package/plugins/maestro-skills/skills/ccxray-diagnostics.md +91 -0
- package/plugins/maestro-skills/skills/claude-pace.md +61 -0
- package/plugins/maestro-skills/skills/code-review-graph.md +99 -0
- package/scaffold/CLAUDE.md +64 -0
- package/scaffold/config/agent.ts.example +2 -1
- package/scaffold/config/known-agents.json +35 -0
- package/scripts/daemon/classifier.mjs +264 -50
- package/scripts/daemon/dispatcher.mjs +109 -5
- package/scripts/daemon/launchd-wrapper-generic.sh +96 -0
- package/scripts/daemon/launchd-wrapper-slack-events.sh +37 -0
- package/scripts/daemon/launchd-wrapper.sh +91 -0
- package/scripts/daemon/lib/session-router.mjs +274 -0
- package/scripts/daemon/lib/session-router.test.mjs +295 -0
- package/scripts/daemon/prompt-builder.mjs +51 -11
- package/scripts/daemon/responder.mjs +234 -19
- package/scripts/daemon/session-lock.mjs +194 -0
- package/scripts/daemon/sophie-daemon.mjs +16 -2
- package/scripts/email-signature.html +20 -4
- package/scripts/local-triggers/generate-plists.sh +62 -10
- package/scripts/poller/imap-client.mjs +4 -2
- package/scripts/poller/slack-poller.mjs +104 -52
- package/scripts/setup/init-agent.sh +91 -1
- package/scripts/setup/install-dev-tools.sh +150 -0
- package/scripts/spawn-session.sh +21 -6
- package/workflows/continuous/backlog-executor.yaml +141 -0
- package/workflows/daily/evening-wrap.yaml +41 -1
- package/workflows/daily/morning-brief.yaml +17 -0
- package/workflows/event-driven/agent-failure-investigation.yaml +137 -0
- package/workflows/event-driven/pr-review.yaml +104 -0
- package/workflows/weekly/engineering-health.yaml +154 -0
|
@@ -5,7 +5,8 @@
|
|
|
5
5
|
import { spawn } from "child_process";
|
|
6
6
|
import { appendFileSync, mkdirSync, writeFileSync, readFileSync, renameSync } from "fs";
|
|
7
7
|
import { join } from "path";
|
|
8
|
-
import { releaseLock, releaseThreadLock, releaseRequestClaim } from "./session-lock.mjs";
|
|
8
|
+
import { releaseLock, releaseThreadLock, releaseRequestClaim, claimItem, releaseItemClaim } from "./session-lock.mjs";
|
|
9
|
+
import { recordSession } from "./health.mjs";
|
|
9
10
|
|
|
10
11
|
const SOPHIE_AI_DIR = join(new URL(".", import.meta.url).pathname, "../..");
|
|
11
12
|
const CLAUDE_BIN = process.env.CLAUDE_BIN || "/Users/sophie/.local/bin/claude";
|
|
@@ -27,6 +28,12 @@ const priorityQueue = []; // critical/high items
|
|
|
27
28
|
const normalQueue = []; // normal items
|
|
28
29
|
let sessionCounter = 0;
|
|
29
30
|
|
|
31
|
+
// Tracks sessions whose proc.on("error") handler has already fired.
|
|
32
|
+
// Prevents double-counting + double-cleanup when a spawn failure (ENOENT,
|
|
33
|
+
// EACCES, ETIMEDOUT) triggers both "error" and a trailing "close" event.
|
|
34
|
+
// See ib-20260416-daemon-etimedout-failed-event + cycle 135 memo.
|
|
35
|
+
const spawnErrorHandled = new Set();
|
|
36
|
+
|
|
30
37
|
// Backlog dedup: track which items have active sessions to prevent retry storms
|
|
31
38
|
const activeBacklogKeys = new Set(); // backlog item key -> true (while session is running)
|
|
32
39
|
const backlogRetryCount = new Map(); // backlog item key -> number of times dispatched
|
|
@@ -196,6 +203,26 @@ export function dispatch(prompt, item, classResult, source = "inbox") {
|
|
|
196
203
|
}
|
|
197
204
|
}
|
|
198
205
|
|
|
206
|
+
// Item-claim acquisition: file-based claim visible across daemon restarts
|
|
207
|
+
// and concurrent launchd triggers. Complements the in-memory activeBacklogKeys.
|
|
208
|
+
// (ib-20260407-001b: concurrent session coordination)
|
|
209
|
+
if (source === "backlog" && item.id) {
|
|
210
|
+
const sessionId = `s-${Date.now()}-${sessionCounter + 1}`;
|
|
211
|
+
const claim = claimItem(item.id, {
|
|
212
|
+
session_id: sessionId,
|
|
213
|
+
agent_description: classResult.summary || item.title || "",
|
|
214
|
+
ttl_minutes: classResult.model === "opus" ? 120 : 30,
|
|
215
|
+
source: "backlog",
|
|
216
|
+
queue_file: item.source_file || "",
|
|
217
|
+
pid: process.pid, // daemon PID; child PID not yet known
|
|
218
|
+
});
|
|
219
|
+
if (!claim.claimed) {
|
|
220
|
+
console.log(`[dispatcher] Item claim denied for ${item.id}: ${claim.reason} (holder: ${claim.holder || "unknown"})`);
|
|
221
|
+
logSession({ event: "skipped", reason: `item_claim_denied: ${claim.reason}`, summary: classResult.summary, holder: claim.holder });
|
|
222
|
+
return;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
199
226
|
// Backlog items respect the reserved slot cap
|
|
200
227
|
if (source === "backlog") {
|
|
201
228
|
const backlogCount = countBySource("backlog");
|
|
@@ -250,9 +277,13 @@ function spawnSession(entry) {
|
|
|
250
277
|
prompt,
|
|
251
278
|
];
|
|
252
279
|
|
|
280
|
+
// Strip Anthropic API credentials from spawn env so claude CLI falls through
|
|
281
|
+
// to the keychain OAuth (Max subscription) per CEO directive 2026-04-27.
|
|
282
|
+
// A stale ANTHROPIC_API_KEY in the daemon's inherited env will otherwise
|
|
283
|
+
// override the OAuth token and cause "Invalid API key" failures.
|
|
253
284
|
const proc = spawn(CLAUDE_BIN, args, {
|
|
254
285
|
cwd: SOPHIE_AI_DIR,
|
|
255
|
-
env: { ...process.env },
|
|
286
|
+
env: { ...process.env, ANTHROPIC_API_KEY: "", ANTHROPIC_AUTH_TOKEN: "" },
|
|
256
287
|
stdio: ["ignore", "pipe", "pipe"],
|
|
257
288
|
});
|
|
258
289
|
|
|
@@ -287,8 +318,17 @@ function spawnSession(entry) {
|
|
|
287
318
|
|
|
288
319
|
proc.on("close", (code) => {
|
|
289
320
|
clearTimeout(timer);
|
|
321
|
+
// If proc.on("error") already fired for this session (spawn failure path
|
|
322
|
+
// — ENOENT, EACCES, ETIMEDOUT), cleanup + metric + lock release already
|
|
323
|
+
// happened. Skip to avoid double-count and double-release.
|
|
324
|
+
if (spawnErrorHandled.has(sessionId)) {
|
|
325
|
+
spawnErrorHandled.delete(sessionId);
|
|
326
|
+
drainQueue();
|
|
327
|
+
return;
|
|
328
|
+
}
|
|
290
329
|
activeSessions.delete(sessionId);
|
|
291
330
|
removeActiveSession(sessionId);
|
|
331
|
+
recordSession(true, code === 0);
|
|
292
332
|
const duration = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
293
333
|
|
|
294
334
|
// Release item lock — MUST use same key order as acquireLock in daemon
|
|
@@ -312,12 +352,15 @@ function spawnSession(entry) {
|
|
|
312
352
|
});
|
|
313
353
|
}
|
|
314
354
|
|
|
315
|
-
// Release backlog tracking
|
|
355
|
+
// Release backlog tracking + item claim
|
|
316
356
|
if (source === "backlog") {
|
|
317
357
|
const key = backlogKey(item);
|
|
318
358
|
activeBacklogKeys.delete(key);
|
|
319
359
|
const retries = backlogRetryCount.get(key) || 0;
|
|
320
360
|
|
|
361
|
+
// Release file-based item claim (ib-20260407-001b)
|
|
362
|
+
if (item.id) releaseItemClaim(item.id);
|
|
363
|
+
|
|
321
364
|
// If session timed out (143=SIGTERM) and hit retry limit, log it
|
|
322
365
|
if (code === 143 && retries >= MAX_BACKLOG_RETRIES) {
|
|
323
366
|
console.warn(`[dispatcher] Backlog item "${classResult.summary}" exhausted ${MAX_BACKLOG_RETRIES} retries — will not retry`);
|
|
@@ -347,9 +390,70 @@ function spawnSession(entry) {
|
|
|
347
390
|
|
|
348
391
|
proc.on("error", (err) => {
|
|
349
392
|
clearTimeout(timer);
|
|
393
|
+
// Mark so the trailing proc.on("close") doesn't double-process.
|
|
394
|
+
spawnErrorHandled.add(sessionId);
|
|
350
395
|
activeSessions.delete(sessionId);
|
|
351
|
-
|
|
352
|
-
|
|
396
|
+
removeActiveSession(sessionId);
|
|
397
|
+
recordSession(true, false);
|
|
398
|
+
const duration = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
399
|
+
const errorCode = err.code || "unknown";
|
|
400
|
+
|
|
401
|
+
// Release item lock — mirror of close-handler logic.
|
|
402
|
+
const itemId = item.raw_ref || item.id || item.title;
|
|
403
|
+
if (itemId) releaseLock(itemId);
|
|
404
|
+
|
|
405
|
+
// Release thread lock so new messages in this thread can be processed.
|
|
406
|
+
if (item.thread_id) {
|
|
407
|
+
const channel = item.channel_id || (item.raw_ref ? (item.raw_ref.match(/slack:([^:]+):/) || [])[1] : null) || item.channel;
|
|
408
|
+
if (channel) releaseThreadLock(channel, item.thread_id);
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// Release request claim + emit explicit claim_released event so
|
|
412
|
+
// reconciliation audits (cycle 124 Agent C pattern) can distinguish
|
|
413
|
+
// genuine in-flight sessions from silent-exit ETIMEDOUT failures
|
|
414
|
+
// without cross-referencing logs/daemon/responses.jsonl.
|
|
415
|
+
let claimReleased = false;
|
|
416
|
+
if (classResult && classResult.summary) {
|
|
417
|
+
releaseRequestClaim({
|
|
418
|
+
recipient: item.channel_id || item.channel || item.sender || "unknown",
|
|
419
|
+
subject: classResult.summary || item.subject || "",
|
|
420
|
+
action_type: classResult.action || "respond",
|
|
421
|
+
});
|
|
422
|
+
claimReleased = true;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
// Release backlog tracking + item claim.
|
|
426
|
+
if (source === "backlog") {
|
|
427
|
+
const key = backlogKey(item);
|
|
428
|
+
activeBacklogKeys.delete(key);
|
|
429
|
+
// Release file-based item claim (ib-20260407-001b)
|
|
430
|
+
if (item.id) releaseItemClaim(item.id);
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
console.error(`[dispatcher] Session ${sessionId} failed: ${errorCode} (${err.message})`);
|
|
434
|
+
|
|
435
|
+
// Rich "failed" event — see ib-20260416-daemon-etimedout-failed-event.
|
|
436
|
+
logSession({
|
|
437
|
+
event: "failed",
|
|
438
|
+
sessionId,
|
|
439
|
+
error: errorCode,
|
|
440
|
+
error_message: err.message,
|
|
441
|
+
model,
|
|
442
|
+
source,
|
|
443
|
+
priority: classResult?.priority,
|
|
444
|
+
summary: classResult?.summary,
|
|
445
|
+
duration_s: parseFloat(duration),
|
|
446
|
+
active_count: activeSessions.size,
|
|
447
|
+
});
|
|
448
|
+
|
|
449
|
+
if (claimReleased) {
|
|
450
|
+
logSession({
|
|
451
|
+
event: "claim_released",
|
|
452
|
+
sessionId,
|
|
453
|
+
reason: `spawn_failed_${errorCode}`,
|
|
454
|
+
});
|
|
455
|
+
}
|
|
456
|
+
|
|
353
457
|
drainQueue();
|
|
354
458
|
});
|
|
355
459
|
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# launchd-wrapper-generic.sh — Universal env bootstrap for ANY maestro
|
|
3
|
+
# script spawned under launchd.
|
|
4
|
+
#
|
|
5
|
+
# Usage in a plist:
|
|
6
|
+
# <key>ProgramArguments</key>
|
|
7
|
+
# <array>
|
|
8
|
+
# <string>/path/to/scripts/daemon/launchd-wrapper-generic.sh</string>
|
|
9
|
+
# <string>/path/to/script-to-run.sh</string>
|
|
10
|
+
# <string>arg1</string>
|
|
11
|
+
# <string>arg2</string>
|
|
12
|
+
# </array>
|
|
13
|
+
#
|
|
14
|
+
# What it does:
|
|
15
|
+
# 1. Sets HOME, PATH, USER, AGENT_ROOT (launchd's bare env doesn't include them)
|
|
16
|
+
# 2. Detects external SSD and sets CLAUDE_CODE_TMPDIR + creates SSD_AGENT_ROOT
|
|
17
|
+
# 3. Picks a node binary (nvm, homebrew, system fallback)
|
|
18
|
+
# 4. Redirects stdout/stderr to a date-stamped log file on the SSD
|
|
19
|
+
# 5. Exec's the target script with all remaining args
|
|
20
|
+
#
|
|
21
|
+
# Why a wrapper instead of a plist EnvironmentVariables block?
|
|
22
|
+
# Putting these env vars directly in the plist has been observed to cause
|
|
23
|
+
# EX_CONFIG (78) failures on macOS Sequoia + when symlinks are involved.
|
|
24
|
+
# A wrapper script is more portable and easier to debug.
|
|
25
|
+
|
|
26
|
+
set -e
|
|
27
|
+
|
|
28
|
+
if [ $# -lt 1 ]; then
|
|
29
|
+
echo "[wrapper] FATAL: no target script provided" >&2
|
|
30
|
+
exit 64
|
|
31
|
+
fi
|
|
32
|
+
|
|
33
|
+
TARGET="$1"
|
|
34
|
+
shift
|
|
35
|
+
|
|
36
|
+
# AGENT_ROOT is the parent of the script being run, going up two levels.
|
|
37
|
+
# E.g. /Users/lucas/lucas-ai/scripts/local-triggers/run-trigger.sh → /Users/lucas/lucas-ai
|
|
38
|
+
TARGET_DIR="$(cd "$(dirname "$TARGET")" && pwd -P)"
|
|
39
|
+
# Walk up until we find package.json or config/agent.ts
|
|
40
|
+
CANDIDATE="$TARGET_DIR"
|
|
41
|
+
while [ "$CANDIDATE" != "/" ]; do
|
|
42
|
+
if [ -f "$CANDIDATE/package.json" ] && [ -d "$CANDIDATE/config" ]; then
|
|
43
|
+
AGENT_ROOT="$CANDIDATE"
|
|
44
|
+
break
|
|
45
|
+
fi
|
|
46
|
+
CANDIDATE="$(dirname "$CANDIDATE")"
|
|
47
|
+
done
|
|
48
|
+
AGENT_ROOT="${AGENT_ROOT:-$TARGET_DIR}"
|
|
49
|
+
|
|
50
|
+
export AGENT_ROOT
|
|
51
|
+
export HOME="${HOME:-/Users/$(whoami)}"
|
|
52
|
+
export USER="${USER:-$(whoami)}"
|
|
53
|
+
export PATH="/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$PATH"
|
|
54
|
+
|
|
55
|
+
# ── SSD detection ───────────────────────────────────────────────────────────
|
|
56
|
+
SSD_VOLUME="${MAESTRO_SSD_VOLUME:-}"
|
|
57
|
+
if [ -z "$SSD_VOLUME" ]; then
|
|
58
|
+
for v in /Volumes/*-SSD /Volumes/*SSD* /Volumes/maestro-data; do
|
|
59
|
+
if [ -d "$v" ] && [ "$v" != "/Volumes/Macintosh HD" ]; then
|
|
60
|
+
SSD_VOLUME="$v"
|
|
61
|
+
break
|
|
62
|
+
fi
|
|
63
|
+
done
|
|
64
|
+
fi
|
|
65
|
+
|
|
66
|
+
AGENT_NAME="$(basename "$AGENT_ROOT" | sed 's/-ai$//')"
|
|
67
|
+
SSD_AGENT_ROOT=""
|
|
68
|
+
SSD_WRITABLE=0
|
|
69
|
+
if [ -n "$SSD_VOLUME" ] && [ -d "$SSD_VOLUME" ]; then
|
|
70
|
+
SSD_AGENT_ROOT="$SSD_VOLUME/maestro/$AGENT_NAME"
|
|
71
|
+
if mkdir -p "$SSD_AGENT_ROOT/claude-tmp" "$SSD_AGENT_ROOT/logs/launchd" 2>/dev/null && \
|
|
72
|
+
touch "$SSD_AGENT_ROOT/.write-test-$$" 2>/dev/null; then
|
|
73
|
+
rm -f "$SSD_AGENT_ROOT/.write-test-$$"
|
|
74
|
+
SSD_WRITABLE=1
|
|
75
|
+
export CLAUDE_CODE_TMPDIR="$SSD_AGENT_ROOT/claude-tmp"
|
|
76
|
+
export MAESTRO_SSD_AGENT_ROOT="$SSD_AGENT_ROOT"
|
|
77
|
+
fi
|
|
78
|
+
fi
|
|
79
|
+
|
|
80
|
+
cd "$AGENT_ROOT"
|
|
81
|
+
|
|
82
|
+
# Determine the log file name from the target script's basename
|
|
83
|
+
SCRIPT_NAME="$(basename "$TARGET" | sed 's/\.[^.]*$//')"
|
|
84
|
+
LOG_DATE="$(date +%Y-%m-%d)"
|
|
85
|
+
|
|
86
|
+
if [ "$SSD_WRITABLE" = "1" ]; then
|
|
87
|
+
LOG_FILE="$SSD_AGENT_ROOT/logs/launchd/${SCRIPT_NAME}-${LOG_DATE}.log"
|
|
88
|
+
echo "[wrapper $(date -u +%H:%M:%SZ)] starting $SCRIPT_NAME (SSD log: $LOG_FILE)" >> "$LOG_FILE" 2>/dev/null || true
|
|
89
|
+
exec bash "$TARGET" "$@" >> "$LOG_FILE" 2>&1
|
|
90
|
+
else
|
|
91
|
+
# Fall back to internal disk log if SSD isn't writable (e.g. macOS denies launchd
|
|
92
|
+
# write access to external volumes until the user grants it via System Settings).
|
|
93
|
+
FALLBACK_LOG="$AGENT_ROOT/logs/launchd/${SCRIPT_NAME}-${LOG_DATE}.log"
|
|
94
|
+
mkdir -p "$(dirname "$FALLBACK_LOG")" 2>/dev/null || true
|
|
95
|
+
exec bash "$TARGET" "$@" >> "$FALLBACK_LOG" 2>&1
|
|
96
|
+
fi
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# launchd-wrapper-slack-events.sh — Bootstraps env for the slack-events server.
|
|
3
|
+
set -e
|
|
4
|
+
AGENT_ROOT="$(cd "$(dirname "$0")/../.." && pwd -P)"
|
|
5
|
+
export AGENT_ROOT
|
|
6
|
+
export HOME="${HOME:-/Users/$(whoami)}"
|
|
7
|
+
export USER="${USER:-$(whoami)}"
|
|
8
|
+
export PATH="/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$PATH"
|
|
9
|
+
if [ -d "/Volumes/4TB-SSD" ]; then
|
|
10
|
+
AGENT_NAME="$(basename "$AGENT_ROOT" | sed 's/-ai$//')"
|
|
11
|
+
CLAUDE_TMP_DIR="/Volumes/4TB-SSD/maestro/$AGENT_NAME/claude-tmp"
|
|
12
|
+
mkdir -p "$CLAUDE_TMP_DIR"
|
|
13
|
+
export CLAUDE_CODE_TMPDIR="$CLAUDE_TMP_DIR"
|
|
14
|
+
fi
|
|
15
|
+
cd "$AGENT_ROOT"
|
|
16
|
+
NODE_BIN=""
|
|
17
|
+
for candidate in \
|
|
18
|
+
"$HOME/.nvm/versions/node/v24.11.1/bin/node" \
|
|
19
|
+
"$HOME/.nvm/versions/node/v24/bin/node" \
|
|
20
|
+
"$HOME/.nvm/versions/node/v22/bin/node" \
|
|
21
|
+
"$HOME/.nvm/versions/node/v20/bin/node" \
|
|
22
|
+
/opt/homebrew/bin/node \
|
|
23
|
+
/usr/local/bin/node \
|
|
24
|
+
/usr/bin/node; do
|
|
25
|
+
if [ -x "$candidate" ]; then
|
|
26
|
+
NODE_BIN="$candidate"
|
|
27
|
+
break
|
|
28
|
+
fi
|
|
29
|
+
done
|
|
30
|
+
if [ -z "$NODE_BIN" ] && [ -d "$HOME/.nvm/versions/node" ]; then
|
|
31
|
+
NODE_BIN=$(ls -1d "$HOME/.nvm/versions/node"/v*/bin/node 2>/dev/null | sort -V | tail -1)
|
|
32
|
+
fi
|
|
33
|
+
if [ -z "$NODE_BIN" ] || [ ! -x "$NODE_BIN" ]; then
|
|
34
|
+
echo "[wrapper] FATAL: could not find node binary" >&2
|
|
35
|
+
exit 127
|
|
36
|
+
fi
|
|
37
|
+
exec "$NODE_BIN" "$AGENT_ROOT/scripts/slack-events-server.mjs"
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# launchd-wrapper.sh — Bootstraps env for the maestro daemon under launchd.
|
|
3
|
+
#
|
|
4
|
+
# launchd's default environment is bare and doesn't include HOME, PATH, or
|
|
5
|
+
# AGENT_ROOT. We set them here, then exec the daemon. This avoids putting
|
|
6
|
+
# them in the .plist directly, which has been observed to cause EX_CONFIG
|
|
7
|
+
# (78) failures on some macOS versions when symlinks are involved.
|
|
8
|
+
#
|
|
9
|
+
# Storage: when /Volumes/{SSD_NAME} is mounted, all daemon runtime data goes
|
|
10
|
+
# to the SSD. The plist's StandardErrorPath/StandardOutPath remain on the
|
|
11
|
+
# internal disk (launchd refuses external volumes there), but those files
|
|
12
|
+
# only capture launchd-level startup errors. The daemon's own stdout/stderr
|
|
13
|
+
# is redirected into /Volumes/{SSD_NAME}/maestro/{agent}/logs/daemon/ at the
|
|
14
|
+
# bottom of this script via shell redirection.
|
|
15
|
+
#
|
|
16
|
+
# This wrapper is exec'd by ai.adaptic.{firstname}-daemon.plist.
|
|
17
|
+
|
|
18
|
+
set -e
|
|
19
|
+
|
|
20
|
+
AGENT_ROOT="$(cd "$(dirname "$0")/../.." && pwd -P)"
|
|
21
|
+
export AGENT_ROOT
|
|
22
|
+
export HOME="${HOME:-/Users/$(whoami)}"
|
|
23
|
+
export USER="${USER:-$(whoami)}"
|
|
24
|
+
export PATH="/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$PATH"
|
|
25
|
+
|
|
26
|
+
# ── SSD redirect ────────────────────────────────────────────────────────────
|
|
27
|
+
# If an external SSD is mounted at /Volumes/{name}, redirect:
|
|
28
|
+
# - Claude Code per-cwd temp (CLAUDE_CODE_TMPDIR)
|
|
29
|
+
# - Daemon stdout/stderr (via shell redirection at exec time)
|
|
30
|
+
# - state/, outputs/, memory/, knowledge/ are already symlinked at init time
|
|
31
|
+
|
|
32
|
+
# Detect SSD volume — first volume under /Volumes that's not a system mount.
|
|
33
|
+
# Override with MAESTRO_SSD_VOLUME env var if you have multiple SSDs.
|
|
34
|
+
SSD_VOLUME="${MAESTRO_SSD_VOLUME:-}"
|
|
35
|
+
if [ -z "$SSD_VOLUME" ]; then
|
|
36
|
+
for v in /Volumes/*-SSD /Volumes/*SSD* /Volumes/maestro-data; do
|
|
37
|
+
if [ -d "$v" ] && [ "$v" != "/Volumes/Macintosh HD" ]; then
|
|
38
|
+
SSD_VOLUME="$v"
|
|
39
|
+
break
|
|
40
|
+
fi
|
|
41
|
+
done
|
|
42
|
+
fi
|
|
43
|
+
|
|
44
|
+
AGENT_NAME="$(basename "$AGENT_ROOT" | sed 's/-ai$//')"
|
|
45
|
+
SSD_AGENT_ROOT=""
|
|
46
|
+
SSD_WRITABLE=0
|
|
47
|
+
if [ -n "$SSD_VOLUME" ] && [ -d "$SSD_VOLUME" ]; then
|
|
48
|
+
SSD_AGENT_ROOT="$SSD_VOLUME/maestro/$AGENT_NAME"
|
|
49
|
+
if mkdir -p "$SSD_AGENT_ROOT/claude-tmp" "$SSD_AGENT_ROOT/logs/daemon" 2>/dev/null && \
|
|
50
|
+
touch "$SSD_AGENT_ROOT/.write-test-$$" 2>/dev/null; then
|
|
51
|
+
rm -f "$SSD_AGENT_ROOT/.write-test-$$"
|
|
52
|
+
SSD_WRITABLE=1
|
|
53
|
+
export CLAUDE_CODE_TMPDIR="$SSD_AGENT_ROOT/claude-tmp"
|
|
54
|
+
fi
|
|
55
|
+
fi
|
|
56
|
+
|
|
57
|
+
cd "$AGENT_ROOT"
|
|
58
|
+
# Resolve node binary — prefer nvm, fall back to homebrew, then system
|
|
59
|
+
NODE_BIN=""
|
|
60
|
+
for candidate in \
|
|
61
|
+
"$HOME/.nvm/versions/node/v24.11.1/bin/node" \
|
|
62
|
+
"$HOME/.nvm/versions/node/v24/bin/node" \
|
|
63
|
+
"$HOME/.nvm/versions/node/v22/bin/node" \
|
|
64
|
+
"$HOME/.nvm/versions/node/v20/bin/node" \
|
|
65
|
+
/opt/homebrew/bin/node \
|
|
66
|
+
/usr/local/bin/node \
|
|
67
|
+
/usr/bin/node; do
|
|
68
|
+
if [ -x "$candidate" ]; then
|
|
69
|
+
NODE_BIN="$candidate"
|
|
70
|
+
break
|
|
71
|
+
fi
|
|
72
|
+
done
|
|
73
|
+
if [ -z "$NODE_BIN" ] && [ -d "$HOME/.nvm/versions/node" ]; then
|
|
74
|
+
NODE_BIN=$(ls -1d "$HOME/.nvm/versions/node"/v*/bin/node 2>/dev/null | sort -V | tail -1)
|
|
75
|
+
fi
|
|
76
|
+
if [ -z "$NODE_BIN" ] || [ ! -x "$NODE_BIN" ]; then
|
|
77
|
+
echo "[wrapper] FATAL: could not find node binary" >&2
|
|
78
|
+
exit 127
|
|
79
|
+
fi
|
|
80
|
+
|
|
81
|
+
# Exec the daemon. Prefer SSD log path if writable, otherwise fall back to
|
|
82
|
+
# internal disk so the daemon stays up even when macOS denies launchd write
|
|
83
|
+
# access to /Volumes/{name}.
|
|
84
|
+
if [ "$SSD_WRITABLE" = "1" ]; then
|
|
85
|
+
DAEMON_LOG="$SSD_AGENT_ROOT/logs/daemon/daemon-$(date +%Y-%m-%d).log"
|
|
86
|
+
exec "$NODE_BIN" "$AGENT_ROOT/scripts/daemon/maestro-daemon.mjs" >> "$DAEMON_LOG" 2>&1
|
|
87
|
+
else
|
|
88
|
+
DAEMON_LOG="$AGENT_ROOT/logs/daemon/daemon-$(date +%Y-%m-%d).log"
|
|
89
|
+
mkdir -p "$(dirname "$DAEMON_LOG")" 2>/dev/null || true
|
|
90
|
+
exec "$NODE_BIN" "$AGENT_ROOT/scripts/daemon/maestro-daemon.mjs" >> "$DAEMON_LOG" 2>&1
|
|
91
|
+
fi
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* session-router.mjs — Routes daemon Claude CLI invocations to either an
|
|
3
|
+
* existing live session (RESUME) or a fresh ephemeral spawn (EPHEMERAL).
|
|
4
|
+
*
|
|
5
|
+
* Per design memo `outputs/drafts/2026-04-27-claude-cli-session-router.md`
|
|
6
|
+
* §4 (architecture) and §5 (migration plan, step 2). This module is
|
|
7
|
+
* scaffold-only and is NOT yet wired into dispatcher.mjs / responder.mjs.
|
|
8
|
+
* It is intended to ship behind a SESSION_ROUTER_ENABLED=1 env flag
|
|
9
|
+
* (memo §8 step 4) so its mere existence cannot alter daemon behaviour.
|
|
10
|
+
*
|
|
11
|
+
* Public API:
|
|
12
|
+
* - routingKey(item) — pure function, derives a stable conversation key
|
|
13
|
+
* - createRouter(opts) — async factory returning { route, touch,
|
|
14
|
+
* recordExit, evictExpired, _readForTests }
|
|
15
|
+
*
|
|
16
|
+
* Registry on disk: a single JSON file with shape
|
|
17
|
+
* { sessions: { <key>: { ... } }, lru: [ <key>, <key>, ... ] }
|
|
18
|
+
* Concurrent writes use the temp-file + fs.rename atomic pattern. Memo §4.1
|
|
19
|
+
* explicitly rejects flock — "no stale lock files if the daemon dies".
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { promises as fsp } from "fs";
|
|
23
|
+
import { dirname } from "path";
|
|
24
|
+
|
|
25
|
+
// canonicalizeSlackChannel exists in some installs of scripts/daemon/session-lock.mjs.
|
|
26
|
+
// In maestro, it may not be exported (sophie-ai-only helper). Both failure modes
|
|
27
|
+
// — import throws OR import succeeds but the symbol is undefined — degrade to
|
|
28
|
+
// identity so this module never breaks the daemon. The fallback only fires at
|
|
29
|
+
// load time, not per-call.
|
|
30
|
+
let canonicalizeSlackChannel;
|
|
31
|
+
try {
|
|
32
|
+
const mod = await import("../session-lock.mjs");
|
|
33
|
+
canonicalizeSlackChannel = typeof mod.canonicalizeSlackChannel === "function"
|
|
34
|
+
? mod.canonicalizeSlackChannel
|
|
35
|
+
: (c) => c;
|
|
36
|
+
} catch {
|
|
37
|
+
// TODO(session-router): session-lock.mjs unavailable or canonicaliser missing
|
|
38
|
+
// — using identity. Fix the import or invent a shared canon helper.
|
|
39
|
+
canonicalizeSlackChannel = (c) => c;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const EMPTY_REGISTRY = () => ({ sessions: {}, lru: [] });
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Pure key-derivation function (memo §4.2 table).
|
|
46
|
+
*
|
|
47
|
+
* @param {object} item Inbound queue/inbox item
|
|
48
|
+
* @returns {string} Routing key, e.g. "slack:D099N1JGKRQ:1777283277.123456"
|
|
49
|
+
*/
|
|
50
|
+
export function routingKey(item) {
|
|
51
|
+
if (!item || typeof item !== "object") {
|
|
52
|
+
throw new TypeError("routingKey: item must be an object");
|
|
53
|
+
}
|
|
54
|
+
const source = item.source;
|
|
55
|
+
|
|
56
|
+
if (source === "slack") {
|
|
57
|
+
const channel = canonicalizeSlackChannel(item.channel || item.channel_id || "");
|
|
58
|
+
if (!channel) throw new Error("routingKey: slack item missing channel");
|
|
59
|
+
if (item.thread_ts) return `slack:${channel}:${item.thread_ts}`;
|
|
60
|
+
if (typeof channel === "string" && channel.startsWith("D")) {
|
|
61
|
+
return `slack:${channel}`;
|
|
62
|
+
}
|
|
63
|
+
// Channel non-DM, no thread → single-message bucket (effectively ephemeral)
|
|
64
|
+
const ts = item.ts || item.event_ts || "";
|
|
65
|
+
return `slack:${channel}:${ts}`;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (source === "gmail") {
|
|
69
|
+
const tid = item.thread_id || item.threadId;
|
|
70
|
+
if (!tid) throw new Error("routingKey: gmail item missing thread_id");
|
|
71
|
+
return `gmail:${tid}`;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (source === "calendar") {
|
|
75
|
+
const eid = item.event_id || item.eventId;
|
|
76
|
+
if (!eid) throw new Error("routingKey: calendar item missing event_id");
|
|
77
|
+
return `calendar:${eid}`;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (source === "internal") {
|
|
81
|
+
if (!item.id) throw new Error("routingKey: internal item missing id");
|
|
82
|
+
return `internal:${item.id}`;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
if (source === "backlog") {
|
|
86
|
+
if (item.topic_slug) return `backlog:${item.topic_slug}`;
|
|
87
|
+
if (item.id) return `internal:${item.id}`;
|
|
88
|
+
throw new Error("routingKey: backlog item missing topic_slug and id");
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
throw new Error(`routingKey: unknown source "${source}"`);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Read the registry from disk. Missing file → empty registry shape.
|
|
96
|
+
* Corrupted JSON also degrades to empty (memo §6: "Read failure on the
|
|
97
|
+
* main file falls back to an empty registry").
|
|
98
|
+
*/
|
|
99
|
+
async function readRegistry(path) {
|
|
100
|
+
try {
|
|
101
|
+
const raw = await fsp.readFile(path, "utf-8");
|
|
102
|
+
const parsed = JSON.parse(raw);
|
|
103
|
+
// Defensive: ensure shape
|
|
104
|
+
if (!parsed || typeof parsed !== "object") return EMPTY_REGISTRY();
|
|
105
|
+
if (!parsed.sessions || typeof parsed.sessions !== "object") parsed.sessions = {};
|
|
106
|
+
if (!Array.isArray(parsed.lru)) parsed.lru = [];
|
|
107
|
+
return parsed;
|
|
108
|
+
} catch (err) {
|
|
109
|
+
if (err && err.code === "ENOENT") return EMPTY_REGISTRY();
|
|
110
|
+
return EMPTY_REGISTRY();
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Atomic write: temp file + fs.rename. Memo §4.1 specifies this exact
|
|
116
|
+
* pattern instead of flock to avoid stale-lock pathologies.
|
|
117
|
+
*/
|
|
118
|
+
async function writeRegistryAtomic(path, registry) {
|
|
119
|
+
const dir = dirname(path);
|
|
120
|
+
await fsp.mkdir(dir, { recursive: true });
|
|
121
|
+
const tmp = `${path}.tmp.${process.pid}.${Date.now()}`;
|
|
122
|
+
await fsp.writeFile(tmp, JSON.stringify(registry, null, 2), "utf-8");
|
|
123
|
+
await fsp.rename(tmp, path);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Move a key to the back of the LRU array (most-recently-used).
|
|
128
|
+
*/
|
|
129
|
+
function bumpLru(lru, key) {
|
|
130
|
+
const idx = lru.indexOf(key);
|
|
131
|
+
if (idx !== -1) lru.splice(idx, 1);
|
|
132
|
+
lru.push(key);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Create a session router bound to a registry path.
|
|
137
|
+
*
|
|
138
|
+
* @param {object} opts
|
|
139
|
+
* @param {string} opts.registryPath Path to the JSON registry file.
|
|
140
|
+
* @param {number} [opts.ttlSeconds=1800] Idle TTL (memo §4.3 default 30m).
|
|
141
|
+
* @param {number} [opts.maxLiveSessions=8] LRU cap (memo §4.3, matches MAX_CLAUDE_PROCS).
|
|
142
|
+
* @param {() => number} [opts.now] Injectable clock for tests.
|
|
143
|
+
*/
|
|
144
|
+
export async function createRouter({
|
|
145
|
+
registryPath,
|
|
146
|
+
ttlSeconds = 1800,
|
|
147
|
+
maxLiveSessions = 8,
|
|
148
|
+
now = () => Date.now(),
|
|
149
|
+
} = {}) {
|
|
150
|
+
if (!registryPath) {
|
|
151
|
+
throw new Error("createRouter: registryPath is required");
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Eagerly load so first call doesn't race with concurrent writers.
|
|
155
|
+
let registry = await readRegistry(registryPath);
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Decide RESUME / EPHEMERAL / EPHEMERAL_REPLACE for a key (memo §4.4).
|
|
159
|
+
* If RESUME, also touches last_used_at in memory (persisted on next mutation).
|
|
160
|
+
*
|
|
161
|
+
* @param {string} key
|
|
162
|
+
* @returns {{ decision: "EPHEMERAL"|"EPHEMERAL_REPLACE"|"RESUME", resumeId: string|null }}
|
|
163
|
+
*/
|
|
164
|
+
function route(key) {
|
|
165
|
+
const entry = registry.sessions[key];
|
|
166
|
+
if (!entry) return { decision: "EPHEMERAL", resumeId: null };
|
|
167
|
+
|
|
168
|
+
const ttlMs = ttlSeconds * 1000;
|
|
169
|
+
if (now() - entry.last_used_at > ttlMs) {
|
|
170
|
+
return { decision: "EPHEMERAL_REPLACE", resumeId: null };
|
|
171
|
+
}
|
|
172
|
+
if (entry.status !== "live") {
|
|
173
|
+
return { decision: "EPHEMERAL_REPLACE", resumeId: null };
|
|
174
|
+
}
|
|
175
|
+
if (entry.last_exit_code !== 0) {
|
|
176
|
+
return { decision: "EPHEMERAL_REPLACE", resumeId: null };
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Touch in-memory; persisted on next touch()/recordExit() write.
|
|
180
|
+
entry.last_used_at = now();
|
|
181
|
+
bumpLru(registry.lru, key);
|
|
182
|
+
return { decision: "RESUME", resumeId: entry.claude_session_id };
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Insert or refresh a session entry after a successful spawn.
|
|
187
|
+
* Enforces the LRU cap by evicting the oldest entry when at capacity.
|
|
188
|
+
*
|
|
189
|
+
* @param {string} key
|
|
190
|
+
* @param {object} info
|
|
191
|
+
* @param {string} info.claudeSessionId CLI-resolved session id from JSON stdout.
|
|
192
|
+
* @param {string} [info.daemonSessionId] Daemon-local id (s-<epoch>-<n>).
|
|
193
|
+
* @param {string} [info.model] "sonnet" | "opus" | "haiku" | etc.
|
|
194
|
+
*/
|
|
195
|
+
async function touch(key, { claudeSessionId, daemonSessionId, model } = {}) {
|
|
196
|
+
const ts = now();
|
|
197
|
+
const isoNow = new Date(ts).toISOString();
|
|
198
|
+
const existing = registry.sessions[key];
|
|
199
|
+
|
|
200
|
+
const entry = {
|
|
201
|
+
daemon_session_id: daemonSessionId || existing?.daemon_session_id || null,
|
|
202
|
+
claude_session_id: claudeSessionId ?? existing?.claude_session_id ?? null,
|
|
203
|
+
key,
|
|
204
|
+
model: model || existing?.model || null,
|
|
205
|
+
created_at: existing?.created_at || isoNow,
|
|
206
|
+
last_used_at: ts,
|
|
207
|
+
ttl_seconds: ttlSeconds,
|
|
208
|
+
status: "live",
|
|
209
|
+
last_exit_code: 0,
|
|
210
|
+
};
|
|
211
|
+
registry.sessions[key] = entry;
|
|
212
|
+
bumpLru(registry.lru, key);
|
|
213
|
+
|
|
214
|
+
// Enforce LRU cap. The lru array stores keys ordered oldest → newest.
|
|
215
|
+
while (registry.lru.length > maxLiveSessions) {
|
|
216
|
+
const evictKey = registry.lru.shift();
|
|
217
|
+
if (evictKey && evictKey !== key) {
|
|
218
|
+
delete registry.sessions[evictKey];
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
// Defensive: drop sessions that fell out of the lru array entirely.
|
|
222
|
+
for (const k of Object.keys(registry.sessions)) {
|
|
223
|
+
if (!registry.lru.includes(k)) delete registry.sessions[k];
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
await writeRegistryAtomic(registryPath, registry);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Record process exit. Non-zero codes flip status to "killed" so the
|
|
231
|
+
* next route() returns EPHEMERAL_REPLACE.
|
|
232
|
+
*/
|
|
233
|
+
async function recordExit(key, exitCode) {
|
|
234
|
+
const entry = registry.sessions[key];
|
|
235
|
+
if (!entry) return; // No-op — key was never touched.
|
|
236
|
+
entry.last_exit_code = exitCode;
|
|
237
|
+
if (exitCode !== 0) {
|
|
238
|
+
entry.status = "killed";
|
|
239
|
+
}
|
|
240
|
+
await writeRegistryAtomic(registryPath, registry);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Sweep expired entries (memo §4.3 — runs at top of each dispatcher tick).
|
|
245
|
+
* Returns the count evicted.
|
|
246
|
+
*/
|
|
247
|
+
async function evictExpired() {
|
|
248
|
+
const ttlMs = ttlSeconds * 1000;
|
|
249
|
+
const cutoff = now() - ttlMs;
|
|
250
|
+
let evicted = 0;
|
|
251
|
+
for (const [k, e] of Object.entries(registry.sessions)) {
|
|
252
|
+
if (e.last_used_at < cutoff) {
|
|
253
|
+
delete registry.sessions[k];
|
|
254
|
+
const idx = registry.lru.indexOf(k);
|
|
255
|
+
if (idx !== -1) registry.lru.splice(idx, 1);
|
|
256
|
+
evicted++;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
if (evicted > 0) {
|
|
260
|
+
await writeRegistryAtomic(registryPath, registry);
|
|
261
|
+
}
|
|
262
|
+
return evicted;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Test hook — returns a deep-cloned snapshot so tests cannot mutate
|
|
267
|
+
* the live registry by accident.
|
|
268
|
+
*/
|
|
269
|
+
function _readForTests() {
|
|
270
|
+
return JSON.parse(JSON.stringify(registry));
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return { route, touch, recordExit, evictExpired, _readForTests };
|
|
274
|
+
}
|