@adaptic/maestro 1.1.7 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/init-maestro.md +502 -260
- package/README.md +47 -2
- package/bin/maestro.mjs +1 -1
- package/docs/guides/agents-observe-setup.md +64 -0
- package/docs/guides/ccxray-diagnostics.md +65 -0
- package/docs/guides/claude-mem-setup.md +79 -0
- package/docs/guides/claude-pace-setup.md +56 -0
- package/docs/guides/claudraband-sessions.md +98 -0
- package/docs/guides/clawteam-swarm.md +116 -0
- package/docs/guides/code-review-graph-setup.md +86 -0
- package/docs/guides/email-setup.md +399 -0
- package/docs/guides/media-generation-setup.md +349 -0
- package/docs/guides/outbound-governance-setup.md +438 -0
- package/docs/guides/pdf-generation-setup.md +315 -0
- package/docs/guides/poller-daemon-setup.md +550 -0
- package/docs/guides/rag-context-setup.md +459 -0
- package/docs/guides/self-optimization-pattern.md +82 -0
- package/docs/guides/slack-setup.md +350 -0
- package/docs/guides/twilio-subaccounts-setup.md +223 -0
- package/docs/guides/voice-sms-setup.md +698 -0
- package/docs/guides/webhook-relay-setup.md +349 -0
- package/docs/guides/whatsapp-setup.md +282 -0
- package/docs/runbooks/mac-mini-bootstrap.md +21 -0
- package/package.json +2 -1
- package/plugins/maestro-skills/plugin.json +16 -0
- package/plugins/maestro-skills/skills/agents-observe.md +110 -0
- package/plugins/maestro-skills/skills/ccxray-diagnostics.md +91 -0
- package/plugins/maestro-skills/skills/claude-pace.md +61 -0
- package/plugins/maestro-skills/skills/code-review-graph.md +99 -0
- package/scaffold/CLAUDE.md +64 -0
- package/scaffold/config/agent.ts.example +2 -1
- package/scaffold/config/caller-id-map.yaml +46 -0
- package/scaffold/config/known-agents.json +35 -0
- package/scripts/daemon/classifier.mjs +264 -50
- package/scripts/daemon/dispatcher.mjs +109 -5
- package/scripts/daemon/launchd-wrapper-generic.sh +96 -0
- package/scripts/daemon/launchd-wrapper-slack-events.sh +37 -0
- package/scripts/daemon/launchd-wrapper.sh +91 -0
- package/scripts/daemon/lib/session-router.mjs +274 -0
- package/scripts/daemon/lib/session-router.test.mjs +295 -0
- package/scripts/daemon/prompt-builder.mjs +51 -11
- package/scripts/daemon/responder.mjs +234 -19
- package/scripts/daemon/session-lock.mjs +194 -0
- package/scripts/daemon/sophie-daemon.mjs +16 -2
- package/scripts/email-signature.html +20 -4
- package/scripts/local-triggers/generate-plists.sh +62 -10
- package/scripts/media-generation/README.md +2 -0
- package/scripts/pdf-generation/README.md +2 -0
- package/scripts/poller/imap-client.mjs +4 -2
- package/scripts/poller/slack-poller.mjs +126 -59
- package/scripts/poller/trigger.mjs +12 -1
- package/scripts/setup/init-agent.sh +91 -1
- package/scripts/setup/install-dev-tools.sh +150 -0
- package/scripts/spawn-session.sh +21 -6
- package/workflows/continuous/backlog-executor.yaml +141 -0
- package/workflows/daily/evening-wrap.yaml +41 -1
- package/workflows/daily/morning-brief.yaml +17 -0
- package/workflows/event-driven/agent-failure-investigation.yaml +137 -0
- package/workflows/event-driven/pr-review.yaml +104 -0
- package/workflows/weekly/engineering-health.yaml +154 -0
|
@@ -1,39 +1,211 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* responder.mjs — Quick response layer
|
|
3
3
|
*
|
|
4
|
-
* Handles two scenarios
|
|
4
|
+
* Handles two scenarios with a single short-lived `claude --print` call:
|
|
5
5
|
*
|
|
6
|
-
* 1. SIMPLE REPLIES:
|
|
7
|
-
* then posts directly via Slack/Gmail API.
|
|
6
|
+
* 1. SIMPLE REPLIES: Invokes the Claude Code CLI to generate a response,
|
|
7
|
+
* then posts directly via Slack/Gmail API.
|
|
8
8
|
*
|
|
9
9
|
* 2. HOLDING MESSAGES: For complex items that need a full session,
|
|
10
10
|
* generates and sends an immediate acknowledgment so the sender
|
|
11
11
|
* knows it's being worked on.
|
|
12
12
|
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
13
|
+
* Migrated off `@anthropic-ai/sdk` per CEO directive (Slack DM
|
|
14
|
+
* D099N1JGKRQ, 2026-04-27 09:38Z + 11:33Z): all agent daemon model
|
|
15
|
+
* calls must funnel through Claude Code CLI sessions (Max
|
|
16
|
+
* subscription), not the Anthropic API.
|
|
15
17
|
*/
|
|
16
18
|
|
|
17
|
-
import Anthropic from "@anthropic-ai/sdk";
|
|
18
19
|
import { readFileSync, writeFileSync, readdirSync, appendFileSync, mkdirSync } from "fs";
|
|
19
|
-
import { execFileSync } from "child_process";
|
|
20
|
+
import { execFileSync, spawn } from "child_process";
|
|
20
21
|
import { join } from "path";
|
|
22
|
+
import { randomUUID } from "crypto";
|
|
21
23
|
import { checkRecentlySent, registerSent } from "./session-lock.mjs";
|
|
24
|
+
import { routingKey as deriveRoutingKey, createRouter } from "./lib/session-router.mjs";
|
|
22
25
|
|
|
23
26
|
const SOPHIE_AI_DIR = join(new URL(".", import.meta.url).pathname, "../..");
|
|
24
27
|
const SONNET_MODEL = "claude-sonnet-4-6";
|
|
28
|
+
const CLAUDE_BIN = process.env.CLAUDE_BIN || "/Users/sophie/.local/bin/claude";
|
|
29
|
+
const CLAUDE_CLI_TIMEOUT_MS = 60_000;
|
|
30
|
+
const SESSION_REGISTRY_PATH = join(SOPHIE_AI_DIR, "state", "daemon", "session-router-registry.json");
|
|
31
|
+
|
|
32
|
+
// Singleton router — lazily created on first generateResponse() call. The
|
|
33
|
+
// scaffold's createRouter is async (eager registry read), so we cache the
|
|
34
|
+
// promise and await it inside generateResponse.
|
|
35
|
+
let routerPromise = null;
|
|
36
|
+
function getRouter() {
|
|
37
|
+
if (!routerPromise) {
|
|
38
|
+
routerPromise = createRouter({ registryPath: SESSION_REGISTRY_PATH });
|
|
39
|
+
}
|
|
40
|
+
return routerPromise;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Translate a daemon-shaped item into the source/channel/thread_ts shape
|
|
45
|
+
* that session-router's routingKey() expects.
|
|
46
|
+
*
|
|
47
|
+
* Daemon items use {service, channel, thread_id, sender_email, ...}; the
|
|
48
|
+
* router's pure key fn was specced against {source, channel, thread_ts,
|
|
49
|
+
* thread_id, ...} per memo §4.2. This adapter is the seam between them.
|
|
50
|
+
*
|
|
51
|
+
* Returns null for items the router can't key (e.g. service we don't yet
|
|
52
|
+
* support, or missing required fields). Caller falls back to a fresh
|
|
53
|
+
* pre-minted UUID + EPHEMERAL semantics.
|
|
54
|
+
*/
|
|
55
|
+
function deriveRouterItem(item) {
|
|
56
|
+
if (!item || typeof item !== "object") return null;
|
|
57
|
+
|
|
58
|
+
if (item.service === "slack") {
|
|
59
|
+
const channel = item.channel || item.channel_id;
|
|
60
|
+
if (!channel) return null;
|
|
61
|
+
return {
|
|
62
|
+
source: "slack",
|
|
63
|
+
channel,
|
|
64
|
+
thread_ts: item.thread_id || item.thread_ts || null,
|
|
65
|
+
ts: item.ts || item.timestamp || null,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if (item.service === "gmail") {
|
|
70
|
+
const tid = item.thread_id || item.threadId;
|
|
71
|
+
if (!tid) return null;
|
|
72
|
+
return { source: "gmail", thread_id: tid };
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (item.service === "calendar") {
|
|
76
|
+
const eid = item.event_id || item.eventId;
|
|
77
|
+
if (!eid) return null;
|
|
78
|
+
return { source: "calendar", event_id: eid };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return null;
|
|
82
|
+
}
|
|
25
83
|
|
|
26
84
|
// Lazy token access — dotenv loads in daemon main before these are called
|
|
27
85
|
function getSlackToken() {
|
|
28
86
|
return process.env.SLACK_USER_TOKEN || process.env.SLACK_BOT_TOKEN;
|
|
29
87
|
}
|
|
30
88
|
|
|
31
|
-
let anthropic = null;
|
|
32
89
|
let cachedPreamble = null;
|
|
33
90
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
91
|
+
// Spawn `claude --print` with the supplied system + user prompts and model.
|
|
92
|
+
// Mirrors the pattern used in classifier.mjs:
|
|
93
|
+
// • child_process.spawn (not exec) — avoids shell-escape injection on
|
|
94
|
+
// potentially-hostile sender content.
|
|
95
|
+
// • System prompt rides on --append-system-prompt; user prompt is written
|
|
96
|
+
// to stdin and the pipe is closed.
|
|
97
|
+
// • Non-zero exit, timeout, spawn error or stdin write error all reject
|
|
98
|
+
// so the caller can surface the failure.
|
|
99
|
+
//
|
|
100
|
+
// Session-router wire-up (b2-b4, cycle 474): when the caller supplies a
|
|
101
|
+
// `sessionId` (pre-minted UUID) and a `router` + `routingKey`, the spawn
|
|
102
|
+
// adds `--session-id <uuid> --output-format json`, parses the one-line JSON
|
|
103
|
+
// stdout into {session_id, result, is_error}, calls router.touch on success
|
|
104
|
+
// and router.recordExit on close. Per b1 flag-verification report, NEVER
|
|
105
|
+
// combine `--resume` with `--session-id` (not needed: pre-minting + reusing
|
|
106
|
+
// the same UUID across spawns is the resume mechanism).
|
|
107
|
+
//
|
|
108
|
+
// @returns {Promise<{ text: string, jsonResult: object|null, exitCode: number }>}
|
|
109
|
+
function runClaudeCLI(systemPrompt, userPrompt, model, opts = {}) {
|
|
110
|
+
const { sessionId = null, router = null, routingKey = null } = opts;
|
|
111
|
+
|
|
112
|
+
return new Promise((resolvePromise, rejectPromise) => {
|
|
113
|
+
const args = [
|
|
114
|
+
"--print",
|
|
115
|
+
"--dangerously-skip-permissions",
|
|
116
|
+
"--model", model,
|
|
117
|
+
"--append-system-prompt", systemPrompt,
|
|
118
|
+
];
|
|
119
|
+
if (sessionId) {
|
|
120
|
+
// --output-format json is only valid in combination with --print (per
|
|
121
|
+
// b1 report). We always pass --print above, so this is safe.
|
|
122
|
+
args.push("--session-id", sessionId, "--output-format", "json");
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
const proc = spawn(CLAUDE_BIN, args, {
|
|
126
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
127
|
+
// Force claude CLI onto keychain OAuth (Max subscription); strip any
|
|
128
|
+
// stale ANTHROPIC_API_KEY/AUTH_TOKEN inherited from the daemon env.
|
|
129
|
+
env: { ...process.env, ANTHROPIC_API_KEY: "", ANTHROPIC_AUTH_TOKEN: "" },
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
let stdout = "";
|
|
133
|
+
let stderr = "";
|
|
134
|
+
let settled = false;
|
|
135
|
+
|
|
136
|
+
const timer = setTimeout(() => {
|
|
137
|
+
if (settled) return;
|
|
138
|
+
settled = true;
|
|
139
|
+
try { proc.kill("SIGTERM"); } catch (_) { /* noop */ }
|
|
140
|
+
setTimeout(() => { try { if (!proc.killed) proc.kill("SIGKILL"); } catch (_) { /* noop */ } }, 2000);
|
|
141
|
+
rejectPromise(new Error(`claude CLI timed out after ${CLAUDE_CLI_TIMEOUT_MS}ms`));
|
|
142
|
+
}, CLAUDE_CLI_TIMEOUT_MS);
|
|
143
|
+
|
|
144
|
+
proc.stdout.on("data", (chunk) => { stdout += chunk.toString(); });
|
|
145
|
+
proc.stderr.on("data", (chunk) => { stderr += chunk.toString(); });
|
|
146
|
+
|
|
147
|
+
proc.on("error", (err) => {
|
|
148
|
+
if (settled) return;
|
|
149
|
+
settled = true;
|
|
150
|
+
clearTimeout(timer);
|
|
151
|
+
rejectPromise(new Error(`claude CLI spawn error: ${err.message}`));
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
proc.on("close", (code) => {
|
|
155
|
+
// (b4) Always notify the router of the exit, regardless of whether
|
|
156
|
+
// the promise has already settled (timeout path) or not. recordExit
|
|
157
|
+
// is a no-op for keys the router has never touched, so the only
|
|
158
|
+
// cost is a registry write — which we want for non-zero exits so
|
|
159
|
+
// the next route() returns EPHEMERAL_REPLACE.
|
|
160
|
+
if (router && routingKey) {
|
|
161
|
+
// Fire-and-forget; recordExit is async but failure here must not
|
|
162
|
+
// mask the real result. Errors swallowed because the router has
|
|
163
|
+
// its own atomic-write semantics and bubbling here would crash
|
|
164
|
+
// the daemon over a non-critical bookkeeping write.
|
|
165
|
+
Promise.resolve(router.recordExit(routingKey, code)).catch((err) => {
|
|
166
|
+
console.warn(`[responder] router.recordExit failed for ${routingKey}: ${err.message}`);
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (settled) return;
|
|
171
|
+
settled = true;
|
|
172
|
+
clearTimeout(timer);
|
|
173
|
+
if (code !== 0) {
|
|
174
|
+
const tail = (stderr || "").trim().slice(-500);
|
|
175
|
+
rejectPromise(new Error(`claude CLI exited ${code}: ${tail || "no stderr"}`));
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// (b3) If we asked for JSON, parse it. Otherwise return raw text.
|
|
180
|
+
if (sessionId) {
|
|
181
|
+
const trimmed = (stdout || "").trim();
|
|
182
|
+
try {
|
|
183
|
+
const parsed = JSON.parse(trimmed);
|
|
184
|
+
// Per b1 report: top-level `session_id` (snake_case UUID), `result`
|
|
185
|
+
// (text), `is_error` (bool). Top-level `uuid` is the message UUID,
|
|
186
|
+
// NOT the session id — do NOT use it.
|
|
187
|
+
resolvePromise({ text: parsed.result ?? "", jsonResult: parsed, exitCode: code });
|
|
188
|
+
} catch (parseErr) {
|
|
189
|
+
// Legacy fallback (rollout-safety): older CLIs or unexpected output
|
|
190
|
+
// shapes shouldn't crash the daemon. Log a warning, surface the raw
|
|
191
|
+
// text, and let the caller decide whether to call router.touch.
|
|
192
|
+
console.warn(`[responder] claude CLI JSON parse failed (sessionId=${sessionId}): ${parseErr.message} — falling back to raw stdout`);
|
|
193
|
+
resolvePromise({ text: trimmed, jsonResult: null, exitCode: code });
|
|
194
|
+
}
|
|
195
|
+
} else {
|
|
196
|
+
resolvePromise({ text: stdout, jsonResult: null, exitCode: code });
|
|
197
|
+
}
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
try {
|
|
201
|
+
proc.stdin.end(userPrompt, "utf8");
|
|
202
|
+
} catch (err) {
|
|
203
|
+
if (settled) return;
|
|
204
|
+
settled = true;
|
|
205
|
+
clearTimeout(timer);
|
|
206
|
+
rejectPromise(new Error(`claude CLI stdin write error: ${err.message}`));
|
|
207
|
+
}
|
|
208
|
+
});
|
|
37
209
|
}
|
|
38
210
|
|
|
39
211
|
function today() {
|
|
@@ -187,7 +359,7 @@ function loadConversationHistory(item) {
|
|
|
187
359
|
}
|
|
188
360
|
|
|
189
361
|
// ---------------------------------------------------------------------------
|
|
190
|
-
// Generate response text via
|
|
362
|
+
// Generate response text via `claude --print` CLI
|
|
191
363
|
// ---------------------------------------------------------------------------
|
|
192
364
|
|
|
193
365
|
async function generateResponse(item, classResult, isHolding = false) {
|
|
@@ -234,15 +406,58 @@ ${profile ? `\nSender profile:\n${profile}` : ""}`;
|
|
|
234
406
|
`\nClassification: ${classResult.summary}`,
|
|
235
407
|
].filter(Boolean).join("\n");
|
|
236
408
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
409
|
+
// (b2) Session-router decision. Compute the routing key from a daemon→router
|
|
410
|
+
// adapter view of the item. If the item can't be keyed (unknown service,
|
|
411
|
+
// missing channel/thread), fall back to pure ephemeral with no router calls.
|
|
412
|
+
const router = await getRouter();
|
|
413
|
+
const routerItem = deriveRouterItem(item);
|
|
414
|
+
let key = null;
|
|
415
|
+
let sessionId = null;
|
|
416
|
+
if (routerItem) {
|
|
417
|
+
try {
|
|
418
|
+
key = deriveRoutingKey(routerItem);
|
|
419
|
+
const decision = router.route(key);
|
|
420
|
+
if (decision.decision === "RESUME" && decision.resumeId) {
|
|
421
|
+
sessionId = decision.resumeId;
|
|
422
|
+
} else {
|
|
423
|
+
// EPHEMERAL or EPHEMERAL_REPLACE — pre-mint a fresh UUID. Reusing
|
|
424
|
+
// the same key on next call (with a different sessionId) is fine;
|
|
425
|
+
// touch() will overwrite the registry entry.
|
|
426
|
+
sessionId = randomUUID();
|
|
427
|
+
}
|
|
428
|
+
} catch (err) {
|
|
429
|
+
// routingKey() throws on malformed items. Don't crash — fall back to
|
|
430
|
+
// pure ephemeral with no session-router participation.
|
|
431
|
+
console.warn(`[responder] routingKey derivation failed: ${err.message} — falling back to non-routed ephemeral`);
|
|
432
|
+
key = null;
|
|
433
|
+
sessionId = null;
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
const cliResult = await runClaudeCLI(systemPrompt, userContent, model, {
|
|
438
|
+
sessionId,
|
|
439
|
+
router: key ? router : null,
|
|
440
|
+
routingKey: key,
|
|
243
441
|
});
|
|
244
442
|
|
|
245
|
-
|
|
443
|
+
const text = (cliResult.text || "").trim();
|
|
444
|
+
if (!text) {
|
|
445
|
+
throw new Error("claude CLI returned empty result text in generateResponse");
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
// (b3) On success, touch the registry with the CLI-resolved session_id so
|
|
449
|
+
// the next call routed to this key gets a RESUME decision. Skip touch on
|
|
450
|
+
// is_error responses or when JSON parsing failed (legacy fallback path).
|
|
451
|
+
if (key && cliResult.jsonResult && cliResult.jsonResult.is_error !== true) {
|
|
452
|
+
const claudeSessionId = cliResult.jsonResult.session_id || sessionId;
|
|
453
|
+
try {
|
|
454
|
+
await router.touch(key, { claudeSessionId, model });
|
|
455
|
+
} catch (err) {
|
|
456
|
+
console.warn(`[responder] router.touch failed for ${key}: ${err.message}`);
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
return text;
|
|
246
461
|
}
|
|
247
462
|
|
|
248
463
|
// ---------------------------------------------------------------------------
|
|
@@ -518,3 +518,197 @@ export function releaseThreadLock(channel, threadTs) {
|
|
|
518
518
|
// Lock already released or never existed — fine
|
|
519
519
|
}
|
|
520
520
|
}
|
|
521
|
+
|
|
522
|
+
// ---------------------------------------------------------------------------
|
|
523
|
+
// Item claims — prevents multiple parallel backlog sessions from picking up
|
|
524
|
+
// the same queue item simultaneously. Designed per:
|
|
525
|
+
// outputs/research/2026-04-18-session-coordination-design.md
|
|
526
|
+
// ---------------------------------------------------------------------------
|
|
527
|
+
|
|
528
|
+
const ITEM_CLAIM_DIR = join(SOPHIE_AI_DIR, "state", "locks", "item-claims");
|
|
529
|
+
const DEFAULT_ITEM_CLAIM_TTL_MIN = 30; // Default TTL in minutes
|
|
530
|
+
|
|
531
|
+
// Ensure directory exists
|
|
532
|
+
mkdirSync(ITEM_CLAIM_DIR, { recursive: true });
|
|
533
|
+
|
|
534
|
+
/**
|
|
535
|
+
* Check if a process is still running (macOS-compatible).
|
|
536
|
+
* Uses kill(pid, 0) — signal 0 checks existence without sending a signal.
|
|
537
|
+
*
|
|
538
|
+
* @param {number} pid - Process ID to check
|
|
539
|
+
* @returns {boolean} true if process is running
|
|
540
|
+
*/
|
|
541
|
+
function isProcessRunning(pid) {
|
|
542
|
+
try {
|
|
543
|
+
process.kill(pid, 0); // Signal 0 = existence check
|
|
544
|
+
return true;
|
|
545
|
+
} catch (err) {
|
|
546
|
+
return err.code === "EPERM"; // Process exists but we lack permission
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
/**
|
|
551
|
+
* Attempt to claim a queue item so only one session processes it.
|
|
552
|
+
* Uses exclusive file creation (wx flag) for POSIX-atomic acquisition.
|
|
553
|
+
*
|
|
554
|
+
* Fail-open design: claim system failures never block work. The worst case
|
|
555
|
+
* is duplicate work (the status quo), not blocked work.
|
|
556
|
+
*
|
|
557
|
+
* @param {string} itemId - Queue item ID (e.g. "ib-20260407-001b")
|
|
558
|
+
* @param {object} metadata - { session_id, agent_description, ttl_minutes, source, queue_file, pid }
|
|
559
|
+
* @returns {{ claimed: boolean, reason?: string, holder?: string }}
|
|
560
|
+
*/
|
|
561
|
+
export function claimItem(itemId, metadata = {}) {
|
|
562
|
+
const safeId = sanitiseItemId(itemId);
|
|
563
|
+
const claimPath = join(ITEM_CLAIM_DIR, `${safeId}.claim`);
|
|
564
|
+
const ttlMinutes = metadata.ttl_minutes || DEFAULT_ITEM_CLAIM_TTL_MIN;
|
|
565
|
+
|
|
566
|
+
// Check for existing claim
|
|
567
|
+
if (existsSync(claimPath)) {
|
|
568
|
+
try {
|
|
569
|
+
const existing = JSON.parse(readFileSync(claimPath, "utf-8"));
|
|
570
|
+
const ageMinutes = (Date.now() - new Date(existing.claimed_at).getTime()) / 60000;
|
|
571
|
+
|
|
572
|
+
if (ageMinutes < (existing.ttl_minutes || DEFAULT_ITEM_CLAIM_TTL_MIN)) {
|
|
573
|
+
// Claim is within TTL — check if the holding process is actually alive
|
|
574
|
+
if (existing.pid && isProcessRunning(existing.pid)) {
|
|
575
|
+
return { claimed: false, reason: "active_claim", holder: existing.session_id };
|
|
576
|
+
}
|
|
577
|
+
// Process is dead but claim not expired — override (orphaned claim)
|
|
578
|
+
console.log(`[session-lock] Overriding orphaned item claim for ${itemId}, pid ${existing.pid} is dead`);
|
|
579
|
+
// Fall through to write new claim
|
|
580
|
+
} else {
|
|
581
|
+
// TTL expired — override
|
|
582
|
+
console.log(`[session-lock] Overriding expired item claim for ${itemId}, age ${ageMinutes.toFixed(1)}m > ttl ${existing.ttl_minutes || DEFAULT_ITEM_CLAIM_TTL_MIN}m`);
|
|
583
|
+
// Fall through to write new claim
|
|
584
|
+
}
|
|
585
|
+
} catch {
|
|
586
|
+
// Corrupted claim file — treat as no existing claim (fail-open)
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
const claimData = {
|
|
591
|
+
session_id: metadata.session_id || `claim-${Date.now()}`,
|
|
592
|
+
claimed_at: new Date().toISOString(),
|
|
593
|
+
agent_description: metadata.agent_description || "",
|
|
594
|
+
ttl_minutes: ttlMinutes,
|
|
595
|
+
source: metadata.source || "backlog",
|
|
596
|
+
queue_file: metadata.queue_file || "",
|
|
597
|
+
pid: metadata.pid || process.pid,
|
|
598
|
+
};
|
|
599
|
+
|
|
600
|
+
try {
|
|
601
|
+
// Atomic exclusive create — identical pattern to acquireLock (line 90)
|
|
602
|
+
writeFileSync(claimPath, JSON.stringify(claimData, null, 2), { flag: "wx" });
|
|
603
|
+
return { claimed: true };
|
|
604
|
+
} catch (err) {
|
|
605
|
+
if (err.code === "EEXIST") {
|
|
606
|
+
// Race condition — another process won the claim
|
|
607
|
+
try {
|
|
608
|
+
const existing = JSON.parse(readFileSync(claimPath, "utf-8"));
|
|
609
|
+
const ageMinutes = (Date.now() - new Date(existing.claimed_at).getTime()) / 60000;
|
|
610
|
+
if (ageMinutes >= (existing.ttl_minutes || DEFAULT_ITEM_CLAIM_TTL_MIN)) {
|
|
611
|
+
// Stale — overwrite (non-atomic but acceptable)
|
|
612
|
+
writeFileSync(claimPath, JSON.stringify(claimData, null, 2));
|
|
613
|
+
return { claimed: true };
|
|
614
|
+
}
|
|
615
|
+
return { claimed: false, reason: "race_lost", holder: existing.session_id };
|
|
616
|
+
} catch {
|
|
617
|
+
return { claimed: false, reason: "race_lost" };
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
// Fail-open: if we can't write the claim for any other reason, allow processing
|
|
621
|
+
console.warn(`[session-lock] Item claim write failed for ${itemId} (fail-open): ${err.message}`);
|
|
622
|
+
return { claimed: true };
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
/**
|
|
627
|
+
* Release a claim for a queue item (on session completion or error).
|
|
628
|
+
*
|
|
629
|
+
* @param {string} itemId - Queue item ID
|
|
630
|
+
*/
|
|
631
|
+
export function releaseItemClaim(itemId) {
|
|
632
|
+
const safeId = sanitiseItemId(itemId);
|
|
633
|
+
const claimPath = join(ITEM_CLAIM_DIR, `${safeId}.claim`);
|
|
634
|
+
try {
|
|
635
|
+
unlinkSync(claimPath);
|
|
636
|
+
} catch {
|
|
637
|
+
// Claim already released, expired, or never existed — fine
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
/**
|
|
642
|
+
* Check if a queue item has an active (non-expired, live-process) claim.
|
|
643
|
+
* Used by sweepBacklog to skip items already being worked on.
|
|
644
|
+
*
|
|
645
|
+
* @param {string} itemId - Queue item ID
|
|
646
|
+
* @returns {boolean} true if there is an active claim on this item
|
|
647
|
+
*/
|
|
648
|
+
export function hasActiveClaim(itemId) {
|
|
649
|
+
const safeId = sanitiseItemId(itemId);
|
|
650
|
+
const claimPath = join(ITEM_CLAIM_DIR, `${safeId}.claim`);
|
|
651
|
+
|
|
652
|
+
try {
|
|
653
|
+
if (!existsSync(claimPath)) return false;
|
|
654
|
+
|
|
655
|
+
const claim = JSON.parse(readFileSync(claimPath, "utf-8"));
|
|
656
|
+
const ageMinutes = (Date.now() - new Date(claim.claimed_at).getTime()) / 60000;
|
|
657
|
+
const ttl = claim.ttl_minutes || DEFAULT_ITEM_CLAIM_TTL_MIN;
|
|
658
|
+
|
|
659
|
+
// Expired claim — not active
|
|
660
|
+
if (ageMinutes >= ttl) return false;
|
|
661
|
+
|
|
662
|
+
// If PID is recorded and process is dead — not active (orphaned)
|
|
663
|
+
if (claim.pid && !isProcessRunning(claim.pid)) return false;
|
|
664
|
+
|
|
665
|
+
return true;
|
|
666
|
+
} catch {
|
|
667
|
+
// Fail-open: if we can't read/parse the claim, treat as no claim
|
|
668
|
+
return false;
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
/**
|
|
673
|
+
* Sweep stale item claims. Called periodically from the daemon health interval.
|
|
674
|
+
*
|
|
675
|
+
* Two-tier cleanup:
|
|
676
|
+
* - Past 2x TTL: unconditionally remove (well past expiry)
|
|
677
|
+
* - Past 1x TTL but within 2x: remove only if PID is dead (orphaned)
|
|
678
|
+
*
|
|
679
|
+
* @returns {number} Number of stale claims removed
|
|
680
|
+
*/
|
|
681
|
+
export function sweepStaleItemClaims() {
|
|
682
|
+
let swept = 0;
|
|
683
|
+
try {
|
|
684
|
+
const files = readdirSync(ITEM_CLAIM_DIR).filter((f) => f.endsWith(".claim"));
|
|
685
|
+
for (const file of files) {
|
|
686
|
+
const claimPath = join(ITEM_CLAIM_DIR, file);
|
|
687
|
+
try {
|
|
688
|
+
const claim = JSON.parse(readFileSync(claimPath, "utf-8"));
|
|
689
|
+
const ageMinutes = (Date.now() - new Date(claim.claimed_at).getTime()) / 60000;
|
|
690
|
+
const ttl = claim.ttl_minutes || DEFAULT_ITEM_CLAIM_TTL_MIN;
|
|
691
|
+
|
|
692
|
+
if (ageMinutes > ttl * 2) {
|
|
693
|
+
// Well past TTL — remove unconditionally
|
|
694
|
+
unlinkSync(claimPath);
|
|
695
|
+
swept++;
|
|
696
|
+
console.log(`[session-lock] Swept stale item claim: ${claim.session_id}, age ${ageMinutes.toFixed(1)}m (2x TTL=${ttl * 2}m)`);
|
|
697
|
+
} else if (ageMinutes > ttl) {
|
|
698
|
+
// Past TTL but within 2x — check if PID is dead
|
|
699
|
+
if (claim.pid && !isProcessRunning(claim.pid)) {
|
|
700
|
+
unlinkSync(claimPath);
|
|
701
|
+
swept++;
|
|
702
|
+
console.log(`[session-lock] Swept orphaned item claim: ${claim.session_id}, pid ${claim.pid} dead`);
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
} catch {
|
|
706
|
+
// Corrupted claim file — remove it
|
|
707
|
+
try { unlinkSync(claimPath); swept++; } catch {}
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
} catch {
|
|
711
|
+
// ITEM_CLAIM_DIR doesn't exist or unreadable — nothing to sweep
|
|
712
|
+
}
|
|
713
|
+
return swept;
|
|
714
|
+
}
|
|
@@ -34,7 +34,7 @@ import { dispatch, getStatus, availableSlots, canDispatchBacklog, resetActiveSes
|
|
|
34
34
|
import { buildPrompt } from "./prompt-builder.mjs";
|
|
35
35
|
import { sendQuickResponse, sendHoldingMessage, isQuickReply } from "./responder.mjs";
|
|
36
36
|
import { recordPoll, recordClassification, recordSession, writeHealthDashboard } from "./health.mjs";
|
|
37
|
-
import { acquireLock, updateLock, scanStaleLocks, acquireThreadLock, claimRequest } from "./session-lock.mjs";
|
|
37
|
+
import { acquireLock, updateLock, scanStaleLocks, acquireThreadLock, claimRequest, hasActiveClaim, sweepStaleItemClaims } from "./session-lock.mjs";
|
|
38
38
|
|
|
39
39
|
// ---------------------------------------------------------------------------
|
|
40
40
|
// Configuration
|
|
@@ -401,6 +401,15 @@ async function sweepBacklog() {
|
|
|
401
401
|
}
|
|
402
402
|
return false;
|
|
403
403
|
}
|
|
404
|
+
|
|
405
|
+
// File-based item claim check — survives daemon restart and is visible
|
|
406
|
+
// to concurrent launchd triggers. Complements in-memory activeBacklogKeys.
|
|
407
|
+
// (ib-20260407-001b: concurrent session coordination)
|
|
408
|
+
if (qi.id && hasActiveClaim(qi.id)) {
|
|
409
|
+
console.log(`[daemon] Backlog skip: "${qi.title}" — item claimed by another session`);
|
|
410
|
+
return false;
|
|
411
|
+
}
|
|
412
|
+
|
|
404
413
|
return true;
|
|
405
414
|
});
|
|
406
415
|
|
|
@@ -488,10 +497,15 @@ async function main() {
|
|
|
488
497
|
}
|
|
489
498
|
}, BACKLOG_INTERVAL);
|
|
490
499
|
|
|
491
|
-
// Health dashboard
|
|
500
|
+
// Health dashboard + stale claim sweep
|
|
492
501
|
setInterval(() => {
|
|
493
502
|
try {
|
|
494
503
|
writeHealthDashboard();
|
|
504
|
+
// Sweep stale item claims (ib-20260407-001b: concurrent session coordination)
|
|
505
|
+
const claimsSwept = sweepStaleItemClaims();
|
|
506
|
+
if (claimsSwept > 0) {
|
|
507
|
+
console.log(`[daemon] Swept ${claimsSwept} stale item claims`);
|
|
508
|
+
}
|
|
495
509
|
} catch (err) {
|
|
496
510
|
console.error("[daemon] Health write error:", err.message);
|
|
497
511
|
}
|
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
<!--
|
|
2
|
+
Email signature template — Maestro framework
|
|
3
|
+
|
|
4
|
+
This file is rewritten by /init-maestro Sub-agent 4 with the new agent's
|
|
5
|
+
identity. The placeholders below are replaced verbatim:
|
|
6
|
+
|
|
7
|
+
{{AGENT_NAME}} e.g. "Lucas Ferreira"
|
|
8
|
+
{{AGENT_TITLE}} e.g. "VP, Regulatory & Licensing"
|
|
9
|
+
{{AGENT_EMAIL}} e.g. "lucas@adaptic.ai"
|
|
10
|
+
{{AGENT_PHONE}} e.g. "+61 478 964 324" (pretty form)
|
|
11
|
+
{{COMPANY_ADDRESS}} e.g. "Level 1, Innovation One, DIFC, Dubai, UAE"
|
|
12
|
+
|
|
13
|
+
If you see this file unchanged in a deployed agent's repo, something went
|
|
14
|
+
wrong with init-maestro Sub-agent 4 — re-run the wizard or rewrite by hand.
|
|
15
|
+
-->
|
|
1
16
|
<div
|
|
2
17
|
style="
|
|
3
18
|
font-family: Arial, Helvetica, sans-serif;
|
|
@@ -5,8 +20,8 @@
|
|
|
5
20
|
color: #333;
|
|
6
21
|
"
|
|
7
22
|
>
|
|
8
|
-
<p style="margin: 0; font-weight: bold; font-size: 14px">
|
|
9
|
-
<p style="margin: 0; color: #666">
|
|
23
|
+
<p style="margin: 0; font-weight: bold; font-size: 14px">{{AGENT_NAME}}</p>
|
|
24
|
+
<p style="margin: 0; color: #666">{{AGENT_TITLE}}</p>
|
|
10
25
|
<br />
|
|
11
26
|
<a href="https://adaptic.ai"
|
|
12
27
|
><img
|
|
@@ -16,10 +31,11 @@
|
|
|
16
31
|
style="display: block; margin: 8px 0"
|
|
17
32
|
/></a>
|
|
18
33
|
<br />
|
|
19
|
-
<p style="margin: 0; font-size: 12px">
|
|
34
|
+
<p style="margin: 0; font-size: 12px">{{AGENT_EMAIL}}</p>
|
|
35
|
+
<p style="margin: 0; font-size: 12px">{{AGENT_PHONE}}</p>
|
|
20
36
|
<br />
|
|
21
37
|
<p style="margin: 0; font-size: 12px">
|
|
22
|
-
|
|
38
|
+
{{COMPANY_ADDRESS}}
|
|
23
39
|
</p>
|
|
24
40
|
<p
|
|
25
41
|
style="
|