@adaptic/maestro 1.1.8 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/.claude/commands/init-maestro.md +304 -8
  2. package/README.md +28 -0
  3. package/bin/maestro.mjs +258 -56
  4. package/docs/guides/agents-observe-setup.md +64 -0
  5. package/docs/guides/ccxray-diagnostics.md +65 -0
  6. package/docs/guides/claude-mem-setup.md +79 -0
  7. package/docs/guides/claude-pace-setup.md +56 -0
  8. package/docs/guides/claudraband-sessions.md +98 -0
  9. package/docs/guides/clawteam-swarm.md +116 -0
  10. package/docs/guides/code-review-graph-setup.md +86 -0
  11. package/docs/guides/self-optimization-pattern.md +82 -0
  12. package/docs/guides/slack-setup.md +4 -2
  13. package/docs/guides/twilio-subaccounts-setup.md +223 -0
  14. package/docs/guides/webhook-relay-setup.md +349 -0
  15. package/package.json +2 -1
  16. package/plugins/maestro-skills/plugin.json +16 -0
  17. package/plugins/maestro-skills/skills/agents-observe.md +110 -0
  18. package/plugins/maestro-skills/skills/ccxray-diagnostics.md +91 -0
  19. package/plugins/maestro-skills/skills/claude-pace.md +61 -0
  20. package/plugins/maestro-skills/skills/code-review-graph.md +99 -0
  21. package/scaffold/CLAUDE.md +64 -0
  22. package/scaffold/config/agent.ts.example +2 -1
  23. package/scaffold/config/known-agents.json +35 -0
  24. package/scripts/daemon/classifier.mjs +264 -50
  25. package/scripts/daemon/dispatcher.mjs +109 -5
  26. package/scripts/daemon/launchd-wrapper-generic.sh +96 -0
  27. package/scripts/daemon/launchd-wrapper-slack-events.sh +37 -0
  28. package/scripts/daemon/launchd-wrapper.sh +91 -0
  29. package/scripts/daemon/lib/session-router.mjs +274 -0
  30. package/scripts/daemon/lib/session-router.test.mjs +295 -0
  31. package/scripts/daemon/prompt-builder.mjs +51 -11
  32. package/scripts/daemon/responder.mjs +234 -19
  33. package/scripts/daemon/session-lock.mjs +194 -0
  34. package/scripts/daemon/sophie-daemon.mjs +16 -2
  35. package/scripts/email-signature.html +20 -4
  36. package/scripts/local-triggers/generate-plists.sh +62 -10
  37. package/scripts/poller/imap-client.mjs +4 -2
  38. package/scripts/poller/slack-poller.mjs +104 -52
  39. package/scripts/setup/init-agent.sh +91 -1
  40. package/scripts/setup/install-dev-tools.sh +150 -0
  41. package/scripts/spawn-session.sh +21 -6
  42. package/workflows/continuous/backlog-executor.yaml +141 -0
  43. package/workflows/daily/evening-wrap.yaml +41 -1
  44. package/workflows/daily/morning-brief.yaml +17 -0
  45. package/workflows/event-driven/agent-failure-investigation.yaml +137 -0
  46. package/workflows/event-driven/pr-review.yaml +104 -0
  47. package/workflows/weekly/engineering-health.yaml +154 -0
@@ -1,39 +1,211 @@
1
1
  /**
2
2
  * responder.mjs — Quick response layer
3
3
  *
4
- * Handles two scenarios without spawning claude --print sessions:
4
+ * Handles two scenarios with a single short-lived `claude --print` call:
5
5
  *
6
- * 1. SIMPLE REPLIES: Uses Anthropic Sonnet API to generate a response,
7
- * then posts directly via Slack/Gmail API. ~4 seconds total.
6
+ * 1. SIMPLE REPLIES: Invokes the Claude Code CLI to generate a response,
7
+ * then posts directly via Slack/Gmail API.
8
8
  *
9
9
  * 2. HOLDING MESSAGES: For complex items that need a full session,
10
10
  * generates and sends an immediate acknowledgment so the sender
11
11
  * knows it's being worked on.
12
12
  *
13
- * This eliminates CLI startup overhead for routine responses and
14
- * provides instant feedback for complex requests.
13
+ * Migrated off `@anthropic-ai/sdk` per CEO directive (Slack DM
14
+ * D099N1JGKRQ, 2026-04-27 09:38Z + 11:33Z): all agent daemon model
15
+ * calls must funnel through Claude Code CLI sessions (Max
16
+ * subscription), not the Anthropic API.
15
17
  */
16
18
 
17
- import Anthropic from "@anthropic-ai/sdk";
18
19
  import { readFileSync, writeFileSync, readdirSync, appendFileSync, mkdirSync } from "fs";
19
- import { execFileSync } from "child_process";
20
+ import { execFileSync, spawn } from "child_process";
20
21
  import { join } from "path";
22
+ import { randomUUID } from "crypto";
21
23
  import { checkRecentlySent, registerSent } from "./session-lock.mjs";
24
+ import { routingKey as deriveRoutingKey, createRouter } from "./lib/session-router.mjs";
22
25
 
23
26
  const SOPHIE_AI_DIR = join(new URL(".", import.meta.url).pathname, "../..");
24
27
  const SONNET_MODEL = "claude-sonnet-4-6";
28
+ const CLAUDE_BIN = process.env.CLAUDE_BIN || "/Users/sophie/.local/bin/claude";
29
+ const CLAUDE_CLI_TIMEOUT_MS = 60_000;
30
+ const SESSION_REGISTRY_PATH = join(SOPHIE_AI_DIR, "state", "daemon", "session-router-registry.json");
31
+
32
+ // Singleton router — lazily created on first generateResponse() call. The
33
+ // scaffold's createRouter is async (eager registry read), so we cache the
34
+ // promise and await it inside generateResponse.
35
+ let routerPromise = null;
36
+ function getRouter() {
37
+ if (!routerPromise) {
38
+ routerPromise = createRouter({ registryPath: SESSION_REGISTRY_PATH });
39
+ }
40
+ return routerPromise;
41
+ }
42
+
43
+ /**
44
+ * Translate a daemon-shaped item into the source/channel/thread_ts shape
45
+ * that session-router's routingKey() expects.
46
+ *
47
+ * Daemon items use {service, channel, thread_id, sender_email, ...}; the
48
+ * router's pure key fn was specced against {source, channel, thread_ts,
49
+ * thread_id, ...} per memo §4.2. This adapter is the seam between them.
50
+ *
51
+ * Returns null for items the router can't key (e.g. service we don't yet
52
+ * support, or missing required fields). Caller falls back to a fresh
53
+ * pre-minted UUID + EPHEMERAL semantics.
54
+ */
55
+ function deriveRouterItem(item) {
56
+ if (!item || typeof item !== "object") return null;
57
+
58
+ if (item.service === "slack") {
59
+ const channel = item.channel || item.channel_id;
60
+ if (!channel) return null;
61
+ return {
62
+ source: "slack",
63
+ channel,
64
+ thread_ts: item.thread_id || item.thread_ts || null,
65
+ ts: item.ts || item.timestamp || null,
66
+ };
67
+ }
68
+
69
+ if (item.service === "gmail") {
70
+ const tid = item.thread_id || item.threadId;
71
+ if (!tid) return null;
72
+ return { source: "gmail", thread_id: tid };
73
+ }
74
+
75
+ if (item.service === "calendar") {
76
+ const eid = item.event_id || item.eventId;
77
+ if (!eid) return null;
78
+ return { source: "calendar", event_id: eid };
79
+ }
80
+
81
+ return null;
82
+ }
25
83
 
26
84
  // Lazy token access — dotenv loads in daemon main before these are called
27
85
  function getSlackToken() {
28
86
  return process.env.SLACK_USER_TOKEN || process.env.SLACK_BOT_TOKEN;
29
87
  }
30
88
 
31
- let anthropic = null;
32
89
  let cachedPreamble = null;
33
90
 
34
- function getClient() {
35
- if (!anthropic) anthropic = new Anthropic();
36
- return anthropic;
91
+ // Spawn `claude --print` with the supplied system + user prompts and model.
92
+ // Mirrors the pattern used in classifier.mjs:
93
+ // • child_process.spawn (not exec) — avoids shell-escape injection on
94
+ // potentially-hostile sender content.
95
+ // • System prompt rides on --append-system-prompt; user prompt is written
96
+ // to stdin and the pipe is closed.
97
+ // • Non-zero exit, timeout, spawn error or stdin write error all reject
98
+ // so the caller can surface the failure.
99
+ //
100
+ // Session-router wire-up (b2-b4, cycle 474): when the caller supplies a
101
+ // `sessionId` (pre-minted UUID) and a `router` + `routingKey`, the spawn
102
+ // adds `--session-id <uuid> --output-format json`, parses the one-line JSON
103
+ // stdout into {session_id, result, is_error}, calls router.touch on success
104
+ // and router.recordExit on close. Per b1 flag-verification report, NEVER
105
+ // combine `--resume` with `--session-id` (not needed: pre-minting + reusing
106
+ // the same UUID across spawns is the resume mechanism).
107
+ //
108
+ // @returns {Promise<{ text: string, jsonResult: object|null, exitCode: number }>}
109
+ function runClaudeCLI(systemPrompt, userPrompt, model, opts = {}) {
110
+ const { sessionId = null, router = null, routingKey = null } = opts;
111
+
112
+ return new Promise((resolvePromise, rejectPromise) => {
113
+ const args = [
114
+ "--print",
115
+ "--dangerously-skip-permissions",
116
+ "--model", model,
117
+ "--append-system-prompt", systemPrompt,
118
+ ];
119
+ if (sessionId) {
120
+ // --output-format json is only valid in combination with --print (per
121
+ // b1 report). We always pass --print above, so this is safe.
122
+ args.push("--session-id", sessionId, "--output-format", "json");
123
+ }
124
+
125
+ const proc = spawn(CLAUDE_BIN, args, {
126
+ stdio: ["pipe", "pipe", "pipe"],
127
+ // Force claude CLI onto keychain OAuth (Max subscription); strip any
128
+ // stale ANTHROPIC_API_KEY/AUTH_TOKEN inherited from the daemon env.
129
+ env: { ...process.env, ANTHROPIC_API_KEY: "", ANTHROPIC_AUTH_TOKEN: "" },
130
+ });
131
+
132
+ let stdout = "";
133
+ let stderr = "";
134
+ let settled = false;
135
+
136
+ const timer = setTimeout(() => {
137
+ if (settled) return;
138
+ settled = true;
139
+ try { proc.kill("SIGTERM"); } catch (_) { /* noop */ }
140
+ setTimeout(() => { try { if (!proc.killed) proc.kill("SIGKILL"); } catch (_) { /* noop */ } }, 2000);
141
+ rejectPromise(new Error(`claude CLI timed out after ${CLAUDE_CLI_TIMEOUT_MS}ms`));
142
+ }, CLAUDE_CLI_TIMEOUT_MS);
143
+
144
+ proc.stdout.on("data", (chunk) => { stdout += chunk.toString(); });
145
+ proc.stderr.on("data", (chunk) => { stderr += chunk.toString(); });
146
+
147
+ proc.on("error", (err) => {
148
+ if (settled) return;
149
+ settled = true;
150
+ clearTimeout(timer);
151
+ rejectPromise(new Error(`claude CLI spawn error: ${err.message}`));
152
+ });
153
+
154
+ proc.on("close", (code) => {
155
+ // (b4) Always notify the router of the exit, regardless of whether
156
+ // the promise has already settled (timeout path) or not. recordExit
157
+ // is a no-op for keys the router has never touched, so the only
158
+ // cost is a registry write — which we want for non-zero exits so
159
+ // the next route() returns EPHEMERAL_REPLACE.
160
+ if (router && routingKey) {
161
+ // Fire-and-forget; recordExit is async but failure here must not
162
+ // mask the real result. Errors swallowed because the router has
163
+ // its own atomic-write semantics and bubbling here would crash
164
+ // the daemon over a non-critical bookkeeping write.
165
+ Promise.resolve(router.recordExit(routingKey, code)).catch((err) => {
166
+ console.warn(`[responder] router.recordExit failed for ${routingKey}: ${err.message}`);
167
+ });
168
+ }
169
+
170
+ if (settled) return;
171
+ settled = true;
172
+ clearTimeout(timer);
173
+ if (code !== 0) {
174
+ const tail = (stderr || "").trim().slice(-500);
175
+ rejectPromise(new Error(`claude CLI exited ${code}: ${tail || "no stderr"}`));
176
+ return;
177
+ }
178
+
179
+ // (b3) If we asked for JSON, parse it. Otherwise return raw text.
180
+ if (sessionId) {
181
+ const trimmed = (stdout || "").trim();
182
+ try {
183
+ const parsed = JSON.parse(trimmed);
184
+ // Per b1 report: top-level `session_id` (snake_case UUID), `result`
185
+ // (text), `is_error` (bool). Top-level `uuid` is the message UUID,
186
+ // NOT the session id — do NOT use it.
187
+ resolvePromise({ text: parsed.result ?? "", jsonResult: parsed, exitCode: code });
188
+ } catch (parseErr) {
189
+ // Legacy fallback (rollout-safety): older CLIs or unexpected output
190
+ // shapes shouldn't crash the daemon. Log a warning, surface the raw
191
+ // text, and let the caller decide whether to call router.touch.
192
+ console.warn(`[responder] claude CLI JSON parse failed (sessionId=${sessionId}): ${parseErr.message} — falling back to raw stdout`);
193
+ resolvePromise({ text: trimmed, jsonResult: null, exitCode: code });
194
+ }
195
+ } else {
196
+ resolvePromise({ text: stdout, jsonResult: null, exitCode: code });
197
+ }
198
+ });
199
+
200
+ try {
201
+ proc.stdin.end(userPrompt, "utf8");
202
+ } catch (err) {
203
+ if (settled) return;
204
+ settled = true;
205
+ clearTimeout(timer);
206
+ rejectPromise(new Error(`claude CLI stdin write error: ${err.message}`));
207
+ }
208
+ });
37
209
  }
38
210
 
39
211
  function today() {
@@ -187,7 +359,7 @@ function loadConversationHistory(item) {
187
359
  }
188
360
 
189
361
  // ---------------------------------------------------------------------------
190
- // Generate response text via Anthropic Sonnet API
362
+ // Generate response text via `claude --print` CLI
191
363
  // ---------------------------------------------------------------------------
192
364
 
193
365
  async function generateResponse(item, classResult, isHolding = false) {
@@ -234,15 +406,58 @@ ${profile ? `\nSender profile:\n${profile}` : ""}`;
234
406
  `\nClassification: ${classResult.summary}`,
235
407
  ].filter(Boolean).join("\n");
236
408
 
237
- const client = getClient();
238
- const response = await client.messages.create({
239
- model,
240
- max_tokens: 512,
241
- system: systemPrompt,
242
- messages: [{ role: "user", content: userContent }],
409
+ // (b2) Session-router decision. Compute the routing key from a daemon→router
410
+ // adapter view of the item. If the item can't be keyed (unknown service,
411
+ // missing channel/thread), fall back to pure ephemeral with no router calls.
412
+ const router = await getRouter();
413
+ const routerItem = deriveRouterItem(item);
414
+ let key = null;
415
+ let sessionId = null;
416
+ if (routerItem) {
417
+ try {
418
+ key = deriveRoutingKey(routerItem);
419
+ const decision = router.route(key);
420
+ if (decision.decision === "RESUME" && decision.resumeId) {
421
+ sessionId = decision.resumeId;
422
+ } else {
423
+ // EPHEMERAL or EPHEMERAL_REPLACE — pre-mint a fresh UUID. Reusing
424
+ // the same key on next call (with a different sessionId) is fine;
425
+ // touch() will overwrite the registry entry.
426
+ sessionId = randomUUID();
427
+ }
428
+ } catch (err) {
429
+ // routingKey() throws on malformed items. Don't crash — fall back to
430
+ // pure ephemeral with no session-router participation.
431
+ console.warn(`[responder] routingKey derivation failed: ${err.message} — falling back to non-routed ephemeral`);
432
+ key = null;
433
+ sessionId = null;
434
+ }
435
+ }
436
+
437
+ const cliResult = await runClaudeCLI(systemPrompt, userContent, model, {
438
+ sessionId,
439
+ router: key ? router : null,
440
+ routingKey: key,
243
441
  });
244
442
 
245
- return response.content[0].text.trim();
443
+ const text = (cliResult.text || "").trim();
444
+ if (!text) {
445
+ throw new Error("claude CLI returned empty result text in generateResponse");
446
+ }
447
+
448
+ // (b3) On success, touch the registry with the CLI-resolved session_id so
449
+ // the next call routed to this key gets a RESUME decision. Skip touch on
450
+ // is_error responses or when JSON parsing failed (legacy fallback path).
451
+ if (key && cliResult.jsonResult && cliResult.jsonResult.is_error !== true) {
452
+ const claudeSessionId = cliResult.jsonResult.session_id || sessionId;
453
+ try {
454
+ await router.touch(key, { claudeSessionId, model });
455
+ } catch (err) {
456
+ console.warn(`[responder] router.touch failed for ${key}: ${err.message}`);
457
+ }
458
+ }
459
+
460
+ return text;
246
461
  }
247
462
 
248
463
  // ---------------------------------------------------------------------------
@@ -518,3 +518,197 @@ export function releaseThreadLock(channel, threadTs) {
518
518
  // Lock already released or never existed — fine
519
519
  }
520
520
  }
521
+
522
+ // ---------------------------------------------------------------------------
523
+ // Item claims — prevents multiple parallel backlog sessions from picking up
524
+ // the same queue item simultaneously. Designed per:
525
+ // outputs/research/2026-04-18-session-coordination-design.md
526
+ // ---------------------------------------------------------------------------
527
+
528
+ const ITEM_CLAIM_DIR = join(SOPHIE_AI_DIR, "state", "locks", "item-claims");
529
+ const DEFAULT_ITEM_CLAIM_TTL_MIN = 30; // Default TTL in minutes
530
+
531
+ // Ensure directory exists
532
+ mkdirSync(ITEM_CLAIM_DIR, { recursive: true });
533
+
534
+ /**
535
+ * Check if a process is still running (macOS-compatible).
536
+ * Uses kill(pid, 0) — signal 0 checks existence without sending a signal.
537
+ *
538
+ * @param {number} pid - Process ID to check
539
+ * @returns {boolean} true if process is running
540
+ */
541
+ function isProcessRunning(pid) {
542
+ try {
543
+ process.kill(pid, 0); // Signal 0 = existence check
544
+ return true;
545
+ } catch (err) {
546
+ return err.code === "EPERM"; // Process exists but we lack permission
547
+ }
548
+ }
549
+
550
+ /**
551
+ * Attempt to claim a queue item so only one session processes it.
552
+ * Uses exclusive file creation (wx flag) for POSIX-atomic acquisition.
553
+ *
554
+ * Fail-open design: claim system failures never block work. The worst case
555
+ * is duplicate work (the status quo), not blocked work.
556
+ *
557
+ * @param {string} itemId - Queue item ID (e.g. "ib-20260407-001b")
558
+ * @param {object} metadata - { session_id, agent_description, ttl_minutes, source, queue_file, pid }
559
+ * @returns {{ claimed: boolean, reason?: string, holder?: string }}
560
+ */
561
+ export function claimItem(itemId, metadata = {}) {
562
+ const safeId = sanitiseItemId(itemId);
563
+ const claimPath = join(ITEM_CLAIM_DIR, `${safeId}.claim`);
564
+ const ttlMinutes = metadata.ttl_minutes || DEFAULT_ITEM_CLAIM_TTL_MIN;
565
+
566
+ // Check for existing claim
567
+ if (existsSync(claimPath)) {
568
+ try {
569
+ const existing = JSON.parse(readFileSync(claimPath, "utf-8"));
570
+ const ageMinutes = (Date.now() - new Date(existing.claimed_at).getTime()) / 60000;
571
+
572
+ if (ageMinutes < (existing.ttl_minutes || DEFAULT_ITEM_CLAIM_TTL_MIN)) {
573
+ // Claim is within TTL — check if the holding process is actually alive
574
+ if (existing.pid && isProcessRunning(existing.pid)) {
575
+ return { claimed: false, reason: "active_claim", holder: existing.session_id };
576
+ }
577
+ // Process is dead but claim not expired — override (orphaned claim)
578
+ console.log(`[session-lock] Overriding orphaned item claim for ${itemId}, pid ${existing.pid} is dead`);
579
+ // Fall through to write new claim
580
+ } else {
581
+ // TTL expired — override
582
+ console.log(`[session-lock] Overriding expired item claim for ${itemId}, age ${ageMinutes.toFixed(1)}m > ttl ${existing.ttl_minutes || DEFAULT_ITEM_CLAIM_TTL_MIN}m`);
583
+ // Fall through to write new claim
584
+ }
585
+ } catch {
586
+ // Corrupted claim file — treat as no existing claim (fail-open)
587
+ }
588
+ }
589
+
590
+ const claimData = {
591
+ session_id: metadata.session_id || `claim-${Date.now()}`,
592
+ claimed_at: new Date().toISOString(),
593
+ agent_description: metadata.agent_description || "",
594
+ ttl_minutes: ttlMinutes,
595
+ source: metadata.source || "backlog",
596
+ queue_file: metadata.queue_file || "",
597
+ pid: metadata.pid || process.pid,
598
+ };
599
+
600
+ try {
601
+ // Atomic exclusive create — identical pattern to acquireLock (line 90)
602
+ writeFileSync(claimPath, JSON.stringify(claimData, null, 2), { flag: "wx" });
603
+ return { claimed: true };
604
+ } catch (err) {
605
+ if (err.code === "EEXIST") {
606
+ // Race condition — another process won the claim
607
+ try {
608
+ const existing = JSON.parse(readFileSync(claimPath, "utf-8"));
609
+ const ageMinutes = (Date.now() - new Date(existing.claimed_at).getTime()) / 60000;
610
+ if (ageMinutes >= (existing.ttl_minutes || DEFAULT_ITEM_CLAIM_TTL_MIN)) {
611
+ // Stale — overwrite (non-atomic but acceptable)
612
+ writeFileSync(claimPath, JSON.stringify(claimData, null, 2));
613
+ return { claimed: true };
614
+ }
615
+ return { claimed: false, reason: "race_lost", holder: existing.session_id };
616
+ } catch {
617
+ return { claimed: false, reason: "race_lost" };
618
+ }
619
+ }
620
+ // Fail-open: if we can't write the claim for any other reason, allow processing
621
+ console.warn(`[session-lock] Item claim write failed for ${itemId} (fail-open): ${err.message}`);
622
+ return { claimed: true };
623
+ }
624
+ }
625
+
626
+ /**
627
+ * Release a claim for a queue item (on session completion or error).
628
+ *
629
+ * @param {string} itemId - Queue item ID
630
+ */
631
+ export function releaseItemClaim(itemId) {
632
+ const safeId = sanitiseItemId(itemId);
633
+ const claimPath = join(ITEM_CLAIM_DIR, `${safeId}.claim`);
634
+ try {
635
+ unlinkSync(claimPath);
636
+ } catch {
637
+ // Claim already released, expired, or never existed — fine
638
+ }
639
+ }
640
+
641
+ /**
642
+ * Check if a queue item has an active (non-expired, live-process) claim.
643
+ * Used by sweepBacklog to skip items already being worked on.
644
+ *
645
+ * @param {string} itemId - Queue item ID
646
+ * @returns {boolean} true if there is an active claim on this item
647
+ */
648
+ export function hasActiveClaim(itemId) {
649
+ const safeId = sanitiseItemId(itemId);
650
+ const claimPath = join(ITEM_CLAIM_DIR, `${safeId}.claim`);
651
+
652
+ try {
653
+ if (!existsSync(claimPath)) return false;
654
+
655
+ const claim = JSON.parse(readFileSync(claimPath, "utf-8"));
656
+ const ageMinutes = (Date.now() - new Date(claim.claimed_at).getTime()) / 60000;
657
+ const ttl = claim.ttl_minutes || DEFAULT_ITEM_CLAIM_TTL_MIN;
658
+
659
+ // Expired claim — not active
660
+ if (ageMinutes >= ttl) return false;
661
+
662
+ // If PID is recorded and process is dead — not active (orphaned)
663
+ if (claim.pid && !isProcessRunning(claim.pid)) return false;
664
+
665
+ return true;
666
+ } catch {
667
+ // Fail-open: if we can't read/parse the claim, treat as no claim
668
+ return false;
669
+ }
670
+ }
671
+
672
+ /**
673
+ * Sweep stale item claims. Called periodically from the daemon health interval.
674
+ *
675
+ * Two-tier cleanup:
676
+ * - Past 2x TTL: unconditionally remove (well past expiry)
677
+ * - Past 1x TTL but within 2x: remove only if PID is dead (orphaned)
678
+ *
679
+ * @returns {number} Number of stale claims removed
680
+ */
681
+ export function sweepStaleItemClaims() {
682
+ let swept = 0;
683
+ try {
684
+ const files = readdirSync(ITEM_CLAIM_DIR).filter((f) => f.endsWith(".claim"));
685
+ for (const file of files) {
686
+ const claimPath = join(ITEM_CLAIM_DIR, file);
687
+ try {
688
+ const claim = JSON.parse(readFileSync(claimPath, "utf-8"));
689
+ const ageMinutes = (Date.now() - new Date(claim.claimed_at).getTime()) / 60000;
690
+ const ttl = claim.ttl_minutes || DEFAULT_ITEM_CLAIM_TTL_MIN;
691
+
692
+ if (ageMinutes > ttl * 2) {
693
+ // Well past TTL — remove unconditionally
694
+ unlinkSync(claimPath);
695
+ swept++;
696
+ console.log(`[session-lock] Swept stale item claim: ${claim.session_id}, age ${ageMinutes.toFixed(1)}m (2x TTL=${ttl * 2}m)`);
697
+ } else if (ageMinutes > ttl) {
698
+ // Past TTL but within 2x — check if PID is dead
699
+ if (claim.pid && !isProcessRunning(claim.pid)) {
700
+ unlinkSync(claimPath);
701
+ swept++;
702
+ console.log(`[session-lock] Swept orphaned item claim: ${claim.session_id}, pid ${claim.pid} dead`);
703
+ }
704
+ }
705
+ } catch {
706
+ // Corrupted claim file — remove it
707
+ try { unlinkSync(claimPath); swept++; } catch {}
708
+ }
709
+ }
710
+ } catch {
711
+ // ITEM_CLAIM_DIR doesn't exist or unreadable — nothing to sweep
712
+ }
713
+ return swept;
714
+ }
@@ -34,7 +34,7 @@ import { dispatch, getStatus, availableSlots, canDispatchBacklog, resetActiveSes
34
34
  import { buildPrompt } from "./prompt-builder.mjs";
35
35
  import { sendQuickResponse, sendHoldingMessage, isQuickReply } from "./responder.mjs";
36
36
  import { recordPoll, recordClassification, recordSession, writeHealthDashboard } from "./health.mjs";
37
- import { acquireLock, updateLock, scanStaleLocks, acquireThreadLock, claimRequest } from "./session-lock.mjs";
37
+ import { acquireLock, updateLock, scanStaleLocks, acquireThreadLock, claimRequest, hasActiveClaim, sweepStaleItemClaims } from "./session-lock.mjs";
38
38
 
39
39
  // ---------------------------------------------------------------------------
40
40
  // Configuration
@@ -401,6 +401,15 @@ async function sweepBacklog() {
401
401
  }
402
402
  return false;
403
403
  }
404
+
405
+ // File-based item claim check — survives daemon restart and is visible
406
+ // to concurrent launchd triggers. Complements in-memory activeBacklogKeys.
407
+ // (ib-20260407-001b: concurrent session coordination)
408
+ if (qi.id && hasActiveClaim(qi.id)) {
409
+ console.log(`[daemon] Backlog skip: "${qi.title}" — item claimed by another session`);
410
+ return false;
411
+ }
412
+
404
413
  return true;
405
414
  });
406
415
 
@@ -488,10 +497,15 @@ async function main() {
488
497
  }
489
498
  }, BACKLOG_INTERVAL);
490
499
 
491
- // Health dashboard
500
+ // Health dashboard + stale claim sweep
492
501
  setInterval(() => {
493
502
  try {
494
503
  writeHealthDashboard();
504
+ // Sweep stale item claims (ib-20260407-001b: concurrent session coordination)
505
+ const claimsSwept = sweepStaleItemClaims();
506
+ if (claimsSwept > 0) {
507
+ console.log(`[daemon] Swept ${claimsSwept} stale item claims`);
508
+ }
495
509
  } catch (err) {
496
510
  console.error("[daemon] Health write error:", err.message);
497
511
  }
@@ -1,3 +1,18 @@
1
+ <!--
2
+ Email signature template — Maestro framework
3
+
4
+ This file is rewritten by /init-maestro Sub-agent 4 with the new agent's
5
+ identity. The placeholders below are replaced verbatim:
6
+
7
+ {{AGENT_NAME}} e.g. "Lucas Ferreira"
8
+ {{AGENT_TITLE}} e.g. "VP, Regulatory & Licensing"
9
+ {{AGENT_EMAIL}} e.g. "lucas@adaptic.ai"
10
+ {{AGENT_PHONE}} e.g. "+61 478 964 324" (pretty form)
11
+ {{COMPANY_ADDRESS}} e.g. "Level 1, Innovation One, DIFC, Dubai, UAE"
12
+
13
+ If you see this file unchanged in a deployed agent's repo, something went
14
+ wrong with init-maestro Sub-agent 4 — re-run the wizard or rewrite by hand.
15
+ -->
1
16
  <div
2
17
  style="
3
18
  font-family: Arial, Helvetica, sans-serif;
@@ -5,8 +20,8 @@
5
20
  color: #333;
6
21
  "
7
22
  >
8
- <p style="margin: 0; font-weight: bold; font-size: 14px">Sophie Nguyen</p>
9
- <p style="margin: 0; color: #666">Chief of Staff</p>
23
+ <p style="margin: 0; font-weight: bold; font-size: 14px">{{AGENT_NAME}}</p>
24
+ <p style="margin: 0; color: #666">{{AGENT_TITLE}}</p>
10
25
  <br />
11
26
  <a href="https://adaptic.ai"
12
27
  ><img
@@ -16,10 +31,11 @@
16
31
  style="display: block; margin: 8px 0"
17
32
  /></a>
18
33
  <br />
19
- <p style="margin: 0; font-size: 12px">sophie@adaptic.ai</p>
34
+ <p style="margin: 0; font-size: 12px">{{AGENT_EMAIL}}</p>
35
+ <p style="margin: 0; font-size: 12px">{{AGENT_PHONE}}</p>
20
36
  <br />
21
37
  <p style="margin: 0; font-size: 12px">
22
- Level 1, Innovation One, Dubai International Financial Centre, Dubai, UAE
38
+ {{COMPANY_ADDRESS}}
23
39
  </p>
24
40
  <p
25
41
  style="