npm - @adaptic/maestro - Versions diffs - 1.1.8 → 1.4.1 - Mend

@adaptic/maestro 1.1.8 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/.claude/commands/init-maestro.md +304 -8
package/README.md +28 -0
package/bin/maestro.mjs +1 -1
package/docs/guides/agents-observe-setup.md +64 -0
package/docs/guides/ccxray-diagnostics.md +65 -0
package/docs/guides/claude-mem-setup.md +79 -0
package/docs/guides/claude-pace-setup.md +56 -0
package/docs/guides/claudraband-sessions.md +98 -0
package/docs/guides/clawteam-swarm.md +116 -0
package/docs/guides/code-review-graph-setup.md +86 -0
package/docs/guides/self-optimization-pattern.md +82 -0
package/docs/guides/slack-setup.md +4 -2
package/docs/guides/twilio-subaccounts-setup.md +223 -0
package/docs/guides/webhook-relay-setup.md +349 -0
package/package.json +2 -1
package/plugins/maestro-skills/plugin.json +16 -0
package/plugins/maestro-skills/skills/agents-observe.md +110 -0
package/plugins/maestro-skills/skills/ccxray-diagnostics.md +91 -0
package/plugins/maestro-skills/skills/claude-pace.md +61 -0
package/plugins/maestro-skills/skills/code-review-graph.md +99 -0
package/scaffold/CLAUDE.md +64 -0
package/scaffold/config/agent.ts.example +2 -1
package/scaffold/config/known-agents.json +35 -0
package/scripts/daemon/classifier.mjs +264 -50
package/scripts/daemon/dispatcher.mjs +109 -5
package/scripts/daemon/launchd-wrapper-generic.sh +96 -0
package/scripts/daemon/launchd-wrapper-slack-events.sh +37 -0
package/scripts/daemon/launchd-wrapper.sh +91 -0
package/scripts/daemon/lib/session-router.mjs +274 -0
package/scripts/daemon/lib/session-router.test.mjs +295 -0
package/scripts/daemon/prompt-builder.mjs +51 -11
package/scripts/daemon/responder.mjs +234 -19
package/scripts/daemon/session-lock.mjs +194 -0
package/scripts/daemon/sophie-daemon.mjs +16 -2
package/scripts/email-signature.html +20 -4
package/scripts/local-triggers/generate-plists.sh +62 -10
package/scripts/poller/imap-client.mjs +4 -2
package/scripts/poller/slack-poller.mjs +104 -52
package/scripts/setup/init-agent.sh +91 -1
package/scripts/setup/install-dev-tools.sh +150 -0
package/scripts/spawn-session.sh +21 -6
package/workflows/continuous/backlog-executor.yaml +141 -0
package/workflows/daily/evening-wrap.yaml +41 -1
package/workflows/daily/morning-brief.yaml +17 -0
package/workflows/event-driven/agent-failure-investigation.yaml +137 -0
package/workflows/event-driven/pr-review.yaml +104 -0
package/workflows/weekly/engineering-health.yaml +154 -0

package/scripts/daemon/dispatcher.mjs CHANGED Viewed

@@ -5,7 +5,8 @@
 import { spawn } from "child_process";
 import { appendFileSync, mkdirSync, writeFileSync, readFileSync, renameSync } from "fs";
 import { join } from "path";
-import { releaseLock, releaseThreadLock, releaseRequestClaim } from "./session-lock.mjs";
+import { releaseLock, releaseThreadLock, releaseRequestClaim, claimItem, releaseItemClaim } from "./session-lock.mjs";
+import { recordSession } from "./health.mjs";
 const SOPHIE_AI_DIR = join(new URL(".", import.meta.url).pathname, "../..");
 const CLAUDE_BIN = process.env.CLAUDE_BIN || "/Users/sophie/.local/bin/claude";
@@ -27,6 +28,12 @@ const priorityQueue = [];          // critical/high items
 const normalQueue = [];            // normal items
 let sessionCounter = 0;
+// Tracks sessions whose proc.on("error") handler has already fired.
+// Prevents double-counting + double-cleanup when a spawn failure (ENOENT,
+// EACCES, ETIMEDOUT) triggers both "error" and a trailing "close" event.
+// See ib-20260416-daemon-etimedout-failed-event + cycle 135 memo.
+const spawnErrorHandled = new Set();
 // Backlog dedup: track which items have active sessions to prevent retry storms
 const activeBacklogKeys = new Set();  // backlog item key -> true (while session is running)
 const backlogRetryCount = new Map();  // backlog item key -> number of times dispatched
@@ -196,6 +203,26 @@ export function dispatch(prompt, item, classResult, source = "inbox") {
     }
   }
+  // Item-claim acquisition: file-based claim visible across daemon restarts
+  // and concurrent launchd triggers. Complements the in-memory activeBacklogKeys.
+  // (ib-20260407-001b: concurrent session coordination)
+  if (source === "backlog" && item.id) {
+    const sessionId = `s-${Date.now()}-${sessionCounter + 1}`;
+    const claim = claimItem(item.id, {
+      session_id: sessionId,
+      agent_description: classResult.summary || item.title || "",
+      ttl_minutes: classResult.model === "opus" ? 120 : 30,
+      source: "backlog",
+      queue_file: item.source_file || "",
+      pid: process.pid, // daemon PID; child PID not yet known
+    });
+    if (!claim.claimed) {
+      console.log(`[dispatcher] Item claim denied for ${item.id}: ${claim.reason} (holder: ${claim.holder || "unknown"})`);
+      logSession({ event: "skipped", reason: `item_claim_denied: ${claim.reason}`, summary: classResult.summary, holder: claim.holder });
+      return;
+    }
+  }
   // Backlog items respect the reserved slot cap
   if (source === "backlog") {
     const backlogCount = countBySource("backlog");
@@ -250,9 +277,13 @@ function spawnSession(entry) {
     prompt,
   ];
+  // Strip Anthropic API credentials from spawn env so claude CLI falls through
+  // to the keychain OAuth (Max subscription) per CEO directive 2026-04-27.
+  // A stale ANTHROPIC_API_KEY in the daemon's inherited env will otherwise
+  // override the OAuth token and cause "Invalid API key" failures.
   const proc = spawn(CLAUDE_BIN, args, {
     cwd: SOPHIE_AI_DIR,
-    env: { ...process.env },
+    env: { ...process.env, ANTHROPIC_API_KEY: "", ANTHROPIC_AUTH_TOKEN: "" },
     stdio: ["ignore", "pipe", "pipe"],
   });
@@ -287,8 +318,17 @@ function spawnSession(entry) {
   proc.on("close", (code) => {
     clearTimeout(timer);
+    // If proc.on("error") already fired for this session (spawn failure path
+    // — ENOENT, EACCES, ETIMEDOUT), cleanup + metric + lock release already
+    // happened. Skip to avoid double-count and double-release.
+    if (spawnErrorHandled.has(sessionId)) {
+      spawnErrorHandled.delete(sessionId);
+      drainQueue();
+      return;
+    }
     activeSessions.delete(sessionId);
     removeActiveSession(sessionId);
+    recordSession(true, code === 0);
     const duration = ((Date.now() - startTime) / 1000).toFixed(1);
     // Release item lock — MUST use same key order as acquireLock in daemon
@@ -312,12 +352,15 @@ function spawnSession(entry) {
       });
     }
-    // Release backlog tracking
+    // Release backlog tracking + item claim
     if (source === "backlog") {
       const key = backlogKey(item);
       activeBacklogKeys.delete(key);
       const retries = backlogRetryCount.get(key) || 0;
+      // Release file-based item claim (ib-20260407-001b)
+      if (item.id) releaseItemClaim(item.id);
       // If session timed out (143=SIGTERM) and hit retry limit, log it
       if (code === 143 && retries >= MAX_BACKLOG_RETRIES) {
         console.warn(`[dispatcher] Backlog item "${classResult.summary}" exhausted ${MAX_BACKLOG_RETRIES} retries — will not retry`);
@@ -347,9 +390,70 @@ function spawnSession(entry) {
   proc.on("error", (err) => {
     clearTimeout(timer);
+    // Mark so the trailing proc.on("close") doesn't double-process.
+    spawnErrorHandled.add(sessionId);
     activeSessions.delete(sessionId);
-    console.error(`[dispatcher] Session ${sessionId} error: ${err.message}`);
-    logSession({ event: "error", sessionId, error: err.message });
+    removeActiveSession(sessionId);
+    recordSession(true, false);
+    const duration = ((Date.now() - startTime) / 1000).toFixed(1);
+    const errorCode = err.code || "unknown";
+    // Release item lock — mirror of close-handler logic.
+    const itemId = item.raw_ref || item.id || item.title;
+    if (itemId) releaseLock(itemId);
+    // Release thread lock so new messages in this thread can be processed.
+    if (item.thread_id) {
+      const channel = item.channel_id || (item.raw_ref ? (item.raw_ref.match(/slack:([^:]+):/) || [])[1] : null) || item.channel;
+      if (channel) releaseThreadLock(channel, item.thread_id);
+    }
+    // Release request claim + emit explicit claim_released event so
+    // reconciliation audits (cycle 124 Agent C pattern) can distinguish
+    // genuine in-flight sessions from silent-exit ETIMEDOUT failures
+    // without cross-referencing logs/daemon/responses.jsonl.
+    let claimReleased = false;
+    if (classResult && classResult.summary) {
+      releaseRequestClaim({
+        recipient: item.channel_id || item.channel || item.sender || "unknown",
+        subject: classResult.summary || item.subject || "",
+        action_type: classResult.action || "respond",
+      });
+      claimReleased = true;
+    }
+    // Release backlog tracking + item claim.
+    if (source === "backlog") {
+      const key = backlogKey(item);
+      activeBacklogKeys.delete(key);
+      // Release file-based item claim (ib-20260407-001b)
+      if (item.id) releaseItemClaim(item.id);
+    }
+    console.error(`[dispatcher] Session ${sessionId} failed: ${errorCode} (${err.message})`);
+    // Rich "failed" event — see ib-20260416-daemon-etimedout-failed-event.
+    logSession({
+      event: "failed",
+      sessionId,
+      error: errorCode,
+      error_message: err.message,
+      model,
+      source,
+      priority: classResult?.priority,
+      summary: classResult?.summary,
+      duration_s: parseFloat(duration),
+      active_count: activeSessions.size,
+    });
+    if (claimReleased) {
+      logSession({
+        event: "claim_released",
+        sessionId,
+        reason: `spawn_failed_${errorCode}`,
+      });
+    }
     drainQueue();
   });
 }

package/scripts/daemon/launchd-wrapper-generic.sh ADDED Viewed

@@ -0,0 +1,96 @@
+#!/bin/bash
+# launchd-wrapper-generic.sh — Universal env bootstrap for ANY maestro
+# script spawned under launchd.
+#
+# Usage in a plist:
+#   <key>ProgramArguments</key>
+#   <array>
+#     <string>/path/to/scripts/daemon/launchd-wrapper-generic.sh</string>
+#     <string>/path/to/script-to-run.sh</string>
+#     <string>arg1</string>
+#     <string>arg2</string>
+#   </array>
+#
+# What it does:
+#   1. Sets HOME, PATH, USER, AGENT_ROOT (launchd's bare env doesn't include them)
+#   2. Detects external SSD and sets CLAUDE_CODE_TMPDIR + creates SSD_AGENT_ROOT
+#   3. Picks a node binary (nvm, homebrew, system fallback)
+#   4. Redirects stdout/stderr to a date-stamped log file on the SSD
+#   5. Exec's the target script with all remaining args
+#
+# Why a wrapper instead of a plist EnvironmentVariables block?
+# Putting these env vars directly in the plist has been observed to cause
+# EX_CONFIG (78) failures on macOS Sequoia + when symlinks are involved.
+# A wrapper script is more portable and easier to debug.
+set -e
+if [ $# -lt 1 ]; then
+  echo "[wrapper] FATAL: no target script provided" >&2
+  exit 64
+fi
+TARGET="$1"
+shift
+# AGENT_ROOT is the parent of the script being run, going up two levels.
+# E.g. /Users/lucas/lucas-ai/scripts/local-triggers/run-trigger.sh → /Users/lucas/lucas-ai
+TARGET_DIR="$(cd "$(dirname "$TARGET")" && pwd -P)"
+# Walk up until we find package.json or config/agent.ts
+CANDIDATE="$TARGET_DIR"
+while [ "$CANDIDATE" != "/" ]; do
+  if [ -f "$CANDIDATE/package.json" ] && [ -d "$CANDIDATE/config" ]; then
+    AGENT_ROOT="$CANDIDATE"
+    break
+  fi
+  CANDIDATE="$(dirname "$CANDIDATE")"
+done
+AGENT_ROOT="${AGENT_ROOT:-$TARGET_DIR}"
+export AGENT_ROOT
+export HOME="${HOME:-/Users/$(whoami)}"
+export USER="${USER:-$(whoami)}"
+export PATH="/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$PATH"
+# ── SSD detection ───────────────────────────────────────────────────────────
+SSD_VOLUME="${MAESTRO_SSD_VOLUME:-}"
+if [ -z "$SSD_VOLUME" ]; then
+  for v in /Volumes/*-SSD /Volumes/*SSD* /Volumes/maestro-data; do
+    if [ -d "$v" ] && [ "$v" != "/Volumes/Macintosh HD" ]; then
+      SSD_VOLUME="$v"
+      break
+    fi
+  done
+fi
+AGENT_NAME="$(basename "$AGENT_ROOT" | sed 's/-ai$//')"
+SSD_AGENT_ROOT=""
+SSD_WRITABLE=0
+if [ -n "$SSD_VOLUME" ] && [ -d "$SSD_VOLUME" ]; then
+  SSD_AGENT_ROOT="$SSD_VOLUME/maestro/$AGENT_NAME"
+  if mkdir -p "$SSD_AGENT_ROOT/claude-tmp" "$SSD_AGENT_ROOT/logs/launchd" 2>/dev/null && \
+     touch "$SSD_AGENT_ROOT/.write-test-$$" 2>/dev/null; then
+    rm -f "$SSD_AGENT_ROOT/.write-test-$$"
+    SSD_WRITABLE=1
+    export CLAUDE_CODE_TMPDIR="$SSD_AGENT_ROOT/claude-tmp"
+    export MAESTRO_SSD_AGENT_ROOT="$SSD_AGENT_ROOT"
+  fi
+fi
+cd "$AGENT_ROOT"
+# Determine the log file name from the target script's basename
+SCRIPT_NAME="$(basename "$TARGET" | sed 's/\.[^.]*$//')"
+LOG_DATE="$(date +%Y-%m-%d)"
+if [ "$SSD_WRITABLE" = "1" ]; then
+  LOG_FILE="$SSD_AGENT_ROOT/logs/launchd/${SCRIPT_NAME}-${LOG_DATE}.log"
+  echo "[wrapper $(date -u +%H:%M:%SZ)] starting $SCRIPT_NAME (SSD log: $LOG_FILE)" >> "$LOG_FILE" 2>/dev/null || true
+  exec bash "$TARGET" "$@" >> "$LOG_FILE" 2>&1
+else
+  # Fall back to internal disk log if SSD isn't writable (e.g. macOS denies launchd
+  # write access to external volumes until the user grants it via System Settings).
+  FALLBACK_LOG="$AGENT_ROOT/logs/launchd/${SCRIPT_NAME}-${LOG_DATE}.log"
+  mkdir -p "$(dirname "$FALLBACK_LOG")" 2>/dev/null || true
+  exec bash "$TARGET" "$@" >> "$FALLBACK_LOG" 2>&1
+fi

package/scripts/daemon/launchd-wrapper-slack-events.sh ADDED Viewed

@@ -0,0 +1,37 @@
+#!/bin/bash
+# launchd-wrapper-slack-events.sh — Bootstraps env for the slack-events server.
+set -e
+AGENT_ROOT="$(cd "$(dirname "$0")/../.." && pwd -P)"
+export AGENT_ROOT
+export HOME="${HOME:-/Users/$(whoami)}"
+export USER="${USER:-$(whoami)}"
+export PATH="/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$PATH"
+if [ -d "/Volumes/4TB-SSD" ]; then
+  AGENT_NAME="$(basename "$AGENT_ROOT" | sed 's/-ai$//')"
+  CLAUDE_TMP_DIR="/Volumes/4TB-SSD/maestro/$AGENT_NAME/claude-tmp"
+  mkdir -p "$CLAUDE_TMP_DIR"
+  export CLAUDE_CODE_TMPDIR="$CLAUDE_TMP_DIR"
+fi
+cd "$AGENT_ROOT"
+NODE_BIN=""
+for candidate in \
+  "$HOME/.nvm/versions/node/v24.11.1/bin/node" \
+  "$HOME/.nvm/versions/node/v24/bin/node" \
+  "$HOME/.nvm/versions/node/v22/bin/node" \
+  "$HOME/.nvm/versions/node/v20/bin/node" \
+  /opt/homebrew/bin/node \
+  /usr/local/bin/node \
+  /usr/bin/node; do
+  if [ -x "$candidate" ]; then
+    NODE_BIN="$candidate"
+    break
+  fi
+done
+if [ -z "$NODE_BIN" ] && [ -d "$HOME/.nvm/versions/node" ]; then
+  NODE_BIN=$(ls -1d "$HOME/.nvm/versions/node"/v*/bin/node 2>/dev/null | sort -V | tail -1)
+fi
+if [ -z "$NODE_BIN" ] || [ ! -x "$NODE_BIN" ]; then
+  echo "[wrapper] FATAL: could not find node binary" >&2
+  exit 127
+fi
+exec "$NODE_BIN" "$AGENT_ROOT/scripts/slack-events-server.mjs"

package/scripts/daemon/launchd-wrapper.sh ADDED Viewed

@@ -0,0 +1,91 @@
+#!/bin/bash
+# launchd-wrapper.sh — Bootstraps env for the maestro daemon under launchd.
+#
+# launchd's default environment is bare and doesn't include HOME, PATH, or
+# AGENT_ROOT. We set them here, then exec the daemon. This avoids putting
+# them in the .plist directly, which has been observed to cause EX_CONFIG
+# (78) failures on some macOS versions when symlinks are involved.
+#
+# Storage: when /Volumes/{SSD_NAME} is mounted, all daemon runtime data goes
+# to the SSD. The plist's StandardErrorPath/StandardOutPath remain on the
+# internal disk (launchd refuses external volumes there), but those files
+# only capture launchd-level startup errors. The daemon's own stdout/stderr
+# is redirected into /Volumes/{SSD_NAME}/maestro/{agent}/logs/daemon/ at the
+# bottom of this script via shell redirection.
+#
+# This wrapper is exec'd by ai.adaptic.{firstname}-daemon.plist.
+set -e
+AGENT_ROOT="$(cd "$(dirname "$0")/../.." && pwd -P)"
+export AGENT_ROOT
+export HOME="${HOME:-/Users/$(whoami)}"
+export USER="${USER:-$(whoami)}"
+export PATH="/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$PATH"
+# ── SSD redirect ────────────────────────────────────────────────────────────
+# If an external SSD is mounted at /Volumes/{name}, redirect:
+#   - Claude Code per-cwd temp (CLAUDE_CODE_TMPDIR)
+#   - Daemon stdout/stderr (via shell redirection at exec time)
+#   - state/, outputs/, memory/, knowledge/ are already symlinked at init time
+# Detect SSD volume — first volume under /Volumes that's not a system mount.
+# Override with MAESTRO_SSD_VOLUME env var if you have multiple SSDs.
+SSD_VOLUME="${MAESTRO_SSD_VOLUME:-}"
+if [ -z "$SSD_VOLUME" ]; then
+  for v in /Volumes/*-SSD /Volumes/*SSD* /Volumes/maestro-data; do
+    if [ -d "$v" ] && [ "$v" != "/Volumes/Macintosh HD" ]; then
+      SSD_VOLUME="$v"
+      break
+    fi
+  done
+fi
+AGENT_NAME="$(basename "$AGENT_ROOT" | sed 's/-ai$//')"
+SSD_AGENT_ROOT=""
+SSD_WRITABLE=0
+if [ -n "$SSD_VOLUME" ] && [ -d "$SSD_VOLUME" ]; then
+  SSD_AGENT_ROOT="$SSD_VOLUME/maestro/$AGENT_NAME"
+  if mkdir -p "$SSD_AGENT_ROOT/claude-tmp" "$SSD_AGENT_ROOT/logs/daemon" 2>/dev/null && \
+     touch "$SSD_AGENT_ROOT/.write-test-$$" 2>/dev/null; then
+    rm -f "$SSD_AGENT_ROOT/.write-test-$$"
+    SSD_WRITABLE=1
+    export CLAUDE_CODE_TMPDIR="$SSD_AGENT_ROOT/claude-tmp"
+  fi
+fi
+cd "$AGENT_ROOT"
+# Resolve node binary — prefer nvm, fall back to homebrew, then system
+NODE_BIN=""
+for candidate in \
+  "$HOME/.nvm/versions/node/v24.11.1/bin/node" \
+  "$HOME/.nvm/versions/node/v24/bin/node" \
+  "$HOME/.nvm/versions/node/v22/bin/node" \
+  "$HOME/.nvm/versions/node/v20/bin/node" \
+  /opt/homebrew/bin/node \
+  /usr/local/bin/node \
+  /usr/bin/node; do
+  if [ -x "$candidate" ]; then
+    NODE_BIN="$candidate"
+    break
+  fi
+done
+if [ -z "$NODE_BIN" ] && [ -d "$HOME/.nvm/versions/node" ]; then
+  NODE_BIN=$(ls -1d "$HOME/.nvm/versions/node"/v*/bin/node 2>/dev/null | sort -V | tail -1)
+fi
+if [ -z "$NODE_BIN" ] || [ ! -x "$NODE_BIN" ]; then
+  echo "[wrapper] FATAL: could not find node binary" >&2
+  exit 127
+fi
+# Exec the daemon. Prefer SSD log path if writable, otherwise fall back to
+# internal disk so the daemon stays up even when macOS denies launchd write
+# access to /Volumes/{name}.
+if [ "$SSD_WRITABLE" = "1" ]; then
+  DAEMON_LOG="$SSD_AGENT_ROOT/logs/daemon/daemon-$(date +%Y-%m-%d).log"
+  exec "$NODE_BIN" "$AGENT_ROOT/scripts/daemon/maestro-daemon.mjs" >> "$DAEMON_LOG" 2>&1
+else
+  DAEMON_LOG="$AGENT_ROOT/logs/daemon/daemon-$(date +%Y-%m-%d).log"
+  mkdir -p "$(dirname "$DAEMON_LOG")" 2>/dev/null || true
+  exec "$NODE_BIN" "$AGENT_ROOT/scripts/daemon/maestro-daemon.mjs" >> "$DAEMON_LOG" 2>&1
+fi

package/scripts/daemon/lib/session-router.mjs ADDED Viewed

@@ -0,0 +1,274 @@
+/**
+ * session-router.mjs — Routes daemon Claude CLI invocations to either an
+ * existing live session (RESUME) or a fresh ephemeral spawn (EPHEMERAL).
+ *
+ * Per design memo `outputs/drafts/2026-04-27-claude-cli-session-router.md`
+ * §4 (architecture) and §5 (migration plan, step 2). This module is
+ * scaffold-only and is NOT yet wired into dispatcher.mjs / responder.mjs.
+ * It is intended to ship behind a SESSION_ROUTER_ENABLED=1 env flag
+ * (memo §8 step 4) so its mere existence cannot alter daemon behaviour.
+ *
+ * Public API:
+ *   - routingKey(item)   — pure function, derives a stable conversation key
+ *   - createRouter(opts) — async factory returning { route, touch,
+ *                          recordExit, evictExpired, _readForTests }
+ *
+ * Registry on disk: a single JSON file with shape
+ *   { sessions: { <key>: { ... } }, lru: [ <key>, <key>, ... ] }
+ * Concurrent writes use the temp-file + fs.rename atomic pattern. Memo §4.1
+ * explicitly rejects flock — "no stale lock files if the daemon dies".
+ */
+import { promises as fsp } from "fs";
+import { dirname } from "path";
+// canonicalizeSlackChannel exists in some installs of scripts/daemon/session-lock.mjs.
+// In maestro, it may not be exported (sophie-ai-only helper). Both failure modes
+// — import throws OR import succeeds but the symbol is undefined — degrade to
+// identity so this module never breaks the daemon. The fallback only fires at
+// load time, not per-call.
+let canonicalizeSlackChannel;
+try {
+  const mod = await import("../session-lock.mjs");
+  canonicalizeSlackChannel = typeof mod.canonicalizeSlackChannel === "function"
+    ? mod.canonicalizeSlackChannel
+    : (c) => c;
+} catch {
+  // TODO(session-router): session-lock.mjs unavailable or canonicaliser missing
+  // — using identity. Fix the import or invent a shared canon helper.
+  canonicalizeSlackChannel = (c) => c;
+}
+const EMPTY_REGISTRY = () => ({ sessions: {}, lru: [] });
+/**
+ * Pure key-derivation function (memo §4.2 table).
+ *
+ * @param {object} item Inbound queue/inbox item
+ * @returns {string} Routing key, e.g. "slack:D099N1JGKRQ:1777283277.123456"
+ */
+export function routingKey(item) {
+  if (!item || typeof item !== "object") {
+    throw new TypeError("routingKey: item must be an object");
+  }
+  const source = item.source;
+  if (source === "slack") {
+    const channel = canonicalizeSlackChannel(item.channel || item.channel_id || "");
+    if (!channel) throw new Error("routingKey: slack item missing channel");
+    if (item.thread_ts) return `slack:${channel}:${item.thread_ts}`;
+    if (typeof channel === "string" && channel.startsWith("D")) {
+      return `slack:${channel}`;
+    }
+    // Channel non-DM, no thread → single-message bucket (effectively ephemeral)
+    const ts = item.ts || item.event_ts || "";
+    return `slack:${channel}:${ts}`;
+  }
+  if (source === "gmail") {
+    const tid = item.thread_id || item.threadId;
+    if (!tid) throw new Error("routingKey: gmail item missing thread_id");
+    return `gmail:${tid}`;
+  }
+  if (source === "calendar") {
+    const eid = item.event_id || item.eventId;
+    if (!eid) throw new Error("routingKey: calendar item missing event_id");
+    return `calendar:${eid}`;
+  }
+  if (source === "internal") {
+    if (!item.id) throw new Error("routingKey: internal item missing id");
+    return `internal:${item.id}`;
+  }
+  if (source === "backlog") {
+    if (item.topic_slug) return `backlog:${item.topic_slug}`;
+    if (item.id) return `internal:${item.id}`;
+    throw new Error("routingKey: backlog item missing topic_slug and id");
+  }
+  throw new Error(`routingKey: unknown source "${source}"`);
+}
+/**
+ * Read the registry from disk. Missing file → empty registry shape.
+ * Corrupted JSON also degrades to empty (memo §6: "Read failure on the
+ * main file falls back to an empty registry").
+ */
+async function readRegistry(path) {
+  try {
+    const raw = await fsp.readFile(path, "utf-8");
+    const parsed = JSON.parse(raw);
+    // Defensive: ensure shape
+    if (!parsed || typeof parsed !== "object") return EMPTY_REGISTRY();
+    if (!parsed.sessions || typeof parsed.sessions !== "object") parsed.sessions = {};
+    if (!Array.isArray(parsed.lru)) parsed.lru = [];
+    return parsed;
+  } catch (err) {
+    if (err && err.code === "ENOENT") return EMPTY_REGISTRY();
+    return EMPTY_REGISTRY();
+  }
+}
+/**
+ * Atomic write: temp file + fs.rename. Memo §4.1 specifies this exact
+ * pattern instead of flock to avoid stale-lock pathologies.
+ */
+async function writeRegistryAtomic(path, registry) {
+  const dir = dirname(path);
+  await fsp.mkdir(dir, { recursive: true });
+  const tmp = `${path}.tmp.${process.pid}.${Date.now()}`;
+  await fsp.writeFile(tmp, JSON.stringify(registry, null, 2), "utf-8");
+  await fsp.rename(tmp, path);
+}
+/**
+ * Move a key to the back of the LRU array (most-recently-used).
+ */
+function bumpLru(lru, key) {
+  const idx = lru.indexOf(key);
+  if (idx !== -1) lru.splice(idx, 1);
+  lru.push(key);
+}
+/**
+ * Create a session router bound to a registry path.
+ *
+ * @param {object} opts
+ * @param {string} opts.registryPath  Path to the JSON registry file.
+ * @param {number} [opts.ttlSeconds=1800]   Idle TTL (memo §4.3 default 30m).
+ * @param {number} [opts.maxLiveSessions=8] LRU cap (memo §4.3, matches MAX_CLAUDE_PROCS).
+ * @param {() => number} [opts.now]   Injectable clock for tests.
+ */
+export async function createRouter({
+  registryPath,
+  ttlSeconds = 1800,
+  maxLiveSessions = 8,
+  now = () => Date.now(),
+} = {}) {
+  if (!registryPath) {
+    throw new Error("createRouter: registryPath is required");
+  }
+  // Eagerly load so first call doesn't race with concurrent writers.
+  let registry = await readRegistry(registryPath);
+  /**
+   * Decide RESUME / EPHEMERAL / EPHEMERAL_REPLACE for a key (memo §4.4).
+   * If RESUME, also touches last_used_at in memory (persisted on next mutation).
+   *
+   * @param {string} key
+   * @returns {{ decision: "EPHEMERAL"|"EPHEMERAL_REPLACE"|"RESUME", resumeId: string|null }}
+   */
+  function route(key) {
+    const entry = registry.sessions[key];
+    if (!entry) return { decision: "EPHEMERAL", resumeId: null };
+    const ttlMs = ttlSeconds * 1000;
+    if (now() - entry.last_used_at > ttlMs) {
+      return { decision: "EPHEMERAL_REPLACE", resumeId: null };
+    }
+    if (entry.status !== "live") {
+      return { decision: "EPHEMERAL_REPLACE", resumeId: null };
+    }
+    if (entry.last_exit_code !== 0) {
+      return { decision: "EPHEMERAL_REPLACE", resumeId: null };
+    }
+    // Touch in-memory; persisted on next touch()/recordExit() write.
+    entry.last_used_at = now();
+    bumpLru(registry.lru, key);
+    return { decision: "RESUME", resumeId: entry.claude_session_id };
+  }
+  /**
+   * Insert or refresh a session entry after a successful spawn.
+   * Enforces the LRU cap by evicting the oldest entry when at capacity.
+   *
+   * @param {string} key
+   * @param {object} info
+   * @param {string} info.claudeSessionId   CLI-resolved session id from JSON stdout.
+   * @param {string} [info.daemonSessionId] Daemon-local id (s-<epoch>-<n>).
+   * @param {string} [info.model]           "sonnet" | "opus" | "haiku" | etc.
+   */
+  async function touch(key, { claudeSessionId, daemonSessionId, model } = {}) {
+    const ts = now();
+    const isoNow = new Date(ts).toISOString();
+    const existing = registry.sessions[key];
+    const entry = {
+      daemon_session_id: daemonSessionId || existing?.daemon_session_id || null,
+      claude_session_id: claudeSessionId ?? existing?.claude_session_id ?? null,
+      key,
+      model: model || existing?.model || null,
+      created_at: existing?.created_at || isoNow,
+      last_used_at: ts,
+      ttl_seconds: ttlSeconds,
+      status: "live",
+      last_exit_code: 0,
+    };
+    registry.sessions[key] = entry;
+    bumpLru(registry.lru, key);
+    // Enforce LRU cap. The lru array stores keys ordered oldest → newest.
+    while (registry.lru.length > maxLiveSessions) {
+      const evictKey = registry.lru.shift();
+      if (evictKey && evictKey !== key) {
+        delete registry.sessions[evictKey];
+      }
+    }
+    // Defensive: drop sessions that fell out of the lru array entirely.
+    for (const k of Object.keys(registry.sessions)) {
+      if (!registry.lru.includes(k)) delete registry.sessions[k];
+    }
+    await writeRegistryAtomic(registryPath, registry);
+  }
+  /**
+   * Record process exit. Non-zero codes flip status to "killed" so the
+   * next route() returns EPHEMERAL_REPLACE.
+   */
+  async function recordExit(key, exitCode) {
+    const entry = registry.sessions[key];
+    if (!entry) return; // No-op — key was never touched.
+    entry.last_exit_code = exitCode;
+    if (exitCode !== 0) {
+      entry.status = "killed";
+    }
+    await writeRegistryAtomic(registryPath, registry);
+  }
+  /**
+   * Sweep expired entries (memo §4.3 — runs at top of each dispatcher tick).
+   * Returns the count evicted.
+   */
+  async function evictExpired() {
+    const ttlMs = ttlSeconds * 1000;
+    const cutoff = now() - ttlMs;
+    let evicted = 0;
+    for (const [k, e] of Object.entries(registry.sessions)) {
+      if (e.last_used_at < cutoff) {
+        delete registry.sessions[k];
+        const idx = registry.lru.indexOf(k);
+        if (idx !== -1) registry.lru.splice(idx, 1);
+        evicted++;
+      }
+    }
+    if (evicted > 0) {
+      await writeRegistryAtomic(registryPath, registry);
+    }
+    return evicted;
+  }
+  /**
+   * Test hook — returns a deep-cloned snapshot so tests cannot mutate
+   * the live registry by accident.
+   */
+  function _readForTests() {
+    return JSON.parse(JSON.stringify(registry));
+  }
+  return { route, touch, recordExit, evictExpired, _readForTests };
+}