npm - @botcord/daemon - Versions diffs - 0.2.75 → 0.2.77 - Mend

@botcord/daemon 0.2.75 → 0.2.77

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/dist/cloud-auth.d.ts +47 -0
package/dist/cloud-auth.js +51 -0
package/dist/cloud-daemon.d.ts +43 -0
package/dist/cloud-daemon.js +252 -0
package/dist/cloud-mode.d.ts +45 -0
package/dist/cloud-mode.js +55 -0
package/dist/cloud-settle.d.ts +81 -0
package/dist/cloud-settle.js +100 -0
package/dist/daemon-singleton.d.ts +26 -0
package/dist/daemon-singleton.js +91 -0
package/dist/daemon.d.ts +1 -1
package/dist/daemon.js +15 -6
package/dist/doctor.d.ts +4 -1
package/dist/doctor.js +15 -4
package/dist/gateway/channels/botcord.d.ts +1 -1
package/dist/gateway/channels/botcord.js +280 -52
package/dist/gateway/dispatcher.d.ts +34 -1
package/dist/gateway/dispatcher.js +277 -20
package/dist/gateway/gateway.d.ts +9 -1
package/dist/gateway/gateway.js +4 -1
package/dist/gateway/runtime-errors.d.ts +6 -0
package/dist/gateway/runtime-errors.js +14 -0
package/dist/gateway/runtimes/claude-code.d.ts +8 -0
package/dist/gateway/runtimes/claude-code.js +92 -4
package/dist/gateway/runtimes/deepseek-tui.js +19 -5
package/dist/gateway/transcript.d.ts +1 -1
package/dist/gateway/types.d.ts +33 -0
package/dist/index.js +71 -80
package/dist/provision.d.ts +2 -0
package/dist/provision.js +39 -1
package/dist/status-render.js +17 -0
package/package.json +2 -2
package/src/__tests__/cloud-auth.test.ts +42 -0
package/src/__tests__/cloud-daemon.test.ts +237 -0
package/src/__tests__/cloud-mode.test.ts +65 -0
package/src/__tests__/cloud-settle.test.ts +287 -0
package/src/__tests__/daemon-singleton.test.ts +89 -0
package/src/__tests__/doctor.test.ts +34 -0
package/src/__tests__/runtime-discovery.test.ts +90 -0
package/src/__tests__/status-render.test.ts +34 -0
package/src/cloud-auth.ts +78 -0
package/src/cloud-daemon.ts +338 -0
package/src/cloud-mode.ts +70 -0
package/src/cloud-settle.ts +182 -0
package/src/daemon-singleton.ts +122 -0
package/src/daemon.ts +18 -5
package/src/doctor.ts +18 -5
package/src/gateway/__tests__/botcord-channel.test.ts +98 -0
package/src/gateway/__tests__/claude-code-adapter.test.ts +101 -1
package/src/gateway/__tests__/deepseek-tui-adapter.test.ts +19 -0
package/src/gateway/__tests__/dispatcher.test.ts +120 -0
package/src/gateway/channels/botcord.ts +299 -43
package/src/gateway/dispatcher.ts +354 -21
package/src/gateway/gateway.ts +16 -1
package/src/gateway/runtime-errors.ts +15 -0
package/src/gateway/runtimes/claude-code.ts +98 -2
package/src/gateway/runtimes/deepseek-tui.ts +23 -5
package/src/gateway/transcript.ts +1 -1
package/src/gateway/types.ts +34 -0
package/src/index.ts +83 -74
package/src/provision.ts +45 -1
package/src/status-render.ts +24 -0

package/src/gateway/runtimes/claude-code.ts CHANGED Viewed

@@ -1,5 +1,8 @@
+import { execFileSync, type ExecFileSyncOptions } from "node:child_process";
 import path from "node:path";
 import { NdjsonStreamAdapter, type NdjsonEventCtx } from "./ndjson-stream.js";
+import { consoleLogger } from "../log.js";
+import { looksLikeRuntimeAuthFailure } from "../runtime-errors.js";
 import {
   firstExistingPath,
   readCommandVersion,
@@ -18,6 +21,24 @@ const CLAUDE_DESKTOP_CLI_RELATIVE_PATH = path.join(
 );
 const CLAUDE_DESKTOP_CLI_SYSTEM_PATH =
   "/Applications/Claude Code URL Handler.app/Contents/MacOS/claude";
+const log = consoleLogger;
+const CLAUDE_CODE_AUTH_ENV_DENYLIST = [
+  "ANTHROPIC_API_KEY",
+  "ANTHROPIC_AUTH_TOKEN",
+  "ANTHROPIC_BASE_URL",
+  "ANTHROPIC_CUSTOM_HEADERS",
+  "CLAUDE_CODE_OAUTH_TOKEN",
+];
+export function scrubClaudeCodeAuthEnv(env: NodeJS.ProcessEnv): NodeJS.ProcessEnv {
+  const out = { ...env };
+  for (const key of CLAUDE_CODE_AUTH_ENV_DENYLIST) {
+    delete out[key];
+  }
+  return out;
+}
 function isValidClaudeSessionId(sessionId: string): boolean {
   if (sessionId.length === 0 || sessionId.length > 512) return false;
   if (sessionId.startsWith("-")) return false;
@@ -125,6 +146,63 @@ export function probeClaude(deps: ProbeDeps = {}): RuntimeProbeResult {
   };
 }
+export interface ClaudeAuthProbeResult {
+  checked: boolean;
+  ok: boolean;
+  message: string;
+}
+export function probeClaudeAuth(deps: ProbeDeps = {}): ClaudeAuthProbeResult {
+  const command = resolveClaudeCommand(deps);
+  if (!command) return { checked: false, ok: false, message: "claude command not found" };
+  return runClaudeAuthProbe(command, deps);
+}
+function runClaudeAuthProbe(command: string, deps: ProbeDeps = {}): ClaudeAuthProbeResult {
+  const execFn = deps.execFileSyncFn ?? execFileSync;
+  const env = scrubClaudeCodeAuthEnv(deps.env ?? process.env);
+  try {
+    const raw = execFn(command, ["-p", "ping", "--output-format", "stream-json"], {
+      stdio: ["ignore", "pipe", "pipe"],
+      env,
+      timeout: 20_000,
+    } as ExecFileSyncOptions);
+    const output = Buffer.isBuffer(raw) ? raw.toString("utf8") : String(raw ?? "");
+    const authFailure = claudeAuthFailureFromOutput(output);
+    if (authFailure) return { checked: true, ok: false, message: authFailure };
+    return { checked: true, ok: true, message: "claude-code auth ok" };
+  } catch (err) {
+    const e = err as Error & { stdout?: Buffer | string; stderr?: Buffer | string };
+    const output = `${bufferishToString(e.stdout)}\n${bufferishToString(e.stderr)}`.trim();
+    const authFailure = claudeAuthFailureFromOutput(output);
+    return {
+      checked: true,
+      ok: false,
+      message: authFailure || e.message || "claude-code auth probe failed",
+    };
+  }
+}
+function bufferishToString(raw: Buffer | string | undefined): string {
+  return Buffer.isBuffer(raw) ? raw.toString("utf8") : String(raw ?? "");
+}
+function claudeAuthFailureFromOutput(output: string): string | null {
+  for (const line of output.split(/\r?\n/)) {
+    const s = line.trim();
+    if (!s) continue;
+    try {
+      const obj = JSON.parse(s) as { type?: string; result?: unknown; total_cost_usd?: unknown };
+      if (obj.type === "result" && typeof obj.result === "string" && looksLikeRuntimeAuthFailure(obj.result)) {
+        return obj.result;
+      }
+    } catch {
+      if (looksLikeRuntimeAuthFailure(s)) return s;
+    }
+  }
+  return looksLikeRuntimeAuthFailure(output) ? output : null;
+}
 /**
  * Claude Code adapter — spawns `claude -p "<text>" --output-format stream-json`
  * (with `--resume <sid>` when available) and parses the ndjson stream.
@@ -197,6 +275,10 @@ export class ClaudeCodeAdapter extends NdjsonStreamAdapter {
     return args;
   }
+  protected override spawnEnv(opts: RuntimeRunOptions): NodeJS.ProcessEnv {
+    return scrubClaudeCodeAuthEnv(super.spawnEnv(opts));
+  }
   protected handleEvent(raw: unknown, ctx: NdjsonEventCtx): void {
     const obj = raw as {
       type?: string;
@@ -229,8 +311,22 @@ export class ClaudeCodeAdapter extends NdjsonStreamAdapter {
     if (obj.type === "result") {
       if (typeof obj.total_cost_usd === "number") ctx.state.costUsd = obj.total_cost_usd;
       if (obj.subtype === "success") {
-        if (typeof obj.session_id === "string") ctx.state.newSessionId = obj.session_id;
-        if (typeof obj.result === "string") ctx.state.finalText = obj.result;
+        const result = typeof obj.result === "string" ? obj.result : "";
+        const looksLikeAuthFailure =
+          obj.total_cost_usd === 0 && looksLikeRuntimeAuthFailure(result);
+        if (looksLikeAuthFailure) {
+          log.error("claude-code authentication failed; check ~/.claude login or unset stale Anthropic env vars", {
+            error: result,
+          });
+          ctx.state.newSessionId = "";
+          ctx.state.finalText = "";
+          ctx.state.assistantTextChunks = [];
+          ctx.state.assistantTextBytes = 0;
+          ctx.state.errorText = result;
+        } else {
+          if (typeof obj.session_id === "string") ctx.state.newSessionId = obj.session_id;
+          if (typeof obj.result === "string") ctx.state.finalText = obj.result;
+        }
       } else {
         // Non-success result (e.g. resume targeted a missing UUID). Claude Code
         // still emits a fresh `session_id` for the just-spawned empty session —

package/src/gateway/runtimes/deepseek-tui.ts CHANGED Viewed

@@ -379,12 +379,12 @@ export class DeepseekTuiAdapter implements RuntimeAdapter {
         } else if (eventName === "item.delta" && payload?.payload?.kind === "agent_message") {
           append(stringField(payload.payload, "delta") ?? "");
         }
-        if (eventName === "turn.started") {
+        if (eventName === "turn.started" || embeddedDeepseekEvent(payload) === "turn.started") {
           opts.onStatus?.({ kind: "thinking", phase: "started", label: "Thinking" });
         } else if (eventName === "tool.started" || isToolStarted(payload)) {
           const label = stringField(payload, "name") ?? stringField(payload?.payload?.tool, "name") ?? "tool";
           opts.onStatus?.({ kind: "thinking", phase: "updated", label });
-        } else if (eventName === "turn.completed" || eventName === "done") {
+        } else if (isDeepseekTerminalEvent(eventName, payload)) {
           opts.onStatus?.({ kind: "thinking", phase: "stopped" });
           return true;
         }
@@ -451,15 +451,33 @@ function normalizeDeepseekEvent(eventName: string, payload: any, seq: number): S
   if (eventName === "item.delta" && payload?.payload?.kind === "agent_message") {
     return { raw: { event: eventName, payload }, kind: "assistant_text", seq };
   }
-  if (eventName === "turn.started" || eventName === "status") {
+  if (eventName === "turn.started" || eventName === "status" || embeddedDeepseekEvent(payload) === "turn.started") {
     return { raw: { event: eventName, payload }, kind: "system", seq };
   }
-  if (eventName === "error" || eventName === "turn.completed" || eventName === "done") {
+  if (eventName === "error" || isDeepseekTerminalEvent(eventName, payload)) {
     return { raw: { event: eventName, payload }, kind: "other", seq };
   }
   return null;
 }
+function embeddedDeepseekEvent(payload: any): string | undefined {
+  return stringField(payload, "event") ?? stringField(payload?.payload, "event");
+}
+function isDeepseekTerminalEvent(eventName: string, payload: any): boolean {
+  const embedded = embeddedDeepseekEvent(payload);
+  return (
+    eventName === "turn.completed" ||
+    eventName === "turn.finished" ||
+    eventName === "turn.done" ||
+    eventName === "done" ||
+    embedded === "turn.completed" ||
+    embedded === "turn.finished" ||
+    embedded === "turn.done" ||
+    embedded === "done"
+  );
+}
 function isToolStarted(payload: any): boolean {
   return payload?.event === "item.started" && !!payload?.payload?.tool;
 }
@@ -488,7 +506,7 @@ function extractDeepseekError(eventName: string, payload: any): string | undefin
       stringField(payload?.payload, "error")
     );
   }
-  if (eventName === "turn.completed") {
+  if (isDeepseekTerminalEvent(eventName, payload)) {
     const turn = payload?.payload?.turn ?? payload?.turn;
     const status = stringField(turn, "status");
     const err = stringField(turn, "error");

package/src/gateway/transcript.ts CHANGED Viewed

@@ -114,7 +114,7 @@ export interface OutboundTranscriptRecord extends TranscriptRecordBase {
 export interface TurnErrorTranscriptRecord extends TranscriptRecordBase {
   kind: "turn_error";
-  phase: "runtime" | "timeout";
+  phase: "runtime" | "timeout" | "budget";
   error: string;
   durationMs: number;
 }

package/src/gateway/types.ts CHANGED Viewed

@@ -240,10 +240,26 @@ export interface TurnStatusSnapshot {
   startedAt: number;
 }
+/** Per-runtime auth circuit breaker state exposed through daemon snapshots. */
+export interface RuntimeCircuitBreakerSnapshot {
+  key: string;
+  runtime: string;
+  channel: string;
+  accountId: string;
+  conversationId: string;
+  threadId?: string | null;
+  failures: number;
+  openedAt: number;
+  blockedUntil: number;
+  lastFailureAt: number;
+  lastError: string;
+}
 /** Aggregate gateway state combining channel and turn snapshots. */
 export interface GatewayRuntimeSnapshot {
   channels: Record<string, ChannelStatusSnapshot>;
   turns: Record<string, TurnStatusSnapshot>;
+  runtimeCircuitBreakers?: Record<string, RuntimeCircuitBreakerSnapshot>;
 }
 // ---------------------------------------------------------------------------
@@ -383,6 +399,15 @@ export interface RuntimeRunOptions {
   systemContext?: string;
   /** Channel-agnostic bag for dispatch-time data (traceId, channel, conversation, etc.). */
   context?: Record<string, unknown>;
+  /**
+   * Cloud Agent run budget. Present only for Hub-issued `cloud_run` envelopes.
+   * Dispatcher enforces wall time and tool-call count; runtimes may also use it
+   * to apply provider-native limits when available.
+   */
+  budget?: {
+    maxWallTimeMs?: number;
+    maxToolCalls?: number;
+  };
   /** Called for every parsed block while the turn is in progress. */
   onBlock?: (block: StreamBlock) => void;
   /**
@@ -421,6 +446,15 @@ export interface RuntimeRunResult {
   costUsd?: number;
   /** Populated when the runtime reported a hard error. */
   error?: string;
+  /**
+   * Optional token-count breakdown reported by the runtime. Used by the
+   * cloud daemon's ``cloud_run`` settle hook to charge a run against the
+   * user's Cloud Credits. Adapters that don't surface usage data leave
+   * these undefined; the settle path treats undefined as ``0``.
+   */
+  inputCacheHitTokens?: number;
+  inputCacheMissTokens?: number;
+  outputTokens?: number;
 }
 /** Detection result for whether a runtime binary/SDK is usable on this machine. */

package/src/index.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env node
 import { spawn } from "node:child_process";
-import { existsSync, readFileSync, writeFileSync, unlinkSync, readdirSync, statSync, rmSync } from "node:fs";
+import { existsSync, readFileSync, unlinkSync, readdirSync, statSync, rmSync } from "node:fs";
 import { homedir, hostname } from "node:os";
 import path from "node:path";
 import { augmentProcessPath } from "./path-env.js";
@@ -9,7 +9,6 @@ import {
   saveConfig,
   initDefaultConfig,
   resolveConfiguredAgentIds,
-  PID_PATH,
   SNAPSHOT_PATH,
   CONFIG_FILE_PATH,
   CONFIG_MISSING,
@@ -17,6 +16,14 @@ import {
   type RouteRule,
   type RouteRuleMatch,
 } from "./config.js";
+import {
+  ensureNoOtherDaemonFromPidFile,
+  pidAlive,
+  readPid,
+  removePidFile,
+  stopDaemonFromPidFileForRestart,
+  writeCurrentPid,
+} from "./daemon-singleton.js";
 import { resolveBootAgents } from "./agent-discovery.js";
 import {
   defaultTranscriptRoot,
@@ -65,6 +72,8 @@ import {
   mergeOpenclawGateways,
   openclawDiscoveryConfigEnabled,
 } from "./openclaw-discovery.js";
+import { isCloudMode, loadCloudModeConfig } from "./cloud-mode.js";
+import { startCloudDaemon } from "./cloud-daemon.js";
 augmentProcessPath();
@@ -135,7 +144,10 @@ Commands:
   route list
   route remove --room <rm_xxx>|--prefix <rm_xxx>
   config                                  Print resolved config
-  doctor [--json] [--bundle] [--full-log] Scan local runtimes (${ADAPTER_LIST});
+  doctor [--json] [--auth-check] [--bundle] [--full-log]
+                                          Scan local runtimes (${ADAPTER_LIST});
+                                          --auth-check also runs a Claude Code
+                                          ping probe and may contact Anthropic.
                                           --bundle also writes a zip under
                                           ~/.botcord/diagnostics/. Bundles
                                           daemon.log plus the latest 5 rotated
@@ -226,60 +238,6 @@ function parseArgs(argv: string[]): ParsedArgs {
   return { cmd: cmd ?? "", sub, flags, lists };
 }
-function readPid(): number | null {
-  if (!existsSync(PID_PATH)) return null;
-  const raw = readFileSync(PID_PATH, "utf8").trim();
-  const pid = Number(raw);
-  return Number.isFinite(pid) && pid > 0 ? pid : null;
-}
-function pidAlive(pid: number): boolean {
-  try {
-    process.kill(pid, 0);
-    return true;
-  } catch {
-    return false;
-  }
-}
-async function waitForPidExit(pid: number, timeoutMs: number): Promise<boolean> {
-  const deadline = Date.now() + timeoutMs;
-  while (Date.now() < deadline) {
-    if (!pidAlive(pid)) return true;
-    await delay(100);
-  }
-  return !pidAlive(pid);
-}
-async function stopExistingDaemonForRestart(pid: number): Promise<void> {
-  if (pid === process.pid) return;
-  log.info("existing daemon found; restarting", { pid });
-  try {
-    process.kill(pid, "SIGTERM");
-  } catch {
-    try {
-      unlinkSync(PID_PATH);
-    } catch {
-      // ignore
-    }
-    return;
-  }
-  if (!(await waitForPidExit(pid, 5_000))) {
-    log.warn("existing daemon did not stop after SIGTERM; sending SIGKILL", { pid });
-    try {
-      process.kill(pid, "SIGKILL");
-    } catch {
-      // ignore
-    }
-    await waitForPidExit(pid, 2_000);
-  }
-  try {
-    unlinkSync(PID_PATH);
-  } catch {
-    // ignore
-  }
-}
 /**
  * Load the daemon config, auto-creating `~/.botcord/daemon/config.json`
  * with sensible defaults on first run. `--agent` (repeated) pins explicit
@@ -596,6 +554,16 @@ async function ensureUserAuthForStart(args: ParsedArgs): Promise<UserAuthRecord
 }
 async function cmdStart(args: ParsedArgs): Promise<void> {
+  // Cloud-mode short-circuit: the Hub-managed E2B sandbox launches the
+  // daemon with `BOTCORD_CLOUD_DAEMON_ACCESS_TOKEN` set in the environment.
+  // In that case we skip the entire device-code / install-token / on-disk
+  // user-auth flow and dial `/cloud/daemon/ws` directly with the injected
+  // JWT. See ``packages/daemon/src/cloud-mode.ts`` + the design doc §4.
+  if (isCloudMode()) {
+    await cmdStartCloud(args);
+    return;
+  }
   let cfg = loadOrInitConfig(args);
   cfg = await refreshDiscoveredOpenclawGateways(cfg, "start");
   // Foreground is now the default. --background (alias -d) detaches.
@@ -616,13 +584,10 @@ async function cmdStart(args: ParsedArgs): Promise<void> {
   // var so we don't try to re-prompt for credentials it already has.
   if (process.env.BOTCORD_DAEMON_CHILD !== "1") {
     await ensureUserAuthForStart(args);
-    const existing = readPid();
-    if (existing && pidAlive(existing)) {
-      await stopExistingDaemonForRestart(existing);
-    }
+    await stopDaemonFromPidFileForRestart({ logger: log });
   } else {
-    const existing = readPid();
-    if (existing && existing !== process.pid && pidAlive(existing)) {
+    const existing = ensureNoOtherDaemonFromPidFile();
+    if (existing) {
       console.error(`daemon already running (pid ${existing})`);
       process.exit(1);
     }
@@ -657,17 +622,13 @@ async function cmdStart(args: ParsedArgs): Promise<void> {
   }
   // Foreground: we ARE the daemon.
-  writeFileSync(PID_PATH, String(process.pid), { mode: 0o600 });
+  writeCurrentPid();
   const handle = await startDaemon({ config: cfg, configPath: CONFIG_FILE_PATH });
   const shutdown = async (sig: string) => {
     log.info("signal received", { sig });
     await handle.stop(sig);
-    try {
-      unlinkSync(PID_PATH);
-    } catch {
-      // ignore
-    }
+    removePidFile();
     process.exit(0);
   };
   process.on("SIGTERM", () => shutdown("SIGTERM"));
@@ -680,6 +641,57 @@ async function cmdStart(args: ParsedArgs): Promise<void> {
   });
 }
+/**
+ * Cloud-mode start: launched by the Hub-managed E2B sandbox provider.
+ *
+ * No login flow and no on-disk credentials at boot. The daemon still uses
+ * the same PID-file singleton guard as local foreground starts because E2B
+ * resume hooks can run the startup command more than once in one sandbox.
+ *
+ * Always foreground — `--background` / `-d` is silently ignored because
+ * E2B sandboxes don't have a meaningful detach concept.
+ */
+async function cmdStartCloud(_args: ParsedArgs): Promise<void> {
+  const cloudConfig = loadCloudModeConfig();
+  log.info("cmd start (cloud mode)", {
+    cloudDaemonInstanceId: cloudConfig.cloudDaemonInstanceId,
+    daemonInstanceId: cloudConfig.daemonInstanceId,
+    hubUrl: cloudConfig.hubUrl,
+  });
+  await stopDaemonFromPidFileForRestart({ logger: log });
+  writeCurrentPid();
+  // Cloud daemons always start with an empty in-memory config — every
+  // agent + route arrives over the control plane. We synthesize the
+  // shape `Gateway` expects without ever touching `~/.botcord/daemon/config.json`.
+  const cfg: DaemonConfig = {
+    defaultRoute: { adapter: "deepseek-tui", cwd: homedir() },
+    routes: [],
+    streamBlocks: true,
+  };
+  saveConfig(cfg);
+  log.info("cloud mode config initialized", { configPath: CONFIG_FILE_PATH });
+  const handle = await startCloudDaemon({
+    cloudConfig,
+    config: cfg,
+    configPath: CONFIG_FILE_PATH,
+  });
+  const shutdown = async (sig: string): Promise<void> => {
+    log.info("signal received", { sig });
+    await handle.stop(sig);
+    removePidFile();
+    process.exit(0);
+  };
+  process.on("SIGTERM", () => void shutdown("SIGTERM"));
+  process.on("SIGINT", () => void shutdown("SIGINT"));
+  await new Promise<void>(() => {
+    // Deliberately never resolves; `shutdown()` calls `process.exit(0)`.
+  });
+}
 async function cmdStop(): Promise<void> {
   const pid = readPid();
   log.info("cmd stop", { pid });
@@ -689,11 +701,7 @@ async function cmdStop(): Promise<void> {
   }
   if (!pidAlive(pid)) {
     console.error(`pid ${pid} not alive; removing stale pid file`);
-    try {
-      unlinkSync(PID_PATH);
-    } catch {
-      // ignore
-    }
+    removePidFile();
     process.exit(1);
   }
   process.kill(pid, "SIGTERM");
@@ -1408,6 +1416,7 @@ async function cmdDoctor(args: ParsedArgs): Promise<void> {
     fileReader: fsFileReader,
     fetcher: defaultHttpFetcher,
     timeoutMs: 5_000,
+    authCheck: args.flags["auth-check"] === true,
   });
   if (args.flags.json === true) {

package/src/provision.ts CHANGED Viewed

@@ -337,7 +337,10 @@ export function createProvisioner(opts: ProvisionerOptions): (
         } catch {
           cfgForProbe = undefined;
         }
-        const snapshot = await collectRuntimeSnapshotAsync({ cfg: cfgForProbe });
+        const snapshot = attachRuntimeHealth(
+          await collectRuntimeSnapshotAsync({ cfg: cfgForProbe }),
+          gateway.snapshot(),
+        );
         daemonLog.debug("list_runtimes", { count: snapshot.runtimes.length });
         return { ok: true, result: snapshot };
       }
@@ -1795,6 +1798,47 @@ export function collectRuntimeSnapshot(opts: { force?: boolean } = {}): ListRunt
   return value;
 }
+export function attachRuntimeHealth(
+  snapshot: ListRuntimesResult,
+  live: GatewayRuntimeSnapshot,
+): ListRuntimesResult {
+  const breakers = Object.values(live.runtimeCircuitBreakers ?? {});
+  if (breakers.length === 0) return snapshot;
+  const byRuntime = new Map<string, typeof breakers>();
+  for (const breaker of breakers) {
+    const list = byRuntime.get(breaker.runtime) ?? [];
+    if (list.length < 32) list.push(breaker);
+    byRuntime.set(breaker.runtime, list);
+  }
+  return {
+    ...snapshot,
+    runtimes: snapshot.runtimes.map((runtime) => {
+      const runtimeBreakers = byRuntime.get(runtime.id);
+      if (!runtimeBreakers?.length) return runtime;
+      return {
+        ...runtime,
+        health: {
+          ...((runtime as { health?: Record<string, unknown> }).health ?? {}),
+          circuitBreakers: runtimeBreakers.map((b) => ({
+            key: b.key,
+            channel: b.channel,
+            accountId: b.accountId,
+            conversationId: b.conversationId,
+            threadId: b.threadId ?? null,
+            failures: b.failures,
+            openedAt: b.openedAt,
+            blockedUntil: b.blockedUntil,
+            lastFailureAt: b.lastFailureAt,
+            lastError: b.lastError,
+          })),
+        },
+      };
+    }),
+  };
+}
 /** Maximum number of `endpoints[]` entries persisted per runtime (RFC §3.8.2). */
 export const RUNTIME_ENDPOINTS_CAP = 32;

package/src/status-render.ts CHANGED Viewed

@@ -85,6 +85,28 @@ function renderTurns(
   return out;
 }
+function renderRuntimeCircuitBreakers(
+  snap: GatewayRuntimeSnapshot,
+  now: number,
+): string[] {
+  const entries = Object.values(snap.runtimeCircuitBreakers ?? {});
+  if (entries.length === 0) return ["Runtime circuit breakers:", "  (none)"];
+  const out: string[] = ["Runtime circuit breakers:"];
+  const keyW = Math.max(3, ...entries.map((b) => b.key.length));
+  const rtW = Math.max(7, ...entries.map((b) => b.runtime.length));
+  const convW = Math.max(12, ...entries.map((b) => b.conversationId.length));
+  out.push(
+    `  ${pad("KEY", keyW)}  ${pad("RUNTIME", rtW)}  ${pad("CONVERSATION", convW)}  FAILS  BLOCKED FOR  LAST ERROR`,
+  );
+  for (const b of entries) {
+    const blockedFor = relTime(b.blockedUntil - now).replace(" ago", "");
+    out.push(
+      `  ${pad(b.key, keyW)}  ${pad(b.runtime, rtW)}  ${pad(b.conversationId, convW)}  ${pad(String(b.failures), 5)}  ${pad(blockedFor, 11)}  ${b.lastError}`,
+    );
+  }
+  return out;
+}
 /**
  * Format a human-readable status block. Kept pure so it can be unit-tested
  * without touching disk or spawning a daemon.
@@ -125,6 +147,8 @@ export function renderStatus(input: StatusRenderInput, now: number = Date.now())
     lines.push(...renderChannels(input.snapshot));
     lines.push("");
     lines.push(...renderTurns(input.snapshot, now));
+    lines.push("");
+    lines.push(...renderRuntimeCircuitBreakers(input.snapshot, now));
   } else if (input.alive) {
     lines.push("snapshot: unavailable (daemon running but no snapshot file found)");
   }