@botcord/daemon 0.2.75 → 0.2.77

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/cloud-auth.d.ts +47 -0
  2. package/dist/cloud-auth.js +51 -0
  3. package/dist/cloud-daemon.d.ts +43 -0
  4. package/dist/cloud-daemon.js +252 -0
  5. package/dist/cloud-mode.d.ts +45 -0
  6. package/dist/cloud-mode.js +55 -0
  7. package/dist/cloud-settle.d.ts +81 -0
  8. package/dist/cloud-settle.js +100 -0
  9. package/dist/daemon-singleton.d.ts +26 -0
  10. package/dist/daemon-singleton.js +91 -0
  11. package/dist/daemon.d.ts +1 -1
  12. package/dist/daemon.js +15 -6
  13. package/dist/doctor.d.ts +4 -1
  14. package/dist/doctor.js +15 -4
  15. package/dist/gateway/channels/botcord.d.ts +1 -1
  16. package/dist/gateway/channels/botcord.js +280 -52
  17. package/dist/gateway/dispatcher.d.ts +34 -1
  18. package/dist/gateway/dispatcher.js +277 -20
  19. package/dist/gateway/gateway.d.ts +9 -1
  20. package/dist/gateway/gateway.js +4 -1
  21. package/dist/gateway/runtime-errors.d.ts +6 -0
  22. package/dist/gateway/runtime-errors.js +14 -0
  23. package/dist/gateway/runtimes/claude-code.d.ts +8 -0
  24. package/dist/gateway/runtimes/claude-code.js +92 -4
  25. package/dist/gateway/runtimes/deepseek-tui.js +19 -5
  26. package/dist/gateway/transcript.d.ts +1 -1
  27. package/dist/gateway/types.d.ts +33 -0
  28. package/dist/index.js +71 -80
  29. package/dist/provision.d.ts +2 -0
  30. package/dist/provision.js +39 -1
  31. package/dist/status-render.js +17 -0
  32. package/package.json +2 -2
  33. package/src/__tests__/cloud-auth.test.ts +42 -0
  34. package/src/__tests__/cloud-daemon.test.ts +237 -0
  35. package/src/__tests__/cloud-mode.test.ts +65 -0
  36. package/src/__tests__/cloud-settle.test.ts +287 -0
  37. package/src/__tests__/daemon-singleton.test.ts +89 -0
  38. package/src/__tests__/doctor.test.ts +34 -0
  39. package/src/__tests__/runtime-discovery.test.ts +90 -0
  40. package/src/__tests__/status-render.test.ts +34 -0
  41. package/src/cloud-auth.ts +78 -0
  42. package/src/cloud-daemon.ts +338 -0
  43. package/src/cloud-mode.ts +70 -0
  44. package/src/cloud-settle.ts +182 -0
  45. package/src/daemon-singleton.ts +122 -0
  46. package/src/daemon.ts +18 -5
  47. package/src/doctor.ts +18 -5
  48. package/src/gateway/__tests__/botcord-channel.test.ts +98 -0
  49. package/src/gateway/__tests__/claude-code-adapter.test.ts +101 -1
  50. package/src/gateway/__tests__/deepseek-tui-adapter.test.ts +19 -0
  51. package/src/gateway/__tests__/dispatcher.test.ts +120 -0
  52. package/src/gateway/channels/botcord.ts +299 -43
  53. package/src/gateway/dispatcher.ts +354 -21
  54. package/src/gateway/gateway.ts +16 -1
  55. package/src/gateway/runtime-errors.ts +15 -0
  56. package/src/gateway/runtimes/claude-code.ts +98 -2
  57. package/src/gateway/runtimes/deepseek-tui.ts +23 -5
  58. package/src/gateway/transcript.ts +1 -1
  59. package/src/gateway/types.ts +34 -0
  60. package/src/index.ts +83 -74
  61. package/src/provision.ts +45 -1
  62. package/src/status-render.ts +24 -0
@@ -1,5 +1,8 @@
1
+ import { execFileSync, type ExecFileSyncOptions } from "node:child_process";
1
2
  import path from "node:path";
2
3
  import { NdjsonStreamAdapter, type NdjsonEventCtx } from "./ndjson-stream.js";
4
+ import { consoleLogger } from "../log.js";
5
+ import { looksLikeRuntimeAuthFailure } from "../runtime-errors.js";
3
6
  import {
4
7
  firstExistingPath,
5
8
  readCommandVersion,
@@ -18,6 +21,24 @@ const CLAUDE_DESKTOP_CLI_RELATIVE_PATH = path.join(
18
21
  );
19
22
  const CLAUDE_DESKTOP_CLI_SYSTEM_PATH =
20
23
  "/Applications/Claude Code URL Handler.app/Contents/MacOS/claude";
24
+ const log = consoleLogger;
25
+
26
+ const CLAUDE_CODE_AUTH_ENV_DENYLIST = [
27
+ "ANTHROPIC_API_KEY",
28
+ "ANTHROPIC_AUTH_TOKEN",
29
+ "ANTHROPIC_BASE_URL",
30
+ "ANTHROPIC_CUSTOM_HEADERS",
31
+ "CLAUDE_CODE_OAUTH_TOKEN",
32
+ ];
33
+
34
+ export function scrubClaudeCodeAuthEnv(env: NodeJS.ProcessEnv): NodeJS.ProcessEnv {
35
+ const out = { ...env };
36
+ for (const key of CLAUDE_CODE_AUTH_ENV_DENYLIST) {
37
+ delete out[key];
38
+ }
39
+ return out;
40
+ }
41
+
21
42
  function isValidClaudeSessionId(sessionId: string): boolean {
22
43
  if (sessionId.length === 0 || sessionId.length > 512) return false;
23
44
  if (sessionId.startsWith("-")) return false;
@@ -125,6 +146,63 @@ export function probeClaude(deps: ProbeDeps = {}): RuntimeProbeResult {
125
146
  };
126
147
  }
127
148
 
149
+ export interface ClaudeAuthProbeResult {
150
+ checked: boolean;
151
+ ok: boolean;
152
+ message: string;
153
+ }
154
+
155
+ export function probeClaudeAuth(deps: ProbeDeps = {}): ClaudeAuthProbeResult {
156
+ const command = resolveClaudeCommand(deps);
157
+ if (!command) return { checked: false, ok: false, message: "claude command not found" };
158
+ return runClaudeAuthProbe(command, deps);
159
+ }
160
+
161
+ function runClaudeAuthProbe(command: string, deps: ProbeDeps = {}): ClaudeAuthProbeResult {
162
+ const execFn = deps.execFileSyncFn ?? execFileSync;
163
+ const env = scrubClaudeCodeAuthEnv(deps.env ?? process.env);
164
+ try {
165
+ const raw = execFn(command, ["-p", "ping", "--output-format", "stream-json"], {
166
+ stdio: ["ignore", "pipe", "pipe"],
167
+ env,
168
+ timeout: 20_000,
169
+ } as ExecFileSyncOptions);
170
+ const output = Buffer.isBuffer(raw) ? raw.toString("utf8") : String(raw ?? "");
171
+ const authFailure = claudeAuthFailureFromOutput(output);
172
+ if (authFailure) return { checked: true, ok: false, message: authFailure };
173
+ return { checked: true, ok: true, message: "claude-code auth ok" };
174
+ } catch (err) {
175
+ const e = err as Error & { stdout?: Buffer | string; stderr?: Buffer | string };
176
+ const output = `${bufferishToString(e.stdout)}\n${bufferishToString(e.stderr)}`.trim();
177
+ const authFailure = claudeAuthFailureFromOutput(output);
178
+ return {
179
+ checked: true,
180
+ ok: false,
181
+ message: authFailure || e.message || "claude-code auth probe failed",
182
+ };
183
+ }
184
+ }
185
+
186
+ function bufferishToString(raw: Buffer | string | undefined): string {
187
+ return Buffer.isBuffer(raw) ? raw.toString("utf8") : String(raw ?? "");
188
+ }
189
+
190
+ function claudeAuthFailureFromOutput(output: string): string | null {
191
+ for (const line of output.split(/\r?\n/)) {
192
+ const s = line.trim();
193
+ if (!s) continue;
194
+ try {
195
+ const obj = JSON.parse(s) as { type?: string; result?: unknown; total_cost_usd?: unknown };
196
+ if (obj.type === "result" && typeof obj.result === "string" && looksLikeRuntimeAuthFailure(obj.result)) {
197
+ return obj.result;
198
+ }
199
+ } catch {
200
+ if (looksLikeRuntimeAuthFailure(s)) return s;
201
+ }
202
+ }
203
+ return looksLikeRuntimeAuthFailure(output) ? output : null;
204
+ }
205
+
128
206
  /**
129
207
  * Claude Code adapter — spawns `claude -p "<text>" --output-format stream-json`
130
208
  * (with `--resume <sid>` when available) and parses the ndjson stream.
@@ -197,6 +275,10 @@ export class ClaudeCodeAdapter extends NdjsonStreamAdapter {
197
275
  return args;
198
276
  }
199
277
 
278
+ protected override spawnEnv(opts: RuntimeRunOptions): NodeJS.ProcessEnv {
279
+ return scrubClaudeCodeAuthEnv(super.spawnEnv(opts));
280
+ }
281
+
200
282
  protected handleEvent(raw: unknown, ctx: NdjsonEventCtx): void {
201
283
  const obj = raw as {
202
284
  type?: string;
@@ -229,8 +311,22 @@ export class ClaudeCodeAdapter extends NdjsonStreamAdapter {
229
311
  if (obj.type === "result") {
230
312
  if (typeof obj.total_cost_usd === "number") ctx.state.costUsd = obj.total_cost_usd;
231
313
  if (obj.subtype === "success") {
232
- if (typeof obj.session_id === "string") ctx.state.newSessionId = obj.session_id;
233
- if (typeof obj.result === "string") ctx.state.finalText = obj.result;
314
+ const result = typeof obj.result === "string" ? obj.result : "";
315
+ const looksLikeAuthFailure =
316
+ obj.total_cost_usd === 0 && looksLikeRuntimeAuthFailure(result);
317
+ if (looksLikeAuthFailure) {
318
+ log.error("claude-code authentication failed; check ~/.claude login or unset stale Anthropic env vars", {
319
+ error: result,
320
+ });
321
+ ctx.state.newSessionId = "";
322
+ ctx.state.finalText = "";
323
+ ctx.state.assistantTextChunks = [];
324
+ ctx.state.assistantTextBytes = 0;
325
+ ctx.state.errorText = result;
326
+ } else {
327
+ if (typeof obj.session_id === "string") ctx.state.newSessionId = obj.session_id;
328
+ if (typeof obj.result === "string") ctx.state.finalText = obj.result;
329
+ }
234
330
  } else {
235
331
  // Non-success result (e.g. resume targeted a missing UUID). Claude Code
236
332
  // still emits a fresh `session_id` for the just-spawned empty session —
@@ -379,12 +379,12 @@ export class DeepseekTuiAdapter implements RuntimeAdapter {
379
379
  } else if (eventName === "item.delta" && payload?.payload?.kind === "agent_message") {
380
380
  append(stringField(payload.payload, "delta") ?? "");
381
381
  }
382
- if (eventName === "turn.started") {
382
+ if (eventName === "turn.started" || embeddedDeepseekEvent(payload) === "turn.started") {
383
383
  opts.onStatus?.({ kind: "thinking", phase: "started", label: "Thinking" });
384
384
  } else if (eventName === "tool.started" || isToolStarted(payload)) {
385
385
  const label = stringField(payload, "name") ?? stringField(payload?.payload?.tool, "name") ?? "tool";
386
386
  opts.onStatus?.({ kind: "thinking", phase: "updated", label });
387
- } else if (eventName === "turn.completed" || eventName === "done") {
387
+ } else if (isDeepseekTerminalEvent(eventName, payload)) {
388
388
  opts.onStatus?.({ kind: "thinking", phase: "stopped" });
389
389
  return true;
390
390
  }
@@ -451,15 +451,33 @@ function normalizeDeepseekEvent(eventName: string, payload: any, seq: number): S
451
451
  if (eventName === "item.delta" && payload?.payload?.kind === "agent_message") {
452
452
  return { raw: { event: eventName, payload }, kind: "assistant_text", seq };
453
453
  }
454
- if (eventName === "turn.started" || eventName === "status") {
454
+ if (eventName === "turn.started" || eventName === "status" || embeddedDeepseekEvent(payload) === "turn.started") {
455
455
  return { raw: { event: eventName, payload }, kind: "system", seq };
456
456
  }
457
- if (eventName === "error" || eventName === "turn.completed" || eventName === "done") {
457
+ if (eventName === "error" || isDeepseekTerminalEvent(eventName, payload)) {
458
458
  return { raw: { event: eventName, payload }, kind: "other", seq };
459
459
  }
460
460
  return null;
461
461
  }
462
462
 
463
+ function embeddedDeepseekEvent(payload: any): string | undefined {
464
+ return stringField(payload, "event") ?? stringField(payload?.payload, "event");
465
+ }
466
+
467
+ function isDeepseekTerminalEvent(eventName: string, payload: any): boolean {
468
+ const embedded = embeddedDeepseekEvent(payload);
469
+ return (
470
+ eventName === "turn.completed" ||
471
+ eventName === "turn.finished" ||
472
+ eventName === "turn.done" ||
473
+ eventName === "done" ||
474
+ embedded === "turn.completed" ||
475
+ embedded === "turn.finished" ||
476
+ embedded === "turn.done" ||
477
+ embedded === "done"
478
+ );
479
+ }
480
+
463
481
  function isToolStarted(payload: any): boolean {
464
482
  return payload?.event === "item.started" && !!payload?.payload?.tool;
465
483
  }
@@ -488,7 +506,7 @@ function extractDeepseekError(eventName: string, payload: any): string | undefin
488
506
  stringField(payload?.payload, "error")
489
507
  );
490
508
  }
491
- if (eventName === "turn.completed") {
509
+ if (isDeepseekTerminalEvent(eventName, payload)) {
492
510
  const turn = payload?.payload?.turn ?? payload?.turn;
493
511
  const status = stringField(turn, "status");
494
512
  const err = stringField(turn, "error");
@@ -114,7 +114,7 @@ export interface OutboundTranscriptRecord extends TranscriptRecordBase {
114
114
 
115
115
  export interface TurnErrorTranscriptRecord extends TranscriptRecordBase {
116
116
  kind: "turn_error";
117
- phase: "runtime" | "timeout";
117
+ phase: "runtime" | "timeout" | "budget";
118
118
  error: string;
119
119
  durationMs: number;
120
120
  }
@@ -240,10 +240,26 @@ export interface TurnStatusSnapshot {
240
240
  startedAt: number;
241
241
  }
242
242
 
243
+ /** Per-runtime auth circuit breaker state exposed through daemon snapshots. */
244
+ export interface RuntimeCircuitBreakerSnapshot {
245
+ key: string;
246
+ runtime: string;
247
+ channel: string;
248
+ accountId: string;
249
+ conversationId: string;
250
+ threadId?: string | null;
251
+ failures: number;
252
+ openedAt: number;
253
+ blockedUntil: number;
254
+ lastFailureAt: number;
255
+ lastError: string;
256
+ }
257
+
243
258
  /** Aggregate gateway state combining channel and turn snapshots. */
244
259
  export interface GatewayRuntimeSnapshot {
245
260
  channels: Record<string, ChannelStatusSnapshot>;
246
261
  turns: Record<string, TurnStatusSnapshot>;
262
+ runtimeCircuitBreakers?: Record<string, RuntimeCircuitBreakerSnapshot>;
247
263
  }
248
264
 
249
265
  // ---------------------------------------------------------------------------
@@ -383,6 +399,15 @@ export interface RuntimeRunOptions {
383
399
  systemContext?: string;
384
400
  /** Channel-agnostic bag for dispatch-time data (traceId, channel, conversation, etc.). */
385
401
  context?: Record<string, unknown>;
402
+ /**
403
+ * Cloud Agent run budget. Present only for Hub-issued `cloud_run` envelopes.
404
+ * Dispatcher enforces wall time and tool-call count; runtimes may also use it
405
+ * to apply provider-native limits when available.
406
+ */
407
+ budget?: {
408
+ maxWallTimeMs?: number;
409
+ maxToolCalls?: number;
410
+ };
386
411
  /** Called for every parsed block while the turn is in progress. */
387
412
  onBlock?: (block: StreamBlock) => void;
388
413
  /**
@@ -421,6 +446,15 @@ export interface RuntimeRunResult {
421
446
  costUsd?: number;
422
447
  /** Populated when the runtime reported a hard error. */
423
448
  error?: string;
449
+ /**
450
+ * Optional token-count breakdown reported by the runtime. Used by the
451
+ * cloud daemon's ``cloud_run`` settle hook to charge a run against the
452
+ * user's Cloud Credits. Adapters that don't surface usage data leave
453
+ * these undefined; the settle path treats undefined as ``0``.
454
+ */
455
+ inputCacheHitTokens?: number;
456
+ inputCacheMissTokens?: number;
457
+ outputTokens?: number;
424
458
  }
425
459
 
426
460
  /** Detection result for whether a runtime binary/SDK is usable on this machine. */
package/src/index.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
  import { spawn } from "node:child_process";
3
- import { existsSync, readFileSync, writeFileSync, unlinkSync, readdirSync, statSync, rmSync } from "node:fs";
3
+ import { existsSync, readFileSync, unlinkSync, readdirSync, statSync, rmSync } from "node:fs";
4
4
  import { homedir, hostname } from "node:os";
5
5
  import path from "node:path";
6
6
  import { augmentProcessPath } from "./path-env.js";
@@ -9,7 +9,6 @@ import {
9
9
  saveConfig,
10
10
  initDefaultConfig,
11
11
  resolveConfiguredAgentIds,
12
- PID_PATH,
13
12
  SNAPSHOT_PATH,
14
13
  CONFIG_FILE_PATH,
15
14
  CONFIG_MISSING,
@@ -17,6 +16,14 @@ import {
17
16
  type RouteRule,
18
17
  type RouteRuleMatch,
19
18
  } from "./config.js";
19
+ import {
20
+ ensureNoOtherDaemonFromPidFile,
21
+ pidAlive,
22
+ readPid,
23
+ removePidFile,
24
+ stopDaemonFromPidFileForRestart,
25
+ writeCurrentPid,
26
+ } from "./daemon-singleton.js";
20
27
  import { resolveBootAgents } from "./agent-discovery.js";
21
28
  import {
22
29
  defaultTranscriptRoot,
@@ -65,6 +72,8 @@ import {
65
72
  mergeOpenclawGateways,
66
73
  openclawDiscoveryConfigEnabled,
67
74
  } from "./openclaw-discovery.js";
75
+ import { isCloudMode, loadCloudModeConfig } from "./cloud-mode.js";
76
+ import { startCloudDaemon } from "./cloud-daemon.js";
68
77
 
69
78
  augmentProcessPath();
70
79
 
@@ -135,7 +144,10 @@ Commands:
135
144
  route list
136
145
  route remove --room <rm_xxx>|--prefix <rm_xxx>
137
146
  config Print resolved config
138
- doctor [--json] [--bundle] [--full-log] Scan local runtimes (${ADAPTER_LIST});
147
+ doctor [--json] [--auth-check] [--bundle] [--full-log]
148
+ Scan local runtimes (${ADAPTER_LIST});
149
+ --auth-check also runs a Claude Code
150
+ ping probe and may contact Anthropic.
139
151
  --bundle also writes a zip under
140
152
  ~/.botcord/diagnostics/. Bundles
141
153
  daemon.log plus the latest 5 rotated
@@ -226,60 +238,6 @@ function parseArgs(argv: string[]): ParsedArgs {
226
238
  return { cmd: cmd ?? "", sub, flags, lists };
227
239
  }
228
240
 
229
- function readPid(): number | null {
230
- if (!existsSync(PID_PATH)) return null;
231
- const raw = readFileSync(PID_PATH, "utf8").trim();
232
- const pid = Number(raw);
233
- return Number.isFinite(pid) && pid > 0 ? pid : null;
234
- }
235
-
236
- function pidAlive(pid: number): boolean {
237
- try {
238
- process.kill(pid, 0);
239
- return true;
240
- } catch {
241
- return false;
242
- }
243
- }
244
-
245
- async function waitForPidExit(pid: number, timeoutMs: number): Promise<boolean> {
246
- const deadline = Date.now() + timeoutMs;
247
- while (Date.now() < deadline) {
248
- if (!pidAlive(pid)) return true;
249
- await delay(100);
250
- }
251
- return !pidAlive(pid);
252
- }
253
-
254
- async function stopExistingDaemonForRestart(pid: number): Promise<void> {
255
- if (pid === process.pid) return;
256
- log.info("existing daemon found; restarting", { pid });
257
- try {
258
- process.kill(pid, "SIGTERM");
259
- } catch {
260
- try {
261
- unlinkSync(PID_PATH);
262
- } catch {
263
- // ignore
264
- }
265
- return;
266
- }
267
- if (!(await waitForPidExit(pid, 5_000))) {
268
- log.warn("existing daemon did not stop after SIGTERM; sending SIGKILL", { pid });
269
- try {
270
- process.kill(pid, "SIGKILL");
271
- } catch {
272
- // ignore
273
- }
274
- await waitForPidExit(pid, 2_000);
275
- }
276
- try {
277
- unlinkSync(PID_PATH);
278
- } catch {
279
- // ignore
280
- }
281
- }
282
-
283
241
  /**
284
242
  * Load the daemon config, auto-creating `~/.botcord/daemon/config.json`
285
243
  * with sensible defaults on first run. `--agent` (repeated) pins explicit
@@ -596,6 +554,16 @@ async function ensureUserAuthForStart(args: ParsedArgs): Promise<UserAuthRecord
596
554
  }
597
555
 
598
556
  async function cmdStart(args: ParsedArgs): Promise<void> {
557
+ // Cloud-mode short-circuit: the Hub-managed E2B sandbox launches the
558
+ // daemon with `BOTCORD_CLOUD_DAEMON_ACCESS_TOKEN` set in the environment.
559
+ // In that case we skip the entire device-code / install-token / on-disk
560
+ // user-auth flow and dial `/cloud/daemon/ws` directly with the injected
561
+ // JWT. See ``packages/daemon/src/cloud-mode.ts`` + the design doc §4.
562
+ if (isCloudMode()) {
563
+ await cmdStartCloud(args);
564
+ return;
565
+ }
566
+
599
567
  let cfg = loadOrInitConfig(args);
600
568
  cfg = await refreshDiscoveredOpenclawGateways(cfg, "start");
601
569
  // Foreground is now the default. --background (alias -d) detaches.
@@ -616,13 +584,10 @@ async function cmdStart(args: ParsedArgs): Promise<void> {
616
584
  // var so we don't try to re-prompt for credentials it already has.
617
585
  if (process.env.BOTCORD_DAEMON_CHILD !== "1") {
618
586
  await ensureUserAuthForStart(args);
619
- const existing = readPid();
620
- if (existing && pidAlive(existing)) {
621
- await stopExistingDaemonForRestart(existing);
622
- }
587
+ await stopDaemonFromPidFileForRestart({ logger: log });
623
588
  } else {
624
- const existing = readPid();
625
- if (existing && existing !== process.pid && pidAlive(existing)) {
589
+ const existing = ensureNoOtherDaemonFromPidFile();
590
+ if (existing) {
626
591
  console.error(`daemon already running (pid ${existing})`);
627
592
  process.exit(1);
628
593
  }
@@ -657,17 +622,13 @@ async function cmdStart(args: ParsedArgs): Promise<void> {
657
622
  }
658
623
 
659
624
  // Foreground: we ARE the daemon.
660
- writeFileSync(PID_PATH, String(process.pid), { mode: 0o600 });
625
+ writeCurrentPid();
661
626
  const handle = await startDaemon({ config: cfg, configPath: CONFIG_FILE_PATH });
662
627
 
663
628
  const shutdown = async (sig: string) => {
664
629
  log.info("signal received", { sig });
665
630
  await handle.stop(sig);
666
- try {
667
- unlinkSync(PID_PATH);
668
- } catch {
669
- // ignore
670
- }
631
+ removePidFile();
671
632
  process.exit(0);
672
633
  };
673
634
  process.on("SIGTERM", () => shutdown("SIGTERM"));
@@ -680,6 +641,57 @@ async function cmdStart(args: ParsedArgs): Promise<void> {
680
641
  });
681
642
  }
682
643
 
644
+ /**
645
+ * Cloud-mode start: launched by the Hub-managed E2B sandbox provider.
646
+ *
647
+ * No login flow and no on-disk credentials at boot. The daemon still uses
648
+ * the same PID-file singleton guard as local foreground starts because E2B
649
+ * resume hooks can run the startup command more than once in one sandbox.
650
+ *
651
+ * Always foreground — `--background` / `-d` is silently ignored because
652
+ * E2B sandboxes don't have a meaningful detach concept.
653
+ */
654
+ async function cmdStartCloud(_args: ParsedArgs): Promise<void> {
655
+ const cloudConfig = loadCloudModeConfig();
656
+ log.info("cmd start (cloud mode)", {
657
+ cloudDaemonInstanceId: cloudConfig.cloudDaemonInstanceId,
658
+ daemonInstanceId: cloudConfig.daemonInstanceId,
659
+ hubUrl: cloudConfig.hubUrl,
660
+ });
661
+ await stopDaemonFromPidFileForRestart({ logger: log });
662
+ writeCurrentPid();
663
+
664
+ // Cloud daemons always start with an empty in-memory config — every
665
+ // agent + route arrives over the control plane. We synthesize the
666
+ // shape `Gateway` expects without ever touching `~/.botcord/daemon/config.json`.
667
+ const cfg: DaemonConfig = {
668
+ defaultRoute: { adapter: "deepseek-tui", cwd: homedir() },
669
+ routes: [],
670
+ streamBlocks: true,
671
+ };
672
+ saveConfig(cfg);
673
+ log.info("cloud mode config initialized", { configPath: CONFIG_FILE_PATH });
674
+
675
+ const handle = await startCloudDaemon({
676
+ cloudConfig,
677
+ config: cfg,
678
+ configPath: CONFIG_FILE_PATH,
679
+ });
680
+
681
+ const shutdown = async (sig: string): Promise<void> => {
682
+ log.info("signal received", { sig });
683
+ await handle.stop(sig);
684
+ removePidFile();
685
+ process.exit(0);
686
+ };
687
+ process.on("SIGTERM", () => void shutdown("SIGTERM"));
688
+ process.on("SIGINT", () => void shutdown("SIGINT"));
689
+
690
+ await new Promise<void>(() => {
691
+ // Deliberately never resolves; `shutdown()` calls `process.exit(0)`.
692
+ });
693
+ }
694
+
683
695
  async function cmdStop(): Promise<void> {
684
696
  const pid = readPid();
685
697
  log.info("cmd stop", { pid });
@@ -689,11 +701,7 @@ async function cmdStop(): Promise<void> {
689
701
  }
690
702
  if (!pidAlive(pid)) {
691
703
  console.error(`pid ${pid} not alive; removing stale pid file`);
692
- try {
693
- unlinkSync(PID_PATH);
694
- } catch {
695
- // ignore
696
- }
704
+ removePidFile();
697
705
  process.exit(1);
698
706
  }
699
707
  process.kill(pid, "SIGTERM");
@@ -1408,6 +1416,7 @@ async function cmdDoctor(args: ParsedArgs): Promise<void> {
1408
1416
  fileReader: fsFileReader,
1409
1417
  fetcher: defaultHttpFetcher,
1410
1418
  timeoutMs: 5_000,
1419
+ authCheck: args.flags["auth-check"] === true,
1411
1420
  });
1412
1421
 
1413
1422
  if (args.flags.json === true) {
package/src/provision.ts CHANGED
@@ -337,7 +337,10 @@ export function createProvisioner(opts: ProvisionerOptions): (
337
337
  } catch {
338
338
  cfgForProbe = undefined;
339
339
  }
340
- const snapshot = await collectRuntimeSnapshotAsync({ cfg: cfgForProbe });
340
+ const snapshot = attachRuntimeHealth(
341
+ await collectRuntimeSnapshotAsync({ cfg: cfgForProbe }),
342
+ gateway.snapshot(),
343
+ );
341
344
  daemonLog.debug("list_runtimes", { count: snapshot.runtimes.length });
342
345
  return { ok: true, result: snapshot };
343
346
  }
@@ -1795,6 +1798,47 @@ export function collectRuntimeSnapshot(opts: { force?: boolean } = {}): ListRunt
1795
1798
  return value;
1796
1799
  }
1797
1800
 
1801
+ export function attachRuntimeHealth(
1802
+ snapshot: ListRuntimesResult,
1803
+ live: GatewayRuntimeSnapshot,
1804
+ ): ListRuntimesResult {
1805
+ const breakers = Object.values(live.runtimeCircuitBreakers ?? {});
1806
+ if (breakers.length === 0) return snapshot;
1807
+
1808
+ const byRuntime = new Map<string, typeof breakers>();
1809
+ for (const breaker of breakers) {
1810
+ const list = byRuntime.get(breaker.runtime) ?? [];
1811
+ if (list.length < 32) list.push(breaker);
1812
+ byRuntime.set(breaker.runtime, list);
1813
+ }
1814
+
1815
+ return {
1816
+ ...snapshot,
1817
+ runtimes: snapshot.runtimes.map((runtime) => {
1818
+ const runtimeBreakers = byRuntime.get(runtime.id);
1819
+ if (!runtimeBreakers?.length) return runtime;
1820
+ return {
1821
+ ...runtime,
1822
+ health: {
1823
+ ...((runtime as { health?: Record<string, unknown> }).health ?? {}),
1824
+ circuitBreakers: runtimeBreakers.map((b) => ({
1825
+ key: b.key,
1826
+ channel: b.channel,
1827
+ accountId: b.accountId,
1828
+ conversationId: b.conversationId,
1829
+ threadId: b.threadId ?? null,
1830
+ failures: b.failures,
1831
+ openedAt: b.openedAt,
1832
+ blockedUntil: b.blockedUntil,
1833
+ lastFailureAt: b.lastFailureAt,
1834
+ lastError: b.lastError,
1835
+ })),
1836
+ },
1837
+ };
1838
+ }),
1839
+ };
1840
+ }
1841
+
1798
1842
  /** Maximum number of `endpoints[]` entries persisted per runtime (RFC §3.8.2). */
1799
1843
  export const RUNTIME_ENDPOINTS_CAP = 32;
1800
1844
 
@@ -85,6 +85,28 @@ function renderTurns(
85
85
  return out;
86
86
  }
87
87
 
88
+ function renderRuntimeCircuitBreakers(
89
+ snap: GatewayRuntimeSnapshot,
90
+ now: number,
91
+ ): string[] {
92
+ const entries = Object.values(snap.runtimeCircuitBreakers ?? {});
93
+ if (entries.length === 0) return ["Runtime circuit breakers:", " (none)"];
94
+ const out: string[] = ["Runtime circuit breakers:"];
95
+ const keyW = Math.max(3, ...entries.map((b) => b.key.length));
96
+ const rtW = Math.max(7, ...entries.map((b) => b.runtime.length));
97
+ const convW = Math.max(12, ...entries.map((b) => b.conversationId.length));
98
+ out.push(
99
+ ` ${pad("KEY", keyW)} ${pad("RUNTIME", rtW)} ${pad("CONVERSATION", convW)} FAILS BLOCKED FOR LAST ERROR`,
100
+ );
101
+ for (const b of entries) {
102
+ const blockedFor = relTime(b.blockedUntil - now).replace(" ago", "");
103
+ out.push(
104
+ ` ${pad(b.key, keyW)} ${pad(b.runtime, rtW)} ${pad(b.conversationId, convW)} ${pad(String(b.failures), 5)} ${pad(blockedFor, 11)} ${b.lastError}`,
105
+ );
106
+ }
107
+ return out;
108
+ }
109
+
88
110
  /**
89
111
  * Format a human-readable status block. Kept pure so it can be unit-tested
90
112
  * without touching disk or spawning a daemon.
@@ -125,6 +147,8 @@ export function renderStatus(input: StatusRenderInput, now: number = Date.now())
125
147
  lines.push(...renderChannels(input.snapshot));
126
148
  lines.push("");
127
149
  lines.push(...renderTurns(input.snapshot, now));
150
+ lines.push("");
151
+ lines.push(...renderRuntimeCircuitBreakers(input.snapshot, now));
128
152
  } else if (input.alive) {
129
153
  lines.push("snapshot: unavailable (daemon running but no snapshot file found)");
130
154
  }