@botcord/daemon 0.2.74 → 0.2.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/cloud-auth.d.ts +47 -0
  2. package/dist/cloud-auth.js +51 -0
  3. package/dist/cloud-daemon.d.ts +43 -0
  4. package/dist/cloud-daemon.js +252 -0
  5. package/dist/cloud-mode.d.ts +45 -0
  6. package/dist/cloud-mode.js +55 -0
  7. package/dist/cloud-settle.d.ts +81 -0
  8. package/dist/cloud-settle.js +100 -0
  9. package/dist/daemon-singleton.d.ts +26 -0
  10. package/dist/daemon-singleton.js +91 -0
  11. package/dist/daemon.d.ts +1 -1
  12. package/dist/daemon.js +15 -6
  13. package/dist/doctor.d.ts +4 -1
  14. package/dist/doctor.js +15 -4
  15. package/dist/gateway/channels/botcord.d.ts +1 -1
  16. package/dist/gateway/channels/botcord.js +48 -5
  17. package/dist/gateway/dispatcher.d.ts +34 -1
  18. package/dist/gateway/dispatcher.js +277 -20
  19. package/dist/gateway/gateway.d.ts +9 -1
  20. package/dist/gateway/gateway.js +4 -1
  21. package/dist/gateway/runtime-errors.d.ts +6 -0
  22. package/dist/gateway/runtime-errors.js +14 -0
  23. package/dist/gateway/runtimes/claude-code.d.ts +8 -0
  24. package/dist/gateway/runtimes/claude-code.js +92 -4
  25. package/dist/gateway/runtimes/deepseek-tui.js +19 -5
  26. package/dist/gateway/transcript.d.ts +1 -1
  27. package/dist/gateway/types.d.ts +33 -0
  28. package/dist/index.js +71 -80
  29. package/dist/provision.d.ts +2 -0
  30. package/dist/provision.js +39 -1
  31. package/dist/status-render.js +17 -0
  32. package/package.json +2 -2
  33. package/src/__tests__/cloud-auth.test.ts +42 -0
  34. package/src/__tests__/cloud-daemon.test.ts +237 -0
  35. package/src/__tests__/cloud-mode.test.ts +65 -0
  36. package/src/__tests__/cloud-settle.test.ts +287 -0
  37. package/src/__tests__/daemon-singleton.test.ts +89 -0
  38. package/src/__tests__/doctor.test.ts +34 -0
  39. package/src/__tests__/runtime-discovery.test.ts +90 -0
  40. package/src/__tests__/status-render.test.ts +34 -0
  41. package/src/cloud-auth.ts +78 -0
  42. package/src/cloud-daemon.ts +338 -0
  43. package/src/cloud-mode.ts +70 -0
  44. package/src/cloud-settle.ts +182 -0
  45. package/src/daemon-singleton.ts +122 -0
  46. package/src/daemon.ts +18 -5
  47. package/src/doctor.ts +18 -5
  48. package/src/gateway/__tests__/botcord-channel.test.ts +74 -0
  49. package/src/gateway/__tests__/claude-code-adapter.test.ts +101 -1
  50. package/src/gateway/__tests__/deepseek-tui-adapter.test.ts +19 -0
  51. package/src/gateway/__tests__/dispatcher.test.ts +120 -0
  52. package/src/gateway/channels/botcord.ts +54 -7
  53. package/src/gateway/dispatcher.ts +354 -21
  54. package/src/gateway/gateway.ts +16 -1
  55. package/src/gateway/runtime-errors.ts +15 -0
  56. package/src/gateway/runtimes/claude-code.ts +98 -2
  57. package/src/gateway/runtimes/deepseek-tui.ts +23 -5
  58. package/src/gateway/transcript.ts +1 -1
  59. package/src/gateway/types.ts +34 -0
  60. package/src/index.ts +83 -74
  61. package/src/provision.ts +45 -1
  62. package/src/status-render.ts +24 -0
@@ -98,6 +98,18 @@ describe("probeChannel", () => {
98
98
  expect(result.hubMessage).toContain("503");
99
99
  });
100
100
 
101
+ it("treats HTTP 404 as reachable because the probe only needs the Hub host", async () => {
102
+ const credsPath = "/creds/ag_123.json";
103
+ const result = await probeChannel(ch, {
104
+ credentialsPath: () => credsPath,
105
+ fileReader: fileReader({ [credsPath]: okCreds }),
106
+ fetcher: fetcher({ ok: false, status: 404 }),
107
+ timeoutMs: 1000,
108
+ });
109
+ expect(result.hubOk).toBe(true);
110
+ expect(result.hubMessage).toContain("404");
111
+ });
112
+
101
113
  it("probeChannels returns an entry per input channel", async () => {
102
114
  const credsPath = "/creds/ag_123.json";
103
115
  const results = await probeChannels({
@@ -200,6 +212,28 @@ describe("renderDoctor", () => {
200
212
  expect(out).toContain("✓");
201
213
  });
202
214
 
215
+ it("renders optional runtime auth check results", () => {
216
+ const out = renderDoctor({
217
+ runtimes: [
218
+ {
219
+ id: "claude-code",
220
+ displayName: "Claude Code",
221
+ binary: "claude",
222
+ supportsRun: true,
223
+ result: { available: true, version: "1.0.0", path: "/usr/bin/claude" },
224
+ auth: {
225
+ checked: true,
226
+ ok: false,
227
+ message: "Failed to authenticate. API Error: 403 Request not allowed",
228
+ },
229
+ },
230
+ ],
231
+ channels: [],
232
+ });
233
+ expect(out).toContain("auth failed");
234
+ expect(out).toContain("Failed to authenticate");
235
+ });
236
+
203
237
  it("shows 'No channels configured.' when the channel list is empty", () => {
204
238
  const out = renderDoctor({
205
239
  runtimes: [
@@ -28,6 +28,7 @@ vi.mock("../adapters/runtimes.js", async () => {
28
28
  });
29
29
 
30
30
  const {
31
+ attachRuntimeHealth,
31
32
  collectRuntimeSnapshot,
32
33
  collectRuntimeSnapshotAsync,
33
34
  clearRuntimeProbeCache,
@@ -366,4 +367,93 @@ describe("pushRuntimeSnapshot (first-connect push)", () => {
366
367
  expect(ok).toBe(false);
367
368
  expect(send).toHaveBeenCalledOnce();
368
369
  });
370
+
371
+ it("attaches live runtime circuit breaker health to the pushed runtime entry", () => {
372
+ setRuntimes([
373
+ {
374
+ id: "claude-code",
375
+ displayName: "Claude Code",
376
+ binary: "claude",
377
+ supportsRun: true,
378
+ result: { available: true },
379
+ },
380
+ ]);
381
+ const send = vi.fn(() => true);
382
+ const ok = pushRuntimeSnapshot(
383
+ { send },
384
+ {
385
+ channels: {},
386
+ turns: {},
387
+ runtimeCircuitBreakers: {
388
+ "claude-code:botcord:ag_1:rm_oc_a:": {
389
+ key: "claude-code:botcord:ag_1:rm_oc_a:",
390
+ runtime: "claude-code",
391
+ channel: "botcord",
392
+ accountId: "ag_1",
393
+ conversationId: "rm_oc_a",
394
+ threadId: null,
395
+ failures: 3,
396
+ openedAt: 1000,
397
+ blockedUntil: 2000,
398
+ lastFailureAt: 1500,
399
+ lastError: "Failed to authenticate",
400
+ },
401
+ },
402
+ },
403
+ );
404
+ expect(ok).toBe(true);
405
+ const frame = send.mock.calls[0]![0] as {
406
+ params: { runtimes: Array<{ id: string; health?: { circuitBreakers?: unknown[] } }> };
407
+ };
408
+ expect(frame.params.runtimes[0].health?.circuitBreakers).toEqual([
409
+ expect.objectContaining({
410
+ conversationId: "rm_oc_a",
411
+ failures: 3,
412
+ lastError: "Failed to authenticate",
413
+ }),
414
+ ]);
415
+ });
416
+ });
417
+
418
+ describe("attachRuntimeHealth", () => {
419
+ it("groups live circuit breakers onto matching runtime entries", () => {
420
+ const snap = {
421
+ runtimes: [
422
+ { id: "claude-code", available: true },
423
+ { id: "codex", available: true },
424
+ ],
425
+ probedAt: 1000,
426
+ };
427
+ const out = attachRuntimeHealth(snap, {
428
+ channels: {},
429
+ turns: {},
430
+ runtimeCircuitBreakers: {
431
+ "claude-code:botcord:ag_1:rm_oc_a:": {
432
+ key: "claude-code:botcord:ag_1:rm_oc_a:",
433
+ runtime: "claude-code",
434
+ channel: "botcord",
435
+ accountId: "ag_1",
436
+ conversationId: "rm_oc_a",
437
+ threadId: null,
438
+ failures: 3,
439
+ openedAt: 1000,
440
+ blockedUntil: 2000,
441
+ lastFailureAt: 1500,
442
+ lastError: "Failed to authenticate",
443
+ },
444
+ },
445
+ });
446
+ expect(out.runtimes[0]).toMatchObject({
447
+ id: "claude-code",
448
+ health: {
449
+ circuitBreakers: [
450
+ {
451
+ conversationId: "rm_oc_a",
452
+ lastError: "Failed to authenticate",
453
+ },
454
+ ],
455
+ },
456
+ });
457
+ expect(out.runtimes[1]).toEqual({ id: "codex", available: true });
458
+ });
369
459
  });
@@ -8,6 +8,7 @@ function snapshot(
8
8
  return {
9
9
  channels: overrides.channels ?? {},
10
10
  turns: overrides.turns ?? {},
11
+ runtimeCircuitBreakers: overrides.runtimeCircuitBreakers,
11
12
  };
12
13
  }
13
14
 
@@ -69,6 +70,7 @@ describe("renderStatus", () => {
69
70
  expect(out).toContain("rm_oc_abc");
70
71
  expect(out).toContain("claude-code");
71
72
  expect(out).toMatch(/12s ago/);
73
+ expect(out).toContain("Runtime circuit breakers:");
72
74
  expect(out).not.toContain("⚠ stale");
73
75
  });
74
76
 
@@ -134,4 +136,36 @@ describe("renderStatus", () => {
134
136
  expect(out).toContain("Channels:");
135
137
  expect(out).toContain("(none)");
136
138
  });
139
+
140
+ it("renders open runtime auth circuit breakers", () => {
141
+ const now = 1_700_000_000_000;
142
+ const out = renderStatus(
143
+ {
144
+ pid: 1,
145
+ alive: true,
146
+ snapshot: snapshot({
147
+ runtimeCircuitBreakers: {
148
+ "claude-code:botcord:ag_1:rm_oc_a:": {
149
+ key: "claude-code:botcord:ag_1:rm_oc_a:",
150
+ runtime: "claude-code",
151
+ channel: "botcord",
152
+ accountId: "ag_1",
153
+ conversationId: "rm_oc_a",
154
+ failures: 3,
155
+ openedAt: now - 1000,
156
+ blockedUntil: now + 60_000,
157
+ lastFailureAt: now - 1000,
158
+ lastError: "Failed to authenticate",
159
+ },
160
+ },
161
+ }),
162
+ snapshotAgeMs: 100,
163
+ },
164
+ now,
165
+ );
166
+ expect(out).toContain("Runtime circuit breakers:");
167
+ expect(out).toContain("claude-code");
168
+ expect(out).toContain("rm_oc_a");
169
+ expect(out).toContain("Failed to authenticate");
170
+ });
137
171
  });
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Cloud-daemon auth manager.
3
+ *
4
+ * Implements the subset of `UserAuthManager` surface that `ControlChannel`
5
+ * uses (`current`, `ensureAccessToken`) so the same channel implementation
6
+ * can be reused for `/cloud/daemon/ws`. Unlike the user variant there is
7
+ * no refresh token: the Hub-managed E2B provider rotates the access token
8
+ * by relaunching the daemon. When the embedded JWT expires, the WS server
9
+ * closes with 4401 and `ControlChannel.onClose` writes the auth-expired
10
+ * flag — at which point the provider would resume the sandbox with a
11
+ * fresh token.
12
+ *
13
+ * Plan §6.4: `auth-expired.flag` is still written so any external monitor
14
+ * watching the sandbox filesystem can detect the situation; the cloud
15
+ * provider doesn't read this file directly today (it relies on
16
+ * `daemon_instances.last_seen_at` going stale instead).
17
+ *
18
+ * Field names match `UserAuthRecord` for drop-in compatibility with
19
+ * `ControlChannel.start()` which reads `auth.current.{userId,hubUrl,label}`.
20
+ */
21
+ import type { UserAuthRecord, UserAuthManager } from "./user-auth.js";
22
+ import type { CloudModeConfig } from "./cloud-mode.js";
23
+
24
+ /**
25
+ * Minimal `UserAuthManager`-shaped wrapper backed by the cloud-mode env
26
+ * vars. Static-typed against `UserAuthManager` so `ControlChannel` accepts
27
+ * it without an interface change.
28
+ */
29
+ export class CloudAuthManager {
30
+ private record: UserAuthRecord;
31
+
32
+ constructor(cfg: CloudModeConfig) {
33
+ this.record = {
34
+ version: 1,
35
+ // The cloud daemon row is owned by a single user — but we don't get
36
+ // the user id in the env (the JWT carries it server-side). Surface
37
+ // the cloud daemon instance id as a stable "who am I" string for
38
+ // logs; the Hub already knows the binding.
39
+ userId: cfg.cloudDaemonInstanceId,
40
+ daemonInstanceId: cfg.daemonInstanceId,
41
+ hubUrl: cfg.hubUrl,
42
+ accessToken: cfg.accessToken,
43
+ // No refresh token in cloud mode. Stored as an empty string to keep
44
+ // the type intact; `ensureAccessToken` never reaches the refresh path
45
+ // because `expiresAt` is set to a date far in the future (the Hub
46
+ // closes the WS with 4401 when the embedded JWT expires).
47
+ refreshToken: "",
48
+ expiresAt: Number.MAX_SAFE_INTEGER,
49
+ loggedInAt: new Date().toISOString(),
50
+ label: `cloud:${cfg.cloudDaemonInstanceId}`,
51
+ };
52
+ }
53
+
54
+ get current(): UserAuthRecord {
55
+ return this.record;
56
+ }
57
+
58
+ /**
59
+ * Cloud-mode access token never refreshes locally — it's baked into the
60
+ * JWT the provider injected at sandbox start. The provider rotates by
61
+ * relaunching the daemon, not by talking to a refresh endpoint.
62
+ */
63
+ async ensureAccessToken(): Promise<string> {
64
+ return this.record.accessToken;
65
+ }
66
+ }
67
+
68
+ /**
69
+ * Hand the cloud auth wrapper out as a `UserAuthManager` so `ControlChannel`
70
+ * (which only consults `current` and `ensureAccessToken`) accepts it.
71
+ *
72
+ * Cast-only — no runtime translation needed because `CloudAuthManager`
73
+ * implements the same shape. Kept as a single helper so the cast is
74
+ * documented in one place.
75
+ */
76
+ export function asUserAuthManager(mgr: CloudAuthManager): UserAuthManager {
77
+ return mgr as unknown as UserAuthManager;
78
+ }
@@ -0,0 +1,338 @@
1
+ /**
2
+ * Cloud-daemon mode runtime entrypoint.
3
+ *
4
+ * Equivalent to {@link startDaemon} for cloud-mode operation: skips local
5
+ * user-auth, skips local on-disk credentials, dials
6
+ * `${HUB_URL}/cloud/daemon/ws` with the env-injected JWT, and reuses the
7
+ * existing provisioner so `provision_agent` / `revoke_agent` frames work
8
+ * the same way they do for local daemons.
9
+ *
10
+ * See ``docs/cloud-agent-technical-design.md`` §4 + §6.
11
+ */
12
+ import { shouldWake, type AttentionPolicy } from "@botcord/protocol-core";
13
+ import {
14
+ Gateway,
15
+ resolveTranscriptEnabled,
16
+ type ChannelAdapter,
17
+ type GatewayChannelConfig,
18
+ type GatewayInboundMessage,
19
+ type GatewayLogger,
20
+ type GatewayRuntimeSnapshot,
21
+ } from "./gateway/index.js";
22
+ import { ActivityTracker } from "./activity-tracker.js";
23
+ import type { DaemonConfig } from "./config.js";
24
+ import { SESSIONS_PATH, SNAPSHOT_PATH } from "./config.js";
25
+ import { ControlChannel } from "./control-channel.js";
26
+ import { toGatewayConfig } from "./daemon-config-map.js";
27
+ import { log as daemonLog } from "./log.js";
28
+ import { createProvisioner } from "./provision.js";
29
+ import { createDaemonChannel, pushRuntimeSnapshot } from "./daemon.js";
30
+ import { SnapshotWriter } from "./snapshot-writer.js";
31
+ import { createDaemonSystemContextBuilder } from "./system-context.js";
32
+ import { readWorkingMemorySnapshot } from "./working-memory.js";
33
+ import { createRoomStaticContextBuilder } from "./room-context.js";
34
+ import { createRoomContextFetcher } from "./room-context-fetcher.js";
35
+ import { composeBotCordUserTurn } from "./turn-text.js";
36
+ import { PolicyResolver, type DaemonAttentionPolicy } from "./gateway/policy-resolver.js";
37
+ import { scanMention } from "./mention-scan.js";
38
+ import { createActivityRecorder } from "./daemon.js";
39
+ import { CloudAuthManager, asUserAuthManager } from "./cloud-auth.js";
40
+ import type { CloudModeConfig } from "./cloud-mode.js";
41
+ import { buildCloudRunSettleHook } from "./cloud-settle.js";
42
+ import type { InstalledAgentInfo, OnAgentInstalledHook } from "./provision.js";
43
+
44
+ // Cloud daemons follow the same cadence as local — keeps dashboard
45
+ // "runtimes last detected" behavior identical across both kinds.
46
+ const DEFAULT_TURN_TIMEOUT_MS = 30 * 60 * 1000;
47
+ const DEFAULT_SNAPSHOT_INTERVAL_MS = 5_000;
48
+
49
+ function resolveSnapshotIntervalMs(): number {
50
+ const raw = process.env.BOTCORD_DAEMON_SNAPSHOT_INTERVAL_MS;
51
+ if (!raw) return DEFAULT_SNAPSHOT_INTERVAL_MS;
52
+ const n = Number(raw);
53
+ if (!Number.isFinite(n) || n <= 0) return DEFAULT_SNAPSHOT_INTERVAL_MS;
54
+ return n;
55
+ }
56
+
57
+ /** Options accepted by {@link startCloudDaemon}. */
58
+ export interface CloudDaemonRuntimeOptions {
59
+ /** Resolved env-driven cloud config (see {@link loadCloudModeConfig}). */
60
+ cloudConfig: CloudModeConfig;
61
+ /**
62
+ * Empty/initial DaemonConfig. Cloud daemons start with zero agents and
63
+ * grow exclusively via `provision_agent` frames over the cloud control
64
+ * plane — `agents[]` / `routes[]` arrays are seeded empty.
65
+ */
66
+ config: DaemonConfig;
67
+ configPath: string;
68
+ sessionStorePath?: string;
69
+ snapshotPath?: string;
70
+ snapshotIntervalMs?: number;
71
+ log?: GatewayLogger;
72
+ /** Test hook — override the control-channel cstr. */
73
+ controlChannelFactory?: typeof ControlChannel;
74
+ /** Skip control channel entirely; for tests that exercise the gateway only. */
75
+ disableControlChannel?: boolean;
76
+ /**
77
+ * Test hook — inject a pre-built provisioner. Default uses
78
+ * `createProvisioner({ gateway, policyResolver, onAgentInstalled })`.
79
+ */
80
+ provisionerFactory?: typeof createProvisioner;
81
+ }
82
+
83
+ /** Handle returned by {@link startCloudDaemon}. */
84
+ export interface CloudDaemonHandle {
85
+ stop: (reason?: string) => Promise<void>;
86
+ snapshot: () => GatewayRuntimeSnapshot;
87
+ }
88
+
89
+ function buildLogger(opt: GatewayLogger | undefined): GatewayLogger {
90
+ if (opt) return opt;
91
+ return {
92
+ info: (msg, meta) => daemonLog.info(msg, meta),
93
+ warn: (msg, meta) => daemonLog.warn(msg, meta),
94
+ error: (msg, meta) => daemonLog.error(msg, meta),
95
+ debug: (msg, meta) => daemonLog.debug(msg, meta),
96
+ };
97
+ }
98
+
99
+ /**
100
+ * Boot the cloud daemon. The gateway starts with zero channels; every
101
+ * provisioned agent arrives via `provision_agent`, which calls into the
102
+ * shared `provision.ts` flow exactly like a local daemon does. The only
103
+ * difference is the control-channel auth, endpoint path, and the absence
104
+ * of a local user-auth file.
105
+ */
106
+ export async function startCloudDaemon(
107
+ opts: CloudDaemonRuntimeOptions,
108
+ ): Promise<CloudDaemonHandle> {
109
+ const logger = buildLogger(opts.log);
110
+ const cloudCfg = opts.cloudConfig;
111
+
112
+ logger.info("cloud daemon starting", {
113
+ cloudDaemonInstanceId: cloudCfg.cloudDaemonInstanceId,
114
+ daemonInstanceId: cloudCfg.daemonInstanceId,
115
+ hubUrl: cloudCfg.hubUrl,
116
+ });
117
+
118
+ // ActivityTracker / policy resolver / per-agent caches — same as local
119
+ // daemon, but the caches start empty because no agents are bound at
120
+ // boot. `onAgentInstalled` populates them whenever provision_agent
121
+ // lands.
122
+ const activityTracker = new ActivityTracker();
123
+ const credentialPathByAgentId = new Map<string, string>();
124
+ const hubUrlByAgentId = new Map<string, string>();
125
+ const displayNameByAgent = new Map<string, string>();
126
+ // Seed each per-agent hub URL with the cloud-mode value so that even
127
+ // before the first credential file is written the room-context fetcher
128
+ // has somewhere sensible to point.
129
+ const fallbackHubUrl = cloudCfg.hubUrl;
130
+ const resolveHubUrl = (accountId: string): string | undefined =>
131
+ hubUrlByAgentId.get(accountId) ?? fallbackHubUrl;
132
+
133
+ // Same gateway-config translation as local — empty `agents` produces an
134
+ // empty `channels[]` initially, which is fine.
135
+ const gwConfig = toGatewayConfig(opts.config, { agentIds: [], agentRuntimes: {} });
136
+
137
+ const roomContextFetcher = createRoomContextFetcher({
138
+ credentialPathByAgentId,
139
+ hubBaseUrl: cloudCfg.hubUrl,
140
+ log: logger,
141
+ });
142
+ const roomContextBuilder = createRoomStaticContextBuilder({
143
+ fetchRoomInfo: roomContextFetcher,
144
+ log: logger,
145
+ });
146
+
147
+ type PerAgentBuilder = (
148
+ msg: GatewayInboundMessage,
149
+ ) => Promise<string | undefined> | string | undefined;
150
+ const scBuilders = new Map<string, PerAgentBuilder>();
151
+ const buildSystemContext = (
152
+ message: GatewayInboundMessage,
153
+ ): Promise<string | undefined> | string | undefined => {
154
+ const b = scBuilders.get(message.accountId);
155
+ return b ? b(message) : undefined;
156
+ };
157
+ const buildMemoryContext = (message: GatewayInboundMessage) =>
158
+ readWorkingMemorySnapshot(message.accountId);
159
+
160
+ const recordActivity = createActivityRecorder({ activityTracker });
161
+ const onInbound = (msg: GatewayInboundMessage): void => {
162
+ recordActivity(msg);
163
+ };
164
+
165
+ // Settle ``cloud_run`` envelopes against the Hub usage ledger once the
166
+ // runtime turn finishes. Pure adapter from the dispatcher's hook shape
167
+ // to the settle helper's input shape — the actual HTTP call lives in
168
+ // :func:`buildCloudRunSettleHook` so it's unit-testable.
169
+ const settleHook = buildCloudRunSettleHook({
170
+ hubUrl: cloudCfg.hubUrl,
171
+ accessToken: cloudCfg.accessToken,
172
+ log: logger,
173
+ });
174
+ const onTurnComplete = async (event: {
175
+ message: GatewayInboundMessage;
176
+ result?: import("./gateway/types.js").RuntimeRunResult;
177
+ wallTimeMs: number;
178
+ error?: unknown;
179
+ }): Promise<void> => {
180
+ const envelope = (event.message.raw as { envelope?: unknown } | undefined)
181
+ ?.envelope as
182
+ | {
183
+ type?: string;
184
+ payload?: { cloud_run?: { run_id?: unknown } | null } | null;
185
+ }
186
+ | undefined;
187
+ const runId = envelope?.payload?.cloud_run?.run_id;
188
+ await settleHook({
189
+ envelopeType: envelope?.type,
190
+ runId: typeof runId === "string" ? runId : undefined,
191
+ wallTimeMs: event.wallTimeMs,
192
+ tokens: {
193
+ ...(event.result?.inputCacheHitTokens !== undefined
194
+ ? { inputCacheHitTokens: event.result.inputCacheHitTokens }
195
+ : {}),
196
+ ...(event.result?.inputCacheMissTokens !== undefined
197
+ ? { inputCacheMissTokens: event.result.inputCacheMissTokens }
198
+ : {}),
199
+ ...(event.result?.outputTokens !== undefined
200
+ ? { outputTokens: event.result.outputTokens }
201
+ : {}),
202
+ },
203
+ messageId: event.message.id,
204
+ });
205
+ };
206
+
207
+ const policyResolver = new PolicyResolver({
208
+ fetchGlobal: async (_agentId: string) => undefined,
209
+ });
210
+
211
+ const attentionGate = async (msg: GatewayInboundMessage): Promise<boolean> => {
212
+ const policy: DaemonAttentionPolicy = await policyResolver.resolve(
213
+ msg.accountId,
214
+ msg.conversation.id,
215
+ );
216
+ if (policy.mode === "allowed_senders") {
217
+ return (policy.allowedSenderIds ?? []).includes(msg.sender.id);
218
+ }
219
+ const localMention = scanMention(msg.text, {
220
+ agentId: msg.accountId,
221
+ displayName: displayNameByAgent.get(msg.accountId),
222
+ });
223
+ return shouldWake(policy as AttentionPolicy, {
224
+ mentioned: msg.mentioned === true || localMention,
225
+ text: msg.text,
226
+ });
227
+ };
228
+
229
+ const onAgentInstalled: OnAgentInstalledHook = (info: InstalledAgentInfo) => {
230
+ credentialPathByAgentId.set(info.agentId, info.credentialsFile);
231
+ if (info.hubUrl) hubUrlByAgentId.set(info.agentId, info.hubUrl);
232
+ if (info.displayName) displayNameByAgent.set(info.agentId, info.displayName);
233
+ if (!scBuilders.has(info.agentId)) {
234
+ scBuilders.set(
235
+ info.agentId,
236
+ createDaemonSystemContextBuilder({
237
+ agentId: info.agentId,
238
+ activityTracker,
239
+ roomContextBuilder,
240
+ // Cloud daemons run isolated — no loop-risk guard wired in PR1;
241
+ // the runtime adapter's wall-time budget enforces the equivalent.
242
+ loopRiskBuilder: () => null,
243
+ }),
244
+ );
245
+ }
246
+ };
247
+
248
+ const gateway = new Gateway({
249
+ config: gwConfig,
250
+ sessionStorePath: opts.sessionStorePath ?? SESSIONS_PATH,
251
+ createChannel: (chCfg: GatewayChannelConfig): ChannelAdapter => {
252
+ return createDaemonChannel(chCfg, {
253
+ credentialPathByAgentId,
254
+ hubBaseUrl: cloudCfg.hubUrl,
255
+ });
256
+ },
257
+ log: logger,
258
+ turnTimeoutMs: DEFAULT_TURN_TIMEOUT_MS,
259
+ buildSystemContext,
260
+ buildMemoryContext,
261
+ onInbound,
262
+ onTurnComplete,
263
+ composeUserTurn: composeBotCordUserTurn,
264
+ attentionGate,
265
+ resolveHubUrl,
266
+ transcriptEnabled: resolveTranscriptEnabled(
267
+ process.env.BOTCORD_TRANSCRIPT,
268
+ opts.config.transcript?.enabled,
269
+ ),
270
+ });
271
+
272
+ await gateway.start();
273
+ logger.info("cloud daemon gateway started (zero agents at boot)");
274
+
275
+ let controlChannel: ControlChannel | null = null;
276
+ if (!opts.disableControlChannel) {
277
+ const auth = asUserAuthManager(new CloudAuthManager(cloudCfg));
278
+ const provisionerFactory = opts.provisionerFactory ?? createProvisioner;
279
+ const provisioner = provisionerFactory({
280
+ gateway,
281
+ policyResolver,
282
+ onAgentInstalled,
283
+ });
284
+ const ControlChannelCtor = opts.controlChannelFactory ?? ControlChannel;
285
+ controlChannel = new ControlChannelCtor({
286
+ auth,
287
+ // The cloud WS endpoint differs from the local daemon WS — same
288
+ // frame schema, different bearer-token kind on the Hub side.
289
+ path: "/cloud/daemon/ws",
290
+ handle: async (frame) => provisioner(frame),
291
+ label: `cloud:${cloudCfg.cloudDaemonInstanceId}`,
292
+ });
293
+ try {
294
+ await controlChannel.start();
295
+ // Same `runtime_snapshot` push as local — keeps the dashboard's
296
+ // "what's installed" view accurate the moment the daemon comes up.
297
+ const pushed = pushRuntimeSnapshot(controlChannel);
298
+ logger.info("cloud control-channel started; runtime_snapshot pushed", {
299
+ ok: pushed,
300
+ });
301
+ } catch (err) {
302
+ logger.warn("cloud control-channel start failed; daemon will retry", {
303
+ error: err instanceof Error ? err.message : String(err),
304
+ });
305
+ }
306
+ }
307
+
308
+ const snapshotWriter = new SnapshotWriter({
309
+ path: opts.snapshotPath ?? SNAPSHOT_PATH,
310
+ intervalMs: opts.snapshotIntervalMs ?? resolveSnapshotIntervalMs(),
311
+ snapshot: () => gateway.snapshot(),
312
+ log: logger,
313
+ });
314
+ snapshotWriter.start();
315
+
316
+ let stopping: Promise<void> | null = null;
317
+ const stop = (reason?: string): Promise<void> => {
318
+ if (stopping) return stopping;
319
+ logger.info("cloud daemon stopping", { reason: reason ?? null });
320
+ snapshotWriter.stop();
321
+ snapshotWriter.writeFinal();
322
+ const controlStopP = controlChannel
323
+ ? controlChannel.stop().catch(() => undefined)
324
+ : Promise.resolve();
325
+ stopping = Promise.all([controlStopP, gateway.stop(reason)]).then(
326
+ () => undefined,
327
+ ).finally(() => {
328
+ snapshotWriter.remove();
329
+ logger.info("cloud daemon stopped", { reason: reason ?? null });
330
+ });
331
+ return stopping;
332
+ };
333
+
334
+ return {
335
+ stop,
336
+ snapshot: () => gateway.snapshot(),
337
+ };
338
+ }
@@ -0,0 +1,70 @@
1
+ /**
2
+ * Cloud-daemon mode detection + env-driven configuration.
3
+ *
4
+ * A "cloud daemon" is a `botcord-daemon` process running inside a Hub-managed
5
+ * E2B sandbox. It is configured exclusively through environment variables
6
+ * (no on-disk `user-auth.json`) and connects to `/cloud/daemon/ws` with a
7
+ * `cloud-daemon-access` JWT instead of the device-code-issued user token.
8
+ *
9
+ * The Hub-side provider that launches the daemon is
10
+ * `backend/hub/services/cloud_daemon_provider_e2b.py` — keep the env-var
11
+ * names below in sync with `_build_env` there.
12
+ *
13
+ * See ``docs/cloud-agent-technical-design.md`` §3-4.
14
+ */
15
+
16
+ /** Names of the environment variables the cloud provider injects. */
17
+ export const CLOUD_ENV_VARS = {
18
+ HUB_URL: "BOTCORD_HUB_URL",
19
+ CLOUD_DAEMON_INSTANCE_ID: "BOTCORD_CLOUD_DAEMON_INSTANCE_ID",
20
+ DAEMON_INSTANCE_ID: "BOTCORD_DAEMON_INSTANCE_ID",
21
+ ACCESS_TOKEN: "BOTCORD_CLOUD_DAEMON_ACCESS_TOKEN",
22
+ } as const;
23
+
24
+ /** Resolved cloud-mode configuration. All fields are required when present. */
25
+ export interface CloudModeConfig {
26
+ hubUrl: string;
27
+ cloudDaemonInstanceId: string;
28
+ daemonInstanceId: string;
29
+ accessToken: string;
30
+ }
31
+
32
+ /**
33
+ * Detection signal — true when `BOTCORD_CLOUD_DAEMON_ACCESS_TOKEN` is set.
34
+ *
35
+ * The access-token presence is the canonical mode switch (matches the
36
+ * provider contract — the token is the one piece the sandbox can't forge).
37
+ * Other env vars may be set during development without flipping mode.
38
+ */
39
+ export function isCloudMode(env: NodeJS.ProcessEnv = process.env): boolean {
40
+ const token = env[CLOUD_ENV_VARS.ACCESS_TOKEN];
41
+ return typeof token === "string" && token.length > 0;
42
+ }
43
+
44
+ /**
45
+ * Resolve the cloud-mode configuration from env vars. Throws when a required
46
+ * variable is missing — the daemon must fail fast instead of falling through
47
+ * to the local-mode codepath with partial cloud config.
48
+ *
49
+ * `BOTCORD_DAEMON_INSTANCE_ID` is allowed to fall back to the cloud daemon
50
+ * id when omitted in tests, but in production the provider always sets it.
51
+ */
52
+ export function loadCloudModeConfig(
53
+ env: NodeJS.ProcessEnv = process.env,
54
+ ): CloudModeConfig {
55
+ const requireString = (name: string): string => {
56
+ const v = env[name];
57
+ if (typeof v !== "string" || v.length === 0) {
58
+ throw new Error(
59
+ `cloud-daemon mode: required env var "${name}" is missing or empty`,
60
+ );
61
+ }
62
+ return v;
63
+ };
64
+ return {
65
+ hubUrl: requireString(CLOUD_ENV_VARS.HUB_URL),
66
+ cloudDaemonInstanceId: requireString(CLOUD_ENV_VARS.CLOUD_DAEMON_INSTANCE_ID),
67
+ daemonInstanceId: requireString(CLOUD_ENV_VARS.DAEMON_INSTANCE_ID),
68
+ accessToken: requireString(CLOUD_ENV_VARS.ACCESS_TOKEN),
69
+ };
70
+ }