@botcord/daemon 0.2.75 → 0.2.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cloud-auth.d.ts +47 -0
- package/dist/cloud-auth.js +51 -0
- package/dist/cloud-daemon.d.ts +43 -0
- package/dist/cloud-daemon.js +252 -0
- package/dist/cloud-mode.d.ts +45 -0
- package/dist/cloud-mode.js +55 -0
- package/dist/cloud-settle.d.ts +81 -0
- package/dist/cloud-settle.js +100 -0
- package/dist/daemon-singleton.d.ts +26 -0
- package/dist/daemon-singleton.js +91 -0
- package/dist/daemon.d.ts +1 -1
- package/dist/daemon.js +15 -6
- package/dist/doctor.d.ts +4 -1
- package/dist/doctor.js +15 -4
- package/dist/gateway/channels/botcord.d.ts +1 -1
- package/dist/gateway/channels/botcord.js +48 -5
- package/dist/gateway/dispatcher.d.ts +34 -1
- package/dist/gateway/dispatcher.js +277 -20
- package/dist/gateway/gateway.d.ts +9 -1
- package/dist/gateway/gateway.js +4 -1
- package/dist/gateway/runtime-errors.d.ts +6 -0
- package/dist/gateway/runtime-errors.js +14 -0
- package/dist/gateway/runtimes/claude-code.d.ts +8 -0
- package/dist/gateway/runtimes/claude-code.js +92 -4
- package/dist/gateway/runtimes/deepseek-tui.js +19 -5
- package/dist/gateway/transcript.d.ts +1 -1
- package/dist/gateway/types.d.ts +33 -0
- package/dist/index.js +71 -80
- package/dist/provision.d.ts +2 -0
- package/dist/provision.js +39 -1
- package/dist/status-render.js +17 -0
- package/package.json +2 -2
- package/src/__tests__/cloud-auth.test.ts +42 -0
- package/src/__tests__/cloud-daemon.test.ts +237 -0
- package/src/__tests__/cloud-mode.test.ts +65 -0
- package/src/__tests__/cloud-settle.test.ts +287 -0
- package/src/__tests__/daemon-singleton.test.ts +89 -0
- package/src/__tests__/doctor.test.ts +34 -0
- package/src/__tests__/runtime-discovery.test.ts +90 -0
- package/src/__tests__/status-render.test.ts +34 -0
- package/src/cloud-auth.ts +78 -0
- package/src/cloud-daemon.ts +338 -0
- package/src/cloud-mode.ts +70 -0
- package/src/cloud-settle.ts +182 -0
- package/src/daemon-singleton.ts +122 -0
- package/src/daemon.ts +18 -5
- package/src/doctor.ts +18 -5
- package/src/gateway/__tests__/botcord-channel.test.ts +74 -0
- package/src/gateway/__tests__/claude-code-adapter.test.ts +101 -1
- package/src/gateway/__tests__/deepseek-tui-adapter.test.ts +19 -0
- package/src/gateway/__tests__/dispatcher.test.ts +120 -0
- package/src/gateway/channels/botcord.ts +54 -7
- package/src/gateway/dispatcher.ts +354 -21
- package/src/gateway/gateway.ts +16 -1
- package/src/gateway/runtime-errors.ts +15 -0
- package/src/gateway/runtimes/claude-code.ts +98 -2
- package/src/gateway/runtimes/deepseek-tui.ts +23 -5
- package/src/gateway/transcript.ts +1 -1
- package/src/gateway/types.ts +34 -0
- package/src/index.ts +83 -74
- package/src/provision.ts +45 -1
- package/src/status-render.ts +24 -0
|
@@ -98,6 +98,18 @@ describe("probeChannel", () => {
|
|
|
98
98
|
expect(result.hubMessage).toContain("503");
|
|
99
99
|
});
|
|
100
100
|
|
|
101
|
+
it("treats HTTP 404 as reachable because the probe only needs the Hub host", async () => {
|
|
102
|
+
const credsPath = "/creds/ag_123.json";
|
|
103
|
+
const result = await probeChannel(ch, {
|
|
104
|
+
credentialsPath: () => credsPath,
|
|
105
|
+
fileReader: fileReader({ [credsPath]: okCreds }),
|
|
106
|
+
fetcher: fetcher({ ok: false, status: 404 }),
|
|
107
|
+
timeoutMs: 1000,
|
|
108
|
+
});
|
|
109
|
+
expect(result.hubOk).toBe(true);
|
|
110
|
+
expect(result.hubMessage).toContain("404");
|
|
111
|
+
});
|
|
112
|
+
|
|
101
113
|
it("probeChannels returns an entry per input channel", async () => {
|
|
102
114
|
const credsPath = "/creds/ag_123.json";
|
|
103
115
|
const results = await probeChannels({
|
|
@@ -200,6 +212,28 @@ describe("renderDoctor", () => {
|
|
|
200
212
|
expect(out).toContain("✓");
|
|
201
213
|
});
|
|
202
214
|
|
|
215
|
+
it("renders optional runtime auth check results", () => {
|
|
216
|
+
const out = renderDoctor({
|
|
217
|
+
runtimes: [
|
|
218
|
+
{
|
|
219
|
+
id: "claude-code",
|
|
220
|
+
displayName: "Claude Code",
|
|
221
|
+
binary: "claude",
|
|
222
|
+
supportsRun: true,
|
|
223
|
+
result: { available: true, version: "1.0.0", path: "/usr/bin/claude" },
|
|
224
|
+
auth: {
|
|
225
|
+
checked: true,
|
|
226
|
+
ok: false,
|
|
227
|
+
message: "Failed to authenticate. API Error: 403 Request not allowed",
|
|
228
|
+
},
|
|
229
|
+
},
|
|
230
|
+
],
|
|
231
|
+
channels: [],
|
|
232
|
+
});
|
|
233
|
+
expect(out).toContain("auth failed");
|
|
234
|
+
expect(out).toContain("Failed to authenticate");
|
|
235
|
+
});
|
|
236
|
+
|
|
203
237
|
it("shows 'No channels configured.' when the channel list is empty", () => {
|
|
204
238
|
const out = renderDoctor({
|
|
205
239
|
runtimes: [
|
|
@@ -28,6 +28,7 @@ vi.mock("../adapters/runtimes.js", async () => {
|
|
|
28
28
|
});
|
|
29
29
|
|
|
30
30
|
const {
|
|
31
|
+
attachRuntimeHealth,
|
|
31
32
|
collectRuntimeSnapshot,
|
|
32
33
|
collectRuntimeSnapshotAsync,
|
|
33
34
|
clearRuntimeProbeCache,
|
|
@@ -366,4 +367,93 @@ describe("pushRuntimeSnapshot (first-connect push)", () => {
|
|
|
366
367
|
expect(ok).toBe(false);
|
|
367
368
|
expect(send).toHaveBeenCalledOnce();
|
|
368
369
|
});
|
|
370
|
+
|
|
371
|
+
it("attaches live runtime circuit breaker health to the pushed runtime entry", () => {
|
|
372
|
+
setRuntimes([
|
|
373
|
+
{
|
|
374
|
+
id: "claude-code",
|
|
375
|
+
displayName: "Claude Code",
|
|
376
|
+
binary: "claude",
|
|
377
|
+
supportsRun: true,
|
|
378
|
+
result: { available: true },
|
|
379
|
+
},
|
|
380
|
+
]);
|
|
381
|
+
const send = vi.fn(() => true);
|
|
382
|
+
const ok = pushRuntimeSnapshot(
|
|
383
|
+
{ send },
|
|
384
|
+
{
|
|
385
|
+
channels: {},
|
|
386
|
+
turns: {},
|
|
387
|
+
runtimeCircuitBreakers: {
|
|
388
|
+
"claude-code:botcord:ag_1:rm_oc_a:": {
|
|
389
|
+
key: "claude-code:botcord:ag_1:rm_oc_a:",
|
|
390
|
+
runtime: "claude-code",
|
|
391
|
+
channel: "botcord",
|
|
392
|
+
accountId: "ag_1",
|
|
393
|
+
conversationId: "rm_oc_a",
|
|
394
|
+
threadId: null,
|
|
395
|
+
failures: 3,
|
|
396
|
+
openedAt: 1000,
|
|
397
|
+
blockedUntil: 2000,
|
|
398
|
+
lastFailureAt: 1500,
|
|
399
|
+
lastError: "Failed to authenticate",
|
|
400
|
+
},
|
|
401
|
+
},
|
|
402
|
+
},
|
|
403
|
+
);
|
|
404
|
+
expect(ok).toBe(true);
|
|
405
|
+
const frame = send.mock.calls[0]![0] as {
|
|
406
|
+
params: { runtimes: Array<{ id: string; health?: { circuitBreakers?: unknown[] } }> };
|
|
407
|
+
};
|
|
408
|
+
expect(frame.params.runtimes[0].health?.circuitBreakers).toEqual([
|
|
409
|
+
expect.objectContaining({
|
|
410
|
+
conversationId: "rm_oc_a",
|
|
411
|
+
failures: 3,
|
|
412
|
+
lastError: "Failed to authenticate",
|
|
413
|
+
}),
|
|
414
|
+
]);
|
|
415
|
+
});
|
|
416
|
+
});
|
|
417
|
+
|
|
418
|
+
describe("attachRuntimeHealth", () => {
|
|
419
|
+
it("groups live circuit breakers onto matching runtime entries", () => {
|
|
420
|
+
const snap = {
|
|
421
|
+
runtimes: [
|
|
422
|
+
{ id: "claude-code", available: true },
|
|
423
|
+
{ id: "codex", available: true },
|
|
424
|
+
],
|
|
425
|
+
probedAt: 1000,
|
|
426
|
+
};
|
|
427
|
+
const out = attachRuntimeHealth(snap, {
|
|
428
|
+
channels: {},
|
|
429
|
+
turns: {},
|
|
430
|
+
runtimeCircuitBreakers: {
|
|
431
|
+
"claude-code:botcord:ag_1:rm_oc_a:": {
|
|
432
|
+
key: "claude-code:botcord:ag_1:rm_oc_a:",
|
|
433
|
+
runtime: "claude-code",
|
|
434
|
+
channel: "botcord",
|
|
435
|
+
accountId: "ag_1",
|
|
436
|
+
conversationId: "rm_oc_a",
|
|
437
|
+
threadId: null,
|
|
438
|
+
failures: 3,
|
|
439
|
+
openedAt: 1000,
|
|
440
|
+
blockedUntil: 2000,
|
|
441
|
+
lastFailureAt: 1500,
|
|
442
|
+
lastError: "Failed to authenticate",
|
|
443
|
+
},
|
|
444
|
+
},
|
|
445
|
+
});
|
|
446
|
+
expect(out.runtimes[0]).toMatchObject({
|
|
447
|
+
id: "claude-code",
|
|
448
|
+
health: {
|
|
449
|
+
circuitBreakers: [
|
|
450
|
+
{
|
|
451
|
+
conversationId: "rm_oc_a",
|
|
452
|
+
lastError: "Failed to authenticate",
|
|
453
|
+
},
|
|
454
|
+
],
|
|
455
|
+
},
|
|
456
|
+
});
|
|
457
|
+
expect(out.runtimes[1]).toEqual({ id: "codex", available: true });
|
|
458
|
+
});
|
|
369
459
|
});
|
|
@@ -8,6 +8,7 @@ function snapshot(
|
|
|
8
8
|
return {
|
|
9
9
|
channels: overrides.channels ?? {},
|
|
10
10
|
turns: overrides.turns ?? {},
|
|
11
|
+
runtimeCircuitBreakers: overrides.runtimeCircuitBreakers,
|
|
11
12
|
};
|
|
12
13
|
}
|
|
13
14
|
|
|
@@ -69,6 +70,7 @@ describe("renderStatus", () => {
|
|
|
69
70
|
expect(out).toContain("rm_oc_abc");
|
|
70
71
|
expect(out).toContain("claude-code");
|
|
71
72
|
expect(out).toMatch(/12s ago/);
|
|
73
|
+
expect(out).toContain("Runtime circuit breakers:");
|
|
72
74
|
expect(out).not.toContain("⚠ stale");
|
|
73
75
|
});
|
|
74
76
|
|
|
@@ -134,4 +136,36 @@ describe("renderStatus", () => {
|
|
|
134
136
|
expect(out).toContain("Channels:");
|
|
135
137
|
expect(out).toContain("(none)");
|
|
136
138
|
});
|
|
139
|
+
|
|
140
|
+
it("renders open runtime auth circuit breakers", () => {
|
|
141
|
+
const now = 1_700_000_000_000;
|
|
142
|
+
const out = renderStatus(
|
|
143
|
+
{
|
|
144
|
+
pid: 1,
|
|
145
|
+
alive: true,
|
|
146
|
+
snapshot: snapshot({
|
|
147
|
+
runtimeCircuitBreakers: {
|
|
148
|
+
"claude-code:botcord:ag_1:rm_oc_a:": {
|
|
149
|
+
key: "claude-code:botcord:ag_1:rm_oc_a:",
|
|
150
|
+
runtime: "claude-code",
|
|
151
|
+
channel: "botcord",
|
|
152
|
+
accountId: "ag_1",
|
|
153
|
+
conversationId: "rm_oc_a",
|
|
154
|
+
failures: 3,
|
|
155
|
+
openedAt: now - 1000,
|
|
156
|
+
blockedUntil: now + 60_000,
|
|
157
|
+
lastFailureAt: now - 1000,
|
|
158
|
+
lastError: "Failed to authenticate",
|
|
159
|
+
},
|
|
160
|
+
},
|
|
161
|
+
}),
|
|
162
|
+
snapshotAgeMs: 100,
|
|
163
|
+
},
|
|
164
|
+
now,
|
|
165
|
+
);
|
|
166
|
+
expect(out).toContain("Runtime circuit breakers:");
|
|
167
|
+
expect(out).toContain("claude-code");
|
|
168
|
+
expect(out).toContain("rm_oc_a");
|
|
169
|
+
expect(out).toContain("Failed to authenticate");
|
|
170
|
+
});
|
|
137
171
|
});
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cloud-daemon auth manager.
|
|
3
|
+
*
|
|
4
|
+
* Implements the subset of `UserAuthManager` surface that `ControlChannel`
|
|
5
|
+
* uses (`current`, `ensureAccessToken`) so the same channel implementation
|
|
6
|
+
* can be reused for `/cloud/daemon/ws`. Unlike the user variant there is
|
|
7
|
+
* no refresh token: the Hub-managed E2B provider rotates the access token
|
|
8
|
+
* by relaunching the daemon. When the embedded JWT expires, the WS server
|
|
9
|
+
* closes with 4401 and `ControlChannel.onClose` writes the auth-expired
|
|
10
|
+
* flag — at which point the provider would resume the sandbox with a
|
|
11
|
+
* fresh token.
|
|
12
|
+
*
|
|
13
|
+
* Plan §6.4: `auth-expired.flag` is still written so any external monitor
|
|
14
|
+
* watching the sandbox filesystem can detect the situation; the cloud
|
|
15
|
+
* provider doesn't read this file directly today (it relies on
|
|
16
|
+
* `daemon_instances.last_seen_at` going stale instead).
|
|
17
|
+
*
|
|
18
|
+
* Field names match `UserAuthRecord` for drop-in compatibility with
|
|
19
|
+
* `ControlChannel.start()` which reads `auth.current.{userId,hubUrl,label}`.
|
|
20
|
+
*/
|
|
21
|
+
import type { UserAuthRecord, UserAuthManager } from "./user-auth.js";
|
|
22
|
+
import type { CloudModeConfig } from "./cloud-mode.js";
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Minimal `UserAuthManager`-shaped wrapper backed by the cloud-mode env
|
|
26
|
+
* vars. Static-typed against `UserAuthManager` so `ControlChannel` accepts
|
|
27
|
+
* it without an interface change.
|
|
28
|
+
*/
|
|
29
|
+
export class CloudAuthManager {
|
|
30
|
+
private record: UserAuthRecord;
|
|
31
|
+
|
|
32
|
+
constructor(cfg: CloudModeConfig) {
|
|
33
|
+
this.record = {
|
|
34
|
+
version: 1,
|
|
35
|
+
// The cloud daemon row is owned by a single user — but we don't get
|
|
36
|
+
// the user id in the env (the JWT carries it server-side). Surface
|
|
37
|
+
// the cloud daemon instance id as a stable "who am I" string for
|
|
38
|
+
// logs; the Hub already knows the binding.
|
|
39
|
+
userId: cfg.cloudDaemonInstanceId,
|
|
40
|
+
daemonInstanceId: cfg.daemonInstanceId,
|
|
41
|
+
hubUrl: cfg.hubUrl,
|
|
42
|
+
accessToken: cfg.accessToken,
|
|
43
|
+
// No refresh token in cloud mode. Stored as an empty string to keep
|
|
44
|
+
// the type intact; `ensureAccessToken` never reaches the refresh path
|
|
45
|
+
// because `expiresAt` is set to a date far in the future (the Hub
|
|
46
|
+
// closes the WS with 4401 when the embedded JWT expires).
|
|
47
|
+
refreshToken: "",
|
|
48
|
+
expiresAt: Number.MAX_SAFE_INTEGER,
|
|
49
|
+
loggedInAt: new Date().toISOString(),
|
|
50
|
+
label: `cloud:${cfg.cloudDaemonInstanceId}`,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
get current(): UserAuthRecord {
|
|
55
|
+
return this.record;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Cloud-mode access token never refreshes locally — it's baked into the
|
|
60
|
+
* JWT the provider injected at sandbox start. The provider rotates by
|
|
61
|
+
* relaunching the daemon, not by talking to a refresh endpoint.
|
|
62
|
+
*/
|
|
63
|
+
async ensureAccessToken(): Promise<string> {
|
|
64
|
+
return this.record.accessToken;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Hand the cloud auth wrapper out as a `UserAuthManager` so `ControlChannel`
|
|
70
|
+
* (which only consults `current` and `ensureAccessToken`) accepts it.
|
|
71
|
+
*
|
|
72
|
+
* Cast-only — no runtime translation needed because `CloudAuthManager`
|
|
73
|
+
* implements the same shape. Kept as a single helper so the cast is
|
|
74
|
+
* documented in one place.
|
|
75
|
+
*/
|
|
76
|
+
export function asUserAuthManager(mgr: CloudAuthManager): UserAuthManager {
|
|
77
|
+
return mgr as unknown as UserAuthManager;
|
|
78
|
+
}
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cloud-daemon mode runtime entrypoint.
|
|
3
|
+
*
|
|
4
|
+
* Equivalent to {@link startDaemon} for cloud-mode operation: skips local
|
|
5
|
+
* user-auth, skips local on-disk credentials, dials
|
|
6
|
+
* `${HUB_URL}/cloud/daemon/ws` with the env-injected JWT, and reuses the
|
|
7
|
+
* existing provisioner so `provision_agent` / `revoke_agent` frames work
|
|
8
|
+
* the same way they do for local daemons.
|
|
9
|
+
*
|
|
10
|
+
* See ``docs/cloud-agent-technical-design.md`` §4 + §6.
|
|
11
|
+
*/
|
|
12
|
+
import { shouldWake, type AttentionPolicy } from "@botcord/protocol-core";
|
|
13
|
+
import {
|
|
14
|
+
Gateway,
|
|
15
|
+
resolveTranscriptEnabled,
|
|
16
|
+
type ChannelAdapter,
|
|
17
|
+
type GatewayChannelConfig,
|
|
18
|
+
type GatewayInboundMessage,
|
|
19
|
+
type GatewayLogger,
|
|
20
|
+
type GatewayRuntimeSnapshot,
|
|
21
|
+
} from "./gateway/index.js";
|
|
22
|
+
import { ActivityTracker } from "./activity-tracker.js";
|
|
23
|
+
import type { DaemonConfig } from "./config.js";
|
|
24
|
+
import { SESSIONS_PATH, SNAPSHOT_PATH } from "./config.js";
|
|
25
|
+
import { ControlChannel } from "./control-channel.js";
|
|
26
|
+
import { toGatewayConfig } from "./daemon-config-map.js";
|
|
27
|
+
import { log as daemonLog } from "./log.js";
|
|
28
|
+
import { createProvisioner } from "./provision.js";
|
|
29
|
+
import { createDaemonChannel, pushRuntimeSnapshot } from "./daemon.js";
|
|
30
|
+
import { SnapshotWriter } from "./snapshot-writer.js";
|
|
31
|
+
import { createDaemonSystemContextBuilder } from "./system-context.js";
|
|
32
|
+
import { readWorkingMemorySnapshot } from "./working-memory.js";
|
|
33
|
+
import { createRoomStaticContextBuilder } from "./room-context.js";
|
|
34
|
+
import { createRoomContextFetcher } from "./room-context-fetcher.js";
|
|
35
|
+
import { composeBotCordUserTurn } from "./turn-text.js";
|
|
36
|
+
import { PolicyResolver, type DaemonAttentionPolicy } from "./gateway/policy-resolver.js";
|
|
37
|
+
import { scanMention } from "./mention-scan.js";
|
|
38
|
+
import { createActivityRecorder } from "./daemon.js";
|
|
39
|
+
import { CloudAuthManager, asUserAuthManager } from "./cloud-auth.js";
|
|
40
|
+
import type { CloudModeConfig } from "./cloud-mode.js";
|
|
41
|
+
import { buildCloudRunSettleHook } from "./cloud-settle.js";
|
|
42
|
+
import type { InstalledAgentInfo, OnAgentInstalledHook } from "./provision.js";
|
|
43
|
+
|
|
44
|
+
// Cloud daemons follow the same cadence as local — keeps dashboard
|
|
45
|
+
// "runtimes last detected" behavior identical across both kinds.
|
|
46
|
+
const DEFAULT_TURN_TIMEOUT_MS = 30 * 60 * 1000;
|
|
47
|
+
const DEFAULT_SNAPSHOT_INTERVAL_MS = 5_000;
|
|
48
|
+
|
|
49
|
+
function resolveSnapshotIntervalMs(): number {
|
|
50
|
+
const raw = process.env.BOTCORD_DAEMON_SNAPSHOT_INTERVAL_MS;
|
|
51
|
+
if (!raw) return DEFAULT_SNAPSHOT_INTERVAL_MS;
|
|
52
|
+
const n = Number(raw);
|
|
53
|
+
if (!Number.isFinite(n) || n <= 0) return DEFAULT_SNAPSHOT_INTERVAL_MS;
|
|
54
|
+
return n;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/** Options accepted by {@link startCloudDaemon}. */
|
|
58
|
+
export interface CloudDaemonRuntimeOptions {
|
|
59
|
+
/** Resolved env-driven cloud config (see {@link loadCloudModeConfig}). */
|
|
60
|
+
cloudConfig: CloudModeConfig;
|
|
61
|
+
/**
|
|
62
|
+
* Empty/initial DaemonConfig. Cloud daemons start with zero agents and
|
|
63
|
+
* grow exclusively via `provision_agent` frames over the cloud control
|
|
64
|
+
* plane — `agents[]` / `routes[]` arrays are seeded empty.
|
|
65
|
+
*/
|
|
66
|
+
config: DaemonConfig;
|
|
67
|
+
configPath: string;
|
|
68
|
+
sessionStorePath?: string;
|
|
69
|
+
snapshotPath?: string;
|
|
70
|
+
snapshotIntervalMs?: number;
|
|
71
|
+
log?: GatewayLogger;
|
|
72
|
+
/** Test hook — override the control-channel cstr. */
|
|
73
|
+
controlChannelFactory?: typeof ControlChannel;
|
|
74
|
+
/** Skip control channel entirely; for tests that exercise the gateway only. */
|
|
75
|
+
disableControlChannel?: boolean;
|
|
76
|
+
/**
|
|
77
|
+
* Test hook — inject a pre-built provisioner. Default uses
|
|
78
|
+
* `createProvisioner({ gateway, policyResolver, onAgentInstalled })`.
|
|
79
|
+
*/
|
|
80
|
+
provisionerFactory?: typeof createProvisioner;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/** Handle returned by {@link startCloudDaemon}. */
|
|
84
|
+
export interface CloudDaemonHandle {
|
|
85
|
+
stop: (reason?: string) => Promise<void>;
|
|
86
|
+
snapshot: () => GatewayRuntimeSnapshot;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function buildLogger(opt: GatewayLogger | undefined): GatewayLogger {
|
|
90
|
+
if (opt) return opt;
|
|
91
|
+
return {
|
|
92
|
+
info: (msg, meta) => daemonLog.info(msg, meta),
|
|
93
|
+
warn: (msg, meta) => daemonLog.warn(msg, meta),
|
|
94
|
+
error: (msg, meta) => daemonLog.error(msg, meta),
|
|
95
|
+
debug: (msg, meta) => daemonLog.debug(msg, meta),
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Boot the cloud daemon. The gateway starts with zero channels; every
|
|
101
|
+
* provisioned agent arrives via `provision_agent`, which calls into the
|
|
102
|
+
* shared `provision.ts` flow exactly like a local daemon does. The only
|
|
103
|
+
* difference is the control-channel auth, endpoint path, and the absence
|
|
104
|
+
* of a local user-auth file.
|
|
105
|
+
*/
|
|
106
|
+
export async function startCloudDaemon(
|
|
107
|
+
opts: CloudDaemonRuntimeOptions,
|
|
108
|
+
): Promise<CloudDaemonHandle> {
|
|
109
|
+
const logger = buildLogger(opts.log);
|
|
110
|
+
const cloudCfg = opts.cloudConfig;
|
|
111
|
+
|
|
112
|
+
logger.info("cloud daemon starting", {
|
|
113
|
+
cloudDaemonInstanceId: cloudCfg.cloudDaemonInstanceId,
|
|
114
|
+
daemonInstanceId: cloudCfg.daemonInstanceId,
|
|
115
|
+
hubUrl: cloudCfg.hubUrl,
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
// ActivityTracker / policy resolver / per-agent caches — same as local
|
|
119
|
+
// daemon, but the caches start empty because no agents are bound at
|
|
120
|
+
// boot. `onAgentInstalled` populates them whenever provision_agent
|
|
121
|
+
// lands.
|
|
122
|
+
const activityTracker = new ActivityTracker();
|
|
123
|
+
const credentialPathByAgentId = new Map<string, string>();
|
|
124
|
+
const hubUrlByAgentId = new Map<string, string>();
|
|
125
|
+
const displayNameByAgent = new Map<string, string>();
|
|
126
|
+
// Seed each per-agent hub URL with the cloud-mode value so that even
|
|
127
|
+
// before the first credential file is written the room-context fetcher
|
|
128
|
+
// has somewhere sensible to point.
|
|
129
|
+
const fallbackHubUrl = cloudCfg.hubUrl;
|
|
130
|
+
const resolveHubUrl = (accountId: string): string | undefined =>
|
|
131
|
+
hubUrlByAgentId.get(accountId) ?? fallbackHubUrl;
|
|
132
|
+
|
|
133
|
+
// Same gateway-config translation as local — empty `agents` produces an
|
|
134
|
+
// empty `channels[]` initially, which is fine.
|
|
135
|
+
const gwConfig = toGatewayConfig(opts.config, { agentIds: [], agentRuntimes: {} });
|
|
136
|
+
|
|
137
|
+
const roomContextFetcher = createRoomContextFetcher({
|
|
138
|
+
credentialPathByAgentId,
|
|
139
|
+
hubBaseUrl: cloudCfg.hubUrl,
|
|
140
|
+
log: logger,
|
|
141
|
+
});
|
|
142
|
+
const roomContextBuilder = createRoomStaticContextBuilder({
|
|
143
|
+
fetchRoomInfo: roomContextFetcher,
|
|
144
|
+
log: logger,
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
type PerAgentBuilder = (
|
|
148
|
+
msg: GatewayInboundMessage,
|
|
149
|
+
) => Promise<string | undefined> | string | undefined;
|
|
150
|
+
const scBuilders = new Map<string, PerAgentBuilder>();
|
|
151
|
+
const buildSystemContext = (
|
|
152
|
+
message: GatewayInboundMessage,
|
|
153
|
+
): Promise<string | undefined> | string | undefined => {
|
|
154
|
+
const b = scBuilders.get(message.accountId);
|
|
155
|
+
return b ? b(message) : undefined;
|
|
156
|
+
};
|
|
157
|
+
const buildMemoryContext = (message: GatewayInboundMessage) =>
|
|
158
|
+
readWorkingMemorySnapshot(message.accountId);
|
|
159
|
+
|
|
160
|
+
const recordActivity = createActivityRecorder({ activityTracker });
|
|
161
|
+
const onInbound = (msg: GatewayInboundMessage): void => {
|
|
162
|
+
recordActivity(msg);
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
// Settle ``cloud_run`` envelopes against the Hub usage ledger once the
|
|
166
|
+
// runtime turn finishes. Pure adapter from the dispatcher's hook shape
|
|
167
|
+
// to the settle helper's input shape — the actual HTTP call lives in
|
|
168
|
+
// :func:`buildCloudRunSettleHook` so it's unit-testable.
|
|
169
|
+
const settleHook = buildCloudRunSettleHook({
|
|
170
|
+
hubUrl: cloudCfg.hubUrl,
|
|
171
|
+
accessToken: cloudCfg.accessToken,
|
|
172
|
+
log: logger,
|
|
173
|
+
});
|
|
174
|
+
const onTurnComplete = async (event: {
|
|
175
|
+
message: GatewayInboundMessage;
|
|
176
|
+
result?: import("./gateway/types.js").RuntimeRunResult;
|
|
177
|
+
wallTimeMs: number;
|
|
178
|
+
error?: unknown;
|
|
179
|
+
}): Promise<void> => {
|
|
180
|
+
const envelope = (event.message.raw as { envelope?: unknown } | undefined)
|
|
181
|
+
?.envelope as
|
|
182
|
+
| {
|
|
183
|
+
type?: string;
|
|
184
|
+
payload?: { cloud_run?: { run_id?: unknown } | null } | null;
|
|
185
|
+
}
|
|
186
|
+
| undefined;
|
|
187
|
+
const runId = envelope?.payload?.cloud_run?.run_id;
|
|
188
|
+
await settleHook({
|
|
189
|
+
envelopeType: envelope?.type,
|
|
190
|
+
runId: typeof runId === "string" ? runId : undefined,
|
|
191
|
+
wallTimeMs: event.wallTimeMs,
|
|
192
|
+
tokens: {
|
|
193
|
+
...(event.result?.inputCacheHitTokens !== undefined
|
|
194
|
+
? { inputCacheHitTokens: event.result.inputCacheHitTokens }
|
|
195
|
+
: {}),
|
|
196
|
+
...(event.result?.inputCacheMissTokens !== undefined
|
|
197
|
+
? { inputCacheMissTokens: event.result.inputCacheMissTokens }
|
|
198
|
+
: {}),
|
|
199
|
+
...(event.result?.outputTokens !== undefined
|
|
200
|
+
? { outputTokens: event.result.outputTokens }
|
|
201
|
+
: {}),
|
|
202
|
+
},
|
|
203
|
+
messageId: event.message.id,
|
|
204
|
+
});
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
const policyResolver = new PolicyResolver({
|
|
208
|
+
fetchGlobal: async (_agentId: string) => undefined,
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
const attentionGate = async (msg: GatewayInboundMessage): Promise<boolean> => {
|
|
212
|
+
const policy: DaemonAttentionPolicy = await policyResolver.resolve(
|
|
213
|
+
msg.accountId,
|
|
214
|
+
msg.conversation.id,
|
|
215
|
+
);
|
|
216
|
+
if (policy.mode === "allowed_senders") {
|
|
217
|
+
return (policy.allowedSenderIds ?? []).includes(msg.sender.id);
|
|
218
|
+
}
|
|
219
|
+
const localMention = scanMention(msg.text, {
|
|
220
|
+
agentId: msg.accountId,
|
|
221
|
+
displayName: displayNameByAgent.get(msg.accountId),
|
|
222
|
+
});
|
|
223
|
+
return shouldWake(policy as AttentionPolicy, {
|
|
224
|
+
mentioned: msg.mentioned === true || localMention,
|
|
225
|
+
text: msg.text,
|
|
226
|
+
});
|
|
227
|
+
};
|
|
228
|
+
|
|
229
|
+
const onAgentInstalled: OnAgentInstalledHook = (info: InstalledAgentInfo) => {
|
|
230
|
+
credentialPathByAgentId.set(info.agentId, info.credentialsFile);
|
|
231
|
+
if (info.hubUrl) hubUrlByAgentId.set(info.agentId, info.hubUrl);
|
|
232
|
+
if (info.displayName) displayNameByAgent.set(info.agentId, info.displayName);
|
|
233
|
+
if (!scBuilders.has(info.agentId)) {
|
|
234
|
+
scBuilders.set(
|
|
235
|
+
info.agentId,
|
|
236
|
+
createDaemonSystemContextBuilder({
|
|
237
|
+
agentId: info.agentId,
|
|
238
|
+
activityTracker,
|
|
239
|
+
roomContextBuilder,
|
|
240
|
+
// Cloud daemons run isolated — no loop-risk guard wired in PR1;
|
|
241
|
+
// the runtime adapter's wall-time budget enforces the equivalent.
|
|
242
|
+
loopRiskBuilder: () => null,
|
|
243
|
+
}),
|
|
244
|
+
);
|
|
245
|
+
}
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
const gateway = new Gateway({
|
|
249
|
+
config: gwConfig,
|
|
250
|
+
sessionStorePath: opts.sessionStorePath ?? SESSIONS_PATH,
|
|
251
|
+
createChannel: (chCfg: GatewayChannelConfig): ChannelAdapter => {
|
|
252
|
+
return createDaemonChannel(chCfg, {
|
|
253
|
+
credentialPathByAgentId,
|
|
254
|
+
hubBaseUrl: cloudCfg.hubUrl,
|
|
255
|
+
});
|
|
256
|
+
},
|
|
257
|
+
log: logger,
|
|
258
|
+
turnTimeoutMs: DEFAULT_TURN_TIMEOUT_MS,
|
|
259
|
+
buildSystemContext,
|
|
260
|
+
buildMemoryContext,
|
|
261
|
+
onInbound,
|
|
262
|
+
onTurnComplete,
|
|
263
|
+
composeUserTurn: composeBotCordUserTurn,
|
|
264
|
+
attentionGate,
|
|
265
|
+
resolveHubUrl,
|
|
266
|
+
transcriptEnabled: resolveTranscriptEnabled(
|
|
267
|
+
process.env.BOTCORD_TRANSCRIPT,
|
|
268
|
+
opts.config.transcript?.enabled,
|
|
269
|
+
),
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
await gateway.start();
|
|
273
|
+
logger.info("cloud daemon gateway started (zero agents at boot)");
|
|
274
|
+
|
|
275
|
+
let controlChannel: ControlChannel | null = null;
|
|
276
|
+
if (!opts.disableControlChannel) {
|
|
277
|
+
const auth = asUserAuthManager(new CloudAuthManager(cloudCfg));
|
|
278
|
+
const provisionerFactory = opts.provisionerFactory ?? createProvisioner;
|
|
279
|
+
const provisioner = provisionerFactory({
|
|
280
|
+
gateway,
|
|
281
|
+
policyResolver,
|
|
282
|
+
onAgentInstalled,
|
|
283
|
+
});
|
|
284
|
+
const ControlChannelCtor = opts.controlChannelFactory ?? ControlChannel;
|
|
285
|
+
controlChannel = new ControlChannelCtor({
|
|
286
|
+
auth,
|
|
287
|
+
// The cloud WS endpoint differs from the local daemon WS — same
|
|
288
|
+
// frame schema, different bearer-token kind on the Hub side.
|
|
289
|
+
path: "/cloud/daemon/ws",
|
|
290
|
+
handle: async (frame) => provisioner(frame),
|
|
291
|
+
label: `cloud:${cloudCfg.cloudDaemonInstanceId}`,
|
|
292
|
+
});
|
|
293
|
+
try {
|
|
294
|
+
await controlChannel.start();
|
|
295
|
+
// Same `runtime_snapshot` push as local — keeps the dashboard's
|
|
296
|
+
// "what's installed" view accurate the moment the daemon comes up.
|
|
297
|
+
const pushed = pushRuntimeSnapshot(controlChannel);
|
|
298
|
+
logger.info("cloud control-channel started; runtime_snapshot pushed", {
|
|
299
|
+
ok: pushed,
|
|
300
|
+
});
|
|
301
|
+
} catch (err) {
|
|
302
|
+
logger.warn("cloud control-channel start failed; daemon will retry", {
|
|
303
|
+
error: err instanceof Error ? err.message : String(err),
|
|
304
|
+
});
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
const snapshotWriter = new SnapshotWriter({
|
|
309
|
+
path: opts.snapshotPath ?? SNAPSHOT_PATH,
|
|
310
|
+
intervalMs: opts.snapshotIntervalMs ?? resolveSnapshotIntervalMs(),
|
|
311
|
+
snapshot: () => gateway.snapshot(),
|
|
312
|
+
log: logger,
|
|
313
|
+
});
|
|
314
|
+
snapshotWriter.start();
|
|
315
|
+
|
|
316
|
+
let stopping: Promise<void> | null = null;
|
|
317
|
+
const stop = (reason?: string): Promise<void> => {
|
|
318
|
+
if (stopping) return stopping;
|
|
319
|
+
logger.info("cloud daemon stopping", { reason: reason ?? null });
|
|
320
|
+
snapshotWriter.stop();
|
|
321
|
+
snapshotWriter.writeFinal();
|
|
322
|
+
const controlStopP = controlChannel
|
|
323
|
+
? controlChannel.stop().catch(() => undefined)
|
|
324
|
+
: Promise.resolve();
|
|
325
|
+
stopping = Promise.all([controlStopP, gateway.stop(reason)]).then(
|
|
326
|
+
() => undefined,
|
|
327
|
+
).finally(() => {
|
|
328
|
+
snapshotWriter.remove();
|
|
329
|
+
logger.info("cloud daemon stopped", { reason: reason ?? null });
|
|
330
|
+
});
|
|
331
|
+
return stopping;
|
|
332
|
+
};
|
|
333
|
+
|
|
334
|
+
return {
|
|
335
|
+
stop,
|
|
336
|
+
snapshot: () => gateway.snapshot(),
|
|
337
|
+
};
|
|
338
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cloud-daemon mode detection + env-driven configuration.
|
|
3
|
+
*
|
|
4
|
+
* A "cloud daemon" is a `botcord-daemon` process running inside a Hub-managed
|
|
5
|
+
* E2B sandbox. It is configured exclusively through environment variables
|
|
6
|
+
* (no on-disk `user-auth.json`) and connects to `/cloud/daemon/ws` with a
|
|
7
|
+
* `cloud-daemon-access` JWT instead of the device-code-issued user token.
|
|
8
|
+
*
|
|
9
|
+
* The Hub-side provider that launches the daemon is
|
|
10
|
+
* `backend/hub/services/cloud_daemon_provider_e2b.py` — keep the env-var
|
|
11
|
+
* names below in sync with `_build_env` there.
|
|
12
|
+
*
|
|
13
|
+
* See ``docs/cloud-agent-technical-design.md`` §3-4.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
/** Names of the environment variables the cloud provider injects. */
|
|
17
|
+
export const CLOUD_ENV_VARS = {
|
|
18
|
+
HUB_URL: "BOTCORD_HUB_URL",
|
|
19
|
+
CLOUD_DAEMON_INSTANCE_ID: "BOTCORD_CLOUD_DAEMON_INSTANCE_ID",
|
|
20
|
+
DAEMON_INSTANCE_ID: "BOTCORD_DAEMON_INSTANCE_ID",
|
|
21
|
+
ACCESS_TOKEN: "BOTCORD_CLOUD_DAEMON_ACCESS_TOKEN",
|
|
22
|
+
} as const;
|
|
23
|
+
|
|
24
|
+
/** Resolved cloud-mode configuration. All fields are required when present. */
|
|
25
|
+
export interface CloudModeConfig {
|
|
26
|
+
hubUrl: string;
|
|
27
|
+
cloudDaemonInstanceId: string;
|
|
28
|
+
daemonInstanceId: string;
|
|
29
|
+
accessToken: string;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Detection signal — true when `BOTCORD_CLOUD_DAEMON_ACCESS_TOKEN` is set.
|
|
34
|
+
*
|
|
35
|
+
* The access-token presence is the canonical mode switch (matches the
|
|
36
|
+
* provider contract — the token is the one piece the sandbox can't forge).
|
|
37
|
+
* Other env vars may be set during development without flipping mode.
|
|
38
|
+
*/
|
|
39
|
+
export function isCloudMode(env: NodeJS.ProcessEnv = process.env): boolean {
|
|
40
|
+
const token = env[CLOUD_ENV_VARS.ACCESS_TOKEN];
|
|
41
|
+
return typeof token === "string" && token.length > 0;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Resolve the cloud-mode configuration from env vars. Throws when a required
|
|
46
|
+
* variable is missing — the daemon must fail fast instead of falling through
|
|
47
|
+
* to the local-mode codepath with partial cloud config.
|
|
48
|
+
*
|
|
49
|
+
* `BOTCORD_DAEMON_INSTANCE_ID` is allowed to fall back to the cloud daemon
|
|
50
|
+
* id when omitted in tests, but in production the provider always sets it.
|
|
51
|
+
*/
|
|
52
|
+
export function loadCloudModeConfig(
|
|
53
|
+
env: NodeJS.ProcessEnv = process.env,
|
|
54
|
+
): CloudModeConfig {
|
|
55
|
+
const requireString = (name: string): string => {
|
|
56
|
+
const v = env[name];
|
|
57
|
+
if (typeof v !== "string" || v.length === 0) {
|
|
58
|
+
throw new Error(
|
|
59
|
+
`cloud-daemon mode: required env var "${name}" is missing or empty`,
|
|
60
|
+
);
|
|
61
|
+
}
|
|
62
|
+
return v;
|
|
63
|
+
};
|
|
64
|
+
return {
|
|
65
|
+
hubUrl: requireString(CLOUD_ENV_VARS.HUB_URL),
|
|
66
|
+
cloudDaemonInstanceId: requireString(CLOUD_ENV_VARS.CLOUD_DAEMON_INSTANCE_ID),
|
|
67
|
+
daemonInstanceId: requireString(CLOUD_ENV_VARS.DAEMON_INSTANCE_ID),
|
|
68
|
+
accessToken: requireString(CLOUD_ENV_VARS.ACCESS_TOKEN),
|
|
69
|
+
};
|
|
70
|
+
}
|