@botcord/daemon 0.2.62 → 0.2.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  import type { LogFileEntry } from "./log.js";
2
- export type AcpTraceStream = "child_start" | "child_exit" | "child_error" | "stderr" | "stdout_non_json" | "rpc_in" | "rpc_out";
2
+ export type AcpTraceStream = "child_start" | "child_exit" | "child_error" | "stderr" | "stdout_non_json" | "turn_context" | "rpc_in" | "rpc_out";
3
3
  export interface AcpTraceMeta {
4
4
  runtime: string;
5
5
  accountId?: string;
@@ -13,6 +13,10 @@ export interface AcpTraceMeta {
13
13
  }
14
14
  export interface AcpTraceEvent {
15
15
  stream: AcpTraceStream;
16
+ turnId?: string;
17
+ messageId?: string;
18
+ roomId?: string;
19
+ topicId?: string | null;
16
20
  direction?: "in" | "out";
17
21
  pid?: number;
18
22
  id?: number | string;
package/dist/acp-logs.js CHANGED
@@ -64,9 +64,10 @@ function writeAcpTrace(file, meta, event, verbose) {
64
64
  ts: new Date().toISOString(),
65
65
  runtime: meta.runtime,
66
66
  accountId: meta.accountId,
67
- turnId: meta.turnId,
68
- roomId: meta.roomId,
69
- topicId: meta.topicId ?? undefined,
67
+ turnId: event.turnId ?? meta.turnId,
68
+ messageId: event.messageId,
69
+ roomId: event.roomId ?? meta.roomId,
70
+ topicId: event.topicId ?? meta.topicId ?? undefined,
70
71
  gatewayName: meta.gatewayName,
71
72
  gatewayUrl: meta.gatewayUrl,
72
73
  hermesProfile: meta.hermesProfile,
package/dist/config.d.ts CHANGED
@@ -139,7 +139,7 @@ export interface DaemonConfig {
139
139
  streamBlocks: boolean;
140
140
  /**
141
141
  * Persistent transcript-logging settings (design §3 / §6). Defaults to
142
- * disabled — see `BOTCORD_TRANSCRIPT` for env-driven temporary overrides.
142
+ * enabled — see `BOTCORD_TRANSCRIPT` for env-driven temporary overrides.
143
143
  */
144
144
  transcript?: TranscriptConfig;
145
145
  /**
@@ -162,8 +162,8 @@ export interface DaemonConfig {
162
162
  thirdPartyGateways?: ThirdPartyGatewayProfile[];
163
163
  }
164
164
  /**
165
- * Persistent transcript settings (design §6). Default-off — `botcord-daemon
166
- * transcript enable` flips `enabled` and `transcript disable` flips it back.
165
+ * Persistent transcript settings (design §6). Default-on — `botcord-daemon
166
+ * transcript disable` sets `enabled=false`, and `transcript enable` flips it back.
167
167
  * The env var `BOTCORD_TRANSCRIPT` can override at boot.
168
168
  */
169
169
  export interface TranscriptConfig {
@@ -73,6 +73,7 @@ export declare class ControlChannel {
73
73
  private readonly seenFrameIds;
74
74
  private connectInflight;
75
75
  private connected;
76
+ private connectionSeq;
76
77
  constructor(opts: ControlChannelOptions);
77
78
  /** True once the initial WS handshake succeeded. Flipped back on close. */
78
79
  get isConnected(): boolean;
@@ -12,6 +12,7 @@ import { log as daemonLog } from "./log.js";
12
12
  import { AuthRefreshRejectedError, writeAuthExpiredFlag, } from "./user-auth.js";
13
13
  /** Exponential backoff plan for transient disconnects. */
14
14
  const RECONNECT_BACKOFF_MS = [1000, 2000, 4000, 8000, 16000, 30000];
15
+ const RECONNECT_JITTER_RATIO = 0.25;
15
16
  /**
16
17
  * Keepalive cadence. Has to stay below the smallest idle-timeout in any
17
18
  * intermediary on the daemon → Hub WS path. Cloudflare and AWS ALB both
@@ -38,6 +39,10 @@ export function controlSigningInput(frame) {
38
39
  };
39
40
  return jcsCanonicalize(obj) ?? "{}";
40
41
  }
42
+ function withReconnectJitter(delayMs) {
43
+ const jitterMs = Math.floor(Math.random() * delayMs * RECONNECT_JITTER_RATIO);
44
+ return { delayMs: delayMs + jitterMs, jitterMs };
45
+ }
41
46
  /**
42
47
  * Long-lived, self-healing WS connection that carries control frames
43
48
  * between the Hub and the local daemon. Owns reconnect/backoff and
@@ -60,6 +65,7 @@ export class ControlChannel {
60
65
  seenFrameIds = [];
61
66
  connectInflight = null;
62
67
  connected = false;
68
+ connectionSeq = 0;
63
69
  constructor(opts) {
64
70
  this.auth = opts.auth;
65
71
  this.handle = opts.handle;
@@ -170,9 +176,22 @@ export class ControlChannel {
170
176
  const record = this.auth.current;
171
177
  if (!record)
172
178
  throw new Error("control-channel: no user-auth");
179
+ const current = this.ws;
180
+ if (current &&
181
+ (current.readyState === WebSocket.CONNECTING || current.readyState === WebSocket.OPEN)) {
182
+ daemonLog.debug("control-channel connect skipped (socket already active)", {
183
+ readyState: current.readyState,
184
+ });
185
+ return;
186
+ }
187
+ if (this.reconnectTimer) {
188
+ clearTimeout(this.reconnectTimer);
189
+ this.reconnectTimer = null;
190
+ }
173
191
  const accessToken = await this.auth.ensureAccessToken();
174
192
  const url = buildDaemonWebSocketUrl(record.hubUrl, this.path, this.label ? { label: this.label } : undefined);
175
193
  daemonLog.info("control-channel connecting", { url });
194
+ const connectionId = ++this.connectionSeq;
176
195
  const ws = new this.webSocketCtor(url, {
177
196
  headers: { Authorization: `Bearer ${accessToken}` },
178
197
  });
@@ -180,6 +199,16 @@ export class ControlChannel {
180
199
  await new Promise((resolve, reject) => {
181
200
  const onOpen = () => {
182
201
  ws.removeListener("error", onError);
202
+ if (this.stopRequested || this.ws !== ws || connectionId !== this.connectionSeq) {
203
+ try {
204
+ ws.close(1000, "stale control-channel connection");
205
+ }
206
+ catch {
207
+ // ignore
208
+ }
209
+ resolve();
210
+ return;
211
+ }
183
212
  this.connected = true;
184
213
  this.reconnectAttempts = 0;
185
214
  daemonLog.info("control-channel connected", { url });
@@ -188,13 +217,21 @@ export class ControlChannel {
188
217
  };
189
218
  const onError = (err) => {
190
219
  ws.removeListener("open", onOpen);
220
+ if (this.ws !== ws || connectionId !== this.connectionSeq) {
221
+ resolve();
222
+ return;
223
+ }
191
224
  reject(err);
192
225
  };
193
226
  ws.once("open", onOpen);
194
227
  ws.once("error", onError);
195
228
  });
196
- ws.on("message", (data) => this.onMessage(data));
197
- ws.on("close", (code, reason) => this.onClose(code, reason));
229
+ ws.on("message", (data) => {
230
+ if (this.ws !== ws || connectionId !== this.connectionSeq)
231
+ return;
232
+ void this.onMessage(data);
233
+ });
234
+ ws.on("close", (code, reason) => this.onClose(code, reason, ws, connectionId));
198
235
  ws.on("error", (err) => daemonLog.warn("control-channel error", {
199
236
  error: err instanceof Error ? err.message : String(err),
200
237
  }));
@@ -231,8 +268,12 @@ export class ControlChannel {
231
268
  this.keepaliveTimer = null;
232
269
  }
233
270
  }
234
- onClose(code, reason) {
271
+ onClose(code, reason, ws, connectionId) {
235
272
  const reasonText = reason?.toString() || "";
273
+ if (ws && (this.ws !== ws || connectionId !== this.connectionSeq)) {
274
+ daemonLog.debug("control-channel stale close ignored", { code, reason: reasonText });
275
+ return;
276
+ }
236
277
  this.connected = false;
237
278
  this.stopKeepalive();
238
279
  this.ws = null;
@@ -252,6 +293,13 @@ export class ControlChannel {
252
293
  scheduleReconnect(err) {
253
294
  if (this.stopRequested)
254
295
  return;
296
+ if (this.reconnectTimer)
297
+ return;
298
+ const current = this.ws;
299
+ if (current &&
300
+ (current.readyState === WebSocket.CONNECTING || current.readyState === WebSocket.OPEN)) {
301
+ return;
302
+ }
255
303
  if (err instanceof AuthRefreshRejectedError) {
256
304
  this.stopRequested = true;
257
305
  daemonLog.warn("control-channel: refresh rejected; halting reconnect (re-login required)", {
@@ -261,22 +309,25 @@ export class ControlChannel {
261
309
  }
262
310
  const attempt = this.reconnectAttempts;
263
311
  this.reconnectAttempts = attempt + 1;
264
- const delay = this.backoff[Math.min(attempt, this.backoff.length - 1)];
312
+ const baseDelayMs = this.backoff[Math.min(attempt, this.backoff.length - 1)];
313
+ const { delayMs, jitterMs } = withReconnectJitter(baseDelayMs);
265
314
  if (err) {
266
315
  daemonLog.warn("control-channel reconnect scheduled", {
267
- delayMs: delay,
316
+ delayMs,
317
+ baseDelayMs,
318
+ jitterMs,
268
319
  error: err instanceof Error ? err.message : String(err),
269
320
  });
270
321
  }
271
322
  else {
272
- daemonLog.info("control-channel reconnect scheduled", { delayMs: delay });
323
+ daemonLog.info("control-channel reconnect scheduled", { delayMs, baseDelayMs, jitterMs });
273
324
  }
274
325
  this.reconnectTimer = setTimeout(() => {
275
326
  this.reconnectTimer = null;
276
327
  if (this.stopRequested)
277
328
  return;
278
329
  this.connect().catch((err) => this.scheduleReconnect(err));
279
- }, delay);
330
+ }, delayMs);
280
331
  }
281
332
  async onMessage(data) {
282
333
  let frame;
package/dist/daemon.js CHANGED
@@ -367,7 +367,7 @@ export async function startDaemon(opts) {
367
367
  composeUserTurn: composeBotCordUserTurn,
368
368
  attentionGate,
369
369
  resolveHubUrl,
370
- transcriptEnabled: resolveTranscriptEnabled(process.env.BOTCORD_TRANSCRIPT, opts.config.transcript?.enabled === true),
370
+ transcriptEnabled: resolveTranscriptEnabled(process.env.BOTCORD_TRANSCRIPT, opts.config.transcript?.enabled),
371
371
  });
372
372
  logger.info("daemon starting", {
373
373
  agents: agentIds,
@@ -5,6 +5,7 @@ export interface CreateDiagnosticBundleOptions {
5
5
  logFile?: string;
6
6
  configFile?: string;
7
7
  snapshotFile?: string;
8
+ sessionsFile?: string;
8
9
  doctor?: {
9
10
  text: string;
10
11
  json: unknown;
@@ -1,12 +1,15 @@
1
- import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
1
+ import { existsSync, mkdirSync, readdirSync, readFileSync, statSync, writeFileSync } from "node:fs";
2
2
  import { homedir, hostname, platform, release, arch } from "node:os";
3
3
  import path from "node:path";
4
4
  import { Buffer } from "node:buffer";
5
+ import { createRequire } from "node:module";
6
+ import { fileURLToPath } from "node:url";
5
7
  import { deflateRawSync } from "node:zlib";
6
8
  import { AUTH_EXPIRED_FLAG_PATH, USER_AUTH_PATH, } from "./user-auth.js";
7
- import { CONFIG_FILE_PATH, PID_PATH, SNAPSHOT_PATH, loadConfig, saveConfig, } from "./config.js";
9
+ import { CONFIG_FILE_PATH, PID_PATH, SESSIONS_PATH, SNAPSHOT_PATH, loadConfig, saveConfig, } from "./config.js";
8
10
  import { listDaemonLogFiles, LOG_FILE_PATH } from "./log.js";
9
11
  import { listAcpTraceLogFiles, listRuntimeLogFiles } from "./acp-logs.js";
12
+ import { defaultTranscriptRoot } from "./gateway/transcript.js";
10
13
  import { channelsFromDaemonConfig, defaultHttpFetcher, renderDoctor, runDoctor, } from "./doctor.js";
11
14
  import { detectRuntimes } from "./adapters/runtimes.js";
12
15
  import { log as daemonLog } from "./log.js";
@@ -14,6 +17,27 @@ import { discoverLocalOpenclawGateways, mergeOpenclawGateways, openclawDiscovery
14
17
  const DIAGNOSTICS_DIR = path.join(homedir(), ".botcord", "diagnostics");
15
18
  const MAX_UPLOAD_BYTES = 50 * 1024 * 1024;
16
19
  const DEFAULT_ROTATED_LOGS_IN_BUNDLE = 5;
20
+ const require = createRequire(import.meta.url);
21
+ const MODULE_PATH = fileURLToPath(import.meta.url);
22
+ const ENV_ALLOWLIST = new Set([
23
+ "NODE_ENV",
24
+ "PATH",
25
+ "BOTCORD_HUB",
26
+ "BOTCORD_DAEMON_HOME",
27
+ "BOTCORD_DAEMON_CONFIG",
28
+ "BOTCORD_DAEMON_LOG",
29
+ "BOTCORD_DAEMON_SNAPSHOT_INTERVAL_MS",
30
+ "BOTCORD_HERMES_AGENT_BIN",
31
+ "BOTCORD_CLAUDE_CODE_BIN",
32
+ "BOTCORD_CODEX_BIN",
33
+ "BOTCORD_GEMINI_BIN",
34
+ "BOTCORD_DEEPSEEK_TUI_BIN",
35
+ "BOTCORD_KIMI_CLI_BIN",
36
+ "OPENCLAW_ACP_URL",
37
+ ]);
38
+ const TRANSCRIPT_LOG_DIAGNOSTICS_DEFAULT = 10;
39
+ const TRANSCRIPT_LOG_DIAGNOSTICS_ALL = 50;
40
+ const TRANSCRIPT_LOG_MAX_FILE_BYTES = 2 * 1024 * 1024;
17
41
  const SECRET_PATTERNS = [
18
42
  [/(Authorization:\s*Bearer\s+)[^\s"']+/gi, "$1[REDACTED]"],
19
43
  [/("?(?:accessToken|access_token|refreshToken|refresh_token|token|privateKey|private_key|secret)"?\s*:\s*")[^"]+(")/gi, "$1[REDACTED]$2"],
@@ -38,6 +62,82 @@ function safeReadText(file) {
38
62
  return `read failed: ${err instanceof Error ? err.message : String(err)}\n`;
39
63
  }
40
64
  }
65
+ function readJsonFile(file) {
66
+ try {
67
+ const parsed = JSON.parse(readFileSync(file, "utf8"));
68
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed)
69
+ ? parsed
70
+ : null;
71
+ }
72
+ catch {
73
+ return null;
74
+ }
75
+ }
76
+ function findDaemonPackageJson(startFile) {
77
+ let dir = path.dirname(startFile);
78
+ for (let i = 0; i < 6; i += 1) {
79
+ const candidate = path.join(dir, "package.json");
80
+ const parsed = readJsonFile(candidate);
81
+ if (parsed?.name === "@botcord/daemon")
82
+ return parsed;
83
+ const next = path.dirname(dir);
84
+ if (next === dir)
85
+ break;
86
+ dir = next;
87
+ }
88
+ return null;
89
+ }
90
+ function readInstalledPackageVersion(packageJsonSpecifier) {
91
+ try {
92
+ const pkgPath = require.resolve(packageJsonSpecifier);
93
+ const parsed = readJsonFile(pkgPath);
94
+ return typeof parsed?.version === "string" ? parsed.version : null;
95
+ }
96
+ catch {
97
+ return null;
98
+ }
99
+ }
100
+ function daemonRuntimeSummary() {
101
+ const pkg = findDaemonPackageJson(MODULE_PATH);
102
+ const version = typeof pkg?.version === "string" ? pkg.version : null;
103
+ const startedAtMs = Date.now() - Math.round(process.uptime() * 1000);
104
+ return {
105
+ packageName: typeof pkg?.name === "string" ? pkg.name : "@botcord/daemon",
106
+ version,
107
+ modulePath: MODULE_PATH,
108
+ entrypoint: process.argv[1] ?? null,
109
+ execPath: process.execPath,
110
+ argv: process.argv.map((arg) => redact(arg)),
111
+ execArgv: process.execArgv.map((arg) => redact(arg)),
112
+ cwd: process.cwd(),
113
+ pid: process.pid,
114
+ ppid: process.ppid,
115
+ uptimeSec: Math.round(process.uptime()),
116
+ startedAt: new Date(startedAtMs).toISOString(),
117
+ versions: {
118
+ node: process.version,
119
+ v8: process.versions.v8,
120
+ uv: process.versions.uv,
121
+ openssl: process.versions.openssl,
122
+ },
123
+ packages: {
124
+ "@botcord/daemon": version,
125
+ "@botcord/cli": readInstalledPackageVersion("@botcord/cli/package.json"),
126
+ "@botcord/protocol-core": readInstalledPackageVersion("@botcord/protocol-core/package.json"),
127
+ },
128
+ };
129
+ }
130
+ function safeEnvironmentSummary() {
131
+ const out = {};
132
+ for (const [key, value] of Object.entries(process.env)) {
133
+ if (!value)
134
+ continue;
135
+ if (!ENV_ALLOWLIST.has(key) && !key.startsWith("BOTCORD_DAEMON_"))
136
+ continue;
137
+ out[key] = redact(value);
138
+ }
139
+ return out;
140
+ }
41
141
  function readUserAuthSummary() {
42
142
  const raw = safeReadText(USER_AUTH_PATH);
43
143
  if (!raw)
@@ -252,6 +352,50 @@ function bundledLogs(logFile, includeAllLogs) {
252
352
  ...(includeAllLogs ? rotated : rotated.slice(0, DEFAULT_ROTATED_LOGS_IN_BUNDLE)),
253
353
  ];
254
354
  }
355
+ function listTranscriptLogFiles(includeAll) {
356
+ const root = defaultTranscriptRoot();
357
+ const out = [];
358
+ collectTranscriptFiles(root, root, out, 5);
359
+ const limit = includeAll ? TRANSCRIPT_LOG_DIAGNOSTICS_ALL : TRANSCRIPT_LOG_DIAGNOSTICS_DEFAULT;
360
+ return out
361
+ .sort((a, b) => b.mtimeMs - a.mtimeMs || b.name.localeCompare(a.name))
362
+ .slice(0, limit);
363
+ }
364
+ function collectTranscriptFiles(root, dir, out, maxDepth) {
365
+ if (maxDepth < 0)
366
+ return;
367
+ let names;
368
+ try {
369
+ names = readdirSync(dir);
370
+ }
371
+ catch {
372
+ return;
373
+ }
374
+ for (const name of names) {
375
+ const file = path.join(dir, name);
376
+ try {
377
+ const st = statSync(file);
378
+ if (st.isDirectory()) {
379
+ collectTranscriptFiles(root, file, out, maxDepth - 1);
380
+ }
381
+ else if (st.isFile() &&
382
+ name.endsWith(".jsonl") &&
383
+ file.includes(`${path.sep}transcripts${path.sep}`) &&
384
+ st.size <= TRANSCRIPT_LOG_MAX_FILE_BYTES) {
385
+ out.push({
386
+ path: file,
387
+ name: path.relative(root, file) || name,
388
+ sizeBytes: st.size,
389
+ mtimeMs: st.mtimeMs,
390
+ active: true,
391
+ });
392
+ }
393
+ }
394
+ catch {
395
+ // ignore files that disappear while collecting diagnostics
396
+ }
397
+ }
398
+ }
255
399
  export async function createDiagnosticBundle(opts = {}) {
256
400
  const createdAt = new Date();
257
401
  const stamp = createdAt.toISOString().replace(/[:.]/g, "-");
@@ -260,10 +404,12 @@ export async function createDiagnosticBundle(opts = {}) {
260
404
  const logFile = opts.logFile ?? LOG_FILE_PATH;
261
405
  const configFile = opts.configFile ?? CONFIG_FILE_PATH;
262
406
  const snapshotFile = opts.snapshotFile ?? SNAPSHOT_PATH;
407
+ const sessionsFile = opts.sessionsFile ?? SESSIONS_PATH;
263
408
  const includeAllLogs = opts.includeAllLogs === true;
264
409
  const logs = bundledLogs(logFile, includeAllLogs);
265
410
  const acpLogs = listAcpTraceLogFiles(includeAllLogs);
266
411
  const runtimeLogs = listRuntimeLogFiles(includeAllLogs);
412
+ const transcriptLogs = listTranscriptLogFiles(includeAllLogs);
267
413
  mkdirSync(diagnosticsDir, { recursive: true, mode: 0o700 });
268
414
  const doctor = opts.doctor ?? await buildDoctorEntries();
269
415
  const status = {
@@ -273,9 +419,12 @@ export async function createDiagnosticBundle(opts = {}) {
273
419
  release: release(),
274
420
  arch: arch(),
275
421
  node: process.version,
422
+ daemon: daemonRuntimeSummary(),
423
+ environment: safeEnvironmentSummary(),
276
424
  pidPath: PID_PATH,
277
425
  pid: process.pid,
278
426
  configPath: configFile,
427
+ sessionsPath: sessionsFile,
279
428
  snapshotPath: snapshotFile,
280
429
  logPath: logFile,
281
430
  logsBundled: logs.map((entry) => ({
@@ -294,6 +443,11 @@ export async function createDiagnosticBundle(opts = {}) {
294
443
  path: entry.path,
295
444
  sizeBytes: entry.sizeBytes,
296
445
  })),
446
+ transcriptLogsBundled: transcriptLogs.map((entry) => ({
447
+ name: entry.name,
448
+ path: entry.path,
449
+ sizeBytes: entry.sizeBytes,
450
+ })),
297
451
  logsBundleMode: includeAllLogs ? "all" : `active_plus_${DEFAULT_ROTATED_LOGS_IN_BUNDLE}_rotated`,
298
452
  diagnosticsDir,
299
453
  userAuth: readUserAuthSummary(),
@@ -333,6 +487,13 @@ export async function createDiagnosticBundle(opts = {}) {
333
487
  data: log ?? `no runtime log file at ${entry.path}\n`,
334
488
  });
335
489
  }
490
+ for (const entry of transcriptLogs) {
491
+ const log = safeReadText(entry.path);
492
+ entries.push({
493
+ name: `transcripts/${entry.name.split(path.sep).join("/")}`,
494
+ data: log ?? `no transcript log file at ${entry.path}\n`,
495
+ });
496
+ }
336
497
  const config = safeReadText(configFile);
337
498
  entries.push({
338
499
  name: "config.json.redacted",
@@ -343,6 +504,11 @@ export async function createDiagnosticBundle(opts = {}) {
343
504
  name: "snapshot.json",
344
505
  data: snapshot ?? `no snapshot file at ${snapshotFile}\n`,
345
506
  });
507
+ const sessions = safeReadText(sessionsFile);
508
+ entries.push({
509
+ name: "sessions.json.redacted",
510
+ data: sessions ?? `no sessions file at ${sessionsFile}\n`,
511
+ });
346
512
  const zip = createZip(entries);
347
513
  const out = path.join(diagnosticsDir, filename);
348
514
  writeFileSync(out, zip, { mode: 0o600 });
@@ -3,6 +3,7 @@ import { BotCordClient, buildHubWebSocketUrl, defaultCredentialsFile, loadStored
3
3
  import { sanitizeUntrustedContent } from "./sanitize.js";
4
4
  import { revokeAgent } from "../../provision.js";
5
5
  const RECONNECT_BACKOFF = [1000, 2000, 4000, 8000, 16000, 30000];
6
+ const RECONNECT_JITTER_RATIO = 0.25;
6
7
  const KEEPALIVE_INTERVAL = 20_000;
7
8
  const MAX_AUTH_FAILURES = 5;
8
9
  const SEEN_MESSAGES_CAP = 500;
@@ -10,6 +11,10 @@ const OWNER_CHAT_PREFIX = "rm_oc_";
10
11
  const DM_ROOM_PREFIX = "rm_dm_";
11
12
  const INBOX_POLL_LIMIT = 50;
12
13
  const CHANNEL_PERMANENT_STOP = "channel_permanent_stop";
14
+ function withReconnectJitter(delayMs) {
15
+ const jitterMs = Math.floor(Math.random() * delayMs * RECONNECT_JITTER_RATIO);
16
+ return { delayMs: delayMs + jitterMs, jitterMs };
17
+ }
13
18
  function isUnclaimedAgentError(err) {
14
19
  const status = err?.status;
15
20
  if (status !== 403)
@@ -344,6 +349,7 @@ export function createBotCordChannel(options) {
344
349
  let reconnectTimer = null;
345
350
  let keepaliveTimer = null;
346
351
  let reconnectAttempt = 0;
352
+ let connectionSeq = 0;
347
353
  let consecutiveAuthFailures = 0;
348
354
  let running = true;
349
355
  let permanentStopping = false;
@@ -465,22 +471,36 @@ export function createBotCordChannel(options) {
465
471
  function scheduleReconnect() {
466
472
  if (!running)
467
473
  return;
468
- const delay = RECONNECT_BACKOFF[Math.min(reconnectAttempt, RECONNECT_BACKOFF.length - 1)];
474
+ if (reconnectTimer)
475
+ return;
476
+ if (ws && (ws.readyState === WebSocket.CONNECTING || ws.readyState === WebSocket.OPEN)) {
477
+ return;
478
+ }
479
+ const baseDelayMs = RECONNECT_BACKOFF[Math.min(reconnectAttempt, RECONNECT_BACKOFF.length - 1)];
480
+ const { delayMs, jitterMs } = withReconnectJitter(baseDelayMs);
469
481
  reconnectAttempt += 1;
470
482
  markStatus({
471
483
  connected: false,
472
484
  restartPending: true,
473
485
  reconnectAttempts: reconnectAttempt,
474
486
  });
475
- log.info("botcord ws reconnect scheduled", { delayMs: delay, attempt: reconnectAttempt });
487
+ log.info("botcord ws reconnect scheduled", {
488
+ delayMs,
489
+ baseDelayMs,
490
+ jitterMs,
491
+ attempt: reconnectAttempt,
492
+ });
476
493
  reconnectTimer = setTimeout(() => {
477
494
  reconnectTimer = null;
478
495
  void connect();
479
- }, delay);
496
+ }, delayMs);
480
497
  }
481
498
  async function connect() {
482
499
  if (!running)
483
500
  return;
501
+ if (ws && (ws.readyState === WebSocket.CONNECTING || ws.readyState === WebSocket.OPEN)) {
502
+ return;
503
+ }
484
504
  const agentId = options.agentId;
485
505
  markStatus({ connected: false, restartPending: false });
486
506
  if (pendingRefresh) {
@@ -506,8 +526,11 @@ export function createBotCordChannel(options) {
506
526
  }
507
527
  const url = buildHubWebSocketUrl(hubUrl);
508
528
  log.info("botcord ws connecting", { url, agentId });
529
+ const connectionId = ++connectionSeq;
530
+ let socket;
509
531
  try {
510
- ws = new wsCtor(url);
532
+ socket = new wsCtor(url);
533
+ ws = socket;
511
534
  }
512
535
  catch (err) {
513
536
  log.error("botcord ws construct failed", { agentId, err: String(err) });
@@ -515,10 +538,21 @@ export function createBotCordChannel(options) {
515
538
  scheduleReconnect();
516
539
  return;
517
540
  }
518
- ws.on("open", () => {
519
- ws.send(JSON.stringify({ type: "auth", token }));
541
+ socket.on("open", () => {
542
+ if (!running || ws !== socket || connectionId !== connectionSeq) {
543
+ try {
544
+ socket.close();
545
+ }
546
+ catch {
547
+ // ignore
548
+ }
549
+ return;
550
+ }
551
+ socket.send(JSON.stringify({ type: "auth", token }));
520
552
  });
521
- ws.on("message", (data) => {
553
+ socket.on("message", (data) => {
554
+ if (ws !== socket || connectionId !== connectionSeq)
555
+ return;
522
556
  let msg = null;
523
557
  try {
524
558
  msg = JSON.parse(String(data));
@@ -540,10 +574,12 @@ export function createBotCordChannel(options) {
540
574
  });
541
575
  log.info("botcord ws authenticated", { agentId: msg.agent_id });
542
576
  void fireInbox("ws_auth_ok");
577
+ if (keepaliveTimer)
578
+ clearInterval(keepaliveTimer);
543
579
  keepaliveTimer = setInterval(() => {
544
- if (ws && ws.readyState === WebSocket.OPEN) {
580
+ if (ws === socket && socket.readyState === WebSocket.OPEN) {
545
581
  try {
546
- ws.send(JSON.stringify({ type: "ping" }));
582
+ socket.send(JSON.stringify({ type: "ping" }));
547
583
  }
548
584
  catch {
549
585
  // ignore
@@ -562,10 +598,15 @@ export function createBotCordChannel(options) {
562
598
  log.warn("botcord ws server error", { agentId, msg });
563
599
  }
564
600
  });
565
- ws.on("close", (code, reason) => {
601
+ socket.on("close", (code, reason) => {
566
602
  const reasonStr = reason?.toString() || "";
603
+ if (ws !== socket || connectionId !== connectionSeq) {
604
+ log.debug("botcord ws stale close ignored", { agentId, code, reason: reasonStr });
605
+ return;
606
+ }
567
607
  log.info("botcord ws closed", { agentId, code, reason: reasonStr });
568
608
  clearTimers();
609
+ ws = null;
569
610
  markStatus({ connected: false });
570
611
  if (!running) {
571
612
  if (permanentStopping)
@@ -606,7 +647,9 @@ export function createBotCordChannel(options) {
606
647
  }
607
648
  scheduleReconnect();
608
649
  });
609
- ws.on("error", (err) => {
650
+ socket.on("error", (err) => {
651
+ if (ws !== socket || connectionId !== connectionSeq)
652
+ return;
610
653
  log.warn("botcord ws error", { agentId, err: String(err) });
611
654
  markStatus({ lastError: String(err) });
612
655
  });