@botcord/daemon 0.2.62 → 0.2.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,6 +25,7 @@ import {
25
25
 
26
26
  /** Exponential backoff plan for transient disconnects. */
27
27
  const RECONNECT_BACKOFF_MS = [1000, 2000, 4000, 8000, 16000, 30000];
28
+ const RECONNECT_JITTER_RATIO = 0.25;
28
29
  /**
29
30
  * Keepalive cadence. Has to stay below the smallest idle-timeout in any
30
31
  * intermediary on the daemon → Hub WS path. Cloudflare and AWS ALB both
@@ -55,6 +56,11 @@ export function controlSigningInput(
55
56
  return jcsCanonicalize(obj) ?? "{}";
56
57
  }
57
58
 
59
+ function withReconnectJitter(delayMs: number): { delayMs: number; jitterMs: number } {
60
+ const jitterMs = Math.floor(Math.random() * delayMs * RECONNECT_JITTER_RATIO);
61
+ return { delayMs: delayMs + jitterMs, jitterMs };
62
+ }
63
+
58
64
  /** Handler invoked for each inbound frame. Return value is the ack payload. */
59
65
  export type ControlFrameHandler = (
60
66
  frame: ControlFrame,
@@ -110,6 +116,7 @@ export class ControlChannel {
110
116
  private readonly seenFrameIds: string[] = [];
111
117
  private connectInflight: Promise<void> | null = null;
112
118
  private connected = false;
119
+ private connectionSeq = 0;
113
120
 
114
121
  constructor(opts: ControlChannelOptions) {
115
122
  this.auth = opts.auth;
@@ -220,6 +227,20 @@ export class ControlChannel {
220
227
  private async connect(): Promise<void> {
221
228
  const record = this.auth.current;
222
229
  if (!record) throw new Error("control-channel: no user-auth");
230
+ const current = this.ws;
231
+ if (
232
+ current &&
233
+ (current.readyState === WebSocket.CONNECTING || current.readyState === WebSocket.OPEN)
234
+ ) {
235
+ daemonLog.debug("control-channel connect skipped (socket already active)", {
236
+ readyState: current.readyState,
237
+ });
238
+ return;
239
+ }
240
+ if (this.reconnectTimer) {
241
+ clearTimeout(this.reconnectTimer);
242
+ this.reconnectTimer = null;
243
+ }
223
244
 
224
245
  const accessToken = await this.auth.ensureAccessToken();
225
246
  const url = buildDaemonWebSocketUrl(
@@ -229,6 +250,7 @@ export class ControlChannel {
229
250
  );
230
251
  daemonLog.info("control-channel connecting", { url });
231
252
 
253
+ const connectionId = ++this.connectionSeq;
232
254
  const ws = new this.webSocketCtor(url, {
233
255
  headers: { Authorization: `Bearer ${accessToken}` },
234
256
  });
@@ -237,6 +259,15 @@ export class ControlChannel {
237
259
  await new Promise<void>((resolve, reject) => {
238
260
  const onOpen = (): void => {
239
261
  ws.removeListener("error", onError);
262
+ if (this.stopRequested || this.ws !== ws || connectionId !== this.connectionSeq) {
263
+ try {
264
+ ws.close(1000, "stale control-channel connection");
265
+ } catch {
266
+ // ignore
267
+ }
268
+ resolve();
269
+ return;
270
+ }
240
271
  this.connected = true;
241
272
  this.reconnectAttempts = 0;
242
273
  daemonLog.info("control-channel connected", { url });
@@ -245,14 +276,21 @@ export class ControlChannel {
245
276
  };
246
277
  const onError = (err: Error): void => {
247
278
  ws.removeListener("open", onOpen);
279
+ if (this.ws !== ws || connectionId !== this.connectionSeq) {
280
+ resolve();
281
+ return;
282
+ }
248
283
  reject(err);
249
284
  };
250
285
  ws.once("open", onOpen);
251
286
  ws.once("error", onError);
252
287
  });
253
288
 
254
- ws.on("message", (data) => this.onMessage(data));
255
- ws.on("close", (code, reason) => this.onClose(code, reason));
289
+ ws.on("message", (data) => {
290
+ if (this.ws !== ws || connectionId !== this.connectionSeq) return;
291
+ void this.onMessage(data);
292
+ });
293
+ ws.on("close", (code, reason) => this.onClose(code, reason, ws, connectionId));
256
294
  ws.on("error", (err) =>
257
295
  daemonLog.warn("control-channel error", {
258
296
  error: err instanceof Error ? err.message : String(err),
@@ -292,8 +330,12 @@ export class ControlChannel {
292
330
  }
293
331
  }
294
332
 
295
- private onClose(code: number, reason: Buffer): void {
333
+ private onClose(code: number, reason: Buffer, ws?: WebSocket, connectionId?: number): void {
296
334
  const reasonText = reason?.toString() || "";
335
+ if (ws && (this.ws !== ws || connectionId !== this.connectionSeq)) {
336
+ daemonLog.debug("control-channel stale close ignored", { code, reason: reasonText });
337
+ return;
338
+ }
297
339
  this.connected = false;
298
340
  this.stopKeepalive();
299
341
  this.ws = null;
@@ -314,6 +356,14 @@ export class ControlChannel {
314
356
 
315
357
  private scheduleReconnect(err?: unknown): void {
316
358
  if (this.stopRequested) return;
359
+ if (this.reconnectTimer) return;
360
+ const current = this.ws;
361
+ if (
362
+ current &&
363
+ (current.readyState === WebSocket.CONNECTING || current.readyState === WebSocket.OPEN)
364
+ ) {
365
+ return;
366
+ }
317
367
  if (err instanceof AuthRefreshRejectedError) {
318
368
  this.stopRequested = true;
319
369
  daemonLog.warn("control-channel: refresh rejected; halting reconnect (re-login required)", {
@@ -323,20 +373,23 @@ export class ControlChannel {
323
373
  }
324
374
  const attempt = this.reconnectAttempts;
325
375
  this.reconnectAttempts = attempt + 1;
326
- const delay = this.backoff[Math.min(attempt, this.backoff.length - 1)];
376
+ const baseDelayMs = this.backoff[Math.min(attempt, this.backoff.length - 1)];
377
+ const { delayMs, jitterMs } = withReconnectJitter(baseDelayMs);
327
378
  if (err) {
328
379
  daemonLog.warn("control-channel reconnect scheduled", {
329
- delayMs: delay,
380
+ delayMs,
381
+ baseDelayMs,
382
+ jitterMs,
330
383
  error: err instanceof Error ? err.message : String(err),
331
384
  });
332
385
  } else {
333
- daemonLog.info("control-channel reconnect scheduled", { delayMs: delay });
386
+ daemonLog.info("control-channel reconnect scheduled", { delayMs, baseDelayMs, jitterMs });
334
387
  }
335
388
  this.reconnectTimer = setTimeout(() => {
336
389
  this.reconnectTimer = null;
337
390
  if (this.stopRequested) return;
338
391
  this.connect().catch((err) => this.scheduleReconnect(err));
339
- }, delay);
392
+ }, delayMs);
340
393
  }
341
394
 
342
395
  private async onMessage(data: WebSocket.RawData): Promise<void> {
package/src/daemon.ts CHANGED
@@ -518,7 +518,7 @@ export async function startDaemon(opts: DaemonRuntimeOptions): Promise<DaemonHan
518
518
  resolveHubUrl,
519
519
  transcriptEnabled: resolveTranscriptEnabled(
520
520
  process.env.BOTCORD_TRANSCRIPT,
521
- opts.config.transcript?.enabled === true,
521
+ opts.config.transcript?.enabled,
522
522
  ),
523
523
  });
524
524
 
@@ -1,7 +1,9 @@
1
- import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
1
+ import { existsSync, mkdirSync, readdirSync, readFileSync, statSync, writeFileSync } from "node:fs";
2
2
  import { homedir, hostname, platform, release, arch } from "node:os";
3
3
  import path from "node:path";
4
4
  import { Buffer } from "node:buffer";
5
+ import { createRequire } from "node:module";
6
+ import { fileURLToPath } from "node:url";
5
7
  import { deflateRawSync } from "node:zlib";
6
8
  import {
7
9
  AUTH_EXPIRED_FLAG_PATH,
@@ -12,6 +14,7 @@ import {
12
14
  import {
13
15
  CONFIG_FILE_PATH,
14
16
  PID_PATH,
17
+ SESSIONS_PATH,
15
18
  SNAPSHOT_PATH,
16
19
  loadConfig,
17
20
  saveConfig,
@@ -19,6 +22,7 @@ import {
19
22
  } from "./config.js";
20
23
  import { listDaemonLogFiles, LOG_FILE_PATH, type LogFileEntry } from "./log.js";
21
24
  import { listAcpTraceLogFiles, listRuntimeLogFiles } from "./acp-logs.js";
25
+ import { defaultTranscriptRoot } from "./gateway/transcript.js";
22
26
  import {
23
27
  channelsFromDaemonConfig,
24
28
  defaultHttpFetcher,
@@ -38,12 +42,34 @@ import {
38
42
  const DIAGNOSTICS_DIR = path.join(homedir(), ".botcord", "diagnostics");
39
43
  const MAX_UPLOAD_BYTES = 50 * 1024 * 1024;
40
44
  const DEFAULT_ROTATED_LOGS_IN_BUNDLE = 5;
45
+ const require = createRequire(import.meta.url);
46
+ const MODULE_PATH = fileURLToPath(import.meta.url);
47
+ const ENV_ALLOWLIST = new Set([
48
+ "NODE_ENV",
49
+ "PATH",
50
+ "BOTCORD_HUB",
51
+ "BOTCORD_DAEMON_HOME",
52
+ "BOTCORD_DAEMON_CONFIG",
53
+ "BOTCORD_DAEMON_LOG",
54
+ "BOTCORD_DAEMON_SNAPSHOT_INTERVAL_MS",
55
+ "BOTCORD_HERMES_AGENT_BIN",
56
+ "BOTCORD_CLAUDE_CODE_BIN",
57
+ "BOTCORD_CODEX_BIN",
58
+ "BOTCORD_GEMINI_BIN",
59
+ "BOTCORD_DEEPSEEK_TUI_BIN",
60
+ "BOTCORD_KIMI_CLI_BIN",
61
+ "OPENCLAW_ACP_URL",
62
+ ]);
63
+ const TRANSCRIPT_LOG_DIAGNOSTICS_DEFAULT = 10;
64
+ const TRANSCRIPT_LOG_DIAGNOSTICS_ALL = 50;
65
+ const TRANSCRIPT_LOG_MAX_FILE_BYTES = 2 * 1024 * 1024;
41
66
 
42
67
  export interface CreateDiagnosticBundleOptions {
43
68
  diagnosticsDir?: string;
44
69
  logFile?: string;
45
70
  configFile?: string;
46
71
  snapshotFile?: string;
72
+ sessionsFile?: string;
47
73
  doctor?: { text: string; json: unknown };
48
74
  includeAllLogs?: boolean;
49
75
  }
@@ -89,6 +115,81 @@ function safeReadText(file: string): string | null {
89
115
  }
90
116
  }
91
117
 
118
+ function readJsonFile(file: string): Record<string, unknown> | null {
119
+ try {
120
+ const parsed = JSON.parse(readFileSync(file, "utf8")) as unknown;
121
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed)
122
+ ? parsed as Record<string, unknown>
123
+ : null;
124
+ } catch {
125
+ return null;
126
+ }
127
+ }
128
+
129
+ function findDaemonPackageJson(startFile: string): Record<string, unknown> | null {
130
+ let dir = path.dirname(startFile);
131
+ for (let i = 0; i < 6; i += 1) {
132
+ const candidate = path.join(dir, "package.json");
133
+ const parsed = readJsonFile(candidate);
134
+ if (parsed?.name === "@botcord/daemon") return parsed;
135
+ const next = path.dirname(dir);
136
+ if (next === dir) break;
137
+ dir = next;
138
+ }
139
+ return null;
140
+ }
141
+
142
+ function readInstalledPackageVersion(packageJsonSpecifier: string): string | null {
143
+ try {
144
+ const pkgPath = require.resolve(packageJsonSpecifier);
145
+ const parsed = readJsonFile(pkgPath);
146
+ return typeof parsed?.version === "string" ? parsed.version : null;
147
+ } catch {
148
+ return null;
149
+ }
150
+ }
151
+
152
+ function daemonRuntimeSummary(): Record<string, unknown> {
153
+ const pkg = findDaemonPackageJson(MODULE_PATH);
154
+ const version = typeof pkg?.version === "string" ? pkg.version : null;
155
+ const startedAtMs = Date.now() - Math.round(process.uptime() * 1000);
156
+ return {
157
+ packageName: typeof pkg?.name === "string" ? pkg.name : "@botcord/daemon",
158
+ version,
159
+ modulePath: MODULE_PATH,
160
+ entrypoint: process.argv[1] ?? null,
161
+ execPath: process.execPath,
162
+ argv: process.argv.map((arg) => redact(arg)),
163
+ execArgv: process.execArgv.map((arg) => redact(arg)),
164
+ cwd: process.cwd(),
165
+ pid: process.pid,
166
+ ppid: process.ppid,
167
+ uptimeSec: Math.round(process.uptime()),
168
+ startedAt: new Date(startedAtMs).toISOString(),
169
+ versions: {
170
+ node: process.version,
171
+ v8: process.versions.v8,
172
+ uv: process.versions.uv,
173
+ openssl: process.versions.openssl,
174
+ },
175
+ packages: {
176
+ "@botcord/daemon": version,
177
+ "@botcord/cli": readInstalledPackageVersion("@botcord/cli/package.json"),
178
+ "@botcord/protocol-core": readInstalledPackageVersion("@botcord/protocol-core/package.json"),
179
+ },
180
+ };
181
+ }
182
+
183
+ function safeEnvironmentSummary(): Record<string, string> {
184
+ const out: Record<string, string> = {};
185
+ for (const [key, value] of Object.entries(process.env)) {
186
+ if (!value) continue;
187
+ if (!ENV_ALLOWLIST.has(key) && !key.startsWith("BOTCORD_DAEMON_")) continue;
188
+ out[key] = redact(value);
189
+ }
190
+ return out;
191
+ }
192
+
92
193
  function readUserAuthSummary(): Record<string, unknown> | null {
93
194
  const raw = safeReadText(USER_AUTH_PATH);
94
195
  if (!raw) return null;
@@ -319,6 +420,55 @@ function bundledLogs(logFile: string, includeAllLogs: boolean): LogFileEntry[] {
319
420
  ];
320
421
  }
321
422
 
423
+ function listTranscriptLogFiles(includeAll: boolean): LogFileEntry[] {
424
+ const root = defaultTranscriptRoot();
425
+ const out: LogFileEntry[] = [];
426
+ collectTranscriptFiles(root, root, out, 5);
427
+ const limit = includeAll ? TRANSCRIPT_LOG_DIAGNOSTICS_ALL : TRANSCRIPT_LOG_DIAGNOSTICS_DEFAULT;
428
+ return out
429
+ .sort((a, b) => b.mtimeMs - a.mtimeMs || b.name.localeCompare(a.name))
430
+ .slice(0, limit);
431
+ }
432
+
433
+ function collectTranscriptFiles(
434
+ root: string,
435
+ dir: string,
436
+ out: LogFileEntry[],
437
+ maxDepth: number,
438
+ ): void {
439
+ if (maxDepth < 0) return;
440
+ let names: string[];
441
+ try {
442
+ names = readdirSync(dir);
443
+ } catch {
444
+ return;
445
+ }
446
+ for (const name of names) {
447
+ const file = path.join(dir, name);
448
+ try {
449
+ const st = statSync(file);
450
+ if (st.isDirectory()) {
451
+ collectTranscriptFiles(root, file, out, maxDepth - 1);
452
+ } else if (
453
+ st.isFile() &&
454
+ name.endsWith(".jsonl") &&
455
+ file.includes(`${path.sep}transcripts${path.sep}`) &&
456
+ st.size <= TRANSCRIPT_LOG_MAX_FILE_BYTES
457
+ ) {
458
+ out.push({
459
+ path: file,
460
+ name: path.relative(root, file) || name,
461
+ sizeBytes: st.size,
462
+ mtimeMs: st.mtimeMs,
463
+ active: true,
464
+ });
465
+ }
466
+ } catch {
467
+ // ignore files that disappear while collecting diagnostics
468
+ }
469
+ }
470
+ }
471
+
322
472
  export async function createDiagnosticBundle(
323
473
  opts: CreateDiagnosticBundleOptions = {},
324
474
  ): Promise<DiagnosticBundleResult> {
@@ -329,10 +479,12 @@ export async function createDiagnosticBundle(
329
479
  const logFile = opts.logFile ?? LOG_FILE_PATH;
330
480
  const configFile = opts.configFile ?? CONFIG_FILE_PATH;
331
481
  const snapshotFile = opts.snapshotFile ?? SNAPSHOT_PATH;
482
+ const sessionsFile = opts.sessionsFile ?? SESSIONS_PATH;
332
483
  const includeAllLogs = opts.includeAllLogs === true;
333
484
  const logs = bundledLogs(logFile, includeAllLogs);
334
485
  const acpLogs = listAcpTraceLogFiles(includeAllLogs);
335
486
  const runtimeLogs = listRuntimeLogFiles(includeAllLogs);
487
+ const transcriptLogs = listTranscriptLogFiles(includeAllLogs);
336
488
  mkdirSync(diagnosticsDir, { recursive: true, mode: 0o700 });
337
489
 
338
490
  const doctor = opts.doctor ?? await buildDoctorEntries();
@@ -343,9 +495,12 @@ export async function createDiagnosticBundle(
343
495
  release: release(),
344
496
  arch: arch(),
345
497
  node: process.version,
498
+ daemon: daemonRuntimeSummary(),
499
+ environment: safeEnvironmentSummary(),
346
500
  pidPath: PID_PATH,
347
501
  pid: process.pid,
348
502
  configPath: configFile,
503
+ sessionsPath: sessionsFile,
349
504
  snapshotPath: snapshotFile,
350
505
  logPath: logFile,
351
506
  logsBundled: logs.map((entry) => ({
@@ -364,6 +519,11 @@ export async function createDiagnosticBundle(
364
519
  path: entry.path,
365
520
  sizeBytes: entry.sizeBytes,
366
521
  })),
522
+ transcriptLogsBundled: transcriptLogs.map((entry) => ({
523
+ name: entry.name,
524
+ path: entry.path,
525
+ sizeBytes: entry.sizeBytes,
526
+ })),
367
527
  logsBundleMode: includeAllLogs ? "all" : `active_plus_${DEFAULT_ROTATED_LOGS_IN_BUNDLE}_rotated`,
368
528
  diagnosticsDir,
369
529
  userAuth: readUserAuthSummary(),
@@ -403,6 +563,13 @@ export async function createDiagnosticBundle(
403
563
  data: log ?? `no runtime log file at ${entry.path}\n`,
404
564
  });
405
565
  }
566
+ for (const entry of transcriptLogs) {
567
+ const log = safeReadText(entry.path);
568
+ entries.push({
569
+ name: `transcripts/${entry.name.split(path.sep).join("/")}`,
570
+ data: log ?? `no transcript log file at ${entry.path}\n`,
571
+ });
572
+ }
406
573
  const config = safeReadText(configFile);
407
574
  entries.push({
408
575
  name: "config.json.redacted",
@@ -413,6 +580,11 @@ export async function createDiagnosticBundle(
413
580
  name: "snapshot.json",
414
581
  data: snapshot ?? `no snapshot file at ${snapshotFile}\n`,
415
582
  });
583
+ const sessions = safeReadText(sessionsFile);
584
+ entries.push({
585
+ name: "sessions.json.redacted",
586
+ data: sessions ?? `no sessions file at ${sessionsFile}\n`,
587
+ });
416
588
 
417
589
  const zip = createZip(entries);
418
590
  const out = path.join(diagnosticsDir, filename);
@@ -1276,6 +1276,50 @@ describe("Dispatcher", () => {
1276
1276
  expect(blockTypes).toEqual(["system", "tool_use"]);
1277
1277
  });
1278
1278
 
1279
+ it("transcript: records runtime blocks even when channel streaming is disabled", async () => {
1280
+ const blocks: StreamBlock[] = [
1281
+ {
1282
+ raw: {
1283
+ params: {
1284
+ update: {
1285
+ sessionUpdate: "tool_call",
1286
+ toolCall: { name: "botcord_send", rawInput: { text: "hello" } },
1287
+ },
1288
+ },
1289
+ },
1290
+ kind: "tool_use",
1291
+ seq: 7,
1292
+ },
1293
+ ];
1294
+ const runtime = new FakeRuntime({ blocks, reply: "ok", newSessionId: "sid" });
1295
+ const channel = new FakeChannel();
1296
+ const records: import("../transcript.js").TranscriptRecord[] = [];
1297
+ const { store, dir } = await makeStore();
1298
+ tempDirs.push(dir);
1299
+ const channels = new Map<string, ChannelAdapter>([[channel.id, channel]]);
1300
+ const dispatcher = new Dispatcher({
1301
+ config: baseConfig(),
1302
+ channels,
1303
+ runtime: () => runtime,
1304
+ sessionStore: store,
1305
+ log: silentLogger(),
1306
+ transcript: { enabled: true, rootDir: dir, write: (rec) => records.push(rec) },
1307
+ });
1308
+
1309
+ await dispatcher.handle(
1310
+ makeEnvelope({ trace: { id: "tr", streamable: false } }),
1311
+ );
1312
+
1313
+ expect(channel.streams).toHaveLength(0);
1314
+ const block = records.find((r) => r.kind === "block");
1315
+ expect(block).toMatchObject({
1316
+ kind: "block",
1317
+ blockType: "tool_use",
1318
+ seq: 7,
1319
+ summary: { type: "tool_use", name: "botcord_send" },
1320
+ });
1321
+ });
1322
+
1279
1323
  it("runtime throws: sends error reply, does not write session", async () => {
1280
1324
  const runtime = new FakeRuntime({ throwError: "boom" });
1281
1325
  const channel = new FakeChannel();
@@ -1,5 +1,5 @@
1
- import { mkdtemp, readFile, rm } from "node:fs/promises";
2
- import { existsSync, statSync } from "node:fs";
1
+ import { mkdir, mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
2
+ import { existsSync, statSync, utimesSync } from "node:fs";
3
3
  import { tmpdir } from "node:os";
4
4
  import path from "node:path";
5
5
  import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
@@ -7,7 +7,9 @@ import { Dispatcher, type RuntimeFactory } from "../dispatcher.js";
7
7
  import { SessionStore } from "../session-store.js";
8
8
  import {
9
9
  createTranscriptWriter,
10
+ cleanupTranscriptFiles,
10
11
  resolveTranscriptEnabled,
12
+ TRANSCRIPT_RETENTION_MS,
11
13
  TRANSCRIPT_TEXT_LIMIT,
12
14
  truncateTextField,
13
15
  type TranscriptRecord,
@@ -239,6 +241,10 @@ describe("resolveTranscriptEnabled", () => {
239
241
  expect(resolveTranscriptEnabled("yes", true)).toBe(true);
240
242
  expect(resolveTranscriptEnabled("yes", false)).toBe(false);
241
243
  });
244
+ it("defaults on when env and config are both unset", () => {
245
+ expect(resolveTranscriptEnabled(undefined, undefined)).toBe(true);
246
+ expect(resolveTranscriptEnabled("yes", undefined)).toBe(true);
247
+ });
242
248
  });
243
249
 
244
250
  describe("truncateTextField", () => {
@@ -460,6 +466,25 @@ describe("Dispatcher transcript integration", () => {
460
466
  expect(files).toContain("_default.jsonl");
461
467
  });
462
468
 
469
+ it("cleans transcript files older than the retention window", async () => {
470
+ const tmp = await mkdtemp(path.join(tmpdir(), "transcript-clean-"));
471
+ cleanups.push(() => rm(tmp, { recursive: true, force: true }));
472
+ const oldFile = transcriptFilePath(tmp, "ag_me", "rm_old", null);
473
+ const freshFile = transcriptFilePath(tmp, "ag_me", "rm_fresh", null);
474
+ await mkdir(path.dirname(oldFile), { recursive: true });
475
+ await mkdir(path.dirname(freshFile), { recursive: true });
476
+ await writeFile(oldFile, "{}\n", { mode: 0o600 });
477
+ await writeFile(freshFile, "{}\n", { mode: 0o600 });
478
+ const oldDate = new Date(Date.now() - TRANSCRIPT_RETENTION_MS - 60_000);
479
+ utimesSync(oldFile, oldDate, oldDate);
480
+
481
+ const removed = cleanupTranscriptFiles(tmp, Date.now() - TRANSCRIPT_RETENTION_MS);
482
+
483
+ expect(removed).toBe(1);
484
+ expect(existsSync(oldFile)).toBe(false);
485
+ expect(existsSync(freshFile)).toBe(true);
486
+ });
487
+
463
488
  it("disabled writer does not create files", async () => {
464
489
  const tmp = await mkdtemp(path.join(tmpdir(), "transcript-off-"));
465
490
  cleanups.push(() => rm(tmp, { recursive: true, force: true }));
@@ -25,6 +25,7 @@ import { sanitizeUntrustedContent } from "./sanitize.js";
25
25
  import { revokeAgent } from "../../provision.js";
26
26
 
27
27
  const RECONNECT_BACKOFF = [1000, 2000, 4000, 8000, 16000, 30000];
28
+ const RECONNECT_JITTER_RATIO = 0.25;
28
29
  const KEEPALIVE_INTERVAL = 20_000;
29
30
  const MAX_AUTH_FAILURES = 5;
30
31
  const SEEN_MESSAGES_CAP = 500;
@@ -33,6 +34,11 @@ const DM_ROOM_PREFIX = "rm_dm_";
33
34
  const INBOX_POLL_LIMIT = 50;
34
35
  const CHANNEL_PERMANENT_STOP = "channel_permanent_stop";
35
36
 
37
+ function withReconnectJitter(delayMs: number): { delayMs: number; jitterMs: number } {
38
+ const jitterMs = Math.floor(Math.random() * delayMs * RECONNECT_JITTER_RATIO);
39
+ return { delayMs: delayMs + jitterMs, jitterMs };
40
+ }
41
+
36
42
  type InboxDrainTrigger =
37
43
  | "ws_auth_ok"
38
44
  | "ws_inbox_update"
@@ -477,6 +483,7 @@ export function createBotCordChannel(options: BotCordChannelOptions): ChannelAda
477
483
  let reconnectTimer: NodeJS.Timeout | null = null;
478
484
  let keepaliveTimer: NodeJS.Timeout | null = null;
479
485
  let reconnectAttempt = 0;
486
+ let connectionSeq = 0;
480
487
  let consecutiveAuthFailures = 0;
481
488
  let running = true;
482
489
  let permanentStopping = false;
@@ -603,23 +610,36 @@ export function createBotCordChannel(options: BotCordChannelOptions): ChannelAda
603
610
 
604
611
  function scheduleReconnect() {
605
612
  if (!running) return;
606
- const delay =
613
+ if (reconnectTimer) return;
614
+ if (ws && (ws.readyState === WebSocket.CONNECTING || ws.readyState === WebSocket.OPEN)) {
615
+ return;
616
+ }
617
+ const baseDelayMs =
607
618
  RECONNECT_BACKOFF[Math.min(reconnectAttempt, RECONNECT_BACKOFF.length - 1)];
619
+ const { delayMs, jitterMs } = withReconnectJitter(baseDelayMs);
608
620
  reconnectAttempt += 1;
609
621
  markStatus({
610
622
  connected: false,
611
623
  restartPending: true,
612
624
  reconnectAttempts: reconnectAttempt,
613
625
  });
614
- log.info("botcord ws reconnect scheduled", { delayMs: delay, attempt: reconnectAttempt });
626
+ log.info("botcord ws reconnect scheduled", {
627
+ delayMs,
628
+ baseDelayMs,
629
+ jitterMs,
630
+ attempt: reconnectAttempt,
631
+ });
615
632
  reconnectTimer = setTimeout(() => {
616
633
  reconnectTimer = null;
617
634
  void connect();
618
- }, delay);
635
+ }, delayMs);
619
636
  }
620
637
 
621
638
  async function connect() {
622
639
  if (!running) return;
640
+ if (ws && (ws.readyState === WebSocket.CONNECTING || ws.readyState === WebSocket.OPEN)) {
641
+ return;
642
+ }
623
643
  const agentId = options.agentId;
624
644
  markStatus({ connected: false, restartPending: false });
625
645
  if (pendingRefresh) {
@@ -644,8 +664,11 @@ export function createBotCordChannel(options: BotCordChannelOptions): ChannelAda
644
664
  const url = buildHubWebSocketUrl(hubUrl);
645
665
  log.info("botcord ws connecting", { url, agentId });
646
666
 
667
+ const connectionId = ++connectionSeq;
668
+ let socket: WebSocket;
647
669
  try {
648
- ws = new wsCtor(url);
670
+ socket = new wsCtor(url);
671
+ ws = socket;
649
672
  } catch (err) {
650
673
  log.error("botcord ws construct failed", { agentId, err: String(err) });
651
674
  markStatus({ lastError: String(err) });
@@ -653,11 +676,20 @@ export function createBotCordChannel(options: BotCordChannelOptions): ChannelAda
653
676
  return;
654
677
  }
655
678
 
656
- ws.on("open", () => {
657
- ws!.send(JSON.stringify({ type: "auth", token }));
679
+ socket.on("open", () => {
680
+ if (!running || ws !== socket || connectionId !== connectionSeq) {
681
+ try {
682
+ socket.close();
683
+ } catch {
684
+ // ignore
685
+ }
686
+ return;
687
+ }
688
+ socket.send(JSON.stringify({ type: "auth", token }));
658
689
  });
659
690
 
660
- ws.on("message", (data: WebSocket.RawData) => {
691
+ socket.on("message", (data: WebSocket.RawData) => {
692
+ if (ws !== socket || connectionId !== connectionSeq) return;
661
693
  let msg: { type?: string; agent_id?: string } | null = null;
662
694
  try {
663
695
  msg = JSON.parse(String(data));
@@ -677,10 +709,11 @@ export function createBotCordChannel(options: BotCordChannelOptions): ChannelAda
677
709
  });
678
710
  log.info("botcord ws authenticated", { agentId: msg.agent_id });
679
711
  void fireInbox("ws_auth_ok");
712
+ if (keepaliveTimer) clearInterval(keepaliveTimer);
680
713
  keepaliveTimer = setInterval(() => {
681
- if (ws && ws.readyState === WebSocket.OPEN) {
714
+ if (ws === socket && socket.readyState === WebSocket.OPEN) {
682
715
  try {
683
- ws.send(JSON.stringify({ type: "ping" }));
716
+ socket.send(JSON.stringify({ type: "ping" }));
684
717
  } catch {
685
718
  // ignore
686
719
  }
@@ -696,10 +729,15 @@ export function createBotCordChannel(options: BotCordChannelOptions): ChannelAda
696
729
  }
697
730
  });
698
731
 
699
- ws.on("close", (code: number, reason: Buffer) => {
732
+ socket.on("close", (code: number, reason: Buffer) => {
700
733
  const reasonStr = reason?.toString() || "";
734
+ if (ws !== socket || connectionId !== connectionSeq) {
735
+ log.debug("botcord ws stale close ignored", { agentId, code, reason: reasonStr });
736
+ return;
737
+ }
701
738
  log.info("botcord ws closed", { agentId, code, reason: reasonStr });
702
739
  clearTimers();
740
+ ws = null;
703
741
  markStatus({ connected: false });
704
742
  if (!running) {
705
743
  if (permanentStopping) return;
@@ -740,7 +778,8 @@ export function createBotCordChannel(options: BotCordChannelOptions): ChannelAda
740
778
  scheduleReconnect();
741
779
  });
742
780
 
743
- ws.on("error", (err: Error) => {
781
+ socket.on("error", (err: Error) => {
782
+ if (ws !== socket || connectionId !== connectionSeq) return;
744
783
  log.warn("botcord ws error", { agentId, err: String(err) });
745
784
  markStatus({ lastError: String(err) });
746
785
  });