@xmoxmo/bncr 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/channel.ts CHANGED
@@ -77,7 +77,7 @@ import { buildDownlinkHealth as buildDownlinkHealthFromRuntime } from './core/do
77
77
  import { buildExtendedDiagnostics as buildExtendedDiagnosticsFromRuntime } from './core/extended-diagnostics.ts';
78
78
  import { observeLeaseState, matchesTransferOwner as matchesTransferOwnerFromRuntime } from './core/lease-state.ts';
79
79
  import { emitBncrLog, emitBncrLogLine } from './core/logging.ts';
80
- import { resolveBncrChannelPolicy } from './core/policy.ts';
80
+ import { resolveBncrChannelPolicy, resolveBncrConfigWarnings } from './core/policy.ts';
81
81
  import {
82
82
  buildRegisterTraceSummary as buildRegisterTraceSummaryFromEntries,
83
83
  classifyRegisterTrace as classifyRegisterTraceFromStack,
@@ -323,17 +323,35 @@ const BNCR_FILE_ABORT_EVENT = 'plugin.bncr.file.abort';
323
323
  const CONNECT_TTL_MS = 120_000;
324
324
  const RECENT_INBOUND_SEND_WINDOW_MS = 60_000;
325
325
  const MAX_RETRY = 10;
326
+ const MAX_DEAD_LETTER_ENTRIES = 1000;
327
+ const MAX_SESSION_ROUTE_ENTRIES = 1000;
328
+ const MAX_ACCOUNT_ACTIVITY_ENTRIES = 1000;
326
329
  const PUSH_DRAIN_INTERVAL_MS = 500;
330
+ const PUSH_DRAIN_ACCOUNT_BUDGET = 5;
331
+ const PUSH_DRAIN_ACCOUNT_TIME_BUDGET_MS = 2_000;
327
332
  const PUSH_ACK_TIMEOUT_MS = 30_000;
333
+ const ADAPTIVE_ACK_TIMEOUT_DEFAULT_ENABLED = true;
334
+ const RECOMMENDED_ACK_TIMEOUT_MIN_MS = PUSH_ACK_TIMEOUT_MS;
335
+ const RECOMMENDED_ACK_TIMEOUT_MAX_MS = 90_000;
336
+ const ADAPTIVE_ACK_TIMEOUT_OBSERVATION_TTL_MS = 60 * 60 * 1000;
337
+ const ADAPTIVE_ACK_TIMEOUT_RECOVERY_OK_THRESHOLD = 3;
338
+ const ADAPTIVE_ACK_TIMEOUT_LOG_THROTTLE_MS = 5 * 60 * 1000;
328
339
  const OUTBOUND_READY_TTL_MS = 30_000;
329
340
  const PREFERRED_OUTBOUND_TTL_MS = 12_000;
330
341
  const FILE_FORCE_CHUNK = true; // 统一走 WS 分块,保留 base64 仅作兜底
342
+ const LOG_DEDUPE_STATE_TTL_MS = 10 * 60 * 1000;
343
+ const LOG_DEDUPE_STATE_MAX_ENTRIES = 1_000;
331
344
  const FILE_INLINE_THRESHOLD = 5 * 1024 * 1024; // fallback 阈值(仅 FILE_FORCE_CHUNK=false 时生效)
332
345
  const FILE_CHUNK_SIZE = 256 * 1024; // 256KB
346
+ const INBOUND_FILE_TRANSFER_MAX_BYTES = 50 * 1024 * 1024;
347
+ const INBOUND_FILE_TRANSFER_MAX_CHUNKS = Math.ceil(INBOUND_FILE_TRANSFER_MAX_BYTES / FILE_CHUNK_SIZE) + 1;
333
348
  const FILE_CHUNK_RETRY = 3;
334
349
  const FILE_ACK_TIMEOUT_MS = 30_000;
335
350
  const FILE_TRANSFER_ACK_TTL_MS = 30_000;
351
+ const MAX_EARLY_FILE_ACKS = 1000;
352
+ const INTERNAL_SLEEP_MAX_MS = 120_000;
336
353
  const FILE_TRANSFER_KEEP_MS = 6 * 60 * 60 * 1000;
354
+ const FILE_TRANSFER_TERMINAL_KEEP_MS = 10 * 60 * 1000;
337
355
  const REGISTER_WARMUP_WINDOW_MS = 30_000;
338
356
  let BNCR_DEBUG_VERBOSE = false; // 全局调试日志开关(默认关闭)
339
357
 
@@ -355,9 +373,16 @@ type FileSendTransferState = {
355
373
  ownerConnId?: string;
356
374
  ownerClientId?: string;
357
375
  completedPath?: string;
376
+ terminalAt?: number;
358
377
  error?: string;
359
378
  };
360
379
 
380
+ type ChannelAccountWorkerHandle = {
381
+ timer: NodeJS.Timeout;
382
+ finish: (reason: string) => void;
383
+ cleanupAbortListener?: () => void;
384
+ };
385
+
361
386
  type FileRecvTransferState = {
362
387
  transferId: string;
363
388
  accountId: string;
@@ -376,6 +401,7 @@ type FileRecvTransferState = {
376
401
  ownerConnId?: string;
377
402
  ownerClientId?: string;
378
403
  completedPath?: string;
404
+ terminalAt?: number;
379
405
  error?: string;
380
406
  };
381
407
 
@@ -501,6 +527,28 @@ function asString(v: unknown, fallback = ''): string {
501
527
  return String(v);
502
528
  }
503
529
 
530
+ function finiteNumberOr(value: unknown, fallback: number): number {
531
+ const n = Number(value);
532
+ return Number.isFinite(n) ? n : fallback;
533
+ }
534
+
535
+ function optionalFiniteNumber(value: unknown): number | undefined {
536
+ if (value == null || value === '') return undefined;
537
+ const n = Number(value);
538
+ return Number.isFinite(n) ? n : undefined;
539
+ }
540
+
541
+ function finiteNonNegativeNumberOrNull(value: unknown): number | null {
542
+ const n = Number(value);
543
+ return Number.isFinite(n) && n >= 0 ? n : null;
544
+ }
545
+
546
+ function clampFiniteNumber(value: unknown, fallback: number, min: number, max: number): number {
547
+ const n = Number(value);
548
+ const finite = Number.isFinite(n) ? n : fallback;
549
+ return Math.max(min, Math.min(finite, max));
550
+ }
551
+
504
552
  function isPlainObject(value: unknown): value is Record<string, unknown> {
505
553
  return typeof value === 'object' && value !== null && !Array.isArray(value);
506
554
  }
@@ -658,7 +706,15 @@ class BncrBridgeRuntime {
658
706
  private lastAckOkByAccount = new Map<string, number>();
659
707
  private lastAckTimeoutByAccount = new Map<string, number>();
660
708
  private ackTimeoutCountByAccount = new Map<string, number>();
661
- private channelAccountTimers = new Map<string, NodeJS.Timeout>();
709
+ private lateAckOkCountByAccount = new Map<string, number>();
710
+ private lastLateAckOkByAccount = new Map<string, number>();
711
+ private lastAckQueueLatencyMsByAccount = new Map<string, number>();
712
+ private lastAckPushLatencyMsByAccount = new Map<string, number>();
713
+ private lastLateAckQueueLatencyMsByAccount = new Map<string, number>();
714
+ private lastLateAckPushLatencyMsByAccount = new Map<string, number>();
715
+ private adaptiveAckRecoveryOkCountByAccount = new Map<string, number>();
716
+ private adaptiveAckTimeoutLogStateByAccount = new Map<string, { at: number; timeoutMs: number; reason: string }>();
717
+ private channelAccountWorkers = new Map<string, ChannelAccountWorkerHandle>();
662
718
  private logDedupeState = new Map<string, { at: number; sig: string }>();
663
719
  private canonicalAgentId: string | null = null;
664
720
  private canonicalAgentSource: 'startup' | 'runtime' | 'fallback-main' | null = null;
@@ -666,6 +722,7 @@ class BncrBridgeRuntime {
666
722
 
667
723
  // 内置健康/回归计数(替代独立脚本)
668
724
  private startedAt = now();
725
+ private stopped = false;
669
726
  private connectEventsByAccount = new Map<string, number>();
670
727
  private inboundEventsByAccount = new Map<string, number>();
671
728
  private activityEventsByAccount = new Map<string, number>();
@@ -682,6 +739,7 @@ class BncrBridgeRuntime {
682
739
  // then move storage + resolver/wait APIs together rather than partially splitting the map only.
683
740
  string,
684
741
  {
742
+ promise: Promise<'acked' | 'timeout'>;
685
743
  resolve: (result: 'acked' | 'timeout') => void;
686
744
  timer: NodeJS.Timeout;
687
745
  }
@@ -694,6 +752,7 @@ class BncrBridgeRuntime {
694
752
  private fileAckWaiters = new Map<
695
753
  string,
696
754
  {
755
+ promise: Promise<Record<string, unknown>>;
697
756
  resolve: (payload: Record<string, unknown>) => void;
698
757
  reject: (err: Error) => void;
699
758
  timer: NodeJS.Timeout;
@@ -701,6 +760,15 @@ class BncrBridgeRuntime {
701
760
  >();
702
761
  private earlyFileAcks = new Map<string, FileAckPayloadState>();
703
762
 
763
+ private rememberEarlyFileAck(key: string, state: FileAckPayloadState) {
764
+ this.earlyFileAcks.set(key, state);
765
+ while (this.earlyFileAcks.size > MAX_EARLY_FILE_ACKS) {
766
+ const oldestKey = this.earlyFileAcks.keys().next().value;
767
+ if (!oldestKey) break;
768
+ this.earlyFileAcks.delete(oldestKey);
769
+ }
770
+ }
771
+
704
772
  constructor(api: OpenClawPluginApi) {
705
773
  this.api = api;
706
774
  }
@@ -756,11 +824,27 @@ class BncrBridgeRuntime {
756
824
  this.logError(scope, this.buildDebugJsonMessage(event, payload), options);
757
825
  }
758
826
 
827
+ private pruneLogDedupeState(currentTime = now()) {
828
+ for (const [key, entry] of this.logDedupeState.entries()) {
829
+ if (currentTime - entry.at > LOG_DEDUPE_STATE_TTL_MS) {
830
+ this.logDedupeState.delete(key);
831
+ }
832
+ }
833
+
834
+ while (this.logDedupeState.size > LOG_DEDUPE_STATE_MAX_ENTRIES) {
835
+ const oldestKey = this.logDedupeState.keys().next().value;
836
+ if (!oldestKey) break;
837
+ this.logDedupeState.delete(oldestKey);
838
+ }
839
+ }
840
+
759
841
  private shouldEmitDedupLog(key: string, sig: string, windowMs = 5 * 60 * 1000) {
760
842
  const t = now();
843
+ this.pruneLogDedupeState(t);
761
844
  const prev = this.logDedupeState.get(key) || null;
762
845
  if (prev && prev.sig === sig && t - prev.at < windowMs) return false;
763
846
  this.logDedupeState.set(key, { at: t, sig });
847
+ this.pruneLogDedupeState(t);
764
848
  return true;
765
849
  }
766
850
 
@@ -859,10 +943,9 @@ class BncrBridgeRuntime {
859
943
  }
860
944
 
861
945
  private clearChannelAccountWorker(accountId: string, reason: string) {
862
- const timer = this.channelAccountTimers.get(accountId);
863
- if (!timer) return false;
864
- clearInterval(timer);
865
- this.channelAccountTimers.delete(accountId);
946
+ const worker = this.channelAccountWorkers.get(accountId);
947
+ if (!worker) return false;
948
+ worker.finish(reason);
866
949
  this.logInfo(
867
950
  'health',
868
951
  `status-worker cleared ${JSON.stringify({ bridge: this.bridgeId, accountId, reason })}`,
@@ -871,6 +954,12 @@ class BncrBridgeRuntime {
871
954
  return true;
872
955
  }
873
956
 
957
+ private clearAllChannelAccountWorkers(reason: string) {
958
+ for (const accountId of Array.from(this.channelAccountWorkers.keys())) {
959
+ this.clearChannelAccountWorker(accountId, reason);
960
+ }
961
+ }
962
+
874
963
  private captureDriftSnapshot(
875
964
  summary: ReturnType<BncrBridgeRuntime['buildRegisterTraceSummary']>,
876
965
  ) {
@@ -1106,14 +1195,18 @@ class BncrBridgeRuntime {
1106
1195
  }
1107
1196
 
1108
1197
  startService = async (ctx: OpenClawPluginServiceContext, debug?: boolean) => {
1198
+ this.stopped = false;
1109
1199
  this.statePath = path.join(ctx.stateDir, 'bncr-bridge-state.json');
1110
- await this.loadState();
1111
1200
  try {
1112
1201
  const cfg = this.api.runtime.config.current();
1113
1202
  this.initializeCanonicalAgentId(cfg);
1203
+ for (const warning of resolveBncrConfigWarnings(cfg?.channels?.[CHANNEL_ID] || {})) {
1204
+ this.logWarn('config', warning);
1205
+ }
1114
1206
  } catch {
1115
1207
  // ignore startup canonical agent initialization errors
1116
1208
  }
1209
+ await this.loadState();
1117
1210
  if (typeof debug === 'boolean') BNCR_DEBUG_VERBOSE = debug;
1118
1211
  await this.refreshDebugFlagFromConfig({ forceLog: true });
1119
1212
  const bootDiag = this.buildIntegratedDiagnostics(BNCR_DEFAULT_ACCOUNT_ID);
@@ -1129,15 +1222,34 @@ class BncrBridgeRuntime {
1129
1222
  };
1130
1223
 
1131
1224
  stopService = async () => {
1132
- if (this.pushTimer) {
1133
- clearTimeout(this.pushTimer);
1134
- this.pushTimer = null;
1135
- }
1225
+ this.cleanupRuntimeWaitersAndTimers('service stopped');
1136
1226
  await this.flushState();
1137
1227
  this.logInfo('debug', 'service stopped', { debugOnly: true });
1138
1228
  };
1139
1229
 
1140
1230
  shutdown() {
1231
+ this.cleanupRuntimeWaitersAndTimers('shutdown');
1232
+ }
1233
+
1234
+ private cleanupRuntimeWaitersAndTimers(reason: string) {
1235
+ this.logInfo(
1236
+ 'lifecycle',
1237
+ `cleanup ${JSON.stringify({
1238
+ bridge: this.bridgeId,
1239
+ reason,
1240
+ messageAckWaiters: this.messageAckWaiters.size,
1241
+ fileAckWaiters: this.fileAckWaiters.size,
1242
+ earlyFileAcks: this.earlyFileAcks.size,
1243
+ outbox: this.outbox.size,
1244
+ runningDrainAccounts: this.pushDrainRunningAccounts.size,
1245
+ channelAccountWorkers: this.channelAccountWorkers.size,
1246
+ hasSaveTimer: !!this.saveTimer,
1247
+ hasPushTimer: !!this.pushTimer,
1248
+ })}`,
1249
+ { debugOnly: true },
1250
+ );
1251
+ this.stopped = true;
1252
+ this.clearAllChannelAccountWorkers(reason);
1141
1253
  if (this.saveTimer) {
1142
1254
  clearTimeout(this.saveTimer);
1143
1255
  this.saveTimer = null;
@@ -1148,19 +1260,23 @@ class BncrBridgeRuntime {
1148
1260
  }
1149
1261
  for (const waiter of this.messageAckWaiters.values()) {
1150
1262
  clearTimeout(waiter.timer);
1263
+ waiter.resolve('timeout');
1151
1264
  }
1152
1265
  this.messageAckWaiters.clear();
1153
1266
  for (const waiter of this.fileAckWaiters.values()) {
1154
1267
  clearTimeout(waiter.timer);
1268
+ waiter.reject(new Error(reason));
1155
1269
  }
1156
1270
  this.fileAckWaiters.clear();
1157
1271
  this.earlyFileAcks.clear();
1158
1272
  }
1159
1273
 
1160
1274
  private scheduleSave() {
1275
+ if (this.stopped) return;
1161
1276
  if (this.saveTimer) return;
1162
1277
  this.saveTimer = setTimeout(() => {
1163
1278
  this.saveTimer = null;
1279
+ if (this.stopped) return;
1164
1280
  void this.flushState();
1165
1281
  }, 300);
1166
1282
  }
@@ -1297,7 +1413,13 @@ class BncrBridgeRuntime {
1297
1413
  }
1298
1414
 
1299
1415
  private buildIntegratedDiagnostics(accountId: string) {
1300
- return buildIntegratedDiagnosticsFromRuntime(this.buildRuntimeStatusInput(accountId));
1416
+ const ackObservability = this.buildRuntimeAckObservability(accountId);
1417
+ const ackStrategy = this.buildRuntimeAckStrategy(ackObservability);
1418
+ return {
1419
+ ...buildIntegratedDiagnosticsFromRuntime(this.buildRuntimeStatusInput(accountId)),
1420
+ ackObservability,
1421
+ ackStrategy,
1422
+ };
1301
1423
  }
1302
1424
 
1303
1425
  private buildDownlinkHealth(accountId: string) {
@@ -1347,10 +1469,10 @@ class BncrBridgeRuntime {
1347
1469
  sessionKey: normalized.sessionKey,
1348
1470
  route,
1349
1471
  payload,
1350
- createdAt: Number(entry.createdAt || now()),
1351
- retryCount: Number(entry.retryCount || 0),
1352
- nextAttemptAt: Number(entry.nextAttemptAt || now()),
1353
- lastAttemptAt: entry.lastAttemptAt ? Number(entry.lastAttemptAt) : undefined,
1472
+ createdAt: finiteNumberOr(entry.createdAt, now()),
1473
+ retryCount: finiteNumberOr(entry.retryCount, 0),
1474
+ nextAttemptAt: finiteNumberOr(entry.nextAttemptAt, now()),
1475
+ lastAttemptAt: optionalFiniteNumber(entry.lastAttemptAt),
1354
1476
  lastError: entry.lastError ? asString(entry.lastError) : undefined,
1355
1477
  };
1356
1478
 
@@ -1358,7 +1480,10 @@ class BncrBridgeRuntime {
1358
1480
  }
1359
1481
 
1360
1482
  this.deadLetter = [];
1361
- for (const entry of Array.isArray(data.deadLetter) ? data.deadLetter : []) {
1483
+ const persistedDeadLetter = Array.isArray(data.deadLetter)
1484
+ ? data.deadLetter.slice(-MAX_DEAD_LETTER_ENTRIES)
1485
+ : [];
1486
+ for (const entry of persistedDeadLetter) {
1362
1487
  if (!entry?.messageId) continue;
1363
1488
  const accountId = normalizeAccountId(entry.accountId);
1364
1489
  const sessionKey = asString(entry.sessionKey || '').trim();
@@ -1379,17 +1504,20 @@ class BncrBridgeRuntime {
1379
1504
  sessionKey: normalized.sessionKey,
1380
1505
  route,
1381
1506
  payload,
1382
- createdAt: Number(entry.createdAt || now()),
1383
- retryCount: Number(entry.retryCount || 0),
1384
- nextAttemptAt: Number(entry.nextAttemptAt || now()),
1385
- lastAttemptAt: entry.lastAttemptAt ? Number(entry.lastAttemptAt) : undefined,
1507
+ createdAt: finiteNumberOr(entry.createdAt, now()),
1508
+ retryCount: finiteNumberOr(entry.retryCount, 0),
1509
+ nextAttemptAt: finiteNumberOr(entry.nextAttemptAt, now()),
1510
+ lastAttemptAt: optionalFiniteNumber(entry.lastAttemptAt),
1386
1511
  lastError: entry.lastError ? asString(entry.lastError) : undefined,
1387
1512
  });
1388
1513
  }
1389
1514
 
1390
1515
  this.sessionRoutes.clear();
1391
1516
  this.routeAliases.clear();
1392
- for (const item of data.sessionRoutes || []) {
1517
+ const persistedSessionRoutes = Array.isArray(data.sessionRoutes)
1518
+ ? data.sessionRoutes.slice(-MAX_SESSION_ROUTE_ENTRIES)
1519
+ : [];
1520
+ for (const item of persistedSessionRoutes) {
1393
1521
  const normalized = normalizeStoredSessionKey(
1394
1522
  asString(item?.sessionKey || ''),
1395
1523
  this.canonicalAgentId,
@@ -1398,7 +1526,7 @@ class BncrBridgeRuntime {
1398
1526
 
1399
1527
  const route = parseRouteLike(item?.route) || normalized.route;
1400
1528
  const accountId = normalizeAccountId(item?.accountId);
1401
- const updatedAt = Number(item?.updatedAt || now());
1529
+ const updatedAt = finiteNumberOr(item?.updatedAt, now());
1402
1530
 
1403
1531
  const info = {
1404
1532
  accountId,
@@ -1411,14 +1539,17 @@ class BncrBridgeRuntime {
1411
1539
  }
1412
1540
 
1413
1541
  this.lastSessionByAccount.clear();
1414
- for (const item of data.lastSessionByAccount || []) {
1542
+ const persistedLastSessionByAccount = Array.isArray(data.lastSessionByAccount)
1543
+ ? data.lastSessionByAccount.slice(-MAX_ACCOUNT_ACTIVITY_ENTRIES)
1544
+ : [];
1545
+ for (const item of persistedLastSessionByAccount) {
1415
1546
  const accountId = normalizeAccountId(item?.accountId);
1416
1547
  const normalized = normalizeStoredSessionKey(
1417
1548
  asString(item?.sessionKey || ''),
1418
1549
  this.canonicalAgentId,
1419
1550
  );
1420
- const updatedAt = Number(item?.updatedAt || 0);
1421
- if (!normalized || !Number.isFinite(updatedAt) || updatedAt <= 0) continue;
1551
+ const updatedAt = finiteNumberOr(item?.updatedAt, 0);
1552
+ if (!normalized || updatedAt <= 0) continue;
1422
1553
 
1423
1554
  this.lastSessionByAccount.set(accountId, {
1424
1555
  sessionKey: normalized.sessionKey,
@@ -1429,33 +1560,42 @@ class BncrBridgeRuntime {
1429
1560
  }
1430
1561
 
1431
1562
  this.lastActivityByAccount.clear();
1432
- for (const item of data.lastActivityByAccount || []) {
1563
+ const persistedLastActivityByAccount = Array.isArray(data.lastActivityByAccount)
1564
+ ? data.lastActivityByAccount.slice(-MAX_ACCOUNT_ACTIVITY_ENTRIES)
1565
+ : [];
1566
+ for (const item of persistedLastActivityByAccount) {
1433
1567
  const accountId = normalizeAccountId(item?.accountId);
1434
- const updatedAt = Number(item?.updatedAt || 0);
1435
- if (!Number.isFinite(updatedAt) || updatedAt <= 0) continue;
1568
+ const updatedAt = finiteNumberOr(item?.updatedAt, 0);
1569
+ if (updatedAt <= 0) continue;
1436
1570
  this.lastActivityByAccount.set(accountId, updatedAt);
1437
1571
  }
1438
1572
 
1439
1573
  this.lastInboundByAccount.clear();
1440
- for (const item of data.lastInboundByAccount || []) {
1574
+ const persistedLastInboundByAccount = Array.isArray(data.lastInboundByAccount)
1575
+ ? data.lastInboundByAccount.slice(-MAX_ACCOUNT_ACTIVITY_ENTRIES)
1576
+ : [];
1577
+ for (const item of persistedLastInboundByAccount) {
1441
1578
  const accountId = normalizeAccountId(item?.accountId);
1442
- const updatedAt = Number(item?.updatedAt || 0);
1443
- if (!Number.isFinite(updatedAt) || updatedAt <= 0) continue;
1579
+ const updatedAt = finiteNumberOr(item?.updatedAt, 0);
1580
+ if (updatedAt <= 0) continue;
1444
1581
  this.lastInboundByAccount.set(accountId, updatedAt);
1445
1582
  }
1446
1583
 
1447
1584
  this.lastOutboundByAccount.clear();
1448
- for (const item of data.lastOutboundByAccount || []) {
1585
+ const persistedLastOutboundByAccount = Array.isArray(data.lastOutboundByAccount)
1586
+ ? data.lastOutboundByAccount.slice(-MAX_ACCOUNT_ACTIVITY_ENTRIES)
1587
+ : [];
1588
+ for (const item of persistedLastOutboundByAccount) {
1449
1589
  const accountId = normalizeAccountId(item?.accountId);
1450
- const updatedAt = Number(item?.updatedAt || 0);
1451
- if (!Number.isFinite(updatedAt) || updatedAt <= 0) continue;
1590
+ const updatedAt = finiteNumberOr(item?.updatedAt, 0);
1591
+ if (updatedAt <= 0) continue;
1452
1592
  this.lastOutboundByAccount.set(accountId, updatedAt);
1453
1593
  }
1454
1594
 
1455
1595
  this.lastDriftSnapshot =
1456
1596
  data.lastDriftSnapshot && typeof data.lastDriftSnapshot === 'object'
1457
1597
  ? {
1458
- capturedAt: Number((data.lastDriftSnapshot as any).capturedAt || 0),
1598
+ capturedAt: finiteNumberOr((data.lastDriftSnapshot as any).capturedAt, 0),
1459
1599
  registerCount: Number.isFinite(Number((data.lastDriftSnapshot as any).registerCount))
1460
1600
  ? Number((data.lastDriftSnapshot as any).registerCount)
1461
1601
  : null,
@@ -1478,7 +1618,7 @@ class BncrBridgeRuntime {
1478
1618
  typeof (data.lastDriftSnapshot as any).sourceBuckets === 'object'
1479
1619
  ? { ...((data.lastDriftSnapshot as any).sourceBuckets as Record<string, number>) }
1480
1620
  : {},
1481
- traceWindowSize: Number((data.lastDriftSnapshot as any).traceWindowSize || 0),
1621
+ traceWindowSize: finiteNumberOr((data.lastDriftSnapshot as any).traceWindowSize, 0),
1482
1622
  traceRecent: Array.isArray((data.lastDriftSnapshot as any).traceRecent)
1483
1623
  ? [...((data.lastDriftSnapshot as any).traceRecent as Array<Record<string, unknown>>)]
1484
1624
  : [],
@@ -1489,8 +1629,8 @@ class BncrBridgeRuntime {
1489
1629
  if (this.lastSessionByAccount.size === 0 && this.sessionRoutes.size > 0) {
1490
1630
  for (const [sessionKey, info] of this.sessionRoutes.entries()) {
1491
1631
  const acc = normalizeAccountId(info.accountId);
1492
- const updatedAt = Number(info.updatedAt || 0);
1493
- if (!Number.isFinite(updatedAt) || updatedAt <= 0) continue;
1632
+ const updatedAt = finiteNumberOr(info.updatedAt, 0);
1633
+ if (updatedAt <= 0) continue;
1494
1634
 
1495
1635
  const current = this.lastSessionByAccount.get(acc);
1496
1636
  if (!current || updatedAt >= current.updatedAt) {
@@ -1521,38 +1661,38 @@ class BncrBridgeRuntime {
1521
1661
  route: v.route,
1522
1662
  updatedAt: v.updatedAt,
1523
1663
  }))
1524
- .slice(-1000);
1664
+ .slice(-MAX_SESSION_ROUTE_ENTRIES);
1525
1665
 
1526
1666
  const data: PersistedState = {
1527
1667
  outbox: Array.from(this.outbox.values()),
1528
- deadLetter: this.deadLetter.slice(-1000),
1668
+ deadLetter: this.deadLetter.slice(-MAX_DEAD_LETTER_ENTRIES),
1529
1669
  sessionRoutes,
1530
- lastSessionByAccount: Array.from(this.lastSessionByAccount.entries()).map(
1531
- ([accountId, v]) => ({
1670
+ lastSessionByAccount: Array.from(this.lastSessionByAccount.entries())
1671
+ .map(([accountId, v]) => ({
1532
1672
  accountId,
1533
1673
  sessionKey: v.sessionKey,
1534
1674
  scope: v.scope,
1535
1675
  updatedAt: v.updatedAt,
1536
- }),
1537
- ),
1538
- lastActivityByAccount: Array.from(this.lastActivityByAccount.entries()).map(
1539
- ([accountId, updatedAt]) => ({
1676
+ }))
1677
+ .slice(-MAX_ACCOUNT_ACTIVITY_ENTRIES),
1678
+ lastActivityByAccount: Array.from(this.lastActivityByAccount.entries())
1679
+ .map(([accountId, updatedAt]) => ({
1540
1680
  accountId,
1541
1681
  updatedAt,
1542
- }),
1543
- ),
1544
- lastInboundByAccount: Array.from(this.lastInboundByAccount.entries()).map(
1545
- ([accountId, updatedAt]) => ({
1682
+ }))
1683
+ .slice(-MAX_ACCOUNT_ACTIVITY_ENTRIES),
1684
+ lastInboundByAccount: Array.from(this.lastInboundByAccount.entries())
1685
+ .map(([accountId, updatedAt]) => ({
1546
1686
  accountId,
1547
1687
  updatedAt,
1548
- }),
1549
- ),
1550
- lastOutboundByAccount: Array.from(this.lastOutboundByAccount.entries()).map(
1551
- ([accountId, updatedAt]) => ({
1688
+ }))
1689
+ .slice(-MAX_ACCOUNT_ACTIVITY_ENTRIES),
1690
+ lastOutboundByAccount: Array.from(this.lastOutboundByAccount.entries())
1691
+ .map(([accountId, updatedAt]) => ({
1552
1692
  accountId,
1553
1693
  updatedAt,
1554
- }),
1555
- ),
1694
+ }))
1695
+ .slice(-MAX_ACCOUNT_ACTIVITY_ENTRIES),
1556
1696
  lastDriftSnapshot: this.lastDriftSnapshot
1557
1697
  ? {
1558
1698
  capturedAt: this.lastDriftSnapshot.capturedAt,
@@ -1606,11 +1746,11 @@ class BncrBridgeRuntime {
1606
1746
 
1607
1747
  const recentInboundConnIds = this.resolveRecentInboundConnIds(acc);
1608
1748
  const candidateScore = (conn: BncrConnection) => {
1609
- const preferredForOutboundUntil = Number((conn as any).preferredForOutboundUntil || 0);
1610
- const outboundReadyUntil = Number((conn as any).outboundReadyUntil || 0);
1611
- const lastPushTimeoutAt = Number((conn as any).lastPushTimeoutAt || 0);
1612
- const lastAckOkAt = Number((conn as any).lastAckOkAt || 0);
1613
- const pushFailureScore = Number((conn as any).pushFailureScore || 0);
1749
+ const preferredForOutboundUntil = finiteNumberOr((conn as any).preferredForOutboundUntil, 0);
1750
+ const outboundReadyUntil = finiteNumberOr((conn as any).outboundReadyUntil, 0);
1751
+ const lastPushTimeoutAt = finiteNumberOr((conn as any).lastPushTimeoutAt, 0);
1752
+ const lastAckOkAt = finiteNumberOr((conn as any).lastAckOkAt, 0);
1753
+ const pushFailureScore = finiteNumberOr((conn as any).pushFailureScore, 0);
1614
1754
  const recentTimeoutPenalty = lastPushTimeoutAt > 0 && t - lastPushTimeoutAt <= 30_000 ? 1 : 0;
1615
1755
  return {
1616
1756
  preferred: preferredForOutboundUntil > t ? 1 : 0,
@@ -1702,11 +1842,11 @@ class BncrBridgeRuntime {
1702
1842
 
1703
1843
  const recentInboundConnIds = this.resolveRecentInboundConnIds(acc);
1704
1844
  const candidateScore = (conn: BncrConnection) => {
1705
- const preferredForOutboundUntil = Number((conn as any).preferredForOutboundUntil || 0);
1706
- const outboundReadyUntil = Number((conn as any).outboundReadyUntil || 0);
1707
- const lastPushTimeoutAt = Number((conn as any).lastPushTimeoutAt || 0);
1708
- const lastAckOkAt = Number((conn as any).lastAckOkAt || 0);
1709
- const pushFailureScore = Number((conn as any).pushFailureScore || 0);
1845
+ const preferredForOutboundUntil = finiteNumberOr((conn as any).preferredForOutboundUntil, 0);
1846
+ const outboundReadyUntil = finiteNumberOr((conn as any).outboundReadyUntil, 0);
1847
+ const lastPushTimeoutAt = finiteNumberOr((conn as any).lastPushTimeoutAt, 0);
1848
+ const lastAckOkAt = finiteNumberOr((conn as any).lastAckOkAt, 0);
1849
+ const pushFailureScore = finiteNumberOr((conn as any).pushFailureScore, 0);
1710
1850
  const recentTimeoutPenalty = lastPushTimeoutAt > 0 && t - lastPushTimeoutAt <= 30_000 ? 1 : 0;
1711
1851
  return {
1712
1852
  preferred: preferredForOutboundUntil > t ? 1 : 0,
@@ -2356,17 +2496,26 @@ class BncrBridgeRuntime {
2356
2496
  }
2357
2497
 
2358
2498
  private logOutboxAckSummary(
2359
- scope: 'outbox ack ok' | 'outbox ack retry' | 'outbox ack timeout' | 'outbox ack fatal',
2499
+ scope:
2500
+ | 'outbox ack ok'
2501
+ | 'outbox ack ok late'
2502
+ | 'outbox ack retry'
2503
+ | 'outbox ack timeout'
2504
+ | 'outbox ack fatal',
2360
2505
  args: {
2361
2506
  messageId: string;
2362
2507
  connId?: string;
2363
2508
  clientId?: string;
2364
2509
  err?: string;
2510
+ queueMs?: number | null;
2511
+ pushMs?: number | null;
2512
+ waitMs?: number | null;
2365
2513
  },
2366
2514
  ) {
2367
2515
  const parts = [`mid=${args.messageId}`, `q=${this.outbox.size}`];
2368
- if (args.connId) parts.push(`conn=${args.connId}`);
2369
- if (args.clientId) parts.push(`client=${args.clientId}`);
2516
+ if (typeof args.queueMs === 'number') parts.push(`queueMs=${args.queueMs}`);
2517
+ if (typeof args.pushMs === 'number') parts.push(`pushMs=${args.pushMs}`);
2518
+ if (typeof args.waitMs === 'number') parts.push(`waitMs=${args.waitMs}`);
2370
2519
  if (args.err) parts.push(`err=${args.err}`);
2371
2520
  this.logInfo(scope, parts.join('|'));
2372
2521
  }
@@ -2377,6 +2526,7 @@ class BncrBridgeRuntime {
2377
2526
  ackResult: 'acked' | 'timeout';
2378
2527
  onlineNow: boolean;
2379
2528
  recentInboundReachable: boolean;
2529
+ ackTimeoutMs?: number | null;
2380
2530
  }) {
2381
2531
  this.logInfo(
2382
2532
  'outbox',
@@ -2384,12 +2534,19 @@ class BncrBridgeRuntime {
2384
2534
  buildOutboxAckDebugInfo({
2385
2535
  messageId: args.entry.messageId,
2386
2536
  accountId: args.entry.accountId,
2537
+ sessionKey: args.entry.sessionKey,
2538
+ to: formatDisplayScope(args.entry.route),
2387
2539
  kind:
2388
2540
  isPlainObject(args.entry.payload?._meta) && args.entry.payload?._meta?.kind === 'file-transfer'
2389
2541
  ? 'file-transfer'
2390
2542
  : undefined,
2391
2543
  requireAck: args.requireAck,
2392
2544
  ackResult: args.ackResult,
2545
+ ackStage: 'message',
2546
+ ackOutcome: args.ackResult,
2547
+ reason: args.ackResult === 'timeout' ? OUTBOUND_TERMINAL_REASON.PUSH_ACK_TIMEOUT : 'message-acked',
2548
+ ackTimeoutMs: typeof args.ackTimeoutMs === 'number' ? args.ackTimeoutMs : undefined,
2549
+ adaptiveAckTimeoutEnabled: ADAPTIVE_ACK_TIMEOUT_DEFAULT_ENABLED,
2393
2550
  onlineNow: args.onlineNow,
2394
2551
  recentInboundReachable: args.recentInboundReachable,
2395
2552
  connIds: args.entry.lastPushConnId ? [args.entry.lastPushConnId] : [],
@@ -2410,6 +2567,7 @@ class BncrBridgeRuntime {
2410
2567
  availableConnIds: string[];
2411
2568
  decision: ReturnType<typeof computeRetryRerouteDecision>;
2412
2569
  localNextDelay: number | null;
2570
+ ackTimeoutMs?: number | null;
2413
2571
  }) {
2414
2572
  this.logOutboxAckSummary(
2415
2573
  args.requireAck ? 'outbox ack timeout' : 'outbox ack retry',
@@ -2418,6 +2576,7 @@ class BncrBridgeRuntime {
2418
2576
  connId: args.entry.lastPushConnId,
2419
2577
  clientId: args.entry.lastPushClientId,
2420
2578
  err: args.requireAck ? undefined : args.entry.lastError,
2579
+ waitMs: args.requireAck ? args.ackTimeoutMs : undefined,
2421
2580
  },
2422
2581
  );
2423
2582
  this.logInfo(
@@ -2500,6 +2659,11 @@ class BncrBridgeRuntime {
2500
2659
  return null;
2501
2660
  }
2502
2661
 
2662
+ if (this.stopped) {
2663
+ respond(true, { ok: true, ignored: true, reason: 'service-stopped' });
2664
+ return null;
2665
+ }
2666
+
2503
2667
  const entry = this.outbox.get(messageId);
2504
2668
  if (!entry) {
2505
2669
  respond(true, { ok: true, message: 'already-acked-or-missing', stale: staleObserved.stale });
@@ -2548,6 +2712,7 @@ class BncrBridgeRuntime {
2548
2712
  connId: string;
2549
2713
  clientId?: string;
2550
2714
  stale: boolean;
2715
+ entry: OutboxEntry;
2551
2716
  }) {
2552
2717
  this.markOutboundCapability({
2553
2718
  accountId: args.accountId,
@@ -2556,14 +2721,47 @@ class BncrBridgeRuntime {
2556
2721
  outboundReady: true,
2557
2722
  preferredForOutbound: true,
2558
2723
  });
2559
- this.lastAckOkByAccount.set(args.accountId, now());
2724
+ const ackAt = now();
2725
+ this.lastAckOkByAccount.set(args.accountId, ackAt);
2726
+ const ackQueueLatencyMs = Math.max(0, ackAt - finiteNumberOr(args.entry.createdAt, ackAt));
2727
+ const ackPushLatencyMs =
2728
+ typeof args.entry.lastPushAt === 'number'
2729
+ ? Math.max(0, ackAt - args.entry.lastPushAt)
2730
+ : null;
2731
+ this.lastAckQueueLatencyMsByAccount.set(args.accountId, ackQueueLatencyMs);
2732
+ if (typeof ackPushLatencyMs === 'number') {
2733
+ this.lastAckPushLatencyMsByAccount.set(args.accountId, ackPushLatencyMs);
2734
+ }
2735
+ const lateAccepted = args.entry.awaitingRetryPush === true;
2736
+ if (lateAccepted) {
2737
+ this.adaptiveAckRecoveryOkCountByAccount.set(args.accountId, 0);
2738
+ this.lateAckOkCountByAccount.set(
2739
+ args.accountId,
2740
+ this.getCounter(this.lateAckOkCountByAccount, args.accountId) + 1,
2741
+ );
2742
+ this.lastLateAckOkByAccount.set(args.accountId, ackAt);
2743
+ this.lastLateAckQueueLatencyMsByAccount.set(args.accountId, ackQueueLatencyMs);
2744
+ if (typeof ackPushLatencyMs === 'number') {
2745
+ this.lastLateAckPushLatencyMsByAccount.set(args.accountId, ackPushLatencyMs);
2746
+ }
2747
+ args.entry.awaitingRetryPush = false;
2748
+ args.entry.lastError = undefined;
2749
+ } else if (typeof ackPushLatencyMs === 'number' && ackPushLatencyMs <= PUSH_ACK_TIMEOUT_MS) {
2750
+ this.adaptiveAckRecoveryOkCountByAccount.set(
2751
+ args.accountId,
2752
+ this.getCounter(this.adaptiveAckRecoveryOkCountByAccount, args.accountId) + 1,
2753
+ );
2754
+ }
2560
2755
  this.outbox.delete(args.messageId);
2561
2756
  this.scheduleSave();
2562
2757
  this.resolveMessageAck(args.messageId, 'acked');
2563
- this.logOutboxAckSummary('outbox ack ok', {
2758
+ this.logOutboxAckSummary(lateAccepted ? 'outbox ack ok late' : 'outbox ack ok', {
2564
2759
  messageId: args.messageId,
2565
2760
  connId: args.connId,
2566
2761
  clientId: args.clientId,
2762
+ queueMs: ackQueueLatencyMs,
2763
+ pushMs: ackPushLatencyMs,
2764
+ err: lateAccepted ? 'accepted-after-timeout' : undefined,
2567
2765
  });
2568
2766
  }
2569
2767
 
@@ -2592,6 +2790,7 @@ class BncrBridgeRuntime {
2592
2790
  }) {
2593
2791
  args.entry.nextAttemptAt = now() + 1_000;
2594
2792
  args.entry.lastError = args.error;
2793
+ args.entry.awaitingRetryPush = true;
2595
2794
  this.outbox.set(args.messageId, args.entry);
2596
2795
  this.scheduleSave();
2597
2796
  this.logOutboxAckSummary('outbox ack retry', {
@@ -2623,6 +2822,7 @@ class BncrBridgeRuntime {
2623
2822
  connId,
2624
2823
  clientId,
2625
2824
  stale: staleObserved.stale,
2825
+ entry,
2626
2826
  });
2627
2827
  this.respondAckResult(respond, staleObserved.stale, { ok: true });
2628
2828
  this.flushPushQueue({
@@ -2842,6 +3042,7 @@ class BncrBridgeRuntime {
2842
3042
  args.entry.lastPushConnId =
2843
3043
  args.ownerConnId || (connIds.length === 1 ? connIds[0] : undefined);
2844
3044
  args.entry.lastPushClientId = args.ownerClientId;
3045
+ args.entry.awaitingRetryPush = false;
2845
3046
  if (!Array.isArray(args.entry.routeAttemptConnIds)) args.entry.routeAttemptConnIds = [];
2846
3047
  if (
2847
3048
  args.entry.lastPushConnId &&
@@ -2857,6 +3058,7 @@ class BncrBridgeRuntime {
2857
3058
  }
2858
3059
 
2859
3060
  private schedulePushDrain(delayMs = 0) {
3061
+ if (this.stopped) return;
2860
3062
  // Structure note (drain scheduler):
2861
3063
  // This is the single-timer gate for outbound retry scheduling. It intentionally coalesces
2862
3064
  // multiple nudges into one pending timer and delegates all actual decision-making to
@@ -2877,6 +3079,7 @@ class BncrBridgeRuntime {
2877
3079
  );
2878
3080
  this.pushTimer = setTimeout(() => {
2879
3081
  this.pushTimer = null;
3082
+ if (this.stopped) return;
2880
3083
  void this.flushPushQueue({
2881
3084
  trigger: OUTBOUND_FLUSH_TRIGGER.TIMER,
2882
3085
  reason: OUTBOUND_FLUSH_REASON.SCHEDULED_DRAIN,
@@ -2914,7 +3117,9 @@ class BncrBridgeRuntime {
2914
3117
  return {
2915
3118
  outboundRequireAck: this.isOutboundAckRequired(accountId),
2916
3119
  ackPolicySource,
2917
- messageAckTimeoutMs: PUSH_ACK_TIMEOUT_MS,
3120
+ messageAckTimeoutMs: this.resolveMessageAckTimeoutMs(accountId),
3121
+ adaptiveAckTimeoutEnabled: ADAPTIVE_ACK_TIMEOUT_DEFAULT_ENABLED,
3122
+ defaultMessageAckTimeoutMs: PUSH_ACK_TIMEOUT_MS,
2918
3123
  fileAckTimeoutMs: FILE_ACK_TIMEOUT_MS,
2919
3124
  debugVerbose: BNCR_DEBUG_VERBOSE,
2920
3125
  };
@@ -2925,6 +3130,7 @@ class BncrBridgeRuntime {
2925
3130
  trigger?: string;
2926
3131
  reason?: string;
2927
3132
  }): Promise<void> {
3133
+ if (this.stopped) return;
2928
3134
  // Structure guide for future safe extraction:
2929
3135
  // - pre-check: choose target accounts, skip accounts already draining, emit flush context logs
2930
3136
  // - tryPush: pick one due entry per account and attempt actual outbound delivery
@@ -2995,8 +3201,45 @@ class BncrBridgeRuntime {
2995
3201
  this.pushDrainRunningAccounts.add(acc);
2996
3202
  try {
2997
3203
  let localNextDelay: number | null = null;
3204
+ let processedThisRun = 0;
3205
+ const accountDrainStartedAt = now();
2998
3206
 
2999
3207
  while (true) {
3208
+ if (this.stopped) break;
3209
+ if (processedThisRun > 0 && now() - accountDrainStartedAt >= PUSH_DRAIN_ACCOUNT_TIME_BUDGET_MS) {
3210
+ localNextDelay = updateMinOutboxDelay(localNextDelay, 0);
3211
+ this.logInfo(
3212
+ 'outbox',
3213
+ `schedule ${JSON.stringify(
3214
+ buildOutboxScheduleDebugInfo({
3215
+ bridgeId: this.bridgeId,
3216
+ accountId: acc,
3217
+ source: OUTBOUND_SCHEDULE_SOURCE.ACCOUNT_TIME_BUDGET_YIELD,
3218
+ wait: 0,
3219
+ localNextDelay,
3220
+ }),
3221
+ )}`,
3222
+ { debugOnly: true },
3223
+ );
3224
+ break;
3225
+ }
3226
+ if (processedThisRun >= PUSH_DRAIN_ACCOUNT_BUDGET) {
3227
+ localNextDelay = updateMinOutboxDelay(localNextDelay, 0);
3228
+ this.logInfo(
3229
+ 'outbox',
3230
+ `schedule ${JSON.stringify(
3231
+ buildOutboxScheduleDebugInfo({
3232
+ bridgeId: this.bridgeId,
3233
+ accountId: acc,
3234
+ source: OUTBOUND_SCHEDULE_SOURCE.ACCOUNT_BUDGET_YIELD,
3235
+ wait: 0,
3236
+ localNextDelay,
3237
+ }),
3238
+ )}`,
3239
+ { debugOnly: true },
3240
+ );
3241
+ break;
3242
+ }
3000
3243
  const t = now();
3001
3244
  const entries = listAccountOutboxEntries({
3002
3245
  accountId: acc,
@@ -3031,11 +3274,13 @@ class BncrBridgeRuntime {
3031
3274
  const onlineNow = this.isOnline(acc);
3032
3275
  const recentInboundReachable = this.hasRecentInboundReachability(acc);
3033
3276
  const pushed = await this.tryPushEntry(entry);
3277
+ processedThisRun += 1;
3034
3278
  if (pushed) {
3035
3279
  const requireAck = this.isOutboundAckRequired(acc);
3280
+ const ackTimeoutMs = requireAck ? this.resolveMessageAckTimeoutMs(acc) : null;
3036
3281
  let ackResult: 'acked' | 'timeout' = requireAck ? 'timeout' : 'acked';
3037
3282
  if (onlineNow && requireAck) {
3038
- ackResult = await this.waitForMessageAck(entry.messageId, PUSH_ACK_TIMEOUT_MS);
3283
+ ackResult = await this.waitForMessageAck(entry.messageId, ackTimeoutMs || PUSH_ACK_TIMEOUT_MS);
3039
3284
  }
3040
3285
 
3041
3286
  this.logOutboxAckWait({
@@ -3044,6 +3289,7 @@ class BncrBridgeRuntime {
3044
3289
  ackResult,
3045
3290
  onlineNow,
3046
3291
  recentInboundReachable,
3292
+ ackTimeoutMs,
3047
3293
  });
3048
3294
 
3049
3295
  if (!this.outbox.has(entry.messageId)) {
@@ -3112,6 +3358,7 @@ class BncrBridgeRuntime {
3112
3358
  acc,
3113
3359
  this.getCounter(this.ackTimeoutCountByAccount, acc) + 1,
3114
3360
  );
3361
+ this.adaptiveAckRecoveryOkCountByAccount.set(acc, 0);
3115
3362
  }
3116
3363
  const wait = computeOutboxRetryWait(decision.nextAttemptAt, now());
3117
3364
  localNextDelay = updateMinOutboxDelay(localNextDelay, wait);
@@ -3123,6 +3370,7 @@ class BncrBridgeRuntime {
3123
3370
  availableConnIds,
3124
3371
  decision,
3125
3372
  localNextDelay,
3373
+ ackTimeoutMs,
3126
3374
  });
3127
3375
  await this.sleepMs(PUSH_DRAIN_INTERVAL_MS);
3128
3376
  break;
@@ -3213,17 +3461,36 @@ class BncrBridgeRuntime {
3213
3461
 
3214
3462
  private async waitForMessageAck(messageId: string, waitMs: number): Promise<'acked' | 'timeout'> {
3215
3463
  const key = asString(messageId).trim();
3216
- const timeoutMs = Math.max(0, Math.min(waitMs, 25_000));
3464
+ const timeoutMs = clampFiniteNumber(
3465
+ waitMs,
3466
+ 0,
3467
+ 0,
3468
+ RECOMMENDED_ACK_TIMEOUT_MAX_MS,
3469
+ );
3217
3470
  if (!key || !timeoutMs) return 'timeout';
3218
3471
 
3219
- return await new Promise<'acked' | 'timeout'>((resolve) => {
3220
- const timer = setTimeout(() => {
3472
+ const existing = this.messageAckWaiters.get(key);
3473
+ if (existing) {
3474
+ this.logWarn(
3475
+ 'outbox',
3476
+ `message-ack-waiter-reuse ${JSON.stringify({ bridge: this.bridgeId, messageId: key })}`,
3477
+ { debugOnly: true },
3478
+ );
3479
+ return await existing.promise;
3480
+ }
3481
+
3482
+ let timer: NodeJS.Timeout;
3483
+ let resolveWaiter!: (result: 'acked' | 'timeout') => void;
3484
+ const promise = new Promise<'acked' | 'timeout'>((resolve) => {
3485
+ resolveWaiter = resolve;
3486
+ timer = setTimeout(() => {
3221
3487
  this.messageAckWaiters.delete(key);
3222
3488
  resolve('timeout');
3223
3489
  }, timeoutMs);
3224
-
3225
- this.messageAckWaiters.set(key, { resolve, timer });
3226
3490
  });
3491
+
3492
+ this.messageAckWaiters.set(key, { promise, resolve: resolveWaiter, timer: timer! });
3493
+ return await promise;
3227
3494
  }
3228
3495
 
3229
3496
  private connectionKey(accountId: string, clientId?: string): string {
@@ -3253,6 +3520,9 @@ class BncrBridgeRuntime {
3253
3520
  { debugOnly: true },
3254
3521
  );
3255
3522
  this.connections.delete(key);
3523
+ if (this.activeConnectionByAccount.get(c.accountId) === key) {
3524
+ this.activeConnectionByAccount.delete(c.accountId);
3525
+ }
3256
3526
  }
3257
3527
  }
3258
3528
 
@@ -3267,11 +3537,23 @@ class BncrBridgeRuntime {
3267
3537
 
3268
3538
  private cleanupFileTransfers() {
3269
3539
  const t = now();
3540
+ const keepMsForTransfer = (st: { status: string; startedAt: number; terminalAt?: number }) => {
3541
+ const startedAt = finiteNumberOr(st.startedAt, t);
3542
+ if (st.status === 'completed' || st.status === 'aborted') {
3543
+ return {
3544
+ since: finiteNumberOr(st.terminalAt, startedAt),
3545
+ keepMs: FILE_TRANSFER_TERMINAL_KEEP_MS,
3546
+ };
3547
+ }
3548
+ return { since: startedAt, keepMs: FILE_TRANSFER_KEEP_MS };
3549
+ };
3270
3550
  for (const [id, st] of this.fileSendTransfers.entries()) {
3271
- if (t - st.startedAt > FILE_TRANSFER_KEEP_MS) this.fileSendTransfers.delete(id);
3551
+ const keep = keepMsForTransfer(st);
3552
+ if (t - keep.since > keep.keepMs) this.fileSendTransfers.delete(id);
3272
3553
  }
3273
3554
  for (const [id, st] of this.fileRecvTransfers.entries()) {
3274
- if (t - st.startedAt > FILE_TRANSFER_KEEP_MS) this.fileRecvTransfers.delete(id);
3555
+ const keep = keepMsForTransfer(st);
3556
+ if (t - keep.since > keep.keepMs) this.fileRecvTransfers.delete(id);
3275
3557
  }
3276
3558
  for (const [key, ack] of this.earlyFileAcks.entries()) {
3277
3559
  if (t - ack.at > FILE_TRANSFER_ACK_TTL_MS) this.earlyFileAcks.delete(key);
@@ -3671,10 +3953,19 @@ class BncrBridgeRuntime {
3671
3953
  }
3672
3954
 
3673
3955
  private fileAckKey(transferId: string, stage: string, chunkIndex?: number): string {
3674
- const idx = Number.isFinite(Number(chunkIndex)) ? String(Number(chunkIndex)) : '-';
3956
+ const n = Number(chunkIndex);
3957
+ const idx = Number.isInteger(n) && n >= 0 ? String(n) : '-';
3675
3958
  return `${transferId}|${stage}|${idx}`;
3676
3959
  }
3677
3960
 
3961
+ private fileAckOwnerInfo(transferId: string) {
3962
+ const st = this.fileSendTransfers.get(transferId);
3963
+ return {
3964
+ ...(st?.ownerConnId ? { ownerConnId: st.ownerConnId } : {}),
3965
+ ...(st?.ownerClientId ? { ownerClientId: st.ownerClientId } : {}),
3966
+ };
3967
+ }
3968
+
3678
3969
  private waitForFileAck(params: {
3679
3970
  transferId: string;
3680
3971
  stage: string;
@@ -3684,10 +3975,8 @@ class BncrBridgeRuntime {
3684
3975
  const transferId = asString(params.transferId).trim();
3685
3976
  const stage = asString(params.stage).trim();
3686
3977
  const key = this.fileAckKey(transferId, stage, params.chunkIndex);
3687
- const timeoutMs = Math.max(
3688
- 1_000,
3689
- Math.min(Number(params.timeoutMs || FILE_ACK_TIMEOUT_MS), 120_000),
3690
- );
3978
+ const timeoutMs = clampFiniteNumber(params.timeoutMs, FILE_ACK_TIMEOUT_MS, 1_000, 120_000);
3979
+ const ownerInfo = this.fileAckOwnerInfo(transferId);
3691
3980
 
3692
3981
  const cached = this.earlyFileAcks.get(key);
3693
3982
  if (cached) {
@@ -3698,9 +3987,13 @@ class BncrBridgeRuntime {
3698
3987
  bridge: this.bridgeId,
3699
3988
  transferId,
3700
3989
  stage,
3990
+ ackStage: stage,
3991
+ ackOutcome: cached.ok ? 'acked' : 'failed',
3992
+ waiterReused: false,
3701
3993
  chunkIndex:
3702
3994
  Number.isFinite(Number(params.chunkIndex)) ? Number(params.chunkIndex) : undefined,
3703
3995
  key,
3996
+ ...ownerInfo,
3704
3997
  ok: cached.ok,
3705
3998
  payload: cached.payload,
3706
3999
  }),
@@ -3714,22 +4007,52 @@ class BncrBridgeRuntime {
3714
4007
  );
3715
4008
  }
3716
4009
 
4010
+ const existing = this.fileAckWaiters.get(key);
4011
+ if (existing) {
4012
+ this.logWarn(
4013
+ 'file-ack-waiter-reuse',
4014
+ JSON.stringify({
4015
+ bridge: this.bridgeId,
4016
+ transferId,
4017
+ stage,
4018
+ ackStage: stage,
4019
+ ackOutcome: 'waiter-reused',
4020
+ waiterReused: true,
4021
+ chunkIndex:
4022
+ Number.isFinite(Number(params.chunkIndex)) ? Number(params.chunkIndex) : undefined,
4023
+ key,
4024
+ ...ownerInfo,
4025
+ }),
4026
+ { debugOnly: true },
4027
+ );
4028
+ return existing.promise;
4029
+ }
4030
+
3717
4031
  this.logInfo(
3718
4032
  'file-ack-wait',
3719
4033
  JSON.stringify({
3720
4034
  bridge: this.bridgeId,
3721
4035
  transferId,
3722
4036
  stage,
4037
+ ackStage: stage,
4038
+ ackOutcome: 'waiting',
4039
+ waiterReused: false,
3723
4040
  chunkIndex:
3724
4041
  Number.isFinite(Number(params.chunkIndex)) ? Number(params.chunkIndex) : undefined,
3725
4042
  key,
4043
+ ...ownerInfo,
3726
4044
  timeoutMs,
3727
4045
  }),
3728
4046
  { debugOnly: true },
3729
4047
  );
3730
4048
 
3731
- return new Promise<Record<string, unknown>>((resolve, reject) => {
3732
- const timer = setTimeout(() => {
4049
+ let timer: NodeJS.Timeout;
4050
+ let resolveWaiter!: (payload: Record<string, unknown>) => void;
4051
+ let rejectWaiter!: (err: Error) => void;
4052
+ const promise = new Promise<Record<string, unknown>>((resolve, reject) => {
4053
+ resolveWaiter = resolve;
4054
+ rejectWaiter = reject;
4055
+ timer = setTimeout(() => {
3733
4056
  this.fileAckWaiters.delete(key);
3734
4057
  this.logWarn(
3735
4058
  OUTBOUND_TERMINAL_REASON.FILE_ACK_TIMEOUT,
@@ -3737,17 +4060,27 @@ class BncrBridgeRuntime {
3737
4060
  bridge: this.bridgeId,
3738
4061
  transferId,
3739
4062
  stage,
4063
+ ackStage: stage,
4064
+ ackOutcome: 'timeout',
4065
+ waiterReused: false,
3740
4066
  chunkIndex:
3741
4067
  Number.isFinite(Number(params.chunkIndex)) ? Number(params.chunkIndex) : undefined,
3742
4068
  key,
4069
+ ...ownerInfo,
3743
4070
  timeoutMs,
3744
4071
  }),
3745
4072
  { debugOnly: true },
3746
4073
  );
3747
4074
  reject(new Error(`file ack timeout: ${key}`));
3748
4075
  }, timeoutMs);
3749
- this.fileAckWaiters.set(key, { resolve, reject, timer });
3750
4076
  });
4077
+ this.fileAckWaiters.set(key, {
4078
+ promise,
4079
+ resolve: resolveWaiter,
4080
+ reject: rejectWaiter,
4081
+ timer: timer!,
4082
+ });
4083
+ return promise;
3751
4084
  }
3752
4085
 
3753
4086
  private resolveFileAck(params: {
@@ -3760,9 +4093,10 @@ class BncrBridgeRuntime {
3760
4093
  const transferId = asString(params.transferId).trim();
3761
4094
  const stage = asString(params.stage).trim();
3762
4095
  const key = this.fileAckKey(transferId, stage, params.chunkIndex);
4096
+ const ownerInfo = this.fileAckOwnerInfo(transferId);
3763
4097
  const waiter = this.fileAckWaiters.get(key);
3764
4098
  if (!waiter) {
3765
- this.earlyFileAcks.set(key, {
4099
+ this.rememberEarlyFileAck(key, {
3766
4100
  payload: params.payload,
3767
4101
  ok: params.ok,
3768
4102
  at: now(),
@@ -3773,9 +4107,13 @@ class BncrBridgeRuntime {
3773
4107
  bridge: this.bridgeId,
3774
4108
  transferId,
3775
4109
  stage,
4110
+ ackStage: stage,
4111
+ ackOutcome: params.ok ? 'early-acked' : 'early-failed',
4112
+ waiterReused: false,
3776
4113
  chunkIndex:
3777
4114
  Number.isFinite(Number(params.chunkIndex)) ? Number(params.chunkIndex) : undefined,
3778
4115
  key,
4116
+ ...ownerInfo,
3779
4117
  ok: params.ok,
3780
4118
  payload: params.payload,
3781
4119
  cached: true,
@@ -3792,9 +4130,13 @@ class BncrBridgeRuntime {
3792
4130
  bridge: this.bridgeId,
3793
4131
  transferId,
3794
4132
  stage,
4133
+ ackStage: stage,
4134
+ ackOutcome: params.ok ? 'acked' : 'failed',
4135
+ waiterReused: false,
3795
4136
  chunkIndex:
3796
4137
  Number.isFinite(Number(params.chunkIndex)) ? Number(params.chunkIndex) : undefined,
3797
4138
  key,
4139
+ ...ownerInfo,
3798
4140
  ok: params.ok,
3799
4141
  payload: params.payload,
3800
4142
  }),
@@ -3841,38 +4183,6 @@ class BncrBridgeRuntime {
3841
4183
  return mt || 'file';
3842
4184
  }
3843
4185
 
3844
- private resolveInboundFilesDir(): string {
3845
- const dir = path.join(process.cwd(), '.openclaw', 'media', 'inbound', 'bncr');
3846
- fs.mkdirSync(dir, { recursive: true });
3847
- return dir;
3848
- }
3849
-
3850
- private async materializeRecvTransfer(
3851
- st: FileRecvTransferState,
3852
- ): Promise<{ path: string; fileSha256: string }> {
3853
- const dir = this.resolveInboundFilesDir();
3854
- const safeName = asString(st.fileName).trim() || `${st.transferId}.bin`;
3855
- const finalPath = path.join(dir, safeName);
3856
-
3857
- const ordered: Buffer[] = [];
3858
- for (let i = 0; i < st.totalChunks; i++) {
3859
- const chunk = st.bufferByChunk.get(i);
3860
- if (!chunk) throw new Error(`missing chunk ${i}`);
3861
- ordered.push(chunk);
3862
- }
3863
- const merged = Buffer.concat(ordered);
3864
- if (Number(st.fileSize || 0) > 0 && merged.length !== Number(st.fileSize || 0)) {
3865
- throw new Error(`size mismatch expected=${st.fileSize} got=${merged.length}`);
3866
- }
3867
-
3868
- const sha = createHash('sha256').update(merged).digest('hex');
3869
- if (st.fileSha256 && sha !== st.fileSha256) {
3870
- throw new Error(`sha256 mismatch expected=${st.fileSha256} got=${sha}`);
3871
- }
3872
-
3873
- fs.writeFileSync(finalPath, merged);
3874
- return { path: finalPath, fileSha256: sha };
3875
- }
3876
4186
 
3877
4187
  private buildRuntimeQueueSnapshot(accountId: string) {
3878
4188
  const pending = Array.from(this.outbox.values()).filter((v) => v.accountId === accountId).length;
@@ -3898,6 +4208,213 @@ class BncrBridgeRuntime {
3898
4208
  };
3899
4209
  }
3900
4210
 
4211
+ private computeRecommendedAckTimeoutReason(args: {
4212
+ lateAckOkCount: number;
4213
+ recentAckTimeoutCount: number;
4214
+ lastLateAckPushLatencyMs: number | null;
4215
+ lastLateAckOkAt?: number | null;
4216
+ adaptiveAckRecoveryOkCount?: number;
4217
+ recommendedAckTimeoutMs?: number;
4218
+ nowMs?: number;
4219
+ }) {
4220
+ if (args.recentAckTimeoutCount <= 0) return 'no-timeout-evidence';
4221
+ if (args.lateAckOkCount <= 0) return 'no-late-ack-evidence';
4222
+ if (typeof args.lastLateAckPushLatencyMs !== 'number') return 'missing-latency';
4223
+ const lastLateAckOkAt = typeof args.lastLateAckOkAt === 'number' ? args.lastLateAckOkAt : null;
4224
+ const nowMs = typeof args.nowMs === 'number' ? args.nowMs : now();
4225
+ if (
4226
+ typeof lastLateAckOkAt === 'number' &&
4227
+ lastLateAckOkAt > 0 &&
4228
+ nowMs - lastLateAckOkAt > ADAPTIVE_ACK_TIMEOUT_OBSERVATION_TTL_MS
4229
+ ) {
4230
+ return 'late-ack-expired';
4231
+ }
4232
+ if (
4233
+ typeof args.adaptiveAckRecoveryOkCount === 'number' &&
4234
+ args.adaptiveAckRecoveryOkCount >= ADAPTIVE_ACK_TIMEOUT_RECOVERY_OK_THRESHOLD
4235
+ ) {
4236
+ return 'recovered';
4237
+ }
4238
+ if (args.recommendedAckTimeoutMs === RECOMMENDED_ACK_TIMEOUT_MAX_MS) return 'capped-max';
4239
+ return 'late-ack-observed';
4240
+ }
4241
+
4242
+ private computeRecommendedAckTimeoutMs(args: {
4243
+ lateAckOkCount: number;
4244
+ recentAckTimeoutCount: number;
4245
+ lastLateAckPushLatencyMs: number | null;
4246
+ lastLateAckOkAt?: number | null;
4247
+ adaptiveAckRecoveryOkCount?: number;
4248
+ nowMs?: number;
4249
+ }) {
4250
+ const lastLateAckOkAt = typeof args.lastLateAckOkAt === 'number' ? args.lastLateAckOkAt : null;
4251
+ const nowMs = typeof args.nowMs === 'number' ? args.nowMs : now();
4252
+ const lateAckExpired =
4253
+ typeof lastLateAckOkAt === 'number' &&
4254
+ lastLateAckOkAt > 0 &&
4255
+ nowMs - lastLateAckOkAt > ADAPTIVE_ACK_TIMEOUT_OBSERVATION_TTL_MS;
4256
+ const recovered =
4257
+ typeof args.adaptiveAckRecoveryOkCount === 'number' &&
4258
+ args.adaptiveAckRecoveryOkCount >= ADAPTIVE_ACK_TIMEOUT_RECOVERY_OK_THRESHOLD;
4259
+ if (
4260
+ args.lateAckOkCount <= 0 ||
4261
+ args.recentAckTimeoutCount <= 0 ||
4262
+ typeof args.lastLateAckPushLatencyMs !== 'number' ||
4263
+ lateAckExpired ||
4264
+ recovered
4265
+ ) {
4266
+ return PUSH_ACK_TIMEOUT_MS;
4267
+ }
4268
+ const recommended = Math.ceil(args.lastLateAckPushLatencyMs * 1.25);
4269
+ return Math.min(
4270
+ RECOMMENDED_ACK_TIMEOUT_MAX_MS,
4271
+ Math.max(RECOMMENDED_ACK_TIMEOUT_MIN_MS, recommended),
4272
+ );
4273
+ }
4274
+
4275
+ private maybeLogAdaptiveAckTimeout(args: {
4276
+ accountId: string;
4277
+ timeoutMs: number;
4278
+ reason: string;
4279
+ lastLateAckPushLatencyMs: number | null;
4280
+ nowMs?: number;
4281
+ }) {
4282
+ if (args.timeoutMs <= PUSH_ACK_TIMEOUT_MS) return;
4283
+ const t = typeof args.nowMs === 'number' ? args.nowMs : now();
4284
+ const previous = this.adaptiveAckTimeoutLogStateByAccount.get(args.accountId);
4285
+ if (
4286
+ previous &&
4287
+ previous.timeoutMs === args.timeoutMs &&
4288
+ previous.reason === args.reason &&
4289
+ t - previous.at < ADAPTIVE_ACK_TIMEOUT_LOG_THROTTLE_MS
4290
+ ) {
4291
+ return;
4292
+ }
4293
+ this.adaptiveAckTimeoutLogStateByAccount.set(args.accountId, {
4294
+ at: t,
4295
+ timeoutMs: args.timeoutMs,
4296
+ reason: args.reason,
4297
+ });
4298
+ const parts = [
4299
+ args.accountId,
4300
+ `current=${args.timeoutMs}`,
4301
+ `default=${PUSH_ACK_TIMEOUT_MS}`,
4302
+ `reason=${args.reason}`,
4303
+ ];
4304
+ if (typeof args.lastLateAckPushLatencyMs === 'number') {
4305
+ parts.push(`latePushMs=${args.lastLateAckPushLatencyMs}`);
4306
+ }
4307
+ this.logInfo('outbox ack timeout-adaptive', parts.join('|'));
4308
+ }
4309
+
4310
+ private resolveMessageAckTimeoutMs(accountId?: string) {
4311
+ if (!ADAPTIVE_ACK_TIMEOUT_DEFAULT_ENABLED) return PUSH_ACK_TIMEOUT_MS;
4312
+ const acc = normalizeAccountId(accountId || BNCR_DEFAULT_ACCOUNT_ID);
4313
+ const lateAckOkCount = this.getCounter(this.lateAckOkCountByAccount, acc);
4314
+ const recentAckTimeoutCount = this.getCounter(this.ackTimeoutCountByAccount, acc);
4315
+ const lastLateAckPushLatencyMs = this.lastLateAckPushLatencyMsByAccount.get(acc) || null;
4316
+ const lastLateAckOkAt = this.lastLateAckOkByAccount.get(acc) || null;
4317
+ const adaptiveAckRecoveryOkCount = this.getCounter(this.adaptiveAckRecoveryOkCountByAccount, acc);
4318
+ const nowMs = now();
4319
+ const timeoutMs = this.computeRecommendedAckTimeoutMs({
4320
+ lateAckOkCount,
4321
+ recentAckTimeoutCount,
4322
+ lastLateAckPushLatencyMs,
4323
+ lastLateAckOkAt,
4324
+ adaptiveAckRecoveryOkCount,
4325
+ nowMs,
4326
+ });
4327
+ const reason = this.computeRecommendedAckTimeoutReason({
4328
+ lateAckOkCount,
4329
+ recentAckTimeoutCount,
4330
+ lastLateAckPushLatencyMs,
4331
+ lastLateAckOkAt,
4332
+ adaptiveAckRecoveryOkCount,
4333
+ recommendedAckTimeoutMs: timeoutMs,
4334
+ nowMs,
4335
+ });
4336
+ this.maybeLogAdaptiveAckTimeout({
4337
+ accountId: acc,
4338
+ timeoutMs,
4339
+ reason,
4340
+ lastLateAckPushLatencyMs,
4341
+ nowMs,
4342
+ });
4343
+ return timeoutMs;
4344
+ }
4345
+
4346
+ private buildRuntimeAckObservability(accountId: string) {
4347
+ const acc = normalizeAccountId(accountId);
4348
+ const recentAckTimeoutCount = this.getCounter(this.ackTimeoutCountByAccount, acc);
4349
+ const lateAckOkCount = this.getCounter(this.lateAckOkCountByAccount, acc);
4350
+ const lastLateAckPushLatencyMs = this.lastLateAckPushLatencyMsByAccount.get(acc) || null;
4351
+ const lastLateAckOkAt = this.lastLateAckOkByAccount.get(acc) || null;
4352
+ const nowMs = now();
4353
+ const lastLateAckAgeMs =
4354
+ typeof lastLateAckOkAt === 'number' && lastLateAckOkAt > 0 ? Math.max(0, nowMs - lastLateAckOkAt) : null;
4355
+ const lateAckObservationTtlMs = ADAPTIVE_ACK_TIMEOUT_OBSERVATION_TTL_MS;
4356
+ const lateAckObservationExpired =
4357
+ typeof lastLateAckAgeMs === 'number' && lastLateAckAgeMs > lateAckObservationTtlMs;
4358
+ const adaptiveAckRecoveryOkCount = this.getCounter(this.adaptiveAckRecoveryOkCountByAccount, acc);
4359
+ const adaptiveAckRecovered = adaptiveAckRecoveryOkCount >= ADAPTIVE_ACK_TIMEOUT_RECOVERY_OK_THRESHOLD;
4360
+ const recommendedAckTimeoutMs = this.computeRecommendedAckTimeoutMs({
4361
+ lateAckOkCount,
4362
+ recentAckTimeoutCount,
4363
+ lastLateAckPushLatencyMs,
4364
+ lastLateAckOkAt,
4365
+ adaptiveAckRecoveryOkCount,
4366
+ nowMs,
4367
+ });
4368
+ const currentAckTimeoutMs = this.resolveMessageAckTimeoutMs(acc);
4369
+ return {
4370
+ lastAckOkAt: this.lastAckOkByAccount.get(acc) || null,
4371
+ lastAckTimeoutAt: this.lastAckTimeoutByAccount.get(acc) || null,
4372
+ recentAckTimeoutCount,
4373
+ lateAckOkCount,
4374
+ lastLateAckOkAt,
4375
+ lastLateAckAgeMs,
4376
+ lateAckObservationTtlMs,
4377
+ lateAckObservationExpired,
4378
+ adaptiveAckRecoveryOkCount,
4379
+ adaptiveAckRecoveryOkThreshold: ADAPTIVE_ACK_TIMEOUT_RECOVERY_OK_THRESHOLD,
4380
+ adaptiveAckRecovered,
4381
+ lastAckQueueLatencyMs: this.lastAckQueueLatencyMsByAccount.get(acc) || null,
4382
+ lastAckPushLatencyMs: this.lastAckPushLatencyMsByAccount.get(acc) || null,
4383
+ lastLateAckQueueLatencyMs: this.lastLateAckQueueLatencyMsByAccount.get(acc) || null,
4384
+ lastLateAckPushLatencyMs,
4385
+ adaptiveAckTimeoutEnabled: ADAPTIVE_ACK_TIMEOUT_DEFAULT_ENABLED,
4386
+ defaultAckTimeoutMs: PUSH_ACK_TIMEOUT_MS,
4387
+ currentAckTimeoutMs,
4388
+ recommendedAckTimeoutMs,
4389
+ recommendedAckTimeoutReason: this.computeRecommendedAckTimeoutReason({
4390
+ lateAckOkCount,
4391
+ recentAckTimeoutCount,
4392
+ lastLateAckPushLatencyMs,
4393
+ lastLateAckOkAt,
4394
+ adaptiveAckRecoveryOkCount,
4395
+ recommendedAckTimeoutMs,
4396
+ nowMs,
4397
+ }),
4398
+ };
4399
+ }
4400
+
4401
+ private buildRuntimeAckStrategy(ackObservability: Record<string, any>) {
4402
+ const currentMs = finiteNumberOr(ackObservability.currentAckTimeoutMs, PUSH_ACK_TIMEOUT_MS);
4403
+ const defaultMs = finiteNumberOr(ackObservability.defaultAckTimeoutMs, PUSH_ACK_TIMEOUT_MS);
4404
+ const reason = asString(ackObservability.recommendedAckTimeoutReason || 'unknown') || 'unknown';
4405
+ return {
4406
+ mode: ackObservability.adaptiveAckTimeoutEnabled === true ? 'adaptive' : 'fixed',
4407
+ currentMs,
4408
+ defaultMs,
4409
+ maxMs: RECOMMENDED_ACK_TIMEOUT_MAX_MS,
4410
+ reason,
4411
+ active: currentMs > defaultMs,
4412
+ lastLateAckAgeMs: ackObservability.lastLateAckAgeMs ?? null,
4413
+ lateAckObservationTtlMs: ackObservability.lateAckObservationTtlMs ?? null,
4414
+ recovered: ackObservability.adaptiveAckRecovered === true,
4415
+ };
4416
+ }
4417
+
3901
4418
  private buildRuntimeActivitySnapshot(accountId: string) {
3902
4419
  return {
3903
4420
  activeConnections: this.activeConnectionCount(accountId),
@@ -3927,7 +4444,29 @@ class BncrBridgeRuntime {
3927
4444
  }
3928
4445
 
3929
4446
  getAccountRuntimeSnapshot(accountId: string) {
3930
- return buildAccountRuntimeSnapshot(this.buildRuntimeStatusInput(accountId, { running: true }));
4447
+ const snapshot = buildAccountRuntimeSnapshot(this.buildRuntimeStatusInput(accountId, { running: true }));
4448
+ const ackObservability = this.buildRuntimeAckObservability(accountId);
4449
+ const ackStrategy = this.buildRuntimeAckStrategy(ackObservability);
4450
+ return {
4451
+ ...snapshot,
4452
+ ackObservability,
4453
+ ackStrategy,
4454
+ diagnostics: {
4455
+ ...(snapshot.diagnostics || {}),
4456
+ ackObservability,
4457
+ ackStrategy,
4458
+ },
4459
+ meta: {
4460
+ ...(snapshot.meta || {}),
4461
+ ackObservability,
4462
+ ackStrategy,
4463
+ diagnostics: {
4464
+ ...(snapshot.meta?.diagnostics || {}),
4465
+ ackObservability,
4466
+ ackStrategy,
4467
+ },
4468
+ },
4469
+ };
3931
4470
  }
3932
4471
 
3933
4472
  private buildStatusHeadline(accountId: string): string {
@@ -3996,7 +4535,7 @@ class BncrBridgeRuntime {
3996
4535
  this.deadLetter = appendDeadLetter({
3997
4536
  deadLetter: this.deadLetter,
3998
4537
  entry: dead,
3999
- maxEntries: 1000,
4538
+ maxEntries: MAX_DEAD_LETTER_ENTRIES,
4000
4539
  });
4001
4540
  this.outbox.delete(entry.messageId);
4002
4541
  this.resolveMessageAck(entry.messageId, 'timeout');
@@ -4336,52 +4875,37 @@ class BncrBridgeRuntime {
4336
4875
  }
4337
4876
 
4338
4877
  private async sleepMs(ms: number): Promise<void> {
4339
- await new Promise<void>((resolve) => setTimeout(resolve, Math.max(0, Number(ms || 0))));
4878
+ await new Promise<void>((resolve) =>
4879
+ setTimeout(resolve, clampFiniteNumber(ms, 0, 0, INTERNAL_SLEEP_MAX_MS)),
4880
+ );
4340
4881
  }
4341
4882
 
4342
- private waitChunkAck(params: {
4883
+ private async waitChunkAck(params: {
4343
4884
  transferId: string;
4344
4885
  chunkIndex: number;
4345
4886
  timeoutMs?: number;
4346
4887
  }): Promise<void> {
4347
4888
  // Refactor boundary note (file-transfer / ACK coupling):
4348
4889
  // Chunk-level ACK waiting is part of the file-transfer sub-protocol, but it depends directly on
4349
- // mutable transfer runtime state in fileSendTransfers. If this is extracted later, preserve the
4350
- // current state ownership and timeout semantics before moving polling/wait logic out to another file.
4890
+ // mutable transfer runtime state in fileSendTransfers. Keep state prechecks here, while ACK wakeup
4891
+ // uses the shared event-style fileAckWaiters path instead of polling transfer state.
4351
4892
  const { transferId, chunkIndex } = params;
4352
- const timeoutMs = Math.max(
4353
- 1_000,
4354
- Math.min(Number(params.timeoutMs || FILE_TRANSFER_ACK_TTL_MS), 60_000),
4355
- );
4356
- const started = now();
4357
-
4358
- return new Promise<void>((resolve, reject) => {
4359
- const tick = async () => {
4360
- const st = this.fileSendTransfers.get(transferId);
4361
- if (!st) {
4362
- reject(new Error('transfer state missing'));
4363
- return;
4364
- }
4365
- if (st.failedChunks.has(chunkIndex)) {
4366
- reject(new Error(st.failedChunks.get(chunkIndex) || `chunk ${chunkIndex} failed`));
4367
- return;
4368
- }
4369
- if (st.ackedChunks.has(chunkIndex)) {
4370
- resolve();
4371
- return;
4372
- }
4373
- if (now() - started >= timeoutMs) {
4374
- reject(new Error(`chunk ack timeout index=${chunkIndex}`));
4375
- return;
4376
- }
4377
- await this.sleepMs(120);
4378
- void tick();
4379
- };
4380
- void tick();
4893
+ const st = this.fileSendTransfers.get(transferId);
4894
+ if (!st) throw new Error('transfer state missing');
4895
+ if (st.failedChunks.has(chunkIndex)) {
4896
+ throw new Error(st.failedChunks.get(chunkIndex) || `chunk ${chunkIndex} failed`);
4897
+ }
4898
+ if (st.ackedChunks.has(chunkIndex)) return;
4899
+
4900
+ await this.waitForFileAck({
4901
+ transferId,
4902
+ stage: 'chunk',
4903
+ chunkIndex,
4904
+ timeoutMs: clampFiniteNumber(params.timeoutMs, FILE_TRANSFER_ACK_TTL_MS, 1_000, 60_000),
4381
4905
  });
4382
4906
  }
4383
4907
 
4384
- private waitCompleteAck(params: {
4908
+ private async waitCompleteAck(params: {
4385
4909
  transferId: string;
4386
4910
  timeoutMs?: number;
4387
4911
  }): Promise<{ path: string }> {
@@ -4390,33 +4914,20 @@ class BncrBridgeRuntime {
4390
4914
  // transfer status transitions performed elsewhere in channel.ts. Keep completion wait behavior and
4391
4915
  // transfer-state mutation boundaries aligned if/when file-transfer pieces are moved out.
4392
4916
  const { transferId } = params;
4393
- const timeoutMs = Math.max(2_000, Math.min(Number(params.timeoutMs || 60_000), 120_000));
4394
- const started = now();
4395
-
4396
- return new Promise<{ path: string }>((resolve, reject) => {
4397
- const tick = async () => {
4398
- const st = this.fileSendTransfers.get(transferId);
4399
- if (!st) {
4400
- reject(new Error('transfer state missing'));
4401
- return;
4402
- }
4403
- if (st.status === 'aborted') {
4404
- reject(new Error(st.error || 'transfer aborted'));
4405
- return;
4406
- }
4407
- if (st.status === 'completed' && st.completedPath) {
4408
- resolve({ path: st.completedPath });
4409
- return;
4410
- }
4411
- if (now() - started >= timeoutMs) {
4412
- reject(new Error('complete ack timeout'));
4413
- return;
4414
- }
4415
- await this.sleepMs(150);
4416
- void tick();
4417
- };
4418
- void tick();
4917
+ const st = this.fileSendTransfers.get(transferId);
4918
+ if (!st) throw new Error('transfer state missing');
4919
+ if (st.status === 'aborted') throw new Error(st.error || 'transfer aborted');
4920
+ if (st.status === 'completed' && st.completedPath) return { path: st.completedPath };
4921
+
4922
+ const payload = await this.waitForFileAck({
4923
+ transferId,
4924
+ stage: 'complete',
4925
+ timeoutMs: clampFiniteNumber(params.timeoutMs, 60_000, 2_000, 120_000),
4419
4926
  });
4927
+ const updated = this.fileSendTransfers.get(transferId);
4928
+ const path = asString(payload?.path || updated?.completedPath || '').trim();
4929
+ if (!path) throw new Error('complete ack missing path');
4930
+ return { path };
4420
4931
  }
4421
4932
 
4422
4933
  private async transferMediaToBncrClient(params: {
@@ -4596,6 +5107,7 @@ class BncrBridgeRuntime {
4596
5107
 
4597
5108
  if (!ok) {
4598
5109
  st.status = 'aborted';
5110
+ st.terminalAt = now();
4599
5111
  st.error = String((lastErr as any)?.message || lastErr || `chunk-${idx}-failed`);
4600
5112
  this.fileSendTransfers.set(transferId, st);
4601
5113
  ctx.broadcastToConnIds(
@@ -4949,15 +5461,34 @@ class BncrBridgeRuntime {
4949
5461
  const sessionKey = asString(params?.sessionKey || '').trim();
4950
5462
  const fileName = asString(params?.fileName || '').trim() || 'file.bin';
4951
5463
  const mimeType = asString(params?.mimeType || '').trim() || 'application/octet-stream';
4952
- const fileSize = Number(params?.fileSize || 0);
4953
- const chunkSize = Number(params?.chunkSize || 256 * 1024);
4954
- const totalChunks = Number(params?.totalChunks || 0);
5464
+ const fileSize = finiteNonNegativeNumberOrNull(params?.fileSize);
5465
+ const chunkSize = finiteNonNegativeNumberOrNull(params?.chunkSize ?? 256 * 1024);
5466
+ const totalChunks = finiteNonNegativeNumberOrNull(params?.totalChunks);
4955
5467
  const fileSha256 = asString(params?.fileSha256 || '').trim();
4956
5468
 
4957
5469
  if (!transferId || !sessionKey || !fileSize || !chunkSize || !totalChunks) {
4958
5470
  respond(false, { error: 'transferId/sessionKey/fileSize/chunkSize/totalChunks required' });
4959
5471
  return;
4960
5472
  }
5473
+ if (fileSize > INBOUND_FILE_TRANSFER_MAX_BYTES) {
5474
+ respond(false, {
5475
+ error: `fileSize too large size=${fileSize} max=${INBOUND_FILE_TRANSFER_MAX_BYTES}`,
5476
+ });
5477
+ return;
5478
+ }
5479
+ if (totalChunks > INBOUND_FILE_TRANSFER_MAX_CHUNKS) {
5480
+ respond(false, {
5481
+ error: `totalChunks too large total=${totalChunks} max=${INBOUND_FILE_TRANSFER_MAX_CHUNKS}`,
5482
+ });
5483
+ return;
5484
+ }
5485
+ const expectedTotalChunks = Math.ceil(fileSize / chunkSize);
5486
+ if (totalChunks !== expectedTotalChunks) {
5487
+ respond(false, {
5488
+ error: `totalChunks mismatch total=${totalChunks} expected=${expectedTotalChunks}`,
5489
+ });
5490
+ return;
5491
+ }
4961
5492
 
4962
5493
  const normalized = normalizeStoredSessionKey(sessionKey);
4963
5494
  if (!normalized) {
@@ -5015,13 +5546,13 @@ class BncrBridgeRuntime {
5015
5546
  const clientId = asString((params as any)?.clientId || '').trim() || undefined;
5016
5547
 
5017
5548
  const transferId = asString(params?.transferId || '').trim();
5018
- const chunkIndex = Number(params?.chunkIndex ?? -1);
5019
- const offset = Number(params?.offset ?? 0);
5020
- const size = Number(params?.size ?? 0);
5549
+ const chunkIndex = finiteNonNegativeNumberOrNull(params?.chunkIndex);
5550
+ const offset = finiteNonNegativeNumberOrNull(params?.offset ?? 0);
5551
+ const size = finiteNonNegativeNumberOrNull(params?.size ?? 0);
5021
5552
  const chunkSha256 = asString(params?.chunkSha256 || '').trim();
5022
5553
  const base64 = asString(params?.base64 || '');
5023
5554
 
5024
- if (!transferId || chunkIndex < 0 || !base64) {
5555
+ if (!transferId || chunkIndex == null || !base64) {
5025
5556
  respond(false, { error: 'transferId/chunkIndex/base64 required' });
5026
5557
  return;
5027
5558
  }
@@ -5031,6 +5562,10 @@ class BncrBridgeRuntime {
5031
5562
  respond(false, { error: 'transfer not found' });
5032
5563
  return;
5033
5564
  }
5565
+ if (chunkIndex >= st.totalChunks) {
5566
+ respond(false, { error: `chunkIndex out of range index=${chunkIndex} total=${st.totalChunks}` });
5567
+ return;
5568
+ }
5034
5569
 
5035
5570
  const staleObserved = this.observeLease('file.chunk', params ?? {});
5036
5571
  if (staleObserved.stale) {
@@ -5061,7 +5596,7 @@ class BncrBridgeRuntime {
5061
5596
 
5062
5597
  try {
5063
5598
  const buf = Buffer.from(base64, 'base64');
5064
- if (size > 0 && buf.length !== size) {
5599
+ if (size != null && size > 0 && buf.length !== size) {
5065
5600
  throw new Error(`chunk size mismatch expected=${size} got=${buf.length}`);
5066
5601
  }
5067
5602
  if (chunkSha256) {
@@ -5177,6 +5712,7 @@ class BncrBridgeRuntime {
5177
5712
  );
5178
5713
  st.completedPath = saved.path;
5179
5714
  st.status = 'completed';
5715
+ st.terminalAt = now();
5180
5716
  this.fileRecvTransfers.set(transferId, st);
5181
5717
 
5182
5718
  respond(
@@ -5205,6 +5741,7 @@ class BncrBridgeRuntime {
5205
5741
  );
5206
5742
  } catch (error) {
5207
5743
  st.status = 'aborted';
5744
+ st.terminalAt = now();
5208
5745
  st.error = String((error as any)?.message || error || 'complete failed');
5209
5746
  this.fileRecvTransfers.set(transferId, st);
5210
5747
  respond(false, { error: st.error });
@@ -5256,6 +5793,7 @@ class BncrBridgeRuntime {
5256
5793
  }
5257
5794
 
5258
5795
  st.status = 'aborted';
5796
+ st.terminalAt = now();
5259
5797
  st.error = asString(params?.reason || 'aborted');
5260
5798
  this.fileRecvTransfers.set(transferId, st);
5261
5799
 
@@ -5285,7 +5823,7 @@ class BncrBridgeRuntime {
5285
5823
  const transferId = asString(params?.transferId || '').trim();
5286
5824
  const stage = asString(params?.stage || '').trim();
5287
5825
  const ok = params?.ok !== false;
5288
- const chunkIndex = Number(params?.chunkIndex ?? -1);
5826
+ const chunkIndex = finiteNonNegativeNumberOrNull(params?.chunkIndex);
5289
5827
 
5290
5828
  this.logInfo(
5291
5829
  'file-ack-inbound',
@@ -5296,8 +5834,10 @@ class BncrBridgeRuntime {
5296
5834
  clientId: clientId || null,
5297
5835
  transferId,
5298
5836
  stage,
5837
+ ackStage: stage,
5838
+ ackOutcome: ok ? 'acked' : 'failed',
5299
5839
  ok,
5300
- chunkIndex: chunkIndex >= 0 ? chunkIndex : undefined,
5840
+ chunkIndex: chunkIndex != null ? chunkIndex : undefined,
5301
5841
  errorCode: asString(params?.errorCode || ''),
5302
5842
  errorMessage: asString(params?.errorMessage || ''),
5303
5843
  path: asString(params?.path || '').trim(),
@@ -5355,15 +5895,19 @@ class BncrBridgeRuntime {
5355
5895
  const code = asString(params?.errorCode || 'ACK_FAILED');
5356
5896
  const msg = asString(params?.errorMessage || 'ack failed');
5357
5897
  st.error = `${code}:${msg}`;
5358
- if (stage === 'chunk' && chunkIndex >= 0) st.failedChunks.set(chunkIndex, st.error);
5359
- if (stage === 'complete') st.status = 'aborted';
5898
+ if (stage === 'chunk' && chunkIndex != null) st.failedChunks.set(chunkIndex, st.error);
5899
+ if (stage === 'complete') {
5900
+ st.status = 'aborted';
5901
+ st.terminalAt = now();
5902
+ }
5360
5903
  } else {
5361
- if (stage === 'chunk' && chunkIndex >= 0) {
5904
+ if (stage === 'chunk' && chunkIndex != null) {
5362
5905
  st.ackedChunks.add(chunkIndex);
5363
5906
  st.status = 'transferring';
5364
5907
  }
5365
5908
  if (stage === 'complete') {
5366
5909
  st.status = 'completed';
5910
+ st.terminalAt = now();
5367
5911
  st.completedPath = asString(params?.path || '').trim() || st.completedPath;
5368
5912
  }
5369
5913
  }
@@ -5374,7 +5918,7 @@ class BncrBridgeRuntime {
5374
5918
  this.resolveFileAck({
5375
5919
  transferId,
5376
5920
  stage,
5377
- chunkIndex: chunkIndex >= 0 ? chunkIndex : undefined,
5921
+ chunkIndex: chunkIndex != null ? chunkIndex : undefined,
5378
5922
  payload: {
5379
5923
  ok,
5380
5924
  transferId,
@@ -5633,20 +6177,19 @@ class BncrBridgeRuntime {
5633
6177
 
5634
6178
  tick();
5635
6179
  const timer = setInterval(tick, 5_000);
5636
- this.channelAccountTimers.set(accountId, timer);
5637
-
5638
- await new Promise<void>((resolve) => {
6180
+ let worker!: ChannelAccountWorkerHandle;
6181
+ const done = new Promise<void>((resolve) => {
5639
6182
  let settled = false;
5640
6183
  const finish = (reason: string) => {
5641
6184
  if (settled) return;
5642
6185
  settled = true;
5643
- const activeTimer = this.channelAccountTimers.get(accountId);
5644
- if (activeTimer === timer) {
5645
- clearInterval(timer);
5646
- this.channelAccountTimers.delete(accountId);
5647
- } else {
5648
- clearInterval(timer);
6186
+ const activeWorker = this.channelAccountWorkers.get(accountId);
6187
+ if (activeWorker === worker) {
6188
+ this.channelAccountWorkers.delete(accountId);
5649
6189
  }
6190
+ clearInterval(timer);
6191
+ worker.cleanupAbortListener?.();
6192
+ worker.cleanupAbortListener = undefined;
5650
6193
  this.logInfo(
5651
6194
  'health',
5652
6195
  `status-worker finished ${JSON.stringify({ bridge: this.bridgeId, accountId, reason })}`,
@@ -5656,15 +6199,23 @@ class BncrBridgeRuntime {
5656
6199
  resolve();
5657
6200
  };
5658
6201
 
6202
+ worker = { timer, finish };
6203
+ this.channelAccountWorkers.set(accountId, worker);
6204
+
5659
6205
  const onAbort = () => finish('abort');
6206
+ const abortSignal = ctx.abortSignal;
5660
6207
 
5661
- if (ctx.abortSignal?.aborted) {
6208
+ if (abortSignal?.aborted) {
5662
6209
  onAbort();
5663
6210
  return;
5664
6211
  }
5665
6212
 
5666
- ctx.abortSignal?.addEventListener?.('abort', onAbort, { once: true });
6213
+ abortSignal?.addEventListener?.('abort', onAbort, { once: true });
6214
+ if (abortSignal?.removeEventListener) {
6215
+ worker.cleanupAbortListener = () => abortSignal.removeEventListener('abort', onAbort);
6216
+ }
5667
6217
  });
6218
+ await done;
5668
6219
  };
5669
6220
 
5670
6221
  channelStopAccount = async (ctx: any) => {
@@ -5749,6 +6300,7 @@ class BncrBridgeRuntime {
5749
6300
  accountId,
5750
6301
  to,
5751
6302
  text: asString(ctx.text || ''),
6303
+ kind: ctx?.kind,
5752
6304
  replyToId,
5753
6305
  mediaLocalRoots: ctx.mediaLocalRoots,
5754
6306
  resolveVerifiedTarget: (to, accountId) => this.resolveVerifiedTarget(to, accountId),
@@ -5790,6 +6342,7 @@ class BncrBridgeRuntime {
5790
6342
  mediaUrls: Array.isArray(ctx?.mediaUrls) ? ctx.mediaUrls : undefined,
5791
6343
  asVoice,
5792
6344
  audioAsVoice,
6345
+ kind: ctx?.kind,
5793
6346
  replyToId,
5794
6347
  mediaLocalRoots: ctx.mediaLocalRoots,
5795
6348
  resolveVerifiedTarget: (to, accountId) => this.resolveVerifiedTarget(to, accountId),