@coclaw/openclaw-coclaw 0.13.1 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -13,6 +13,7 @@ import { generateTitle } from './src/topic-manager/title-gen.js';
13
13
  import { AutoUpgradeScheduler } from './src/auto-upgrade/updater.js';
14
14
  import { getPackageInfo } from './src/auto-upgrade/updater-check.js';
15
15
  import { createFileHandler } from './src/file-manager/handler.js';
16
+ import { abortAgentRun } from './src/agent-abort.js';
16
17
 
17
18
  import { getPluginVersion, __resetPluginVersion } from './src/plugin-version.js';
18
19
  export { getPluginVersion, __resetPluginVersion };
@@ -457,6 +458,27 @@ const plugin = {
457
458
  }
458
459
  });
459
460
 
461
+ // 取消正在执行的 embedded agent run(通过 OpenClaw 全局 symbol 侧门)
462
+ // 侧门不存在 / sessionId 未注册 / handle.abort 抛异常时返回 { ok:false, reason } —— UI 静默降级
463
+ api.registerGatewayMethod('coclaw.agent.abort', ({ params, respond }) => {
464
+ try {
465
+ const sessionId = params?.sessionId;
466
+ if (typeof sessionId !== 'string' || !sessionId) {
467
+ logger.warn?.(`[coclaw.agent.abort] invalid sessionId: ${JSON.stringify(sessionId)}`);
468
+ respondInvalid(respond, 'sessionId is required');
469
+ return;
470
+ }
471
+ logger.info?.(`[coclaw.agent.abort] request sessionId=${sessionId}`);
472
+ const result = abortAgentRun(sessionId, logger);
473
+ logger.info?.(`[coclaw.agent.abort] result sessionId=${sessionId} ok=${result.ok}${result.reason ? ` reason=${result.reason}` : ''}${result.error ? ` error=${result.error}` : ''}`);
474
+ respond(true, result);
475
+ }
476
+ catch (err) {
477
+ logger.error?.(`[coclaw.agent.abort] handler threw: ${String(err?.message ?? err)}`);
478
+ respondError(respond, err);
479
+ }
480
+ });
481
+
460
482
  api.registerGatewayMethod('coclaw.upgradeHealth', async ({ respond }) => {
461
483
  try {
462
484
  const { version } = await getPackageInfo();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@coclaw/openclaw-coclaw",
3
- "version": "0.13.1",
3
+ "version": "0.14.0",
4
4
  "type": "module",
5
5
  "license": "Apache-2.0",
6
6
  "description": "OpenClaw CoClaw channel plugin for remote chat",
@@ -60,7 +60,7 @@
60
60
  },
61
61
  "dependencies": {
62
62
  "node-datachannel": "0.32.2",
63
- "@coclaw/pion-node": "^0.1.1",
63
+ "@coclaw/pion-node": "^0.1.2",
64
64
  "werift": "^0.19.0",
65
65
  "ws": "^8.19.0"
66
66
  },
@@ -0,0 +1,106 @@
1
+ /**
2
+ * agent-abort:封装 OpenClaw embedded agent run 的侧门取消入口
3
+ *
4
+ * OpenClaw 自 v2026.3.12 起通过全局 symbol 注册表暴露 activeRuns 映射,
5
+ * 允许外部根据 sessionId 调 handle.abort() 真正终止正在执行的 agent run
6
+ *(LLM + 工具 + compaction 均受影响)。
7
+ *
8
+ * 本模块是 CoClaw 插件访问该侧门的唯一入口,未来上游提供正式 API 时集中替换。
9
+ */
10
+
11
+ const EMBEDDED_RUN_STATE_KEY = Symbol.for('openclaw.embeddedRunState');
12
+ const REPLY_RUN_STATE_KEY = Symbol.for('openclaw.replyRunRegistry');
13
+
14
+ /* c8 ignore start */ // 临时诊断代码,定位完问题会删除
15
+ /**
16
+ * 诊断用:从 reply-run-registry 全局单例中解析 sessionId → sessionKey 映射及概览
17
+ * @param {string} sessionId
18
+ * @returns {string}
19
+ */
20
+ function describeReplyRunRegistry(sessionId) {
21
+ const state = globalThis[REPLY_RUN_STATE_KEY];
22
+ if (!state) return 'reply.state=absent';
23
+ const parts = [];
24
+ const runs = state.activeRunsByKey;
25
+ if (runs && typeof runs.size === 'number') {
26
+ parts.push(`reply.activeRunsByKey.size=${runs.size}`);
27
+ try {
28
+ const ks = [];
29
+ if (typeof runs.keys === 'function') {
30
+ for (const k of runs.keys()) {
31
+ ks.push(k);
32
+ if (ks.length >= 10) break;
33
+ }
34
+ }
35
+ parts.push(`reply.keys=${JSON.stringify(ks)}`);
36
+ }
37
+ catch (e) {
38
+ parts.push(`reply.keysErr=${String(e?.message ?? e)}`);
39
+ }
40
+ }
41
+ else {
42
+ parts.push('reply.activeRunsByKey=absent');
43
+ }
44
+ const byId = state.activeKeysBySessionId;
45
+ if (byId && typeof byId.get === 'function') {
46
+ try {
47
+ const mapped = byId.get(sessionId);
48
+ parts.push(`reply.keyForSid=${mapped === undefined ? 'null' : JSON.stringify(mapped)}`);
49
+ }
50
+ catch (e) {
51
+ parts.push(`reply.keyForSidErr=${String(e?.message ?? e)}`);
52
+ }
53
+ }
54
+ else {
55
+ parts.push('reply.activeKeysBySessionId=absent');
56
+ }
57
+ return parts.join(' ');
58
+ }
59
+ /* c8 ignore stop */
60
+
61
+ /**
62
+ * 尝试取消 sessionId 对应的 embedded agent run
63
+ * @param {string} sessionId
64
+ * @param {{ info?: Function }} [logger] - 可选 logger;传入时在 not-found 分支 dump activeRuns 诊断信息
65
+ * @returns {{ ok: true } | { ok: false, reason: 'not-supported' | 'not-found' | 'abort-threw', error?: string }}
66
+ */
67
+ export function abortAgentRun(sessionId, logger) {
68
+ const state = globalThis[EMBEDDED_RUN_STATE_KEY];
69
+ if (!state || !state.activeRuns || typeof state.activeRuns.get !== 'function') {
70
+ return { ok: false, reason: 'not-supported' };
71
+ }
72
+ try {
73
+ const handle = state.activeRuns.get(sessionId);
74
+ if (!handle) {
75
+ /* c8 ignore start */ // 临时诊断代码,定位完问题会删除
76
+ if (logger?.info) {
77
+ let diag = `sessionId=${sessionId} embedded.size=${state.activeRuns.size ?? '?'}`;
78
+ try {
79
+ const ks = [];
80
+ if (typeof state.activeRuns.keys === 'function') {
81
+ for (const k of state.activeRuns.keys()) {
82
+ ks.push(k);
83
+ if (ks.length >= 10) break;
84
+ }
85
+ }
86
+ diag += ` embedded.keys=${JSON.stringify(ks)}`;
87
+ }
88
+ catch (e) {
89
+ diag += ` embedded.keysErr=${String(e?.message ?? e)}`;
90
+ }
91
+ diag += ` ${describeReplyRunRegistry(sessionId)}`;
92
+ logger.info(`[coclaw.agent.abort] not-found diag ${diag}`);
93
+ }
94
+ /* c8 ignore stop */
95
+ return { ok: false, reason: 'not-found' };
96
+ }
97
+ // shape 守卫:abort 字段应为函数;若不是说明 OpenClaw handle 契约变化(归入 not-supported 让 UI 提示升级)
98
+ if (typeof handle.abort !== 'function') return { ok: false, reason: 'not-supported' };
99
+ handle.abort();
100
+ return { ok: true };
101
+ }
102
+ catch (err) {
103
+ // activeRuns.get() 或 handle.abort() 抛(非 Map 实现 / OpenClaw 内部错误)
104
+ return { ok: false, reason: 'abort-threw', error: String(err?.message ?? err) };
105
+ }
106
+ }
@@ -114,7 +114,8 @@ export async function generateTitle({ topicId, topicManager, agentRpc, logger })
114
114
  message: '请为这段对话生成标题',
115
115
  idempotencyKey: randomUUID(),
116
116
  }, {
117
- timeoutMs: 60_000,
117
+ // 题目生成需等待 LLM 完整响应;60s 在复杂对话/慢模型下易超时
118
+ timeoutMs: 300_000,
118
119
  acceptTimeoutMs: 10_000,
119
120
  });
120
121
 
@@ -22,20 +22,15 @@ const encoder = new TextEncoder();
22
22
  const decoder = new TextDecoder();
23
23
 
24
24
  /**
25
- * 按需分片并发送消息
26
- * 小于 maxMessageSize 直接发 string;否则切成 binary chunk 逐个发送
27
- * @param {object} dc - DataChannel(werift 或浏览器)
25
+ * 按需分片:小于等于 maxMessageSize 返回 null(调用方直发 string),否则返回 chunk 数组
28
26
  * @param {string} jsonStr - 已序列化的 JSON 字符串
29
27
  * @param {number} maxMessageSize - 对端声明的 maxMessageSize
30
28
  * @param {() => number} getNextMsgId - 获取下一个 msgId
29
+ * @returns {Buffer[]|null} null 表示不需要分片;否则为 chunk Buffer 数组
31
30
  */
32
- export function chunkAndSend(dc, jsonStr, maxMessageSize, getNextMsgId, logger) {
31
+ export function buildChunks(jsonStr, maxMessageSize, getNextMsgId) {
33
32
  const fullBytes = encoder.encode(jsonStr);
34
- // 快路径:不需要分片
35
- if (fullBytes.byteLength <= maxMessageSize) {
36
- dc.send(jsonStr);
37
- return;
38
- }
33
+ if (fullBytes.byteLength <= maxMessageSize) return null;
39
34
 
40
35
  const chunkPayloadSize = maxMessageSize - HEADER_SIZE;
41
36
  if (chunkPayloadSize <= 0) {
@@ -44,7 +39,7 @@ export function chunkAndSend(dc, jsonStr, maxMessageSize, getNextMsgId, logger)
44
39
 
45
40
  const msgId = getNextMsgId();
46
41
  const totalChunks = Math.ceil(fullBytes.byteLength / chunkPayloadSize);
47
- logger?.info?.(`[dc-chunking] chunking msgId=${msgId}: ${fullBytes.byteLength} bytes → ${totalChunks} chunks (maxMsgSize=${maxMessageSize})`);
42
+ const chunks = new Array(totalChunks);
48
43
 
49
44
  for (let i = 0; i < totalChunks; i++) {
50
45
  const start = i * chunkPayloadSize;
@@ -55,7 +50,30 @@ export function chunkAndSend(dc, jsonStr, maxMessageSize, getNextMsgId, logger)
55
50
  chunk[0] = flag;
56
51
  chunk.writeUInt32BE(msgId, 1);
57
52
  chunk.set(fullBytes.subarray(start, end), HEADER_SIZE);
53
+ chunks[i] = chunk;
54
+ }
55
+ return chunks;
56
+ }
58
57
 
58
+ /**
59
+ * 按需分片并发送消息(薄包装:buildChunks + dc.send)
60
+ * 注意:无应用层流控;生产路径请使用 RpcSendQueue
61
+ * @param {object} dc - DataChannel
62
+ * @param {string} jsonStr - 已序列化的 JSON 字符串
63
+ * @param {number} maxMessageSize - 对端声明的 maxMessageSize
64
+ * @param {() => number} getNextMsgId - 获取下一个 msgId
65
+ * @param {object} [logger] - 可选 logger
66
+ */
67
+ export function chunkAndSend(dc, jsonStr, maxMessageSize, getNextMsgId, logger) {
68
+ const chunks = buildChunks(jsonStr, maxMessageSize, getNextMsgId);
69
+ if (!chunks) {
70
+ dc.send(jsonStr);
71
+ return;
72
+ }
73
+ const msgId = chunks[0].readUInt32BE(1);
74
+ const totalBytes = chunks.reduce((n, c) => n + (c.length - HEADER_SIZE), 0);
75
+ logger?.info?.(`[dc-chunking] chunking msgId=${msgId}: ${totalBytes} bytes → ${chunks.length} chunks (maxMsgSize=${maxMessageSize})`);
76
+ for (const chunk of chunks) {
59
77
  dc.send(chunk);
60
78
  }
61
79
  }
@@ -0,0 +1,182 @@
1
+ /**
2
+ * rpc DataChannel 发送流控队列
3
+ *
4
+ * 针对 plugin 侧 rpc DC 的应用层流控:与 UI 侧 `webrtc-connection.js` 语义对齐,
5
+ * 但因插件运行在 gateway 进程内,必须对队列大小设硬/软上限,避免 OOM。
6
+ *
7
+ * 使用方式:每条 rpc DC 一个实例,绑定到 session.rpcSendQueue。
8
+ * - send(jsonStr):同步入口,fire-and-forget;返回 accepted/dropped
9
+ * - onBufferedAmountLow():由 DC `bufferedamountlow` 事件转调,触发 drain
10
+ * - close():DC 关闭时调用,清空队列并汇总 drop 统计
11
+ *
12
+ * 不做:Promise 送达保证;单条消息硬上限内的背压;自动重试。
13
+ */
14
+
15
+ import { buildChunks } from './dc-chunking.js';
16
+ import { remoteLog } from '../remote-log.js';
17
+
18
+ /** 高水位:`dc.bufferedAmount >= HIGH` 时暂停 fast-path / drain */
19
+ export const DC_HIGH_WATER_MARK = 1024 * 1024; // 1 MB
20
+ /** 低水位:设置 `dc.bufferedAmountLowThreshold`,触发 `bufferedamountlow` 事件 */
21
+ export const DC_LOW_WATER_MARK = 256 * 1024; // 256 KB
22
+ /** 应用层队列软上限:`queueBytes >= MAX_QUEUE_BYTES` 时新消息被 drop */
23
+ export const MAX_QUEUE_BYTES = 10 * 1024 * 1024; // 10 MB
24
+ /** 单条消息硬上限(对齐 dc-chunking.js MAX_REASSEMBLY_BYTES,接收端重组不了也无意义) */
25
+ export const MAX_SINGLE_MSG_BYTES = 50 * 1024 * 1024; // 50 MB
26
+
27
+ export class RpcSendQueue {
28
+ /**
29
+ * @param {object} opts
30
+ * @param {object} opts.dc - DataChannel 实例(需支持 send / bufferedAmount / readyState)
31
+ * @param {number} opts.maxMessageSize - 对端 SDP 声明的 a=max-message-size
32
+ * @param {() => number} opts.getNextMsgId - 分片 msgId 生成器
33
+ * @param {object} [opts.logger] - pino 风格 logger
34
+ * @param {string} [opts.tag] - 诊断 tag(通常是 connId)
35
+ */
36
+ constructor({ dc, maxMessageSize, getNextMsgId, logger, tag }) {
37
+ if (!dc) throw new Error('RpcSendQueue: dc is required');
38
+ this.dc = dc;
39
+ this.maxMessageSize = maxMessageSize;
40
+ this.getNextMsgId = getNextMsgId;
41
+ this.logger = logger ?? console;
42
+ this.tag = tag ?? '';
43
+
44
+ /** @type {Buffer[]} chunks 或 Buffer 化的 string 消息 */
45
+ this.queue = [];
46
+ this.queueBytes = 0;
47
+ this.closed = false;
48
+
49
+ // drop 统计(累计到 close 时汇总)
50
+ this.droppedCount = 0;
51
+ this.droppedBytes = 0;
52
+ // 队列"满"状态:仅 queue-full drop 触发 true;drain 下降到 < MAX 翻回 false
53
+ // single-msg-oversize drop 不影响此状态(它是应用 bug 性质,不代表队列压力)
54
+ this.queueOverflowActive = false;
55
+ }
56
+
57
+ /**
58
+ * 同步发送一条 JSON 字符串。
59
+ * @param {string} jsonStr
60
+ * @returns {boolean} true=accepted(至少已入队或已直发),false=dropped
61
+ */
62
+ send(jsonStr) {
63
+ if (this.closed || this.dc.readyState !== 'open') return false;
64
+
65
+ const chunks = buildChunks(jsonStr, this.maxMessageSize, this.getNextMsgId);
66
+ const totalBytes = chunks
67
+ ? chunks.reduce((n, c) => n + c.length, 0)
68
+ : Buffer.byteLength(jsonStr, 'utf8');
69
+
70
+ // 硬上限:单条超限
71
+ if (totalBytes > MAX_SINGLE_MSG_BYTES) {
72
+ this.droppedCount += 1;
73
+ this.droppedBytes += totalBytes;
74
+ this.logger.warn?.(`[rpc-queue${this.__tagSuffix()}] drop reason=single-msg-oversize size=${totalBytes} cap=${MAX_SINGLE_MSG_BYTES}`);
75
+ return false;
76
+ }
77
+
78
+ // 软上限:队列已积压到 MAX(允许之前单条溢出,但新消息从此开始拒绝)
79
+ if (this.queueBytes >= MAX_QUEUE_BYTES) {
80
+ this.droppedCount += 1;
81
+ this.droppedBytes += totalBytes;
82
+ this.logger.warn?.(`[rpc-queue${this.__tagSuffix()}] drop reason=queue-full size=${totalBytes} queueBytes=${this.queueBytes}`);
83
+ if (!this.queueOverflowActive) {
84
+ this.queueOverflowActive = true;
85
+ remoteLog(`rpc-queue.overflow-start${this.__tagSuffix()} queueBytes=${this.queueBytes}`);
86
+ }
87
+ return false;
88
+ }
89
+
90
+ // 不分片:单条 string 或 Buffer 直接处理
91
+ if (!chunks) {
92
+ if (this.queue.length === 0
93
+ && this.dc.readyState === 'open'
94
+ && this.dc.bufferedAmount < DC_HIGH_WATER_MARK) {
95
+ try {
96
+ this.dc.send(jsonStr);
97
+ return true;
98
+ } catch (err) {
99
+ this.logger.warn?.(`[rpc-queue${this.__tagSuffix()}] fast-path send failed: ${err?.message}`);
100
+ return false;
101
+ }
102
+ }
103
+ const buf = Buffer.from(jsonStr, 'utf8');
104
+ this.queue.push(buf);
105
+ this.queueBytes += buf.length;
106
+ return true;
107
+ }
108
+
109
+ // 分片:fast-path 尝试同步直发尽可能多的 chunk
110
+ // 循环条件与 __drain 一致:DC 仍 open 且 bufferedAmount 未顶到 HIGH
111
+ let i = 0;
112
+ if (this.queue.length === 0) {
113
+ while (i < chunks.length
114
+ && this.dc.readyState === 'open'
115
+ && this.dc.bufferedAmount < DC_HIGH_WATER_MARK) {
116
+ try {
117
+ this.dc.send(chunks[i]);
118
+ i += 1;
119
+ } catch (err) {
120
+ this.logger.warn?.(`[rpc-queue${this.__tagSuffix()}] fast-path send failed at chunk ${i}/${chunks.length}: ${err?.message}`);
121
+ return false;
122
+ }
123
+ }
124
+ }
125
+ // 剩余 chunk 原子性入队(保证同一消息分片连续,不被其他消息插入)
126
+ for (; i < chunks.length; i += 1) {
127
+ this.queue.push(chunks[i]);
128
+ this.queueBytes += chunks[i].length;
129
+ }
130
+ return true;
131
+ }
132
+
133
+ /** 由外部 `dc.onbufferedamountlow` 事件触发 */
134
+ onBufferedAmountLow() {
135
+ this.__drain();
136
+ }
137
+
138
+ /**
139
+ * 关闭队列:清空待发送 chunks,汇总并 remoteLog drop 统计。幂等。
140
+ */
141
+ close() {
142
+ if (this.closed) return;
143
+ this.closed = true;
144
+ const residual = this.queue.length;
145
+ const residualBytes = this.queueBytes;
146
+ this.queue.length = 0;
147
+ this.queueBytes = 0;
148
+ this.queueOverflowActive = false;
149
+ if (this.droppedCount > 0 || residual > 0) {
150
+ remoteLog(`rpc-queue.close${this.__tagSuffix()} dropped=${this.droppedCount} droppedBytes=${this.droppedBytes} residualChunks=${residual} residualBytes=${residualBytes}`);
151
+ }
152
+ }
153
+
154
+ /** @private 排队持续发送直至 HIGH 水位或队列空 */
155
+ __drain() {
156
+ if (this.closed) return;
157
+ const dc = this.dc;
158
+ while (this.queue.length > 0
159
+ && dc.readyState === 'open'
160
+ && dc.bufferedAmount < DC_HIGH_WATER_MARK) {
161
+ const chunk = this.queue[0];
162
+ try {
163
+ dc.send(chunk);
164
+ } catch (err) {
165
+ this.logger.warn?.(`[rpc-queue${this.__tagSuffix()}] drain send failed: ${err?.message}`);
166
+ return; // 保留队列,等 onclose 统一清理
167
+ }
168
+ this.queue.shift();
169
+ this.queueBytes -= chunk.length;
170
+ // 满 → 未满 状态转换
171
+ if (this.queueOverflowActive && this.queueBytes < MAX_QUEUE_BYTES) {
172
+ this.queueOverflowActive = false;
173
+ remoteLog(`rpc-queue.overflow-end${this.__tagSuffix()} dropped=${this.droppedCount} droppedBytes=${this.droppedBytes}`);
174
+ }
175
+ }
176
+ }
177
+
178
+ /** @private */
179
+ __tagSuffix() {
180
+ return this.tag ? ` ${this.tag}` : '';
181
+ }
182
+ }
@@ -1,10 +1,19 @@
1
- import { chunkAndSend, createReassembler } from './dc-chunking.js';
1
+ import { createReassembler } from './dc-chunking.js';
2
+ import { RpcSendQueue, DC_LOW_WATER_MARK } from './rpc-send-queue.js';
2
3
  import { remoteLog } from '../remote-log.js';
3
4
 
4
5
  // 单个 session 内 file DC 历史快照的容量上限(满后按 FIFO 淘汰最老条目)。
5
6
  // 用于诊断 dump:过大会撑爆 remoteLog 单帧,20 足以覆盖典型多文件传输会话。
6
7
  const FILE_CHANNEL_HISTORY_LIMIT = 20;
7
8
 
9
+ // Failed session 保留 24 小时,支持 Capacitor 长时间后台恢复后 ICE restart。
10
+ // 超时后 session 被回收释放 IPC listeners 和 Go 侧资源。
11
+ const FAILED_SESSION_TTL_MS = 24 * 60 * 60 * 1000;
12
+
13
+ // Session 总数上限(活跃 + failed)。溢出时淘汰最旧的 failed session。
14
+ // 20 足以覆盖多 UI 实例(浏览器多标签 + 移动端)的典型场景。
15
+ const MAX_SESSIONS = 20;
16
+
8
17
  /**
9
18
  * 管理多个 WebRTC PeerConnection(以 connId 为粒度)。
10
19
  * Plugin 作为被叫方:收到 UI 的 offer → 回复 answer。
@@ -32,7 +41,7 @@ export class WebRtcPeer {
32
41
  this.__PeerConnection = PeerConnection;
33
42
  this.__impl = impl ?? null;
34
43
  this.__rtcTag = impl ? `[coclaw/rtc:${impl}]` : '[coclaw/rtc]';
35
- /** @type {Map<string, { pc: object, rpcChannel: object|null, remoteMaxMessageSize: number, nextMsgId: number }>} */
44
+ /** @type {Map<string, { pc: object, rpcChannel: object|null, rpcSendQueue: RpcSendQueue|null, fileChannels: Set, remoteMaxMessageSize: number, nextMsgId: number }>} */
36
45
  this.__sessions = new Map();
37
46
  }
38
47
 
@@ -55,7 +64,19 @@ export class WebRtcPeer {
55
64
  async closeByConnId(connId) {
56
65
  const session = this.__sessions.get(connId);
57
66
  if (!session) return;
67
+ // 清理 failed TTL 定时器
68
+ if (session.__failedTimer) {
69
+ clearTimeout(session.__failedTimer);
70
+ session.__failedTimer = null;
71
+ }
58
72
  this.__sessions.delete(connId);
73
+ // 显式关闭 rpc 发送队列:dc.onclose 路径中 `sessions.get(connId)` 已返回 undefined 而短路,
74
+ // 此处不主动 close 会丢失 drop 汇总 remoteLog 诊断
75
+ if (session.rpcSendQueue) {
76
+ session.rpcSendQueue.close();
77
+ session.rpcSendQueue = null;
78
+ session.rpcChannel = null;
79
+ }
59
80
  // 先 detach 事件,防止 pc.close() 异步触发 onconnectionstatechange 删除新 session
60
81
  session.pc.onconnectionstatechange = null;
61
82
  session.pc.onicecandidate = null;
@@ -73,15 +94,16 @@ export class WebRtcPeer {
73
94
  await Promise.all(closing);
74
95
  }
75
96
 
76
- /** 向所有已打开的 rpcChannel 广播(大消息自动分片) */
97
+ /** 向所有已打开的 rpcChannel 广播(大消息自动分片,经由 RpcSendQueue 流控) */
77
98
  broadcast(payload) {
78
99
  const jsonStr = JSON.stringify(payload);
79
100
  for (const [connId, session] of this.__sessions) {
80
- const dc = session.rpcChannel;
81
- if (dc?.readyState === 'open') {
101
+ const q = session.rpcSendQueue;
102
+ if (q && session.rpcChannel?.readyState === 'open') {
82
103
  try {
83
- chunkAndSend(dc, jsonStr, session.remoteMaxMessageSize, () => session.nextMsgId++, this.logger);
104
+ q.send(jsonStr);
84
105
  } catch (err) {
106
+ // buildChunks 抛(maxMessageSize 配置错)等罕见情况
85
107
  this.__logDebug(`[${connId}] broadcast send failed: ${err.message}`);
86
108
  }
87
109
  }
@@ -105,12 +127,24 @@ export class WebRtcPeer {
105
127
  toConnId: connId,
106
128
  payload: { reason: 'impl_unsupported' },
107
129
  });
108
- return;
130
+ return; // TTL timer 保持不变(reject 是同步的,不影响 timer 正常工作)
131
+ }
132
+ // 暂停 failed TTL timer:pion restart 涉及异步协商,期间不应被回收
133
+ if (existing.__failedTimer) {
134
+ clearTimeout(existing.__failedTimer);
135
+ existing.__failedTimer = null;
109
136
  }
110
137
  this.__remoteLog(`rtc.ice-restart conn=${connId}`);
111
138
  this.logger.info?.(`${this.__rtcTag} ICE restart offer from ${connId}, renegotiating`);
112
139
  try {
113
140
  await existing.pc.setRemoteDescription({ type: 'offer', sdp: msg.payload.sdp });
141
+ // 重协商 SDP 可能变更 a=max-message-size,同步刷新 queue 分片阈值;
142
+ // queue 中已入队的 chunks 按旧值分片保留,新消息用新值
143
+ const newMMS = this.__resolveMaxMessageSize(existing.pc, msg.payload.sdp);
144
+ if (newMMS !== existing.remoteMaxMessageSize) {
145
+ existing.remoteMaxMessageSize = newMMS;
146
+ if (existing.rpcSendQueue) existing.rpcSendQueue.maxMessageSize = newMMS;
147
+ }
114
148
  const answer = await existing.pc.createAnswer();
115
149
  await existing.pc.setLocalDescription(answer);
116
150
  this.__onSend({
@@ -155,6 +189,11 @@ export class WebRtcPeer {
155
189
  await this.closeByConnId(connId);
156
190
  }
157
191
 
192
+ // session 总数限制:溢出时淘汰最旧的 failed session
193
+ if (this.__sessions.size >= MAX_SESSIONS) {
194
+ this.__evictOldestFailed();
195
+ }
196
+
158
197
  // 从 Server 注入的 turnCreds 构建 iceServers
159
198
  // werift 的 urls 必须是单个 string,每个 URL 独立一个对象
160
199
  const iceServers = [];
@@ -177,15 +216,9 @@ export class WebRtcPeer {
177
216
 
178
217
  const pc = new this.__PeerConnection({ iceServers });
179
218
 
180
- // 分片阈值 = min(远端能接收, 本地能发送)
181
- // 远端:从 offer SDP 的 a=max-message-size 解析(缺失则 RFC 8841 默认 65536)
182
- // 本地:pc.maxMessageSize(pion 为 65536,ndc/werift 无此属性则不限制)
183
- const mmsMatch = msg.payload.sdp?.match(/a=max-message-size:(\d+)/);
184
- const remoteMMS = mmsMatch ? parseInt(mmsMatch[1], 10) : 65536;
185
- const localMMS = pc.maxMessageSize ?? remoteMMS;
186
- const remoteMaxMessageSize = Math.min(remoteMMS, localMMS);
219
+ const remoteMaxMessageSize = this.__resolveMaxMessageSize(pc, msg.payload.sdp);
187
220
 
188
- const session = { pc, rpcChannel: null, fileChannels: new Set(), remoteMaxMessageSize, nextMsgId: 1 };
221
+ const session = { pc, rpcChannel: null, rpcSendQueue: null, fileChannels: new Set(), remoteMaxMessageSize, nextMsgId: 1 };
189
222
  this.__sessions.set(connId, session);
190
223
 
191
224
  // ICE candidate → 发给 UI,并统计各类型 candidate 数量
@@ -222,6 +255,12 @@ export class WebRtcPeer {
222
255
  const cur = this.__sessions.get(connId);
223
256
  if (!cur || cur.pc !== pc) return;
224
257
 
258
+ // 离开 failed 状态时清理 TTL timer(ICE restart 恢复、自然关闭等)
259
+ if (state !== 'failed' && cur.__failedTimer) {
260
+ clearTimeout(cur.__failedTimer);
261
+ cur.__failedTimer = null;
262
+ }
263
+
225
264
  if (state === 'connected') {
226
265
  // 重置 dump 去重水位(disconnected → connected → disconnected 仍能再 dump)
227
266
  cur.__lastDumpState = null;
@@ -238,16 +277,25 @@ export class WebRtcPeer {
238
277
  // pion: pair 通过独立的 selectedcandidatepairchange 事件上报
239
278
  } else if (state === 'disconnected' || state === 'failed' || state === 'closed') {
240
279
  // 诊断 dump:失败/断连/关闭时输出当前 PC 上 DC 状态,定位"PC 假活/DC 死"现象
241
- // - closed 多由本地主动关闭触发,dump 收敛诊断噪声但仍清理 session
280
+ // - closed 由 closeByConnId 接管清理,dump 收敛诊断噪声
242
281
  // - disconnected 可能反复触发,去重避免噪声
243
282
  if (state !== 'closed' && cur.__lastDumpState !== state) {
244
283
  cur.__lastDumpState = state;
245
284
  this.__dumpSessionState(connId, cur, state);
246
285
  }
247
- // closed 删除 session;failed 保留以支持 ICE restart 恢复
248
- // (如 app 后台冻结 pion ICE failed 前台恢复后 restart)
249
- if (state === 'closed') {
250
- this.__sessions.delete(connId);
286
+ if (state === 'failed') {
287
+ // 启动 TTL 定时器:超时后回收 session 释放 IPC listeners Go 侧资源。
288
+ // unref() 确保定时器不阻止进程退出(gateway 由其他连接保活)。
289
+ if (cur.__failedTimer) clearTimeout(cur.__failedTimer);
290
+ cur.__failedTimer = setTimeout(() => {
291
+ this.__remoteLog(`rtc.session-expired conn=${connId} ttl=${FAILED_SESSION_TTL_MS / 1000}s`);
292
+ this.logger.info?.(`${this.__rtcTag} [${connId}] session TTL expired, closing`);
293
+ this.closeByConnId(connId).catch(() => {});
294
+ }, FAILED_SESSION_TTL_MS);
295
+ cur.__failedTimer.unref?.();
296
+ } else if (state === 'closed') {
297
+ // 自然进入 closed 时也需通过 closeByConnId 释放 IPC listeners 和 Go 资源
298
+ this.closeByConnId(connId).catch(() => {});
251
299
  }
252
300
  }
253
301
  };
@@ -298,6 +346,10 @@ export class WebRtcPeer {
298
346
  // SDP 协商失败 → 清理已入 Map 的 session,避免泄漏
299
347
  const cur = this.__sessions.get(connId);
300
348
  if (cur && cur.pc === pc) {
349
+ if (cur.__failedTimer) {
350
+ clearTimeout(cur.__failedTimer);
351
+ cur.__failedTimer = null;
352
+ }
301
353
  this.__sessions.delete(connId);
302
354
  }
303
355
  await pc.close().catch(() => {});
@@ -321,9 +373,29 @@ export class WebRtcPeer {
321
373
  }
322
374
 
323
375
  __setupDataChannel(connId, dc) {
376
+ // rpc DC 发送流控:每条 rpc DC 绑定一个 RpcSendQueue,广播与 files RPC 响应均经此出口
377
+ const session = this.__sessions.get(connId);
378
+ if (session && dc.label === 'rpc') {
379
+ if ('bufferedAmountLowThreshold' in dc) {
380
+ dc.bufferedAmountLowThreshold = DC_LOW_WATER_MARK;
381
+ }
382
+ session.rpcSendQueue = new RpcSendQueue({
383
+ dc,
384
+ maxMessageSize: session.remoteMaxMessageSize,
385
+ getNextMsgId: () => session.nextMsgId++,
386
+ logger: this.logger,
387
+ tag: `conn=${connId}`,
388
+ });
389
+ dc.onbufferedamountlow = () => {
390
+ session.rpcSendQueue?.onBufferedAmountLow();
391
+ };
392
+ }
393
+
324
394
  const reassembler = createReassembler((jsonStr) => {
325
395
  const payload = JSON.parse(jsonStr);
326
396
  // DC 探测:立即回复,不走 gateway
397
+ // 故意绕过 RpcSendQueue:probe-ack 仅用于测量传输层(SCTP/DTLS)健康,
398
+ // 走 queue 会把应用层积压压力错误地映射到"DC 不通"上。
327
399
  if (payload.type === 'probe') {
328
400
  try { dc.send(JSON.stringify({ type: 'probe-ack' })); }
329
401
  catch { /* DC 已关闭,忽略 */ }
@@ -332,15 +404,10 @@ export class WebRtcPeer {
332
404
  if (payload.type === 'req') {
333
405
  // coclaw.files.* 方法本地处理,不转发 gateway
334
406
  if (payload.method?.startsWith('coclaw.files.') && this.__onFileRpc) {
335
- const session = this.__sessions.get(connId);
407
+ const sess = this.__sessions.get(connId);
336
408
  const sendFn = (response) => {
337
409
  try {
338
- chunkAndSend(
339
- dc, JSON.stringify(response),
340
- session?.remoteMaxMessageSize ?? 65536,
341
- () => session.nextMsgId++,
342
- this.logger,
343
- );
410
+ sess?.rpcSendQueue?.send(JSON.stringify(response));
344
411
  } catch (err) {
345
412
  this.__logDebug(`[${connId}] sendFn failed: ${err.message}`);
346
413
  }
@@ -362,8 +429,12 @@ export class WebRtcPeer {
362
429
  this.__remoteLog(`dc.closed conn=${connId} label=${dc.label}`);
363
430
  this.logger.info?.(`${this.__rtcTag} [${connId}] DataChannel "${dc.label}" closed`);
364
431
  reassembler.reset();
365
- const session = this.__sessions.get(connId);
366
- if (session && dc.label === 'rpc') session.rpcChannel = null;
432
+ const sess = this.__sessions.get(connId);
433
+ if (sess && dc.label === 'rpc') {
434
+ sess.rpcSendQueue?.close();
435
+ sess.rpcSendQueue = null;
436
+ sess.rpcChannel = null;
437
+ }
367
438
  };
368
439
  dc.onerror = (err) => {
369
440
  this.__remoteLog(`dc.error conn=${connId} label=${dc.label}`);
@@ -389,8 +460,24 @@ export class WebRtcPeer {
389
460
  ? 'none'
390
461
  /* c8 ignore next -- ?? fallback for missing readyState */
391
462
  : [...session.fileChannels].map((dc) => `${dc.label}=${dc.readyState ?? '?'}`).join(',');
392
- this.__remoteLog(`rtc.dump conn=${connId} state=${state} sessions=${this.__sessions.size} rpc=${rpcState} fileCount=${session.fileChannels.size} files=[${fileSummary}]`);
393
- this.logger.info?.(`${this.__rtcTag} [${connId}] dump state=${state} rpc=${rpcState} fileCount=${session.fileChannels.size} files=${fileSummary}`);
463
+ const q = session.rpcSendQueue;
464
+ const queueInfo = q
465
+ ? `queueLen=${q.queue.length} queueBytes=${q.queueBytes} dropped=${q.droppedCount}`
466
+ : 'queue=none';
467
+ this.__remoteLog(`rtc.dump conn=${connId} state=${state} sessions=${this.__sessions.size} rpc=${rpcState} ${queueInfo} fileCount=${session.fileChannels.size} files=[${fileSummary}]`);
468
+ this.logger.info?.(`${this.__rtcTag} [${connId}] dump state=${state} rpc=${rpcState} ${queueInfo} fileCount=${session.fileChannels.size} files=${fileSummary}`);
469
+ }
470
+
471
+ /**
472
+ * 分片阈值 = min(远端能接收, 本地能发送)
473
+ * 远端:从 SDP 的 a=max-message-size 解析(缺失则 RFC 8841 默认 65536)
474
+ * 本地:pc.maxMessageSize(pion 为 65536,ndc/werift 无此属性则不限制)
475
+ */
476
+ __resolveMaxMessageSize(pc, sdp) {
477
+ const mmsMatch = sdp?.match(/a=max-message-size:(\d+)/);
478
+ const remoteMMS = mmsMatch ? parseInt(mmsMatch[1], 10) : 65536;
479
+ const localMMS = pc.maxMessageSize ?? remoteMMS;
480
+ return Math.min(remoteMMS, localMMS);
394
481
  }
395
482
 
396
483
  __logNominatedPair(connId, pair) {
@@ -404,9 +491,25 @@ export class WebRtcPeer {
404
491
  remoteLog(this.__impl ? `${msg} rtc=${this.__impl}` : msg);
405
492
  }
406
493
 
494
+ /** 淘汰最旧的 failed session(Map 迭代序 ≈ 创建时间序),用于 queue length 限制 */
495
+ __evictOldestFailed() {
496
+ for (const [connId, session] of this.__sessions) {
497
+ if (session.pc.connectionState === 'failed') {
498
+ this.__remoteLog(`rtc.session-evicted conn=${connId} sessions=${this.__sessions.size}`);
499
+ this.logger.info?.(`${this.__rtcTag} [${connId}] session evicted (limit ${MAX_SESSIONS}), closing`);
500
+ this.closeByConnId(connId).catch(() => {});
501
+ return true;
502
+ }
503
+ }
504
+ this.logger.warn?.(`${this.__rtcTag} session limit (${MAX_SESSIONS}) reached, no failed sessions to evict`);
505
+ return false;
506
+ }
507
+
407
508
  __logDebug(message) {
408
509
  if (typeof this.logger?.debug === 'function') {
409
510
  this.logger.debug(`${this.__rtcTag} ${message}`);
410
511
  }
411
512
  }
412
513
  }
514
+
515
+ export { FAILED_SESSION_TTL_MS, MAX_SESSIONS };