@coclaw/openclaw-coclaw 0.16.0 → 0.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -13
- package/package.json +1 -3
- package/src/auto-upgrade/registry-fallback.js +61 -0
- package/src/auto-upgrade/updater.js +4 -1
- package/src/auto-upgrade/worker.js +42 -10
- package/src/realtime-bridge.js +8 -0
- package/src/webrtc/ndc-preloader.js +2 -0
- package/src/webrtc/rpc-send-queue.js +6 -0
- package/src/webrtc/webrtc-peer.js +156 -10
- package/vendor/ndc-prebuilds/darwin-arm64/node_datachannel.node +0 -0
- package/vendor/ndc-prebuilds/darwin-x64/node_datachannel.node +0 -0
- package/vendor/ndc-prebuilds/linux-arm64/node_datachannel.node +0 -0
- package/vendor/ndc-prebuilds/linux-x64/node_datachannel.node +0 -0
- package/vendor/ndc-prebuilds/win32-x64/node_datachannel.node +0 -0
package/README.md
CHANGED
|
@@ -172,22 +172,14 @@ openclaw gateway call coclaw.upgradeHealth --json
|
|
|
172
172
|
|
|
173
173
|
## WebRTC 实现
|
|
174
174
|
|
|
175
|
-
|
|
175
|
+
插件在运行时按优先级选择 WebRTC 实现:
|
|
176
176
|
|
|
177
|
-
1. **node
|
|
178
|
-
2. **werift**(回退)— 纯 JavaScript 实现,作为
|
|
177
|
+
1. **pion**(主力)— 通过 `@coclaw/pion-node` SDK 驱动 Go 侧 pion-ipc 进程,实现完整 WebRTC 能力。
|
|
178
|
+
2. **werift**(回退)— 纯 JavaScript 实现,作为 pion 加载失败时的兜底。
|
|
179
179
|
|
|
180
|
-
选择结果通过
|
|
180
|
+
选择结果通过 `bridge.started` / `coclaw.env impl=...` 日志上报。
|
|
181
181
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
由于 OpenClaw 使用 `--ignore-scripts` 安装插件,node-datachannel 的 native binary 需通过 vendor 预编译包提供:
|
|
185
|
-
|
|
186
|
-
```bash
|
|
187
|
-
bash scripts/download-ndc-prebuilds.sh # 下载 5 平台预编译包到 vendor/ndc-prebuilds/
|
|
188
|
-
```
|
|
189
|
-
|
|
190
|
-
支持的平台:linux-x64、linux-arm64、darwin-x64、darwin-arm64、win32-x64。vendor 目录不入 git,通过 npm publish 的 `files` 字段包含在发布包中。
|
|
182
|
+
> `ndc-preloader.js`(node-datachannel 路径)的代码仍保留但已摘除 `node-datachannel` 依赖和 vendor 预编译包(2026-04-19)——运行时必然走 fallback 到 werift,待 pion 在全部线上平台稳定观察期结束后与 werift 一并移除。
|
|
191
183
|
|
|
192
184
|
## 运行与排障日志
|
|
193
185
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@coclaw/openclaw-coclaw",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.17.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"description": "OpenClaw CoClaw channel plugin for remote chat",
|
|
@@ -32,7 +32,6 @@
|
|
|
32
32
|
"!src/**/*.test.js",
|
|
33
33
|
"!src/mock-server.helper.js",
|
|
34
34
|
"openclaw.plugin.json",
|
|
35
|
-
"vendor/ndc-prebuilds/**",
|
|
36
35
|
"LICENSE"
|
|
37
36
|
],
|
|
38
37
|
"main": "index.js",
|
|
@@ -59,7 +58,6 @@
|
|
|
59
58
|
"release:versions": "npm view @coclaw/openclaw-coclaw versions --json --registry=https://registry.npmjs.org/ && npm view @coclaw/openclaw-coclaw versions --json"
|
|
60
59
|
},
|
|
61
60
|
"dependencies": {
|
|
62
|
-
"node-datachannel": "0.32.2",
|
|
63
61
|
"@coclaw/pion-node": "^0.1.3",
|
|
64
62
|
"werift": "^0.19.0",
|
|
65
63
|
"ws": "^8.19.0"
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* registry-fallback.js — npm registry 反向兜底
|
|
3
|
+
*
|
|
4
|
+
* 升级首次失败(timeout/429/网络异常等)后,按当前用户的 registry 选反方向源
|
|
5
|
+
* 再试一次:用户原本走 npmmirror 卡住时切到 npmjs;走 npmjs 卡住(如 IP 段被
|
|
6
|
+
* 限流)时切到 npmmirror。两侧任一可用即能脱困。
|
|
7
|
+
*/
|
|
8
|
+
import { execFile as nodeExecFile } from 'node:child_process';
|
|
9
|
+
|
|
10
|
+
export const NPMJS_REGISTRY = 'https://registry.npmjs.org/';
|
|
11
|
+
export const NPMMIRROR_REGISTRY = 'https://registry.npmmirror.com/';
|
|
12
|
+
|
|
13
|
+
const DEFAULT_TIMEOUT_MS = 10_000;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* 读取当前 npm 默认 registry(继承用户 .npmrc 与 env)
|
|
17
|
+
*
|
|
18
|
+
* 失败 / 空字符串均回退到 npmjs URL;上层 pickFallbackRegistry 会据此选 npmmirror。
|
|
19
|
+
* 即"npm 命令本身坏掉时盲选 npmmirror"——在 worker 这种"反正只重试一次"的场景下
|
|
20
|
+
* 是合理代价。
|
|
21
|
+
*
|
|
22
|
+
* 调用方应优先传入 execFileFn 以避免在测试环境拉起真实 npm 进程。
|
|
23
|
+
* @param {object} [opts]
|
|
24
|
+
* @param {Function} [opts.execFileFn]
|
|
25
|
+
* @param {number} [opts.timeoutMs]
|
|
26
|
+
* @returns {Promise<string>}
|
|
27
|
+
*/
|
|
28
|
+
export function getCurrentNpmRegistry(opts) {
|
|
29
|
+
/* c8 ignore next -- ?./?? fallback */
|
|
30
|
+
const doExecFile = opts?.execFileFn ?? nodeExecFile;
|
|
31
|
+
/* c8 ignore next -- ?? fallback */
|
|
32
|
+
const timeout = opts?.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
33
|
+
return new Promise((resolve) => {
|
|
34
|
+
doExecFile('npm', ['config', 'get', 'registry'], {
|
|
35
|
+
timeout,
|
|
36
|
+
shell: process.platform === 'win32',
|
|
37
|
+
}, (err, stdout) => {
|
|
38
|
+
if (err) { resolve(NPMJS_REGISTRY); return; }
|
|
39
|
+
const raw = String(stdout).trim();
|
|
40
|
+
resolve(raw || NPMJS_REGISTRY);
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* 根据当前 registry 选反向兜底:
|
|
47
|
+
* - 含 `npmmirror.com` → 切到 npmjs
|
|
48
|
+
* - 其他(含 npmjs / cnpmjs.org / 自建 / 非字符串等异常输入) → 一律切到 npmmirror
|
|
49
|
+
*
|
|
50
|
+
* "反向"语义只严格区分 npmmirror,因为它是国内绝对主流;其他国内镜像(cnpmjs 等)
|
|
51
|
+
* 当前直接切到 npmmirror(同方向但换实例),属于"换源"兜底而非真正反向,是有意为之
|
|
52
|
+
* 的简化。
|
|
53
|
+
* @param {string} current
|
|
54
|
+
* @returns {string}
|
|
55
|
+
*/
|
|
56
|
+
export function pickFallbackRegistry(current) {
|
|
57
|
+
if (typeof current === 'string' && /npmmirror\.com/i.test(current)) {
|
|
58
|
+
return NPMJS_REGISTRY;
|
|
59
|
+
}
|
|
60
|
+
return NPMMIRROR_REGISTRY;
|
|
61
|
+
}
|
|
@@ -7,7 +7,10 @@ import { readState, resolveStateDir, writeState } from './state.js';
|
|
|
7
7
|
import { getRuntime } from '../runtime.js';
|
|
8
8
|
import { remoteLog } from '../remote-log.js';
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
// 首次检查延迟较长:失败时由 worker 触发 gateway restart,scheduler 重启后会重新计时;
|
|
11
|
+
// 60 分钟基线(实际随机 60-120 分钟)能把"失败→重启→再次检查"的循环周期拉长,
|
|
12
|
+
// 避免连续升级失败时 gateway 在短时间内反复被打扰。
|
|
13
|
+
const INITIAL_DELAY_MS = 60 * 60 * 1000; // 60 分钟
|
|
11
14
|
const CHECK_INTERVAL_MS = 60 * 60 * 1000; // 1 小时
|
|
12
15
|
const CHANNEL_ID = 'coclaw';
|
|
13
16
|
const LOCK_FILENAME = 'upgrade.lock';
|
|
@@ -17,26 +17,35 @@ import { parseArgs } from 'node:util';
|
|
|
17
17
|
import { createBackup, restoreFromBackup, removeBackup } from './worker-backup.js';
|
|
18
18
|
import { verifyUpgrade, waitForGateway } from './worker-verify.js';
|
|
19
19
|
import { addSkippedVersion, updateLastUpgrade, appendLog } from './state.js';
|
|
20
|
+
import { getCurrentNpmRegistry, pickFallbackRegistry } from './registry-fallback.js';
|
|
20
21
|
|
|
21
22
|
const SEMVER_RE = /^\d+\.\d+\.\d+(-[\w.-]+)?$/;
|
|
23
|
+
// 单次 plugins update 上限:包含 npm install 大型 native deps,慢网络 + 弱机器需较长时间
|
|
24
|
+
const UPDATE_TIMEOUT_MS = 10 * 60 * 1000;
|
|
22
25
|
|
|
23
26
|
/**
|
|
24
27
|
* 执行 openclaw plugins update
|
|
28
|
+
*
|
|
29
|
+
* 仅支持 source === "npm" 的安装(updater 已做前置过滤)。
|
|
30
|
+
* env 由调用方决定:缺省时子进程继承当前 process.env(含用户 .npmrc 自动生效);
|
|
31
|
+
* 显式传入时用于覆盖 registry 等 npm 配置以做兜底重试。
|
|
25
32
|
* @param {string} pluginId - 插件 ID
|
|
26
33
|
* @param {object} [opts]
|
|
27
34
|
* @param {Function} [opts.execFileFn]
|
|
35
|
+
* @param {NodeJS.ProcessEnv} [opts.env]
|
|
28
36
|
* @returns {Promise<void>}
|
|
29
37
|
*/
|
|
30
|
-
// openclaw plugins update 内部实现为 staged backup-and-replace,
|
|
31
|
-
// 仅支持 source === "npm" 的安装(updater 已做前置过滤)
|
|
32
38
|
function runPluginUpdate(pluginId, opts) {
|
|
33
39
|
/* c8 ignore next -- ?./?? fallback */
|
|
34
40
|
const doExecFile = opts?.execFileFn ?? nodeExecFile;
|
|
35
41
|
return new Promise((resolve, reject) => {
|
|
36
|
-
|
|
37
|
-
timeout:
|
|
42
|
+
const execOpts = {
|
|
43
|
+
timeout: UPDATE_TIMEOUT_MS,
|
|
38
44
|
shell: process.platform === 'win32',
|
|
39
|
-
}
|
|
45
|
+
};
|
|
46
|
+
// 不传 env 时让 Node 默认继承父进程;显式 env 才覆盖
|
|
47
|
+
if (opts?.env) execOpts.env = opts.env;
|
|
48
|
+
doExecFile('openclaw', ['plugins', 'update', pluginId], execOpts, (err) => {
|
|
40
49
|
if (err) reject(new Error(`plugins update failed: ${err.message}`));
|
|
41
50
|
else resolve();
|
|
42
51
|
});
|
|
@@ -110,21 +119,44 @@ export async function runUpgrade({ pluginDir, fromVersion, toVersion, pluginId,
|
|
|
110
119
|
await createBackup(pluginDir);
|
|
111
120
|
log('[upgrade-worker] Backup created');
|
|
112
121
|
|
|
113
|
-
// 2.
|
|
122
|
+
// 2. 执行升级(首次按用户原 env,失败后用反向 mirror 重试一次)
|
|
114
123
|
log('[upgrade-worker] Running plugins update...');
|
|
124
|
+
let updateErr = null;
|
|
115
125
|
try {
|
|
116
126
|
await runPluginUpdate(pluginId, opts);
|
|
127
|
+
log('[upgrade-worker] Update command completed');
|
|
128
|
+
}
|
|
129
|
+
catch (firstErr) {
|
|
130
|
+
log(`[upgrade-worker] Update command failed: ${firstErr.message}`);
|
|
131
|
+
updateErr = firstErr;
|
|
132
|
+
try {
|
|
133
|
+
const current = await getCurrentNpmRegistry(opts);
|
|
134
|
+
const fallback = pickFallbackRegistry(current);
|
|
135
|
+
log(`[upgrade-worker] Retrying with fallback registry: ${fallback}`);
|
|
136
|
+
// npm 同时认 npm_config_X 与 NPM_CONFIG_X 两种 env 命名,
|
|
137
|
+
// 若用户已 export 大写版(国内常见),仅 set 小写不足以覆盖,
|
|
138
|
+
// 显式 delete 大写避免 retry 仍走原 registry。
|
|
139
|
+
const retryEnv = { ...process.env };
|
|
140
|
+
delete retryEnv.NPM_CONFIG_REGISTRY;
|
|
141
|
+
retryEnv.npm_config_registry = fallback;
|
|
142
|
+
await runPluginUpdate(pluginId, { ...opts, env: retryEnv });
|
|
143
|
+
log('[upgrade-worker] Update command completed on retry');
|
|
144
|
+
updateErr = null;
|
|
145
|
+
}
|
|
146
|
+
catch (retryErr) {
|
|
147
|
+
log(`[upgrade-worker] Retry with fallback registry failed: ${retryErr.message}`);
|
|
148
|
+
updateErr = retryErr;
|
|
149
|
+
}
|
|
117
150
|
}
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
151
|
+
|
|
152
|
+
if (updateErr) {
|
|
153
|
+
// 两次都失败仍按瞬态故障处理(保留原 skipVersion: false 设计意图)
|
|
121
154
|
await handleRollback({
|
|
122
155
|
pluginDir, fromVersion, toVersion, pluginId, pkgName,
|
|
123
156
|
error: updateErr.message, skipVersion: false, opts, log,
|
|
124
157
|
});
|
|
125
158
|
return;
|
|
126
159
|
}
|
|
127
|
-
log('[upgrade-worker] Update command completed');
|
|
128
160
|
|
|
129
161
|
// 3. 等待 gateway 重启并验证
|
|
130
162
|
log('[upgrade-worker] Verifying upgrade...');
|
package/src/realtime-bridge.js
CHANGED
|
@@ -659,6 +659,14 @@ export class RealtimeBridge {
|
|
|
659
659
|
return;
|
|
660
660
|
}
|
|
661
661
|
if (payload.type === 'res' || payload.type === 'event') {
|
|
662
|
+
// 过滤 gateway 的管理层广播事件,这些对 WebChat / plugin 客户端无意义:
|
|
663
|
+
// - health: 全量状态快照(~3KB, ~60s 一次 + RPC 触发),给 Admin UI 的监控仪表盘用
|
|
664
|
+
// - tick: gateway WS 保活心跳(30s 一次),UI 隔着 DC 不需要,DC 自己有 probe 机制
|
|
665
|
+
// 不转发可避免后台时 rpc DC 队列被灌满。上游支持按需订阅前先在插件侧拦截。
|
|
666
|
+
if (payload.type === 'event'
|
|
667
|
+
&& (payload.event === 'health' || payload.event === 'tick')) {
|
|
668
|
+
return;
|
|
669
|
+
}
|
|
662
670
|
this.webrtcPeer?.broadcast(payload);
|
|
663
671
|
}
|
|
664
672
|
});
|
|
@@ -46,6 +46,7 @@ export function defaultResolvePaths(platformKey, pluginRoot) {
|
|
|
46
46
|
// 定位 node-datachannel 包根:从入口路径向上查找 package.json
|
|
47
47
|
const require = createRequire(nodePath.join(pluginRoot, 'package.json'));
|
|
48
48
|
const entryPath = require.resolve('node-datachannel');
|
|
49
|
+
/* c8 ignore start -- node-datachannel 依赖已于 2026-04-19 摘除,以下路径仅在 ndc 实际安装时命中;代码保留作为过渡期 fallback 自然失败锚点,待 ndc-preloader 整体清理时一并删除 */
|
|
49
50
|
let pkgRoot = nodePath.dirname(entryPath);
|
|
50
51
|
while (pkgRoot !== nodePath.dirname(pkgRoot)) {
|
|
51
52
|
try {
|
|
@@ -58,6 +59,7 @@ export function defaultResolvePaths(platformKey, pluginRoot) {
|
|
|
58
59
|
const dest = nodePath.join(destDir, 'node_datachannel.node');
|
|
59
60
|
|
|
60
61
|
return { src, dest, destDir };
|
|
62
|
+
/* c8 ignore stop */
|
|
61
63
|
}
|
|
62
64
|
|
|
63
65
|
/**
|
|
@@ -62,6 +62,10 @@ export class RpcSendQueue {
|
|
|
62
62
|
send(jsonStr) {
|
|
63
63
|
if (this.closed || this.dc.readyState !== 'open') return false;
|
|
64
64
|
|
|
65
|
+
// 诊断日志:打印每次入队的事件,跟踪 gateway 还会推哪些事件
|
|
66
|
+
// 需要时临时打开,平时保持注释避免日志噪音
|
|
67
|
+
// this.logger.info?.(`[rpc-queue${this.__tagSuffix()}] send-payload ${jsonStr}`);
|
|
68
|
+
|
|
65
69
|
const chunks = buildChunks(jsonStr, this.maxMessageSize, this.getNextMsgId);
|
|
66
70
|
const totalBytes = chunks
|
|
67
71
|
? chunks.reduce((n, c) => n + c.length, 0)
|
|
@@ -80,6 +84,8 @@ export class RpcSendQueue {
|
|
|
80
84
|
this.droppedCount += 1;
|
|
81
85
|
this.droppedBytes += totalBytes;
|
|
82
86
|
this.logger.warn?.(`[rpc-queue${this.__tagSuffix()}] drop reason=queue-full size=${totalBytes} queueBytes=${this.queueBytes}`);
|
|
87
|
+
// 诊断日志:定位后台长时间占队的事件来源。需要时临时打开
|
|
88
|
+
// this.logger.info?.(`[rpc-queue${this.__tagSuffix()}] dropped-payload ${jsonStr}`);
|
|
83
89
|
if (!this.queueOverflowActive) {
|
|
84
90
|
this.queueOverflowActive = true;
|
|
85
91
|
remoteLog(`rpc-queue.overflow-start${this.__tagSuffix()} queueBytes=${this.queueBytes}`);
|
|
@@ -6,13 +6,13 @@ import { remoteLog } from '../remote-log.js';
|
|
|
6
6
|
// 用于诊断 dump:过大会撑爆 remoteLog 单帧,20 足以覆盖典型多文件传输会话。
|
|
7
7
|
const FILE_CHANNEL_HISTORY_LIMIT = 20;
|
|
8
8
|
|
|
9
|
-
// Failed session 保留
|
|
9
|
+
// Failed session 保留 12 小时,支持 Capacitor 后台恢复后 ICE restart。
|
|
10
10
|
// 超时后 session 被回收释放 IPC listeners 和 Go 侧资源。
|
|
11
|
-
const FAILED_SESSION_TTL_MS =
|
|
11
|
+
const FAILED_SESSION_TTL_MS = 12 * 60 * 60 * 1000;
|
|
12
12
|
|
|
13
13
|
// Session 总数上限(活跃 + failed)。溢出时淘汰最旧的 failed session。
|
|
14
|
-
//
|
|
15
|
-
const MAX_SESSIONS =
|
|
14
|
+
// 10 足以覆盖多 UI 实例(浏览器多标签 + 移动端)的典型场景。
|
|
15
|
+
const MAX_SESSIONS = 10;
|
|
16
16
|
|
|
17
17
|
/**
|
|
18
18
|
* 管理多个 WebRTC PeerConnection(以 connId 为粒度)。
|
|
@@ -69,6 +69,17 @@ export class WebRtcPeer {
|
|
|
69
69
|
clearTimeout(session.__failedTimer);
|
|
70
70
|
session.__failedTimer = null;
|
|
71
71
|
}
|
|
72
|
+
// 清理 plugin-probe 定时器(避免 session 已关闭仍触发 timeout 日志,
|
|
73
|
+
// 或 500ms 调度窗口内 session 被替换时对着新 session 误发探针)
|
|
74
|
+
if (session.__pluginProbeSchedTimer) {
|
|
75
|
+
clearTimeout(session.__pluginProbeSchedTimer);
|
|
76
|
+
session.__pluginProbeSchedTimer = null;
|
|
77
|
+
}
|
|
78
|
+
if (session.__pluginProbeTimer) {
|
|
79
|
+
clearTimeout(session.__pluginProbeTimer);
|
|
80
|
+
session.__pluginProbeTimer = null;
|
|
81
|
+
session.__pluginProbeInFlight = null;
|
|
82
|
+
}
|
|
72
83
|
this.__sessions.delete(connId);
|
|
73
84
|
// 显式关闭 rpc 发送队列:dc.onclose 路径中 `sessions.get(connId)` 已返回 undefined 而短路,
|
|
74
85
|
// 此处不主动 close 会丢失 drop 汇总 remoteLog 诊断
|
|
@@ -83,6 +94,9 @@ export class WebRtcPeer {
|
|
|
83
94
|
if ('onselectedcandidatepairchange' in session.pc) {
|
|
84
95
|
session.pc.onselectedcandidatepairchange = null;
|
|
85
96
|
}
|
|
97
|
+
if ('oniceconnectionstatechange' in session.pc) {
|
|
98
|
+
session.pc.oniceconnectionstatechange = null;
|
|
99
|
+
}
|
|
86
100
|
await session.pc.close();
|
|
87
101
|
this.__remoteLog(`rtc.closed conn=${connId}`);
|
|
88
102
|
this.logger.info?.(`${this.__rtcTag} [${connId}] closed`);
|
|
@@ -134,6 +148,8 @@ export class WebRtcPeer {
|
|
|
134
148
|
async __handleOffer(msg) {
|
|
135
149
|
const connId = msg.fromConnId;
|
|
136
150
|
const isIceRestart = !!msg.payload?.iceRestart;
|
|
151
|
+
const credRemain = this.__credRemainSec(msg.turnCreds);
|
|
152
|
+
const credRemainStr = credRemain ?? 'none';
|
|
137
153
|
|
|
138
154
|
// ICE restart:在现有 PC 上重新协商,保持 DTLS session
|
|
139
155
|
if (isIceRestart) {
|
|
@@ -141,7 +157,7 @@ export class WebRtcPeer {
|
|
|
141
157
|
if (existing) {
|
|
142
158
|
// 仅已验证支持 ICE restart 的 impl 放行,其余立即 reject 让 UI 走 rebuild
|
|
143
159
|
if (this.__impl !== 'pion') {
|
|
144
|
-
this.__remoteLog(`rtc.ice-restart-unsupported conn=${connId} impl=${this.__impl}`);
|
|
160
|
+
this.__remoteLog(`rtc.ice-restart-unsupported conn=${connId} impl=${this.__impl} credRemain=${credRemainStr}`);
|
|
145
161
|
this.logger.info?.(`${this.__rtcTag} ICE restart rejected: impl=${this.__impl} not verified`);
|
|
146
162
|
this.__onSend({
|
|
147
163
|
type: 'rtc:restart-rejected',
|
|
@@ -155,7 +171,7 @@ export class WebRtcPeer {
|
|
|
155
171
|
clearTimeout(existing.__failedTimer);
|
|
156
172
|
existing.__failedTimer = null;
|
|
157
173
|
}
|
|
158
|
-
this.__remoteLog(`rtc.ice-restart conn=${connId}`);
|
|
174
|
+
this.__remoteLog(`rtc.ice-restart conn=${connId} credRemain=${credRemainStr}`);
|
|
159
175
|
this.logger.info?.(`${this.__rtcTag} ICE restart offer from ${connId}, renegotiating`);
|
|
160
176
|
try {
|
|
161
177
|
await existing.pc.setRemoteDescription({ type: 'offer', sdp: msg.payload.sdp });
|
|
@@ -173,11 +189,12 @@ export class WebRtcPeer {
|
|
|
173
189
|
toConnId: connId,
|
|
174
190
|
payload: { sdp: answer.sdp },
|
|
175
191
|
});
|
|
192
|
+
this.__remoteLog(`rtc.restart-answer-sent conn=${connId}`);
|
|
176
193
|
this.logger.info?.(`${this.__rtcTag} ICE restart answer sent to ${connId}`);
|
|
177
194
|
return;
|
|
178
195
|
} catch (err) {
|
|
179
196
|
// ICE restart 协商失败 → reject,不 fall through
|
|
180
|
-
this.__remoteLog(`rtc.ice-restart-failed conn=${connId}`);
|
|
197
|
+
this.__remoteLog(`rtc.ice-restart-failed conn=${connId} credRemain=${credRemainStr}`);
|
|
181
198
|
this.logger.warn?.(`${this.__rtcTag} ICE restart failed for ${connId}: ${err?.message}`);
|
|
182
199
|
this.__onSend({
|
|
183
200
|
type: 'rtc:restart-rejected',
|
|
@@ -192,7 +209,7 @@ export class WebRtcPeer {
|
|
|
192
209
|
}
|
|
193
210
|
}
|
|
194
211
|
// 无 session → reject(plugin 可能已重启)
|
|
195
|
-
this.__remoteLog(`rtc.ice-restart-no-session conn=${connId}`);
|
|
212
|
+
this.__remoteLog(`rtc.ice-restart-no-session conn=${connId} credRemain=${credRemainStr}`);
|
|
196
213
|
this.logger.warn?.(`${this.__rtcTag} ICE restart from ${connId} but no session, rejecting`);
|
|
197
214
|
this.__onSend({
|
|
198
215
|
type: 'rtc:restart-rejected',
|
|
@@ -243,11 +260,24 @@ export class WebRtcPeer {
|
|
|
243
260
|
this.__sessions.set(connId, session);
|
|
244
261
|
|
|
245
262
|
// ICE candidate → 发给 UI,并统计各类型 candidate 数量
|
|
263
|
+
// gather complete 时一并输出 host 候选的 IP:port 列表(诊断 docker/vbridge 误 gather)
|
|
246
264
|
const candidateCounts = { host: 0, srflx: 0, relay: 0 };
|
|
265
|
+
const hostAddrs = [];
|
|
266
|
+
let gatheringEmitted = false;
|
|
267
|
+
const flushGatherDiag = () => {
|
|
268
|
+
if (gatheringEmitted) return;
|
|
269
|
+
gatheringEmitted = true;
|
|
270
|
+
const hostInfo = hostAddrs.length ? ` hosts=${hostAddrs.join(',')}` : '';
|
|
271
|
+
this.__remoteLog(`rtc.ice-gathered conn=${connId} host=${candidateCounts.host} srflx=${candidateCounts.srflx} relay=${candidateCounts.relay}${hostInfo}`);
|
|
272
|
+
candidateCounts.host = 0;
|
|
273
|
+
candidateCounts.srflx = 0;
|
|
274
|
+
candidateCounts.relay = 0;
|
|
275
|
+
hostAddrs.length = 0;
|
|
276
|
+
};
|
|
247
277
|
pc.onicecandidate = ({ candidate }) => {
|
|
248
278
|
if (!candidate) {
|
|
249
|
-
// gathering
|
|
250
|
-
|
|
279
|
+
// 浏览器路径:gathering 完成通过 null candidate 通知
|
|
280
|
+
flushGatherDiag();
|
|
251
281
|
return;
|
|
252
282
|
}
|
|
253
283
|
// 从 candidate 字符串中提取类型(typ host / typ srflx / typ relay)
|
|
@@ -255,6 +285,14 @@ export class WebRtcPeer {
|
|
|
255
285
|
if (typMatch && candidateCounts[typMatch[1]] !== undefined) {
|
|
256
286
|
candidateCounts[typMatch[1]]++;
|
|
257
287
|
}
|
|
288
|
+
// host 候选记录 addr:port,用于观察 pion 是否把 docker0 / br-* / loopback 等接口当成 host
|
|
289
|
+
// candidate 格式: "candidate:<foundation> <comp> <proto> <prio> <ADDR> <PORT> typ host ..."
|
|
290
|
+
if (typMatch?.[1] === 'host') {
|
|
291
|
+
const parts = candidate.candidate.split(' ');
|
|
292
|
+
if (parts.length >= 6) {
|
|
293
|
+
hostAddrs.push(`${parts[4]}:${parts[5]}`);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
258
296
|
this.__onSend({
|
|
259
297
|
type: 'rtc:ice',
|
|
260
298
|
toConnId: connId,
|
|
@@ -265,6 +303,29 @@ export class WebRtcPeer {
|
|
|
265
303
|
},
|
|
266
304
|
});
|
|
267
305
|
};
|
|
306
|
+
// pion-node 不会在 gather complete 时 fire onicecandidate(null),用 icegatheringstatechange 兜底。
|
|
307
|
+
// gathering→ 重置 flag 支持 ICE restart;complete→ flush 汇总
|
|
308
|
+
if ('onicegatheringstatechange' in pc) {
|
|
309
|
+
pc.onicegatheringstatechange = () => {
|
|
310
|
+
const state = pc.iceGatheringState;
|
|
311
|
+
if (state === 'gathering') {
|
|
312
|
+
gatheringEmitted = false;
|
|
313
|
+
} else if (state === 'complete') {
|
|
314
|
+
flushGatherDiag();
|
|
315
|
+
}
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// ICE agent 状态(pion 暴露的独立事件):能看到 checking / connected / failed 等纯 ICE 侧跳转,
|
|
320
|
+
// 与复合 connectionState 互补。对诊断"pion 说 connected 但 UI 看不到数据"非常关键。
|
|
321
|
+
// 仅在 pion-node 实现中可用;其他实现赋值是 no-op。
|
|
322
|
+
if ('oniceconnectionstatechange' in pc) {
|
|
323
|
+
pc.oniceconnectionstatechange = () => {
|
|
324
|
+
const cur = this.__sessions.get(connId);
|
|
325
|
+
if (!cur || cur.pc !== pc) return;
|
|
326
|
+
this.__remoteLog(`rtc.iceState conn=${connId} ${pc.iceConnectionState ?? '?'}`);
|
|
327
|
+
};
|
|
328
|
+
}
|
|
268
329
|
|
|
269
330
|
// 连接状态变更(校验 pc 归属,防止旧 PC 异步回调删除新 session)
|
|
270
331
|
pc.onconnectionstatechange = () => {
|
|
@@ -283,6 +344,7 @@ export class WebRtcPeer {
|
|
|
283
344
|
}
|
|
284
345
|
|
|
285
346
|
if (state === 'connected') {
|
|
347
|
+
const prevDumpState = cur.__lastDumpState;
|
|
286
348
|
// 重置 dump 去重水位(disconnected → connected → disconnected 仍能再 dump)
|
|
287
349
|
cur.__lastDumpState = null;
|
|
288
350
|
// werift: iceTransports[0].connection.nominated
|
|
@@ -296,6 +358,22 @@ export class WebRtcPeer {
|
|
|
296
358
|
this.logger.info?.(`${this.__rtcTag} [${connId}] ICE nominated: local=${localInfo} remote=${remoteInfo}`);
|
|
297
359
|
}
|
|
298
360
|
// pion: pair 通过独立的 selectedcandidatepairchange 事件上报
|
|
361
|
+
// ICE restart 恢复(disconnected/failed → connected)时做诊断动作:
|
|
362
|
+
// - dump 当前 session DC 状态,对照"UI 看不到 connected 时 plugin 侧看到什么"
|
|
363
|
+
// - 发一次 plugin-probe,实测 DC 是否双向可用
|
|
364
|
+
// 只对 pion 生效:werift/ndc 为兼容路径,不涉及本次调查的病态场景。
|
|
365
|
+
if (this.__impl === 'pion' && (prevDumpState === 'disconnected' || prevDumpState === 'failed')) {
|
|
366
|
+
this.__dumpSessionState(connId, cur, 'connected');
|
|
367
|
+
// 挂到 session 上,使 closeByConnId 能在 500ms 窗口内取消;
|
|
368
|
+
// 否则 session 被替换(同 connId 新 offer)时会对着新 session 误发探针。
|
|
369
|
+
if (cur.__pluginProbeSchedTimer) clearTimeout(cur.__pluginProbeSchedTimer);
|
|
370
|
+
cur.__pluginProbeSchedTimer = setTimeout(() => {
|
|
371
|
+
cur.__pluginProbeSchedTimer = null;
|
|
372
|
+
this.__sendPluginProbe(connId);
|
|
373
|
+
}, 500);
|
|
374
|
+
// unref() 避免定时器阻塞 gateway 进程退出(gateway 由其他连接保活)。
|
|
375
|
+
cur.__pluginProbeSchedTimer.unref?.();
|
|
376
|
+
}
|
|
299
377
|
} else if (state === 'disconnected' || state === 'failed' || state === 'closed') {
|
|
300
378
|
// 诊断 dump:失败/断连/关闭时输出当前 PC 上 DC 状态,定位"PC 假活/DC 死"现象
|
|
301
379
|
// - closed 由 closeByConnId 接管清理,dump 收敛诊断噪声
|
|
@@ -425,6 +503,11 @@ export class WebRtcPeer {
|
|
|
425
503
|
catch { /* DC 已关闭,忽略 */ }
|
|
426
504
|
return;
|
|
427
505
|
}
|
|
506
|
+
// 来自 UI 的 plugin-probe 回复:验证 plugin → UI 方向确实传达并被回传
|
|
507
|
+
if (payload.type === 'plugin-probe-ack') {
|
|
508
|
+
this.__handlePluginProbeAck(connId, payload.id);
|
|
509
|
+
return;
|
|
510
|
+
}
|
|
428
511
|
if (payload.type === 'req') {
|
|
429
512
|
// coclaw.files.* 方法本地处理,不转发 gateway
|
|
430
513
|
if (payload.method?.startsWith('coclaw.files.') && this.__onFileRpc) {
|
|
@@ -556,10 +639,73 @@ export class WebRtcPeer {
|
|
|
556
639
|
this.__remoteLog(`rtc.peer-transport conn=${connId} type=${payload.candidateType} proto=${payload.protocol} relay=${payload.relayProtocol ?? '-'}`);
|
|
557
640
|
}
|
|
558
641
|
|
|
642
|
+
/**
|
|
643
|
+
* 主动探针:在 rpc DC 上发一个 plugin-probe,期待 UI 回 plugin-probe-ack。
|
|
644
|
+
* 用于区分"pion 报告 connected 但 UI 其实没收到数据"与"UI 真的收到了但没记录事件"。
|
|
645
|
+
* 绕过 RpcSendQueue(与 probe-ack 对称),仅测量传输层,不受应用层积压影响。
|
|
646
|
+
* 同一 session 同时只保留一条 in-flight 探针;超时仅打日志,不影响业务恢复。
|
|
647
|
+
*/
|
|
648
|
+
__sendPluginProbe(connId) {
|
|
649
|
+
const session = this.__sessions.get(connId);
|
|
650
|
+
if (!session) return;
|
|
651
|
+
const dc = session.rpcChannel;
|
|
652
|
+
if (!dc || dc.readyState !== 'open') return;
|
|
653
|
+
// 已有 in-flight 则跳过(避免重复)
|
|
654
|
+
if (session.__pluginProbeInFlight) return;
|
|
655
|
+
|
|
656
|
+
const id = (session.__pluginProbeIdSeq = (session.__pluginProbeIdSeq ?? 0) + 1);
|
|
657
|
+
const startMs = Date.now();
|
|
658
|
+
const timer = setTimeout(() => {
|
|
659
|
+
if (session.__pluginProbeInFlight?.id === id) {
|
|
660
|
+
session.__pluginProbeInFlight = null;
|
|
661
|
+
session.__pluginProbeTimer = null;
|
|
662
|
+
this.__remoteLog(`rtc.plugin-probe conn=${connId} id=${id} timeout`);
|
|
663
|
+
}
|
|
664
|
+
}, 5000);
|
|
665
|
+
timer.unref?.();
|
|
666
|
+
session.__pluginProbeInFlight = { id, startMs };
|
|
667
|
+
session.__pluginProbeTimer = timer;
|
|
668
|
+
|
|
669
|
+
try {
|
|
670
|
+
dc.send(JSON.stringify({ type: 'plugin-probe', id }));
|
|
671
|
+
this.__remoteLog(`rtc.plugin-probe conn=${connId} id=${id} sent`);
|
|
672
|
+
} catch (err) {
|
|
673
|
+
clearTimeout(timer);
|
|
674
|
+
session.__pluginProbeInFlight = null;
|
|
675
|
+
session.__pluginProbeTimer = null;
|
|
676
|
+
this.__remoteLog(`rtc.plugin-probe conn=${connId} id=${id} send-failed msg=${err?.message ?? err}`);
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
/** 收到 UI 的 plugin-probe-ack:计算 RTT 并释放 in-flight 槽位 */
|
|
681
|
+
__handlePluginProbeAck(connId, id) {
|
|
682
|
+
const session = this.__sessions.get(connId);
|
|
683
|
+
if (!session) return;
|
|
684
|
+
const inFlight = session.__pluginProbeInFlight;
|
|
685
|
+
if (!inFlight || inFlight.id !== id) return; // 过期 ack,忽略
|
|
686
|
+
const rtt = Date.now() - inFlight.startMs;
|
|
687
|
+
if (session.__pluginProbeTimer) {
|
|
688
|
+
clearTimeout(session.__pluginProbeTimer);
|
|
689
|
+
session.__pluginProbeTimer = null;
|
|
690
|
+
}
|
|
691
|
+
session.__pluginProbeInFlight = null;
|
|
692
|
+
this.__remoteLog(`rtc.plugin-probe conn=${connId} id=${id} acked rtt=${rtt}`);
|
|
693
|
+
}
|
|
694
|
+
|
|
559
695
|
__remoteLog(msg) {
|
|
560
696
|
remoteLog(this.__impl ? `${msg} rtc=${this.__impl}` : msg);
|
|
561
697
|
}
|
|
562
698
|
|
|
699
|
+
// 解析 HMAC turnCreds 中的剩余秒数(username 形如 "<expireAt>:<userId>");
|
|
700
|
+
// 负值表示已过期;解析失败或 turnCreds 缺失返回 null。仅用于 ICE restart 日志诊断。
|
|
701
|
+
__credRemainSec(turnCreds) {
|
|
702
|
+
const username = turnCreds?.username;
|
|
703
|
+
if (typeof username !== 'string') return null;
|
|
704
|
+
const expireAt = Number(username.split(':')[0]);
|
|
705
|
+
if (!Number.isFinite(expireAt)) return null;
|
|
706
|
+
return expireAt - Math.floor(Date.now() / 1000);
|
|
707
|
+
}
|
|
708
|
+
|
|
563
709
|
/** 淘汰最旧的 failed session(Map 迭代序 ≈ 创建时间序),用于 queue length 限制 */
|
|
564
710
|
__evictOldestFailed() {
|
|
565
711
|
for (const [connId, session] of this.__sessions) {
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|