@coclaw/openclaw-coclaw 0.17.5 → 0.17.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +23 -4
- package/package.json +1 -1
- package/src/realtime-bridge.js +99 -0
- package/src/webrtc/rpc-send-queue.js +15 -12
package/index.js
CHANGED
|
@@ -125,11 +125,34 @@ function respondError(respond, err) {
|
|
|
125
125
|
function respondInvalid(respond, message) {
|
|
126
126
|
respond(false, undefined, { code: 'INVALID_INPUT', message });
|
|
127
127
|
}
|
|
128
|
+
/* c8 ignore stop */
|
|
129
|
+
|
|
128
130
|
const plugin = {
|
|
129
131
|
id: 'openclaw-coclaw',
|
|
130
132
|
name: 'CoClaw',
|
|
131
133
|
description: 'OpenClaw CoClaw channel plugin for remote chat',
|
|
132
134
|
register(api) {
|
|
135
|
+
// 按 OpenClaw SDK 入口模式分叉(参照 defineChannelPluginEntry,见上游 plugin-sdk/core.ts 的
|
|
136
|
+
// defineChannelPluginEntry 实现 与 docs/plugins/sdk-entrypoints.md):
|
|
137
|
+
// - cli-metadata 模式:仅声明根命令名供根 CLI 解析使用
|
|
138
|
+
// - 其他模式:注册 channel + CLI 元信息(discovery 下两者由 captured-registration 采集)
|
|
139
|
+
// - 仅 full 模式跑完整副作用(service / RPC / hook / command / managers / 磁盘 IO)
|
|
140
|
+
//
|
|
141
|
+
// 与上游 helper 的刻意偏差:上游 helper 在所有非 cli-metadata 模式下都调
|
|
142
|
+
// setRuntime?.(api.runtime),但 discovery 传入的 api.runtime 是空对象 {},每 14s
|
|
143
|
+
// 一次会把全局 runtime 单例擦掉。本实现把 setRuntime 严格限定在 full 模式,避免擦除。
|
|
144
|
+
const mode = api.registrationMode;
|
|
145
|
+
if (mode === 'cli-metadata') {
|
|
146
|
+
api.registerCli(registerCoclawCli, { commands: ['coclaw'] });
|
|
147
|
+
return;
|
|
148
|
+
}
|
|
149
|
+
api.registerChannel({ plugin: coclawChannelPlugin });
|
|
150
|
+
api.registerCli(registerCoclawCli, { commands: ['coclaw'] });
|
|
151
|
+
// 本插件 package.json 无 setupEntry,setup-only/setup-runtime 实际不会到达主 register;
|
|
152
|
+
// 保留兜底防御上游模型变化。`mode !== 'full'` 也覆盖 discovery(每 14s 一次)
|
|
153
|
+
if (mode !== 'full') return;
|
|
154
|
+
|
|
155
|
+
/* c8 ignore start */
|
|
133
156
|
setRuntime(api.runtime);
|
|
134
157
|
const logger = api?.logger ?? console;
|
|
135
158
|
installAbortRegistryDiag(logger);
|
|
@@ -145,8 +168,6 @@ const plugin = {
|
|
|
145
168
|
logger.warn?.(`[coclaw] chat history manager load failed: ${String(err?.message ?? err)}`);
|
|
146
169
|
});
|
|
147
170
|
|
|
148
|
-
api.registerChannel({ plugin: coclawChannelPlugin });
|
|
149
|
-
|
|
150
171
|
// 追踪 chat 因 reset 产生的孤儿 session
|
|
151
172
|
if (typeof api.on === 'function') {
|
|
152
173
|
api.on('session_start', async (event, ctx) => {
|
|
@@ -622,8 +643,6 @@ const plugin = {
|
|
|
622
643
|
stop() { scheduler.stop(); },
|
|
623
644
|
});
|
|
624
645
|
|
|
625
|
-
api.registerCli(registerCoclawCli, { commands: ['coclaw'] });
|
|
626
|
-
|
|
627
646
|
api.registerCommand({
|
|
628
647
|
name: 'coclaw',
|
|
629
648
|
description: 'CoClaw bind/unbind command',
|
package/package.json
CHANGED
package/src/realtime-bridge.js
CHANGED
|
@@ -29,6 +29,29 @@ const GATEWAY_RETRY_DELAYS_MS = [5_000, 10_000, 20_000, 20_000, 20_000];
|
|
|
29
29
|
// 严格限定在"签名/设备/scope/协议"相关错误,避免对网络/内部错误做无意义的降级尝试。
|
|
30
30
|
const GATEWAY_HANDSHAKE_FALLBACK_PATTERN = /signature|device|scope|protocol/i;
|
|
31
31
|
|
|
32
|
+
// agent run 期间用的 event loop lag 探针参数:每 200ms 测一次主线程漂移,>100ms 视为 spike。
|
|
33
|
+
// 上限 60s 兜底,正常会在 phase-2 终态时主动停。用于持续观测 OpenClaw gateway 主线程被同步代码阻塞。
|
|
34
|
+
const LAG_PROBE_PERIOD_MS = 200;
|
|
35
|
+
const LAG_PROBE_THRESHOLD_MS = 100;
|
|
36
|
+
const LAG_PROBE_MAX_DURATION_MS = 60_000;
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* 判断一个出方向 res payload 是否表示 agent RPC 进入 phase-2 终态。
|
|
40
|
+
* 终态 = res 帧 + status !== 'accepted'。覆盖三种情形:
|
|
41
|
+
* - status='ok':成功
|
|
42
|
+
* - status='error':执行失败
|
|
43
|
+
* - 参数校验失败:ok=false 且无 status(协议文档"特殊情况")
|
|
44
|
+
*
|
|
45
|
+
* @param {object} payload - 待判断的消息
|
|
46
|
+
* @returns {string | null} 终态时返回 lag.summary 的 reason 字符串,否则 null
|
|
47
|
+
*/
|
|
48
|
+
export function classifyAgentLagStop(payload) {
|
|
49
|
+
if (payload?.type !== 'res' || typeof payload?.id !== 'string') return null;
|
|
50
|
+
const status = payload?.payload?.status;
|
|
51
|
+
if (status === 'accepted') return null;
|
|
52
|
+
return status ?? (payload.ok === false ? 'error' : 'ok');
|
|
53
|
+
}
|
|
54
|
+
|
|
32
55
|
function toServerWsUrl(baseUrl, token) {
|
|
33
56
|
const url = new URL(baseUrl);
|
|
34
57
|
url.protocol = url.protocol === 'https:' ? 'wss:' : 'ws:';
|
|
@@ -124,6 +147,8 @@ export class RealtimeBridge {
|
|
|
124
147
|
this.__gatewayGaveUp = false; // 重试次数耗尽 → 终态,不再自动尝试
|
|
125
148
|
this.__gatewayLegacyMode = false; // 学到"本 gateway 不接受带 device 的 v3"
|
|
126
149
|
this.__gatewayLastReason = null; // 最近一次失败原因(用于 gave-up 上报)
|
|
150
|
+
// agent RPC 进 in-flight 时建探针、phase-2 终态时移除:id -> { interval, timeout, stats }
|
|
151
|
+
this.__agentLagProbes = new Map();
|
|
127
152
|
}
|
|
128
153
|
|
|
129
154
|
__resolveWebSocket() {
|
|
@@ -761,6 +786,11 @@ export class RealtimeBridge {
|
|
|
761
786
|
&& (payload.event === 'health' || payload.event === 'tick')) {
|
|
762
787
|
return;
|
|
763
788
|
}
|
|
789
|
+
// agent RPC 进入 phase-2 终态时停 lag 探针(详见 classifyAgentLagStop)
|
|
790
|
+
const lagReason = classifyAgentLagStop(payload);
|
|
791
|
+
if (lagReason !== null) {
|
|
792
|
+
this.__stopLagProbe(payload.id, lagReason);
|
|
793
|
+
}
|
|
764
794
|
this.webrtcPeer?.broadcast(payload);
|
|
765
795
|
}
|
|
766
796
|
});
|
|
@@ -775,6 +805,9 @@ export class RealtimeBridge {
|
|
|
775
805
|
remoteLog(`ws.disconnected peer=gateway code=${ev?.code ?? '?'}`);
|
|
776
806
|
}
|
|
777
807
|
this.logger.info?.(`[coclaw] gateway ws closed (code=${ev?.code ?? '?'} reason=${ev?.reason ?? 'n/a'})`);
|
|
808
|
+
// gateway WS 一断,正在跑的 agent RPC 不会再有 phase-2 res,主动结算所有 lag 探针,
|
|
809
|
+
// 避免它们空跑到 60s 兜底,期间还会持续打 spike 噪声。
|
|
810
|
+
this.__clearAllLagProbes();
|
|
778
811
|
if (this.gatewayWs === ws) {
|
|
779
812
|
this.gatewayWs = null;
|
|
780
813
|
this.gatewayReady = false;
|
|
@@ -880,6 +913,10 @@ export class RealtimeBridge {
|
|
|
880
913
|
method: payload.method,
|
|
881
914
|
params: payload.params ?? {},
|
|
882
915
|
}));
|
|
916
|
+
// 仅 agent RPC 启动 lag 探针(覆盖发送 → phase-2 终态全程)。
|
|
917
|
+
if (payload.method === 'agent') {
|
|
918
|
+
this.__startLagProbe(payload.id);
|
|
919
|
+
}
|
|
883
920
|
}
|
|
884
921
|
catch {
|
|
885
922
|
this.webrtcPeer?.broadcast({
|
|
@@ -902,6 +939,66 @@ export class RealtimeBridge {
|
|
|
902
939
|
this.connectTimer = null;
|
|
903
940
|
}
|
|
904
941
|
|
|
942
|
+
// agent run 期间监测 gateway 主线程是否被同步代码阻塞。
|
|
943
|
+
// 设计动机:上游 OpenClaw 同步路径上有重活(详见 docs/openclaw-upstream-issues.md),
|
|
944
|
+
// 修复前作为持续诊断信号保留——主线程一旦被同步阻塞,agent send 路径会出现几十秒的卡顿。
|
|
945
|
+
//
|
|
946
|
+
// 异常隔离:插件运行在 gateway 进程内,timer 回调任何同步抛出都会让进程崩溃
|
|
947
|
+
// (CLAUDE.md 第 123 行明确禁止全局异常兜底),因此 interval/timeout 回调都用 try/catch 局部包裹。
|
|
948
|
+
__startLagProbe(id) {
|
|
949
|
+
if (this.__agentLagProbes.has(id)) return;
|
|
950
|
+
let lastTick = Date.now();
|
|
951
|
+
const stats = { ticks: 0, max: 0, sumOver: 0, over: 0, startedAt: lastTick };
|
|
952
|
+
const interval = setInterval(() => {
|
|
953
|
+
try {
|
|
954
|
+
const now = Date.now();
|
|
955
|
+
const lag = now - lastTick - LAG_PROBE_PERIOD_MS;
|
|
956
|
+
lastTick = now;
|
|
957
|
+
stats.ticks += 1;
|
|
958
|
+
if (lag > stats.max) stats.max = lag;
|
|
959
|
+
if (lag > LAG_PROBE_THRESHOLD_MS) {
|
|
960
|
+
stats.over += 1;
|
|
961
|
+
stats.sumOver += lag;
|
|
962
|
+
this.logger.warn?.(`[coclaw] lag.spike id=${id} +${lag}ms`);
|
|
963
|
+
}
|
|
964
|
+
}
|
|
965
|
+
catch {
|
|
966
|
+
// 探针自身异常静默吞掉,避免拖垮 gateway。
|
|
967
|
+
}
|
|
968
|
+
}, LAG_PROBE_PERIOD_MS);
|
|
969
|
+
interval.unref?.();
|
|
970
|
+
const timeout = setTimeout(() => this.__stopLagProbe(id, 'timeout'), LAG_PROBE_MAX_DURATION_MS);
|
|
971
|
+
timeout.unref?.();
|
|
972
|
+
this.__agentLagProbes.set(id, { interval, timeout, stats });
|
|
973
|
+
// lag.start 日志即便抛异常也不能影响调用方(在 __handleGatewayRequestFromDc 的 try/catch 内,
|
|
974
|
+
// 抛出会被误判为 send 失败 → 错发 GATEWAY_SEND_FAILED)。
|
|
975
|
+
try { this.logger.info?.(`[coclaw] lag.start id=${id}`); }
|
|
976
|
+
catch { /* 诊断日志失败不影响主流程 */ }
|
|
977
|
+
}
|
|
978
|
+
|
|
979
|
+
__stopLagProbe(id, reason) {
|
|
980
|
+
const probe = this.__agentLagProbes.get(id);
|
|
981
|
+
if (!probe) return;
|
|
982
|
+
clearInterval(probe.interval);
|
|
983
|
+
clearTimeout(probe.timeout);
|
|
984
|
+
this.__agentLagProbes.delete(id);
|
|
985
|
+
try {
|
|
986
|
+
const stats = probe.stats;
|
|
987
|
+
const dur = Date.now() - stats.startedAt;
|
|
988
|
+
this.logger.info?.(`[coclaw] lag.summary id=${id} reason=${reason} dur=${dur}ms ticks=${stats.ticks} max=${stats.max}ms over100=${stats.over} sumOver=${stats.sumOver}ms`);
|
|
989
|
+
}
|
|
990
|
+
catch {
|
|
991
|
+
// summary 输出失败不应阻断后续 RPC 广播——清理已完成,吞掉异常即可。
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
|
|
995
|
+
__clearAllLagProbes() {
|
|
996
|
+
const ids = Array.from(this.__agentLagProbes.keys());
|
|
997
|
+
for (const id of ids) {
|
|
998
|
+
this.__stopLagProbe(id, 'cleanup');
|
|
999
|
+
}
|
|
1000
|
+
}
|
|
1001
|
+
|
|
905
1002
|
__scheduleReconnect() {
|
|
906
1003
|
if (!this.started || this.reconnectTimer) {
|
|
907
1004
|
return;
|
|
@@ -1164,6 +1261,8 @@ export class RealtimeBridge {
|
|
|
1164
1261
|
setRemoteLogSender(null);
|
|
1165
1262
|
this.__clearServerHeartbeat();
|
|
1166
1263
|
this.__clearConnectTimer();
|
|
1264
|
+
// stop() / refresh() 兜底回收 lag 探针的 timer,防 unref 仍残留。
|
|
1265
|
+
this.__clearAllLagProbes();
|
|
1167
1266
|
if (this.reconnectTimer) {
|
|
1168
1267
|
clearTimeout(this.reconnectTimer);
|
|
1169
1268
|
this.reconnectTimer = null;
|
|
@@ -41,7 +41,9 @@ export class RpcSendQueue {
|
|
|
41
41
|
this.logger = logger ?? console;
|
|
42
42
|
this.tag = tag ?? '';
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
// 队列元素显式记录原始类型:drain 出口按 isString=true → string 帧,false → binary 帧。
|
|
45
|
+
// 早期实现统一存 Buffer,导致 string 帧被对端当分片残片静默丢弃
|
|
46
|
+
/** @type {{data: string|Buffer, isString: boolean, bytes: number}[]} */
|
|
45
47
|
this.queue = [];
|
|
46
48
|
this.queueBytes = 0;
|
|
47
49
|
this.closed = false;
|
|
@@ -83,11 +85,11 @@ export class RpcSendQueue {
|
|
|
83
85
|
if (this.queueBytes >= MAX_QUEUE_BYTES) {
|
|
84
86
|
this.droppedCount += 1;
|
|
85
87
|
this.droppedBytes += totalBytes;
|
|
86
|
-
|
|
87
|
-
//
|
|
88
|
-
// this.logger.info?.(`[rpc-queue${this.__tagSuffix()}] dropped-payload ${jsonStr}`);
|
|
88
|
+
// 仅状态翻转点打 log(warn + remoteLog 各一次);overflow 持续期间所有 drop 静默累加,
|
|
89
|
+
// 避免 UI 离线 + ICE 失败导致 DC 永远不 drain 时的日志刷屏
|
|
89
90
|
if (!this.queueOverflowActive) {
|
|
90
91
|
this.queueOverflowActive = true;
|
|
92
|
+
this.logger.warn?.(`[rpc-queue${this.__tagSuffix()}] overflow-start queueBytes=${this.queueBytes}`);
|
|
91
93
|
remoteLog(`rpc-queue.overflow-start${this.__tagSuffix()} queueBytes=${this.queueBytes}`);
|
|
92
94
|
}
|
|
93
95
|
return false;
|
|
@@ -106,9 +108,9 @@ export class RpcSendQueue {
|
|
|
106
108
|
return false;
|
|
107
109
|
}
|
|
108
110
|
}
|
|
109
|
-
const
|
|
110
|
-
this.queue.push(
|
|
111
|
-
this.queueBytes +=
|
|
111
|
+
const bytes = Buffer.byteLength(jsonStr, 'utf8');
|
|
112
|
+
this.queue.push({ data: jsonStr, isString: true, bytes });
|
|
113
|
+
this.queueBytes += bytes;
|
|
112
114
|
return true;
|
|
113
115
|
}
|
|
114
116
|
|
|
@@ -130,7 +132,7 @@ export class RpcSendQueue {
|
|
|
130
132
|
}
|
|
131
133
|
// 剩余 chunk 原子性入队(保证同一消息分片连续,不被其他消息插入)
|
|
132
134
|
for (; i < chunks.length; i += 1) {
|
|
133
|
-
this.queue.push(chunks[i]);
|
|
135
|
+
this.queue.push({ data: chunks[i], isString: false, bytes: chunks[i].length });
|
|
134
136
|
this.queueBytes += chunks[i].length;
|
|
135
137
|
}
|
|
136
138
|
return true;
|
|
@@ -164,18 +166,19 @@ export class RpcSendQueue {
|
|
|
164
166
|
while (this.queue.length > 0
|
|
165
167
|
&& dc.readyState === 'open'
|
|
166
168
|
&& dc.bufferedAmount < DC_HIGH_WATER_MARK) {
|
|
167
|
-
const
|
|
169
|
+
const item = this.queue[0];
|
|
168
170
|
try {
|
|
169
|
-
dc.send(
|
|
171
|
+
dc.send(item.data);
|
|
170
172
|
} catch (err) {
|
|
171
173
|
this.logger.warn?.(`[rpc-queue${this.__tagSuffix()}] drain send failed: ${err?.message}`);
|
|
172
174
|
return; // 保留队列,等 onclose 统一清理
|
|
173
175
|
}
|
|
174
176
|
this.queue.shift();
|
|
175
|
-
this.queueBytes -=
|
|
176
|
-
// 满 → 未满
|
|
177
|
+
this.queueBytes -= item.bytes;
|
|
178
|
+
// 满 → 未满 状态转换:打一条带累计数的 log,与 overflow-start 对称
|
|
177
179
|
if (this.queueOverflowActive && this.queueBytes < MAX_QUEUE_BYTES) {
|
|
178
180
|
this.queueOverflowActive = false;
|
|
181
|
+
this.logger.info?.(`[rpc-queue${this.__tagSuffix()}] overflow-end dropped=${this.droppedCount} droppedBytes=${this.droppedBytes}`);
|
|
179
182
|
remoteLog(`rpc-queue.overflow-end${this.__tagSuffix()} dropped=${this.droppedCount} droppedBytes=${this.droppedBytes}`);
|
|
180
183
|
}
|
|
181
184
|
}
|