@ozaiya/openclaw-channel 0.10.29 → 0.10.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -47,11 +47,52 @@ const CAPTURE_TIMEOUT_MS = 1200;
47
47
  const MAX_CONSECUTIVE_TIMEOUTS = 3;
48
48
  // Backoff between CDP reconnect attempts.
49
49
  const RECONNECT_DELAY_MS = 1000;
50
- // Pause capturing while the relay's outbound buffer exceeds this (≈ a frame or two).
51
- // Bounds in-flight frames to keep the view real-time on a slow uplink.
52
- const BACKPRESSURE_LIMIT_BYTES = 96 * 1024;
50
+ // Pause capturing while the relay's outbound buffer exceeds this (≈ one frame).
51
+ // Bounds in-flight frames to keep the view real-time on a slow uplink — and,
52
+ // just as important, keeps the socket queue shallow so TEXT (takeover control,
53
+ // WebRTC signaling) sharing the pipe isn't stuck behind a frame backlog.
54
+ const BACKPRESSURE_LIMIT_BYTES = 24 * 1024;
53
55
  // While paused for backpressure, re-check this often (ms).
54
56
  const BACKPRESSURE_RECHECK_MS = 25;
57
+ // Page-side pointer probe. Synthetic CDP input (Input.dispatchMouseEvent — from
58
+ // the agent's own automation OR viewer takeover) fires real DOM events, so a
59
+ // window-level listener sees every pointer move without touching the page's
60
+ // layout. Reports throttled coords through a CDP binding; clicks always report.
61
+ const CURSOR_BINDING = "__ozaiyaCursorReport";
62
+ const CURSOR_PROBE_JS = `(() => {
63
+ if (window.__ozaiyaCursorHooked) return; window.__ozaiyaCursorHooked = 1;
64
+ let last = 0, lastTarget = null, lastShape = '';
65
+ const emit = (obj) => {
66
+ const fn = window.${CURSOR_BINDING};
67
+ if (typeof fn === 'function') { try { fn(JSON.stringify(obj)); } catch {} }
68
+ };
69
+ const report = (e, click) => {
70
+ const now = Date.now();
71
+ if (!click && now - last < 33) return;
72
+ last = now;
73
+ let shape;
74
+ if (e.target !== lastTarget) {
75
+ lastTarget = e.target;
76
+ try {
77
+ let s = getComputedStyle(e.target).cursor;
78
+ if (s === 'auto') s = (e.target.closest && e.target.closest('input,textarea,[contenteditable]')) ? 'text' : 'default';
79
+ if (s !== lastShape) { lastShape = s; shape = s; }
80
+ } catch {}
81
+ }
82
+ emit(shape ? { x: Math.round(e.clientX), y: Math.round(e.clientY), c: click ? 1 : 0, s: shape }
83
+ : { x: Math.round(e.clientX), y: Math.round(e.clientY), c: click ? 1 : 0 });
84
+ };
85
+ window.addEventListener('mousemove', (e) => report(e, false), { capture: true, passive: true });
86
+ window.addEventListener('mousedown', (e) => report(e, true), { capture: true, passive: true });
87
+ const clip = () => {
88
+ try {
89
+ const t = String(document.getSelection() || '').slice(0, 65536);
90
+ if (t) emit({ clip: t });
91
+ } catch {}
92
+ };
93
+ window.addEventListener('copy', clip, { capture: true, passive: true });
94
+ window.addEventListener('cut', clip, { capture: true, passive: true });
95
+ })();`;
55
96
  export async function createCdpScreencast(opts) {
56
97
  const { host, port } = opts;
57
98
  const quality = opts.quality ?? 55;
@@ -67,6 +108,12 @@ export async function createCdpScreencast(opts) {
67
108
  let consecutiveTimeouts = 0;
68
109
  let pendingTimer = null;
69
110
  let reconnectTimer = null;
111
+ let paceTimer = null; // pacing/backpressure wait
112
+ let nudged = false; // input just landed — skip the pacing wait once
113
+ let currentTargetId = ""; // page target this socket streams
114
+ let knownPageIds = new Set(); // page targets that existed at attach
115
+ let pendingTargetPath = null; // tab-follow: dial this next connect
116
+ let avoidTargetId = ""; // target whose captures just kept timing out
70
117
  const send = (method, params) => {
71
118
  const id = cmdId++;
72
119
  if (!closed && ws && ws.readyState === 1) {
@@ -98,16 +145,33 @@ export async function createCdpScreencast(opts) {
98
145
  const scheduleNext = () => {
99
146
  if (closed)
100
147
  return;
148
+ if (paceTimer) {
149
+ clearTimeout(paceTimer);
150
+ paceTimer = null;
151
+ }
101
152
  // Flow control: if the relay can't drain what we've already sent, don't capture
102
153
  // more — keep checking until it clears. This adapts fps to uplink bandwidth and
103
154
  // bounds latency (instead of buffering frames the network can't ship in time).
104
155
  const bp = opts.getBackpressure?.() ?? 0;
105
156
  if (bp > BACKPRESSURE_LIMIT_BYTES) {
106
- setTimeout(scheduleNext, BACKPRESSURE_RECHECK_MS);
157
+ paceTimer = setTimeout(scheduleNext, BACKPRESSURE_RECHECK_MS);
107
158
  return;
108
159
  }
109
- const interval = opts.getMinIntervalMs?.() ?? minIntervalMs;
110
- setTimeout(poll, Math.max(0, interval - (Date.now() - shotStartedAt)));
160
+ const interval = nudged ? 0 : (opts.getMinIntervalMs?.() ?? minIntervalMs);
161
+ nudged = false;
162
+ paceTimer = setTimeout(poll, Math.max(0, interval - (Date.now() - shotStartedAt)));
163
+ };
164
+ // Takeover input just changed the page — capture its effect NOW instead of
165
+ // letting click-to-photon eat the remaining pacing wait. If a shot is already
166
+ // inflight, the flag makes the FOLLOWING shot immediate instead (the inflight
167
+ // one raced the input and may predate its effect). Backpressure still applies
168
+ // (scheduleNext re-checks it): capturing sooner can't help a jammed uplink.
169
+ const nudge = () => {
170
+ if (closed)
171
+ return;
172
+ nudged = true;
173
+ if (!inflight)
174
+ scheduleNext();
111
175
  };
112
176
  // A capture resolved (frame arrived) or was abandoned (watchdog) — advance the loop.
113
177
  const finishShot = () => {
@@ -126,6 +190,9 @@ export async function createCdpScreencast(opts) {
126
190
  consecutiveTimeouts = 0;
127
191
  inflight = false;
128
192
  clearPending();
193
+ // This target won't render (e.g. a window.open popup stuck in limbo) —
194
+ // steer the reconnect's discovery toward a different page if one exists.
195
+ avoidTargetId = currentTargetId;
129
196
  const dead = ws;
130
197
  ws = null;
131
198
  try {
@@ -147,37 +214,116 @@ export async function createCdpScreencast(opts) {
147
214
  catch {
148
215
  return;
149
216
  }
217
+ if (msg.method === "Runtime.bindingCalled" && msg.params?.name === CURSOR_BINDING) {
218
+ try {
219
+ const p = JSON.parse(String(msg.params.payload ?? ""));
220
+ if (typeof p.clip === "string")
221
+ opts.onClipboard?.(p.clip);
222
+ else if (typeof p.x === "number" && typeof p.y === "number") {
223
+ opts.onCursor?.(p.x, p.y, !!p.c, typeof p.s === "string" ? p.s : undefined);
224
+ }
225
+ }
226
+ catch { /* malformed report — skip */ }
227
+ return;
228
+ }
229
+ if (msg.method === "Page.javascriptDialogOpening") {
230
+ // A JS dialog BLOCKS the renderer: captureScreenshot hangs (endless
231
+ // watchdog reconnects) and the dialog itself is browser chrome — invisible
232
+ // in screenshots and unreachable by Input.*. Auto-resolve: accept alerts
233
+ // (only option) and beforeunload (let navigation continue); dismiss
234
+ // confirm/prompt (never answer "yes" on the page's behalf).
235
+ const type = String(msg.params?.type ?? "");
236
+ const accept = type === "alert" || type === "beforeunload";
237
+ send("Page.handleJavaScriptDialog", { accept });
238
+ opts.log?.info?.(`[sandbox-cdp] auto-${accept ? "accepted" : "dismissed"} ${type || "dialog"}: ${String(msg.params?.message ?? "").slice(0, 120)}`);
239
+ return;
240
+ }
241
+ if (msg.method === "Target.targetCreated") {
242
+ // A tab created AFTER attach (agent automation or a takeover click on a
243
+ // target=_blank link) is where the action moves — follow it. The initial
244
+ // setDiscoverTargets burst re-announces existing tabs; knownPageIds
245
+ // (seeded from /json at connect) filters those out.
246
+ const info = msg.params?.targetInfo;
247
+ if (info?.type === "page" && info.targetId && !knownPageIds.has(info.targetId)) {
248
+ knownPageIds.add(info.targetId);
249
+ followTarget(info.targetId);
250
+ }
251
+ return;
252
+ }
253
+ if (msg.method === "Target.targetDestroyed") {
254
+ const tid = msg.params?.targetId;
255
+ if (typeof tid === "string")
256
+ knownPageIds.delete(tid);
257
+ // Our own target closing also closes this ws → the reconnect path
258
+ // re-discovers whatever page remains.
259
+ return;
260
+ }
150
261
  if (msg.id === shotId) {
151
262
  const data = msg.result?.data;
152
263
  if (data && !closed) {
153
264
  consecutiveTimeouts = 0;
265
+ avoidTargetId = ""; // captures flow again — stop steering discovery
154
266
  opts.onFrame(Buffer.from(data, "base64"));
155
267
  }
156
268
  finishShot(); // keep the stream alive whether or not this shot returned data
157
269
  }
158
270
  };
159
- /** Discover the page target's ws path (Host spoofed to localhost). */
160
- async function discoverPagePath() {
271
+ /** List page targets (Host spoofed to localhost). webSocketDebuggerUrl is
272
+ * ws://localhost/devtools/page/<id> keep only the path and dial the real
273
+ * container host. */
274
+ async function discoverPages() {
161
275
  const json = await cdpGet(host, port, "/json");
162
276
  const targets = JSON.parse(json);
163
- const page = targets.find((t) => t.type === "page" && t.webSocketDebuggerUrl);
164
- if (!page?.webSocketDebuggerUrl)
165
- return null;
166
- // webSocketDebuggerUrl is ws://localhost/devtools/page/<id> (Host we spoofed) —
167
- // keep only the path and dial the real container host.
168
- return new URL(page.webSocketDebuggerUrl).pathname;
277
+ return targets
278
+ .filter((t) => t.type === "page" && t.webSocketDebuggerUrl)
279
+ .map((t) => ({ id: t.id ?? "", path: new URL(t.webSocketDebuggerUrl).pathname, url: t.url ?? "" }));
280
+ }
281
+ /** Pick the page to attach: prefer targets with a committed document (an
282
+ * empty url = a popup stuck in limbo whose captures never answer), and skip
283
+ * the target whose captures just kept timing out when there's an alternative. */
284
+ function pickPage(pages) {
285
+ const usable = pages.filter((p) => p.id !== avoidTargetId);
286
+ const pool = usable.length > 0 ? usable : pages;
287
+ return pool.find((p) => p.url !== "") ?? pool[0];
169
288
  }
289
+ /** A new tab opened — the action moves there, so the view follows: tear down
290
+ * the current socket and dial the new target. If it isn't connectable yet,
291
+ * the reconnect loop falls back to first-page discovery. */
292
+ const followTarget = (targetId) => {
293
+ if (closed || targetId === currentTargetId)
294
+ return;
295
+ opts.log?.info?.(`[sandbox-cdp] new tab ${targetId} — following`);
296
+ pendingTargetPath = `/devtools/page/${targetId}`;
297
+ const dead = ws;
298
+ ws = null;
299
+ clearPending();
300
+ if (paceTimer) {
301
+ clearTimeout(paceTimer);
302
+ paceTimer = null;
303
+ }
304
+ inflight = false;
305
+ try {
306
+ dead?.close();
307
+ }
308
+ catch { /* ignore */ }
309
+ scheduleReconnect();
310
+ };
170
311
  /** Open (or re-open) the CDP socket and resume the frame loop. Returns false on failure. */
171
312
  async function connect() {
172
313
  if (closed)
173
314
  return false;
174
- let pageWsPath;
315
+ let pageWsPath = pendingTargetPath;
316
+ pendingTargetPath = null;
175
317
  try {
176
- pageWsPath = await discoverPagePath();
318
+ const pages = await discoverPages();
319
+ knownPageIds = new Set(pages.map((p) => p.id));
320
+ if (!pageWsPath)
321
+ pageWsPath = pickPage(pages)?.path ?? null;
177
322
  }
178
323
  catch (err) {
179
324
  opts.log?.warn?.(`[sandbox-cdp] target discovery failed: ${String(err)}`);
180
- return false;
325
+ if (!pageWsPath)
326
+ return false; // a tab-follow dial can proceed without /json
181
327
  }
182
328
  if (!pageWsPath) {
183
329
  opts.log?.warn?.("[sandbox-cdp] no page target");
@@ -185,6 +331,7 @@ export async function createCdpScreencast(opts) {
185
331
  }
186
332
  if (closed)
187
333
  return false;
334
+ currentTargetId = pageWsPath.split("/").pop() ?? "";
188
335
  const sock = new WSImpl(`ws://${host}:${port}${pageWsPath}`, { headers: { Host: "localhost" } });
189
336
  sock.binaryType = "arraybuffer";
190
337
  ws = sock;
@@ -200,7 +347,28 @@ export async function createCdpScreencast(opts) {
200
347
  }
201
348
  opts.log?.info?.("[sandbox-cdp] CDP connected — capturing frames");
202
349
  send("Page.enable");
350
+ // Activate the attached tab: captureScreenshot won't render a background
351
+ // target, so a followed (or rediscovered) tab must be brought to front.
352
+ send("Page.bringToFront");
353
+ // Tab-follow: get targetCreated/Destroyed events (browser-wide, delivered
354
+ // to this session). The initial re-announce burst is filtered by knownPageIds.
355
+ send("Target.setDiscoverTargets", { discover: true });
356
+ if (opts.onCursor || opts.onClipboard) {
357
+ // Pointer/clipboard probe: binding + install on future documents + on
358
+ // the one that's already loaded (addScriptToEvaluateOnNewDocument alone
359
+ // only covers navigations). Runtime.enable is required for bindingCalled.
360
+ send("Runtime.enable");
361
+ send("Runtime.addBinding", { name: CURSOR_BINDING });
362
+ send("Page.addScriptToEvaluateOnNewDocument", { source: CURSOR_PROBE_JS });
363
+ send("Runtime.evaluate", { expression: CURSOR_PROBE_JS });
364
+ }
203
365
  clearPending();
366
+ // A pacing timer from the previous socket's loop may still be pending —
367
+ // cancel it so reconnect doesn't leave two interleaved poll loops.
368
+ if (paceTimer) {
369
+ clearTimeout(paceTimer);
370
+ paceTimer = null;
371
+ }
204
372
  inflight = false;
205
373
  poll();
206
374
  });
@@ -237,6 +405,59 @@ export async function createCdpScreencast(opts) {
237
405
  return null;
238
406
  const MOUSE_BUTTON = ["left", "middle", "right"];
239
407
  const BUTTONS = { left: 1, right: 2, middle: 4 };
408
+ // CDP bitmask of buttons the viewer currently holds. CDP defaults `buttons`
409
+ // to 0 on mouseMoved, so a press-drag-release used to reach the page as moves
410
+ // with NO button held (e.buttons === 0) — drag logic that checks button state
411
+ // saw the handle released the moment it moved. Threading the held state
412
+ // through every event makes dragging real for those pages too.
413
+ let heldButtons = 0;
414
+ // ── Timed gesture replay ───────────────────────────────────────────────
415
+ // The viewer stamps drag events (mousedown → moves → mouseup) with the ms
416
+ // offset from the gesture start. The network BUNCHES events (relay queues,
417
+ // congestion), so dispatching on arrival hands the page a robotic burst —
418
+ // mousedown + every move + mouseup nearly simultaneously. Slider captchas
419
+ // (闲鱼/淘宝 滑块) score trajectory TIMING and reject exactly that; a drag
420
+ // only ever succeeded when the network happened to deliver events smoothly.
421
+ // Replaying on the sender's original timeline reproduces the human gesture
422
+ // regardless of how the events traveled. If delivery falls behind, the
423
+ // anchor shifts forward so the NEXT events keep their original spacing
424
+ // (shape preserved, total delay = network delay).
425
+ const dragQ = [];
426
+ let dragTimer = null;
427
+ let dragBase = 0; // wallclock anchor: event with stamp t fires at dragBase + t
428
+ const pumpDrag = () => {
429
+ dragTimer = null;
430
+ while (dragQ.length) {
431
+ const head = dragQ[0];
432
+ const now = Date.now();
433
+ const due = dragBase + head.t;
434
+ if (due <= now) {
435
+ dragQ.shift();
436
+ head.fire();
437
+ continue;
438
+ }
439
+ dragTimer = setTimeout(pumpDrag, due - now);
440
+ return;
441
+ }
442
+ };
443
+ /** Dispatch a viewer-stamped event on the sender's timeline. */
444
+ const fireTimed = (t, isGestureStart, fire) => {
445
+ const now = Date.now();
446
+ // (Re-)anchor on a new gesture, or when we've fallen behind with nothing
447
+ // queued — this event plays immediately and later ones keep their spacing.
448
+ if (isGestureStart || (dragQ.length === 0 && dragBase + t <= now))
449
+ dragBase = now - t;
450
+ dragQ.push({ t, fire });
451
+ if (!dragTimer)
452
+ pumpDrag();
453
+ };
454
+ /** Unstamped event: keep ordering — tail-queue behind any pending gesture. */
455
+ const fireOrdered = (fire) => {
456
+ if (dragQ.length)
457
+ dragQ.push({ t: dragQ[dragQ.length - 1].t, fire });
458
+ else
459
+ fire();
460
+ };
240
461
  // app modifier byte (shift 0x01 / ctrl 0x02 / alt 0x04 / cmd 0x08) → CDP modifiers
241
462
  // (Alt 1 / Ctrl 2 / Meta 4 / Shift 8).
242
463
  const toCdpMods = (m) => (m & 0x01 ? 8 : 0) | (m & 0x02 ? 2 : 0) | (m & 0x04 ? 1 : 0) | (m & 0x08 ? 4 : 0);
@@ -245,61 +466,140 @@ export async function createCdpScreencast(opts) {
245
466
  Enter: ["Enter", 13], Backspace: ["Backspace", 8], Tab: ["Tab", 9], Escape: ["Escape", 27],
246
467
  Delete: ["Delete", 46], " ": ["Space", 32], ArrowLeft: ["ArrowLeft", 37], ArrowUp: ["ArrowUp", 38],
247
468
  ArrowRight: ["ArrowRight", 39], ArrowDown: ["ArrowDown", 40], Home: ["Home", 36], End: ["End", 35],
248
- PageUp: ["PageUp", 33], PageDown: ["PageDown", 34],
469
+ PageUp: ["PageUp", 33], PageDown: ["PageDown", 34], Insert: ["Insert", 45],
470
+ F1: ["F1", 112], F2: ["F2", 113], F3: ["F3", 114], F4: ["F4", 115], F5: ["F5", 116], F6: ["F6", 117],
471
+ F7: ["F7", 118], F8: ["F8", 119], F9: ["F9", 120], F10: ["F10", 121], F11: ["F11", 122], F12: ["F12", 123],
249
472
  };
250
473
  const mouseClick = (x, y, button) => {
251
474
  const b = BUTTONS[button] ?? 1;
252
475
  send("Input.dispatchMouseEvent", { type: "mousePressed", x, y, button, buttons: b, clickCount: 1 });
253
476
  send("Input.dispatchMouseEvent", { type: "mouseReleased", x, y, button, buttons: 0, clickCount: 1 });
254
477
  };
478
+ // Pinch (the only multitouch gesture meaningful on a desktop page) → ctrl+wheel,
479
+ // which Chromium treats as zoom. The viewer's multitouch stream has no
480
+ // reconstructable touch phases, but spread deltas map cleanly.
481
+ let lastPinchSpread = 0;
482
+ let lastPinchAt = 0;
483
+ const handlePinch = (touches) => {
484
+ if (touches.length < 2) {
485
+ lastPinchSpread = 0;
486
+ return;
487
+ }
488
+ const x0 = Number(touches[0].x), y0 = Number(touches[0].y);
489
+ const x1 = Number(touches[1].x), y1 = Number(touches[1].y);
490
+ if (!isFinite(x0) || !isFinite(x1))
491
+ return;
492
+ const spread = Math.hypot(x1 - x0, y1 - y0);
493
+ const now = Date.now();
494
+ if (lastPinchSpread > 0 && now - lastPinchAt < 300) {
495
+ const delta = spread - lastPinchSpread;
496
+ if (Math.abs(delta) > 8) {
497
+ send("Input.dispatchMouseEvent", {
498
+ type: "mouseWheel", x: (x0 + x1) / 2, y: (y0 + y1) / 2,
499
+ deltaX: 0, deltaY: delta > 0 ? -53 : 53, modifiers: 2 /* Ctrl */,
500
+ });
501
+ lastPinchSpread = spread;
502
+ }
503
+ }
504
+ else {
505
+ lastPinchSpread = spread;
506
+ }
507
+ lastPinchAt = now;
508
+ };
255
509
  return {
256
510
  handleInput: (data) => {
257
511
  if (closed)
258
512
  return;
259
513
  const action = String(data.action ?? data.type ?? "");
260
514
  const x = Number(data.x ?? 0), y = Number(data.y ?? 0);
515
+ // Page-changing input → fast-track the next capture. Bare moves are too
516
+ // frequent and rarely change pixels — EXCEPT while a button is held
517
+ // (dragging a slider handle), when every move visibly moves the page and
518
+ // the viewer needs prompt frames to steer by.
519
+ if ((action !== "mousemove" && action !== "move") || heldButtons !== 0)
520
+ nudge();
521
+ const ts = Number(data.t);
522
+ const timed = Number.isFinite(ts);
523
+ const viaTimeline = (isStart, fire) => {
524
+ if (timed)
525
+ fireTimed(ts, isStart, fire);
526
+ else
527
+ fireOrdered(fire);
528
+ };
261
529
  if (action === "mousemove" || action === "move")
262
- send("Input.dispatchMouseEvent", { type: "mouseMoved", x, y });
530
+ viaTimeline(false, () => send("Input.dispatchMouseEvent", { type: "mouseMoved", x, y, buttons: heldButtons }));
263
531
  else if (action === "click")
264
- mouseClick(x, y, "left");
532
+ fireOrdered(() => mouseClick(x, y, "left"));
265
533
  else if (action === "rightclick")
266
- mouseClick(x, y, "right");
534
+ fireOrdered(() => mouseClick(x, y, "right"));
267
535
  else if (action === "middleclick")
268
- mouseClick(x, y, "middle");
536
+ fireOrdered(() => mouseClick(x, y, "middle"));
269
537
  else if (action === "mousedown" || action === "down") {
270
538
  const b = MOUSE_BUTTON[Number(data.button ?? 0)] ?? "left";
271
- send("Input.dispatchMouseEvent", { type: "mousePressed", x, y, button: b, buttons: BUTTONS[b], clickCount: 1 });
539
+ viaTimeline(true, () => { heldButtons |= BUTTONS[b]; send("Input.dispatchMouseEvent", { type: "mousePressed", x, y, button: b, buttons: heldButtons, clickCount: 1 }); });
272
540
  }
273
541
  else if (action === "mouseup" || action === "up") {
274
542
  const b = MOUSE_BUTTON[Number(data.button ?? 0)] ?? "left";
275
- send("Input.dispatchMouseEvent", { type: "mouseReleased", x, y, button: b, buttons: 0, clickCount: 1 });
543
+ viaTimeline(false, () => { heldButtons &= ~BUTTONS[b]; send("Input.dispatchMouseEvent", { type: "mouseReleased", x, y, button: b, buttons: heldButtons, clickCount: 1 }); });
276
544
  }
277
545
  else if (action === "scroll" || action === "wheel")
278
- send("Input.dispatchMouseEvent", { type: "mouseWheel", x, y, deltaX: 0, deltaY: -Number(data.delta ?? data.deltaY ?? data.dy ?? 0) * 30 });
546
+ fireOrdered(() => send("Input.dispatchMouseEvent", { type: "mouseWheel", x, y, deltaX: 0, deltaY: -Number(data.delta ?? data.deltaY ?? data.dy ?? 0) * 30 }));
547
+ // Paste from the viewer: real text insertion into the focused element —
548
+ // handles IME/emoji/CJK that per-key synthesis can't.
549
+ else if (action === "insertText" && typeof data.text === "string")
550
+ send("Input.insertText", { text: data.text.slice(0, 65536) });
551
+ else if (action === "multitouch" && Array.isArray(data.touches))
552
+ handlePinch(data.touches);
279
553
  },
280
554
  handleBinaryInput: (buf) => {
281
555
  if (closed || buf.length < 1)
282
556
  return;
283
557
  const code = buf[0];
558
+ // Same fast-track as handleInput: anything but a bare move (0x01) / relative
559
+ // move (0x02, unmapped) is about to change the page — and a move WITH a
560
+ // button held is a drag, which changes the page too.
561
+ if ((code !== 0x01 && code !== 0x02) || heldButtons !== 0)
562
+ nudge();
284
563
  const rx = () => buf.readInt16LE(1), ry = () => buf.readInt16LE(3);
285
- if (code === 0x01 && buf.length >= 5)
286
- send("Input.dispatchMouseEvent", { type: "mouseMoved", x: rx(), y: ry() });
287
- else if (code === 0x03 && buf.length >= 5)
288
- mouseClick(rx(), ry(), "left");
289
- else if (code === 0x04 && buf.length >= 5)
290
- mouseClick(rx(), ry(), "right");
291
- else if (code === 0x0A && buf.length >= 5)
292
- mouseClick(rx(), ry(), "middle");
564
+ // Optional trailing u16: ms offset from the gesture start (drag events).
565
+ // Mouse events are 5 bytes; 7+ means the viewer stamped a timeline.
566
+ const isMouse = code === 0x01 || (code >= 0x03 && code <= 0x06) || (code >= 0x08 && code <= 0x0C);
567
+ const ts = isMouse && code !== 0x07 && buf.length >= 7 ? buf.readUInt16LE(5) : null;
568
+ const viaTimeline = (isStart, fire) => {
569
+ if (ts !== null)
570
+ fireTimed(ts, isStart, fire);
571
+ else
572
+ fireOrdered(fire);
573
+ };
574
+ if (code === 0x01 && buf.length >= 5) {
575
+ const x = rx(), y = ry();
576
+ viaTimeline(false, () => send("Input.dispatchMouseEvent", { type: "mouseMoved", x, y, buttons: heldButtons }));
577
+ }
578
+ else if (code === 0x03 && buf.length >= 5) {
579
+ const x = rx(), y = ry();
580
+ fireOrdered(() => mouseClick(x, y, "left"));
581
+ }
582
+ else if (code === 0x04 && buf.length >= 5) {
583
+ const x = rx(), y = ry();
584
+ fireOrdered(() => mouseClick(x, y, "right"));
585
+ }
586
+ else if (code === 0x0A && buf.length >= 5) {
587
+ const x = rx(), y = ry();
588
+ fireOrdered(() => mouseClick(x, y, "middle"));
589
+ }
293
590
  else if ((code === 0x05 || code === 0x08 || code === 0x0B) && buf.length >= 5) {
294
591
  const b = code === 0x08 ? "right" : code === 0x0B ? "middle" : "left";
295
- send("Input.dispatchMouseEvent", { type: "mousePressed", x: rx(), y: ry(), button: b, buttons: BUTTONS[b], clickCount: 1 });
592
+ const x = rx(), y = ry();
593
+ viaTimeline(true, () => { heldButtons |= BUTTONS[b]; send("Input.dispatchMouseEvent", { type: "mousePressed", x, y, button: b, buttons: heldButtons, clickCount: 1 }); });
296
594
  }
297
595
  else if ((code === 0x06 || code === 0x09 || code === 0x0C) && buf.length >= 5) {
298
596
  const b = code === 0x09 ? "right" : code === 0x0C ? "middle" : "left";
299
- send("Input.dispatchMouseEvent", { type: "mouseReleased", x: rx(), y: ry(), button: b, buttons: 0, clickCount: 1 });
597
+ const x = rx(), y = ry();
598
+ viaTimeline(false, () => { heldButtons &= ~BUTTONS[b]; send("Input.dispatchMouseEvent", { type: "mouseReleased", x, y, button: b, buttons: heldButtons, clickCount: 1 }); });
300
599
  }
301
600
  else if (code === 0x07 && buf.length >= 6) {
302
- send("Input.dispatchMouseEvent", { type: "mouseWheel", x: rx(), y: ry(), deltaX: 0, deltaY: -buf.readInt8(5) * 30 });
601
+ const x = rx(), y = ry(), d = buf.readInt8(5);
602
+ fireOrdered(() => send("Input.dispatchMouseEvent", { type: "mouseWheel", x, y, deltaX: 0, deltaY: -d * 30 }));
303
603
  }
304
604
  else if ((code === 0x10 || code === 0x11 || code === 0x12) && buf.length >= 3) {
305
605
  const mod = buf[1], keyLen = buf[2];
@@ -308,7 +608,9 @@ export async function createCdpScreencast(opts) {
308
608
  const isDown = code === 0x10 || code === 0x12;
309
609
  const hasCombo = (mod & 0x0e) !== 0; // ctrl/alt/cmd held
310
610
  const named = KEYMAP[key];
311
- if (!hasCombo && key.length === 1) {
611
+ // Code-POINT count, not UTF-16 length: emoji/astral CJK are length 2 in
612
+ // JS and would otherwise fall into the named-key branch as garbage.
613
+ if (!hasCombo && Array.from(key).length === 1) {
312
614
  // Printable char → 'char' inserts the text (already reflects shift).
313
615
  if (isDown)
314
616
  send("Input.dispatchKeyEvent", { type: "char", text: key, key, modifiers: mods });
@@ -333,6 +635,15 @@ export async function createCdpScreencast(opts) {
333
635
  clearTimeout(reconnectTimer);
334
636
  reconnectTimer = null;
335
637
  }
638
+ if (paceTimer) {
639
+ clearTimeout(paceTimer);
640
+ paceTimer = null;
641
+ }
642
+ if (dragTimer) {
643
+ clearTimeout(dragTimer);
644
+ dragTimer = null;
645
+ }
646
+ dragQ.length = 0;
336
647
  clearPending();
337
648
  try {
338
649
  send("Page.stopScreencast");
@@ -346,4 +657,73 @@ export async function createCdpScreencast(opts) {
346
657
  },
347
658
  };
348
659
  }
660
+ const sharedCasts = new Map();
661
+ export async function acquireCdpScreencast(opts, viewerId, hooks) {
662
+ const key = `${opts.host}:${opts.port}`;
663
+ let entry = sharedCasts.get(key);
664
+ if (!entry) {
665
+ entry = { viewers: new Map(), session: null, starting: null };
666
+ sharedCasts.set(key, entry);
667
+ }
668
+ entry.viewers.set(viewerId, hooks);
669
+ if (!entry.session && !entry.starting) {
670
+ const e = entry;
671
+ const each = (f) => { for (const v of e.viewers.values())
672
+ f(v); };
673
+ const agg = (pickValue, pick, dflt) => {
674
+ let acc;
675
+ for (const v of e.viewers.values()) {
676
+ const x = pickValue(v);
677
+ if (x !== undefined)
678
+ acc = acc === undefined ? x : pick(acc, x);
679
+ }
680
+ return acc ?? dflt;
681
+ };
682
+ e.starting = createCdpScreencast({
683
+ host: opts.host,
684
+ port: opts.port,
685
+ quality: opts.quality,
686
+ minIntervalMs: opts.minIntervalMs,
687
+ log: opts.log,
688
+ onFrame: (jpeg) => each((v) => v.onFrame(jpeg)),
689
+ onCursor: (x, y, c, s) => each((v) => v.onCursor?.(x, y, c, s)),
690
+ onClipboard: (t) => each((v) => v.onClipboard?.(t)),
691
+ getQuality: () => agg((v) => v.getQuality?.(), Math.max, opts.quality ?? 55),
692
+ getMinIntervalMs: () => agg((v) => v.getMinIntervalMs?.(), Math.min, opts.minIntervalMs ?? 60),
693
+ getBackpressure: () => agg((v) => v.getBackpressure?.(), Math.min, 0),
694
+ }).then((s) => {
695
+ e.session = s;
696
+ e.starting = null;
697
+ if (!s)
698
+ sharedCasts.delete(key);
699
+ return s;
700
+ });
701
+ }
702
+ const session = entry.session ?? (await entry.starting);
703
+ // Released (or init failed) while we were starting up.
704
+ if (!session || !entry.viewers.has(viewerId)) {
705
+ if (!session)
706
+ entry.viewers.delete(viewerId);
707
+ return null;
708
+ }
709
+ return session;
710
+ }
711
+ export function releaseCdpScreencast(host, port, viewerId) {
712
+ const key = `${host}:${port}`;
713
+ const entry = sharedCasts.get(key);
714
+ if (!entry)
715
+ return;
716
+ entry.viewers.delete(viewerId);
717
+ if (entry.viewers.size > 0)
718
+ return;
719
+ sharedCasts.delete(key);
720
+ const close = (s) => { try {
721
+ s?.close();
722
+ }
723
+ catch { /* ignore */ } };
724
+ if (entry.session)
725
+ close(entry.session);
726
+ else if (entry.starting)
727
+ void entry.starting.then(close);
728
+ }
349
729
  //# sourceMappingURL=sandboxScreenCdp.js.map