@limrun/ui 0.7.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,9 @@ import {
18
18
  createSetClipboardMessage,
19
19
  createTwoFingerTouchControlMessage,
20
20
  } from '../core/webrtc-messages';
21
+ import { AxFetcher, AxStatus } from '../core/ax-fetcher';
22
+ import { AxElement, AxSnapshot, axElementAtPoint, axSnapshotsEqual } from '../core/ax-tree';
23
+ import { InspectOverlay, InspectOverlayGeometry, InspectMode } from './inspect-overlay';
21
24
 
22
25
  declare global {
23
26
  interface Window {
@@ -51,6 +54,92 @@ interface RemoteControlProps {
51
54
  // showFrame controls whether to display the device frame
52
55
  // around the video. Defaults to true.
53
56
  showFrame?: boolean;
57
+
58
+ // When true, drops after a working session auto-reconnect instead of
59
+ // surfacing the manual "Retry" button. Defaults to false.
60
+ autoReconnect?: boolean;
61
+
62
+ /**
63
+ * Enable the inspect overlay. When set, the component starts polling the
64
+ * accessibility tree and draws boxes over each element on top of the
65
+ * video stream.
66
+ *
67
+ * - `true` — Select mode. Boxes are clickable, click pins a selection
68
+ * with action buttons (Tap / Copy selector / Copy id), ESC clears.
69
+ * Device input is blocked while in this mode.
70
+ * - `'hover-only'` — Boxes follow the cursor as a visual preview. Device
71
+ * input still passes through, so you can drive the simulator while
72
+ * inspecting.
73
+ * - `undefined` / `false` (default) — overlay disabled, no polling.
74
+ */
75
+ inspectMode?: boolean | 'hover-only';
76
+
77
+ /**
78
+ * Fires whenever a fresh accessibility snapshot is delivered.
79
+ *
80
+ * Customers use this to drive their own side panels, agent prompts,
81
+ * analytics, etc. The built-in overlay does not require this callback —
82
+ * it renders from internal state regardless.
83
+ *
84
+ * Identical-to-previous snapshots (per `axSnapshotsEqual`) are NOT
85
+ * re-emitted, so a stable UI doesn't generate callback noise.
86
+ *
87
+ * Invoked in a microtask so customer code doesn't run synchronously
88
+ * inside React's commit phase.
89
+ */
90
+ onAxSnapshotChange?: (snapshot: AxSnapshot | null) => void;
91
+
92
+ /**
93
+ * Fires when the user clicks an overlay element (only emitted when
94
+ * `inspectMode === true`). `null` indicates a deselection (ESC, click
95
+ * outside any box, or programmatic clear).
96
+ *
97
+ * The `snapshot` field is the snapshot active at the moment of the
98
+ * click — useful for capturing context without races against the next
99
+ * poll cycle.
100
+ */
101
+ onInspectSelectionChange?: (selection: { element: AxElement; snapshot: AxSnapshot } | null) => void;
102
+
103
+ /**
104
+ * Fires whenever the accessibility subsystem changes coarse-grained
105
+ * status. Useful for rendering readiness indicators or error banners in
106
+ * a customer-built side panel.
107
+ *
108
+ * Transitions are deduplicated; no self-loops are emitted. The `error`
109
+ * argument is populated when status is `error` or `unavailable`.
110
+ *
111
+ * Lifecycle: `idle` → `starting` → `ready` (or `unavailable` / `error`).
112
+ * Recovery from `error` / `unavailable` is automatic — the fetcher
113
+ * keeps polling and transitions back to `ready` on the next success.
114
+ */
115
+ onAxStatusChange?: (status: AxStatus, error?: string) => void;
116
+
117
+ /**
118
+ * Base interval (ms) between successful AX-tree fetches.
119
+ *
120
+ * The fetcher will:
121
+ * - Wait `axPollIntervalMs` after a successful fetch with NEW data.
122
+ * - Double the wait (up to `axMaxBackoffMs`) when consecutive snapshots
123
+ * are byte-identical (e.g. static screen).
124
+ * - Wait 5 s when the server reports AX is unavailable.
125
+ *
126
+ * In addition, after user input (taps, scrolls, keypresses, openUrl,
127
+ * terminateApp, orientation flips), the fetcher enters a short
128
+ * "activity boost" window (~1.2 s) during which fetches happen at
129
+ * ~250 ms regardless of this setting. This captures mid-animation UI
130
+ * changes without you having to manually call `refreshAxTree`.
131
+ *
132
+ * @default 500
133
+ */
134
+ axPollIntervalMs?: number;
135
+
136
+ /**
137
+ * Maximum backoff (ms) for the AX-tree polling loop when consecutive
138
+ * snapshots are unchanged.
139
+ *
140
+ * @default 2000
141
+ */
142
+ axMaxBackoffMs?: number;
54
143
  }
55
144
 
56
145
  interface ScreenshotData {
@@ -71,6 +160,28 @@ export interface RemoteControlHandle {
71
160
  sendKeyEvent: (event: ImperativeKeyboardEvent) => void;
72
161
  screenshot: () => Promise<ScreenshotData>;
73
162
  terminateApp: (bundleId: string) => Promise<void>;
163
+ reconnect: () => void;
164
+
165
+ // Inspect-mode helpers. These are no-ops when inspect mode is disabled or
166
+ // the WebSocket isn't open.
167
+
168
+ // Force a fresh accessibility-tree fetch outside the normal poll cadence.
169
+ refreshAxTree: () => Promise<AxSnapshot>;
170
+
171
+ // Pull-based access to the most recent snapshot (the same one passed to
172
+ // onAxSnapshotChange). Returns null when no snapshot has arrived yet or
173
+ // when inspect mode is off.
174
+ getAxSnapshot: () => AxSnapshot | null;
175
+
176
+ // Programmatically drive the overlay highlight/selection — useful when a
177
+ // customer's own side panel wants to cross-highlight with the overlay.
178
+ // Pass `null` to clear.
179
+ setInspectHighlight: (element: AxElement | null) => void;
180
+ setInspectSelection: (element: AxElement | null) => void;
181
+
182
+ // Pull-based access to the current AX subsystem status. Mirrors what
183
+ // onAxStatusChange reports, for customers that don't want to subscribe.
184
+ getAxStatus: () => AxStatus;
74
185
  }
75
186
 
76
187
  const debugLog = (...args: any[]) => {
@@ -85,6 +196,26 @@ const debugWarn = (...args: any[]) => {
85
196
  }
86
197
  };
87
198
 
199
+ // Invokes a customer-provided callback in isolation. A throw from the
200
+ // customer's code must NOT propagate back into our state-update flow — that
201
+ // would risk corrupting React reconciliation. We log the error to the
202
+ // console so the customer can still debug, but otherwise swallow.
203
+ const safeInvoke = <Args extends unknown[]>(
204
+ label: string,
205
+ fn: ((...args: Args) => unknown) | undefined,
206
+ ...args: Args
207
+ ): void => {
208
+ if (!fn) return;
209
+ try {
210
+ fn(...args);
211
+ } catch (err) {
212
+ // Surface to the developer regardless of debug flag — this is a bug
213
+ // in the customer's handler and they'll want to see it.
214
+ // eslint-disable-next-line no-console
215
+ console.error(`[RemoteControl] customer callback "${label}" threw:`, err);
216
+ }
217
+ };
218
+
88
219
  const motionActionToString = (action: number): string => {
89
220
  // AMOTION_EVENT is a constants object; find the matching ACTION_* key if present
90
221
  const match = Object.entries(AMOTION_EVENT).find(
@@ -108,20 +239,21 @@ type DeviceConfig = {
108
239
  loadingLogo: string;
109
240
  loadingLogoSize: string;
110
241
  videoPosition: {
111
- portrait: { heightMultiplier?: number; widthMultiplier?: number; };
112
- landscape: { heightMultiplier?: number; widthMultiplier?: number; };
242
+ portrait: { heightMultiplier?: number; widthMultiplier?: number };
243
+ landscape: { heightMultiplier?: number; widthMultiplier?: number };
113
244
  };
114
245
  frame: {
115
246
  image: string;
116
247
  imageLandscape: string;
117
- }
118
- }
248
+ };
249
+ };
119
250
 
120
251
  const ANDROID_TABLET_VIDEO_WIDTH = 1920;
121
252
  const ANDROID_TABLET_VIDEO_HEIGHT = 1200;
122
253
  const MAX_CONNECTION_ATTEMPTS = 3;
123
254
  const CONNECTION_RETRY_DELAY_MS = 1000;
124
255
  const CONNECTION_SUCCESS_TIMEOUT_MS = 15000;
256
+ const ICE_DISCONNECTED_GRACE_MS = 3000;
125
257
 
126
258
  const isAndroidTabletVideo = (width: number, height: number): boolean =>
127
259
  (width === ANDROID_TABLET_VIDEO_WIDTH && height === ANDROID_TABLET_VIDEO_HEIGHT) ||
@@ -191,7 +323,24 @@ function getAndroidKeycodeAndMeta(event: React.KeyboardEvent): { keycode: number
191
323
  }
192
324
 
193
325
  export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>(
194
- ({ className, url, token, sessionId: propSessionId, openUrl, showFrame = true }: RemoteControlProps, ref) => {
326
+ (
327
+ {
328
+ className,
329
+ url,
330
+ token,
331
+ sessionId: propSessionId,
332
+ openUrl,
333
+ showFrame = true,
334
+ autoReconnect = false,
335
+ inspectMode,
336
+ onAxSnapshotChange,
337
+ onInspectSelectionChange,
338
+ onAxStatusChange,
339
+ axPollIntervalMs,
340
+ axMaxBackoffMs,
341
+ }: RemoteControlProps,
342
+ ref,
343
+ ) => {
195
344
  const containerRef = useRef<HTMLDivElement>(null);
196
345
  const videoRef = useRef<HTMLVideoElement>(null);
197
346
  const frameRef = useRef<HTMLImageElement>(null);
@@ -207,9 +356,13 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
207
356
  const retryTimeoutRef = useRef<number | undefined>(undefined);
208
357
  const connectionSuccessTimeoutRef = useRef<number | undefined>(undefined);
209
358
  const requestFrameIntervalRef = useRef<number | undefined>(undefined);
359
+ const iceDisconnectedGraceRef = useRef<number | undefined>(undefined);
210
360
  const connectionGenerationRef = useRef(0);
211
361
  const connectionAttemptRef = useRef(0);
212
362
  const controlChannelOpenedRef = useRef(false);
363
+ // Mirrored to a ref so stale closures in event handlers see the latest value.
364
+ const autoReconnectRef = useRef(autoReconnect);
365
+ autoReconnectRef.current = autoReconnect;
213
366
  const firstFrameShownRef = useRef(false);
214
367
  const pendingScreenshotResolversRef = useRef<
215
368
  Map<string, (value: ScreenshotData | PromiseLike<ScreenshotData>) => void>
@@ -258,6 +411,62 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
258
411
  };
259
412
  const [hoverPoint, setHoverPoint] = useState<HoverPoint | null>(null);
260
413
 
414
+ // Inspect-mode state.
415
+ //
416
+ // Lifecycle of `axFetcherRef`:
417
+ // - Created in dataChannel.onopen (last step of WebRTC handshake)
418
+ // so we know the signaling WS is healthy and the device is
419
+ // responsive to control messages.
420
+ // - Started immediately if `inspectMode` is already enabled, or
421
+ // started later via the sibling useEffect when inspectMode flips on.
422
+ // - Stopped + nulled in teardownConnection (WS close / unmount).
423
+ //
424
+ // Customers can observe readiness via the `onAxStatusChange` callback:
425
+ // `starting` fires when start() runs but no snapshot has landed yet;
426
+ // `ready` once the first snapshot arrives. Status falls back to
427
+ // `unavailable` / `error` if the server can't satisfy AX requests.
428
+ const axFetcherRef = useRef<AxFetcher | null>(null);
429
+ const [axSnapshot, setAxSnapshot] = useState<AxSnapshot | null>(null);
430
+ const [axHighlightedId, setAxHighlightedId] = useState<string | null>(null);
431
+ const [axSelectedId, setAxSelectedId] = useState<string | null>(null);
432
+ const [overlayGeometry, setOverlayGeometry] = useState<InspectOverlayGeometry | null>(null);
433
+ // Viewport-space cursor position used to anchor the inspect InfoCard.
434
+ // Throttled to one update per animation frame to avoid React reconciling
435
+ // on every native mousemove (~60–120Hz).
436
+ const [axCursorPosition, setAxCursorPosition] = useState<{ x: number; y: number } | null>(null);
437
+ const cursorPositionRef = useRef<{ x: number; y: number } | null>(null);
438
+ const cursorRafIdRef = useRef<number | undefined>(undefined);
439
+ const scheduleCursorFlush = (next: { x: number; y: number } | null) => {
440
+ cursorPositionRef.current = next;
441
+ if (cursorRafIdRef.current !== undefined) return;
442
+ cursorRafIdRef.current = window.requestAnimationFrame(() => {
443
+ cursorRafIdRef.current = undefined;
444
+ setAxCursorPosition(cursorPositionRef.current);
445
+ });
446
+ };
447
+ // Position captured at click-time so the InfoCard "freezes" near where
448
+ // the user clicked, even as they move the cursor around afterward. The
449
+ // action buttons (Tap / Copy) stay reachable because the card no longer
450
+ // chases the cursor while the click target is the active selection.
451
+ const [axFrozenCursorPosition, setAxFrozenCursorPosition] = useState<{
452
+ x: number;
453
+ y: number;
454
+ } | null>(null);
455
+ // Mirrors for synchronous access from event handlers without stale closures.
456
+ const inspectModeRef = useRef<boolean | 'hover-only' | undefined>(inspectMode);
457
+ inspectModeRef.current = inspectMode;
458
+ const axSnapshotRef = useRef<AxSnapshot | null>(null);
459
+ axSnapshotRef.current = axSnapshot;
460
+ const onAxSnapshotChangeRef = useRef(onAxSnapshotChange);
461
+ onAxSnapshotChangeRef.current = onAxSnapshotChange;
462
+ const onInspectSelectionChangeRef = useRef(onInspectSelectionChange);
463
+ onInspectSelectionChangeRef.current = onInspectSelectionChange;
464
+ const onAxStatusChangeRef = useRef(onAxStatusChange);
465
+ onAxStatusChangeRef.current = onAxStatusChange;
466
+
467
+ const inspectActive = inspectMode === true || inspectMode === 'hover-only';
468
+ const inspectModeResolved: InspectMode = inspectMode === 'hover-only' ? 'hover-only' : 'select';
469
+
261
470
  const sessionId = useMemo(
262
471
  () =>
263
472
  propSessionId ||
@@ -278,6 +487,72 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
278
487
  return;
279
488
  }
280
489
  dataChannelRef.current.send(data);
490
+ // Any binary control message is an input event. Bump the AX poller so
491
+ // we get a fresh snapshot quickly — the UI almost certainly changed.
492
+ axFetcherRef.current?.bumpActivity();
493
+ };
494
+
495
+ // Pointer ID used by inspect-driven taps. Distinct from human pointers
496
+ // (-1 mouse, -2 alt-mirror) and our touch identifiers so they never
497
+ // interfere with an in-progress drag.
498
+ const AX_TAP_POINTER_ID = -10;
499
+
500
+ // Send a down+up tap at a viewport-space (clientX/Y) position. The point
501
+ // is mapped through the current video letterbox geometry so the
502
+ // simulator receives the correct in-stream coordinates regardless of
503
+ // how the device frame is sized in the DOM.
504
+ const sendTapAtClient = (clientX: number, clientY: number) => {
505
+ const ctx = computeVideoMappingContext();
506
+ if (!ctx) return;
507
+ const geometry = mapClientPointToVideo(ctx, clientX, clientY);
508
+ if (!geometry) return;
509
+ const { videoX, videoY, videoWidth, videoHeight } = geometry;
510
+ const down = createTouchControlMessage(
511
+ AMOTION_EVENT.ACTION_DOWN,
512
+ AX_TAP_POINTER_ID,
513
+ videoWidth,
514
+ videoHeight,
515
+ videoX,
516
+ videoY,
517
+ 1.0,
518
+ AMOTION_EVENT.BUTTON_PRIMARY,
519
+ AMOTION_EVENT.BUTTON_PRIMARY,
520
+ );
521
+ if (down) sendBinaryControlMessage(down);
522
+ window.setTimeout(() => {
523
+ const up = createTouchControlMessage(
524
+ AMOTION_EVENT.ACTION_UP,
525
+ AX_TAP_POINTER_ID,
526
+ videoWidth,
527
+ videoHeight,
528
+ videoX,
529
+ videoY,
530
+ 0,
531
+ AMOTION_EVENT.BUTTON_PRIMARY,
532
+ AMOTION_EVENT.BUTTON_PRIMARY,
533
+ );
534
+ if (up) sendBinaryControlMessage(up);
535
+ }, 60);
536
+ };
537
+
538
+ // Center-of-bounds fallback for programmatic taps when there's no
539
+ // user-aimed click position (e.g. customer calls `setInspectSelection`
540
+ // followed by their own "tap selected" handler without forwarding a
541
+ // pointer position). Maps the element's frame center through the AX
542
+ // screen-coordinate space to viewport coords, then delegates to
543
+ // sendTapAtClient.
544
+ const sendTapAtElementCenter = (element: AxElement, snapshot: AxSnapshot) => {
545
+ const ctx = computeVideoMappingContext();
546
+ if (!ctx) return;
547
+ if (snapshot.screen.width <= 0 || snapshot.screen.height <= 0) return;
548
+ const cxAx = element.frame.x + element.frame.width / 2;
549
+ const cyAx = element.frame.y + element.frame.height / 2;
550
+ // AX screen-fraction → in-video pixel offset → viewport client coord.
551
+ const inVideoX = (cxAx / snapshot.screen.width) * ctx.actualWidth;
552
+ const inVideoY = (cyAx / snapshot.screen.height) * ctx.actualHeight;
553
+ const clientX = ctx.videoRect.left + ctx.offsetX + inVideoX;
554
+ const clientY = ctx.videoRect.top + ctx.offsetY + inVideoY;
555
+ sendTapAtClient(clientX, clientY);
281
556
  };
282
557
 
283
558
  // Fixed pointer IDs for Alt-simulated two-finger gestures
@@ -342,10 +617,7 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
342
617
  case 'down':
343
618
  // For multi-touch: use ACTION_DOWN for first pointer, ACTION_POINTER_DOWN for additional pointers
344
619
  const currentPointerCount = activePointers.current.size;
345
- action =
346
- currentPointerCount === 0
347
- ? AMOTION_EVENT.ACTION_DOWN
348
- : AMOTION_EVENT.ACTION_POINTER_DOWN;
620
+ action = currentPointerCount === 0 ? AMOTION_EVENT.ACTION_DOWN : AMOTION_EVENT.ACTION_POINTER_DOWN;
349
621
  positionToSend = { x: videoX, y: videoY };
350
622
  activePointers.current.set(pointerId, positionToSend);
351
623
  if (pointerId === -1) {
@@ -376,9 +648,7 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
376
648
  // For multi-touch: use ACTION_UP for last pointer, ACTION_POINTER_UP for non-last pointers
377
649
  const remainingPointerCount = activePointers.current.size;
378
650
  action =
379
- remainingPointerCount === 0
380
- ? AMOTION_EVENT.ACTION_UP
381
- : AMOTION_EVENT.ACTION_POINTER_UP;
651
+ remainingPointerCount === 0 ? AMOTION_EVENT.ACTION_UP : AMOTION_EVENT.ACTION_POINTER_UP;
382
652
  }
383
653
  }
384
654
  break;
@@ -386,20 +656,20 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
386
656
 
387
657
  // Send message if action and position determined
388
658
  if (action !== null && positionToSend !== null) {
389
- debugLog('[rc-touch][mouse->touch] sending', {
390
- pointerId,
391
- eventType,
392
- action,
393
- actionName: motionActionToString(action),
394
- positionToSend,
395
- video: { width: videoWidth, height: videoHeight },
396
- altHeld: isAltHeldRef.current,
397
- activePointersAfter: Array.from(activePointers.current.entries()).map(([id, pos]) => ({
398
- id,
399
- x: pos.x,
400
- y: pos.y,
401
- })),
402
- });
659
+ debugLog('[rc-touch][mouse->touch] sending', {
660
+ pointerId,
661
+ eventType,
662
+ action,
663
+ actionName: motionActionToString(action),
664
+ positionToSend,
665
+ video: { width: videoWidth, height: videoHeight },
666
+ altHeld: isAltHeldRef.current,
667
+ activePointersAfter: Array.from(activePointers.current.entries()).map(([id, pos]) => ({
668
+ id,
669
+ x: pos.x,
670
+ y: pos.y,
671
+ })),
672
+ });
403
673
  const message = createTouchControlMessage(
404
674
  action,
405
675
  pointerId,
@@ -412,11 +682,11 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
412
682
  buttons,
413
683
  );
414
684
  if (message) {
415
- debugLog('[rc-touch][mouse->touch] buffer', {
416
- pointerId,
417
- actionName: motionActionToString(action),
418
- byteLength: message.byteLength,
419
- });
685
+ debugLog('[rc-touch][mouse->touch] buffer', {
686
+ pointerId,
687
+ actionName: motionActionToString(action),
688
+ byteLength: message.byteLength,
689
+ });
420
690
  sendBinaryControlMessage(message);
421
691
  }
422
692
  } else if (eventType === 'up' || eventType === 'cancel') {
@@ -448,7 +718,12 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
448
718
  // This is iOS-specific; Android doesn't use this modifier injection.
449
719
  if (platform === 'ios' && dataChannelRef.current && dataChannelRef.current.readyState === 'open') {
450
720
  const action = nextHeld ? ANDROID_KEYS.ACTION_DOWN : ANDROID_KEYS.ACTION_UP;
451
- const message = createInjectKeycodeMessage(action, ANDROID_KEYS.KEYCODE_ALT_LEFT, 0, ANDROID_KEYS.META_NONE);
721
+ const message = createInjectKeycodeMessage(
722
+ action,
723
+ ANDROID_KEYS.KEYCODE_ALT_LEFT,
724
+ 0,
725
+ ANDROID_KEYS.META_NONE,
726
+ );
452
727
  debugLog('[rc-touch][alt] sending Indigo modifier keycode', {
453
728
  action,
454
729
  keycode: ANDROID_KEYS.KEYCODE_ALT_LEFT,
@@ -524,8 +799,14 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
524
799
 
525
800
  const clampedRelativeX = Math.max(0, Math.min(ctx.actualWidth, relativeX));
526
801
  const clampedRelativeY = Math.max(0, Math.min(ctx.actualHeight, relativeY));
527
- const videoX = Math.max(0, Math.min(ctx.videoWidth, (clampedRelativeX / ctx.actualWidth) * ctx.videoWidth));
528
- const videoY = Math.max(0, Math.min(ctx.videoHeight, (clampedRelativeY / ctx.actualHeight) * ctx.videoHeight));
802
+ const videoX = Math.max(
803
+ 0,
804
+ Math.min(ctx.videoWidth, (clampedRelativeX / ctx.actualWidth) * ctx.videoWidth),
805
+ );
806
+ const videoY = Math.max(
807
+ 0,
808
+ Math.min(ctx.videoHeight, (clampedRelativeY / ctx.actualHeight) * ctx.videoHeight),
809
+ );
529
810
 
530
811
  return {
531
812
  videoX,
@@ -547,8 +828,14 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
547
828
 
548
829
  const clampedRelativeX = Math.max(0, Math.min(ctx.actualWidth, relativeX));
549
830
  const clampedRelativeY = Math.max(0, Math.min(ctx.actualHeight, relativeY));
550
- const videoX = Math.max(0, Math.min(ctx.videoWidth, (clampedRelativeX / ctx.actualWidth) * ctx.videoWidth));
551
- const videoY = Math.max(0, Math.min(ctx.videoHeight, (clampedRelativeY / ctx.actualHeight) * ctx.videoHeight));
831
+ const videoX = Math.max(
832
+ 0,
833
+ Math.min(ctx.videoWidth, (clampedRelativeX / ctx.actualWidth) * ctx.videoWidth),
834
+ );
835
+ const videoY = Math.max(
836
+ 0,
837
+ Math.min(ctx.videoHeight, (clampedRelativeY / ctx.actualHeight) * ctx.videoHeight),
838
+ );
552
839
  const mirrorVideoX = ctx.videoWidth - videoX;
553
840
  const mirrorVideoY = ctx.videoHeight - videoY;
554
841
 
@@ -583,15 +870,7 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
583
870
  x1: number,
584
871
  y1: number,
585
872
  ) => {
586
- const msg = createTwoFingerTouchControlMessage(
587
- action,
588
- videoWidth,
589
- videoHeight,
590
- x0,
591
- y0,
592
- x1,
593
- y1,
594
- );
873
+ const msg = createTwoFingerTouchControlMessage(action, videoWidth, videoHeight, x0, y0, x1, y1);
595
874
  debugLog('[rc-touch2] sendTwoFingerMessage (iOS)', {
596
875
  actionName: motionActionToString(action),
597
876
  video: { width: videoWidth, height: videoHeight },
@@ -626,9 +905,10 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
626
905
 
627
906
  if (platform === 'ios') {
628
907
  // iOS: use special two-finger message (type=18)
629
- const action = eventType === 'down' ? AMOTION_EVENT.ACTION_DOWN
630
- : eventType === 'move' ? AMOTION_EVENT.ACTION_MOVE
631
- : AMOTION_EVENT.ACTION_UP;
908
+ const action =
909
+ eventType === 'down' ? AMOTION_EVENT.ACTION_DOWN
910
+ : eventType === 'move' ? AMOTION_EVENT.ACTION_MOVE
911
+ : AMOTION_EVENT.ACTION_UP;
632
912
  sendTwoFingerMessage(action, videoWidth, videoHeight, x0, y0, x1, y1);
633
913
  } else {
634
914
  // Android: send two separate single-touch messages with proper action codes
@@ -662,6 +942,23 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
662
942
  setHoverPoint(fullPoint);
663
943
  };
664
944
 
945
+ // Map clientX/Y to AX screen-coordinate space using the latest snapshot.
946
+ // Returns null if there's no snapshot or the click is outside the video.
947
+ const hitTestAxAtClient = (
948
+ ctx: VideoMappingContext,
949
+ clientX: number,
950
+ clientY: number,
951
+ ): AxElement | null => {
952
+ const snapshot = axSnapshotRef.current;
953
+ if (!snapshot || snapshot.screen.width <= 0 || snapshot.screen.height <= 0) return null;
954
+ const relX = clientX - ctx.videoRect.left - ctx.offsetX;
955
+ const relY = clientY - ctx.videoRect.top - ctx.offsetY;
956
+ if (relX < 0 || relY < 0 || relX > ctx.actualWidth || relY > ctx.actualHeight) return null;
957
+ const axX = (relX / ctx.actualWidth) * snapshot.screen.width;
958
+ const axY = (relY / ctx.actualHeight) * snapshot.screen.height;
959
+ return axElementAtPoint(snapshot, axX, axY);
960
+ };
961
+
665
962
  // Unified handler for both mouse and touch interactions
666
963
  const handleInteraction = (event: React.MouseEvent | React.TouchEvent) => {
667
964
  event.preventDefault();
@@ -670,6 +967,36 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
670
967
  // Compute mapping context once per event (reused for all pointers)
671
968
  const ctx = computeVideoMappingContext();
672
969
 
970
+ // Inspect-mode handling.
971
+ //
972
+ // We use JS hit-testing (not box-level onMouseEnter/Leave) as the
973
+ // single source of truth for which element is under the cursor — it
974
+ // handles overlapping rectangles deterministically by picking the
975
+ // smallest matching box. The overlay's InspectBox children no longer
976
+ // attach hover handlers; they just paint themselves based on the
977
+ // `highlightedId` prop driven from here.
978
+ //
979
+ // Cursor position is tracked in both modes so the cursor-anchored
980
+ // InfoCard can follow the pointer.
981
+ const isInspecting = inspectModeRef.current === true || inspectModeRef.current === 'hover-only';
982
+ if (isInspecting && !('touches' in event)) {
983
+ if (event.type === 'mousemove') {
984
+ scheduleCursorFlush({ x: event.clientX, y: event.clientY });
985
+ if (ctx) {
986
+ const hit = hitTestAxAtClient(ctx, event.clientX, event.clientY);
987
+ setAxHighlightedId(hit?.id ?? null);
988
+ }
989
+ } else if (event.type === 'mouseleave') {
990
+ scheduleCursorFlush(null);
991
+ setAxHighlightedId(null);
992
+ }
993
+ }
994
+ // Select mode blocks device input — clicks/drags don't reach the
995
+ // simulator. Hover-only mode falls through to the regular path.
996
+ if (inspectModeRef.current === true) {
997
+ return;
998
+ }
999
+
673
1000
  // Handle hover point updates for mouse events (only when Alt is held)
674
1001
  if (!('touches' in event) && ctx) {
675
1002
  if (event.type === 'mousemove') {
@@ -681,7 +1008,12 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
681
1008
  // to ensure consistent behavior across focus transitions.
682
1009
  }
683
1010
 
684
- if (!dataChannelRef.current || dataChannelRef.current.readyState !== 'open' || !videoRef.current || !ctx) {
1011
+ if (
1012
+ !dataChannelRef.current ||
1013
+ dataChannelRef.current.readyState !== 'open' ||
1014
+ !videoRef.current ||
1015
+ !ctx
1016
+ ) {
685
1017
  return;
686
1018
  }
687
1019
 
@@ -729,25 +1061,47 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
729
1061
  pointerId0: t0.identifier,
730
1062
  pointerId1: t1.identifier,
731
1063
  };
732
- applyTwoFingerEvent('down', g0.videoWidth, g0.videoHeight,
733
- g0.videoX, g0.videoY, g1.videoX, g1.videoY,
734
- t0.identifier, t1.identifier);
1064
+ applyTwoFingerEvent(
1065
+ 'down',
1066
+ g0.videoWidth,
1067
+ g0.videoHeight,
1068
+ g0.videoX,
1069
+ g0.videoY,
1070
+ g1.videoX,
1071
+ g1.videoY,
1072
+ t0.identifier,
1073
+ t1.identifier,
1074
+ );
735
1075
  } else if (twoFingerStateRef.current.source === 'real-touch') {
736
1076
  // Continuing two-finger gesture (move)
737
1077
  twoFingerStateRef.current.finger0 = { x: g0.videoX, y: g0.videoY };
738
1078
  twoFingerStateRef.current.finger1 = { x: g1.videoX, y: g1.videoY };
739
- applyTwoFingerEvent('move', g0.videoWidth, g0.videoHeight,
740
- g0.videoX, g0.videoY, g1.videoX, g1.videoY,
741
- twoFingerStateRef.current.pointerId0,
742
- twoFingerStateRef.current.pointerId1);
1079
+ applyTwoFingerEvent(
1080
+ 'move',
1081
+ g0.videoWidth,
1082
+ g0.videoHeight,
1083
+ g0.videoX,
1084
+ g0.videoY,
1085
+ g1.videoX,
1086
+ g1.videoY,
1087
+ twoFingerStateRef.current.pointerId0,
1088
+ twoFingerStateRef.current.pointerId1,
1089
+ );
743
1090
  }
744
1091
  } else if (allTouches.length < 2 && twoFingerStateRef.current?.source === 'real-touch') {
745
1092
  // Finger lifted - end two-finger gesture using last known state
746
1093
  const state = twoFingerStateRef.current;
747
- applyTwoFingerEvent('up', state.videoSize.width, state.videoSize.height,
748
- state.finger0.x, state.finger0.y,
749
- state.finger1.x, state.finger1.y,
750
- state.pointerId0, state.pointerId1);
1094
+ applyTwoFingerEvent(
1095
+ 'up',
1096
+ state.videoSize.width,
1097
+ state.videoSize.height,
1098
+ state.finger0.x,
1099
+ state.finger0.y,
1100
+ state.finger1.x,
1101
+ state.finger1.y,
1102
+ state.pointerId0,
1103
+ state.pointerId1,
1104
+ );
751
1105
  twoFingerStateRef.current = null;
752
1106
  // Don't process remaining finger - gesture ended
753
1107
  return;
@@ -848,8 +1202,17 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
848
1202
  pointerId1: ALT_POINTER_ID_MIRROR,
849
1203
  };
850
1204
  videoRef.current?.focus();
851
- applyTwoFingerEvent('down', videoWidth, videoHeight, videoX, videoY, mirrorX, mirrorY,
852
- ALT_POINTER_ID_PRIMARY, ALT_POINTER_ID_MIRROR);
1205
+ applyTwoFingerEvent(
1206
+ 'down',
1207
+ videoWidth,
1208
+ videoHeight,
1209
+ videoX,
1210
+ videoY,
1211
+ mirrorX,
1212
+ mirrorY,
1213
+ ALT_POINTER_ID_PRIMARY,
1214
+ ALT_POINTER_ID_MIRROR,
1215
+ );
853
1216
  return;
854
1217
  }
855
1218
 
@@ -858,8 +1221,17 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
858
1221
  // Update positions
859
1222
  twoFingerStateRef.current.finger0 = { x: videoX, y: videoY };
860
1223
  twoFingerStateRef.current.finger1 = { x: mirrorX, y: mirrorY };
861
- applyTwoFingerEvent('move', videoWidth, videoHeight, videoX, videoY, mirrorX, mirrorY,
862
- ALT_POINTER_ID_PRIMARY, ALT_POINTER_ID_MIRROR);
1224
+ applyTwoFingerEvent(
1225
+ 'move',
1226
+ videoWidth,
1227
+ videoHeight,
1228
+ videoX,
1229
+ videoY,
1230
+ mirrorX,
1231
+ mirrorY,
1232
+ ALT_POINTER_ID_PRIMARY,
1233
+ ALT_POINTER_ID_MIRROR,
1234
+ );
863
1235
  }
864
1236
  return;
865
1237
  }
@@ -869,9 +1241,17 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
869
1241
  if (state?.source === 'alt-mouse') {
870
1242
  // End gesture at last known positions
871
1243
  const { finger0, finger1, videoSize } = state;
872
- applyTwoFingerEvent('up', videoSize.width, videoSize.height,
873
- finger0.x, finger0.y, finger1.x, finger1.y,
874
- ALT_POINTER_ID_PRIMARY, ALT_POINTER_ID_MIRROR);
1244
+ applyTwoFingerEvent(
1245
+ 'up',
1246
+ videoSize.width,
1247
+ videoSize.height,
1248
+ finger0.x,
1249
+ finger0.y,
1250
+ finger1.x,
1251
+ finger1.y,
1252
+ ALT_POINTER_ID_PRIMARY,
1253
+ ALT_POINTER_ID_MIRROR,
1254
+ );
875
1255
  twoFingerStateRef.current = null;
876
1256
  }
877
1257
  return;
@@ -1039,6 +1419,13 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1039
1419
  }
1040
1420
  };
1041
1421
 
1422
+ const clearIceDisconnectedGrace = () => {
1423
+ if (iceDisconnectedGraceRef.current !== undefined) {
1424
+ window.clearTimeout(iceDisconnectedGraceRef.current);
1425
+ iceDisconnectedGraceRef.current = undefined;
1426
+ }
1427
+ };
1428
+
1042
1429
  const markFirstFrameShown = () => {
1043
1430
  if (firstFrameShownRef.current) {
1044
1431
  return;
@@ -1050,7 +1437,18 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1050
1437
 
1051
1438
  const teardownConnection = () => {
1052
1439
  clearConnectionSuccessTimeout();
1440
+ clearIceDisconnectedGrace();
1053
1441
  stopRequestFrameLoop();
1442
+ if (axFetcherRef.current) {
1443
+ axFetcherRef.current.stop();
1444
+ axFetcherRef.current = null;
1445
+ }
1446
+ // A scheduled cursor flush would otherwise call setState on a
1447
+ // teardown component once the next frame runs.
1448
+ if (cursorRafIdRef.current !== undefined) {
1449
+ window.cancelAnimationFrame(cursorRafIdRef.current);
1450
+ cursorRafIdRef.current = undefined;
1451
+ }
1054
1452
  if (wsRef.current) {
1055
1453
  wsRef.current.onopen = null;
1056
1454
  wsRef.current.onmessage = null;
@@ -1093,10 +1491,16 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1093
1491
  }
1094
1492
 
1095
1493
  if (controlChannelOpenedRef.current) {
1096
- updateStatus(`Connection failed after it was established: ${reason}`);
1097
- setRetryExhausted(true);
1098
- teardownConnection();
1099
- return;
1494
+ if (!autoReconnectRef.current) {
1495
+ updateStatus(`Connection failed after it was established: ${reason}`);
1496
+ setRetryExhausted(true);
1497
+ teardownConnection();
1498
+ return;
1499
+ }
1500
+ // Reset so the upcoming retry gets a fresh MAX_CONNECTION_ATTEMPTS budget.
1501
+ updateStatus(`Reconnecting after established session dropped: ${reason}`);
1502
+ controlChannelOpenedRef.current = false;
1503
+ connectionAttemptRef.current = -1;
1100
1504
  }
1101
1505
 
1102
1506
  clearScheduledRetry();
@@ -1133,8 +1537,7 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1133
1537
  setVideoLoaded(false);
1134
1538
  teardownConnection();
1135
1539
 
1136
- const isCurrentAttempt = () =>
1137
- generation === connectionGenerationRef.current;
1540
+ const isCurrentAttempt = () => generation === connectionGenerationRef.current;
1138
1541
 
1139
1542
  connectionSuccessTimeoutRef.current = window.setTimeout(() => {
1140
1543
  connectionSuccessTimeoutRef.current = undefined;
@@ -1262,7 +1665,7 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1262
1665
  return getCodecPriority(a) - getCodecPriority(b);
1263
1666
  });
1264
1667
  videoTransceiver.setCodecPreferences(sortedCodecs);
1265
- debugLog('Set codec preferences:', sortedCodecs.map(c => c.mimeType).join(', '));
1668
+ debugLog('Set codec preferences:', sortedCodecs.map((c) => c.mimeType).join(', '));
1266
1669
  }
1267
1670
  }
1268
1671
 
@@ -1280,6 +1683,44 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1280
1683
  controlChannelOpenedRef.current = true;
1281
1684
  clearConnectionSuccessTimeout();
1282
1685
  updateStatus('Control channel opened');
1686
+
1687
+ // Spin up the AX fetcher now that we have a stable WS + control
1688
+ // channel. The fetcher's send function reuses this WS; it stops
1689
+ // sending if the WS dies. start() is called lazily based on the
1690
+ // inspectMode prop via a sibling useEffect.
1691
+ if (!axFetcherRef.current) {
1692
+ axFetcherRef.current = new AxFetcher({
1693
+ platform,
1694
+ baseIntervalMs: axPollIntervalMs,
1695
+ maxBackoffMs: axMaxBackoffMs,
1696
+ send: (payload) => {
1697
+ if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) return false;
1698
+ try {
1699
+ wsRef.current.send(JSON.stringify(payload));
1700
+ return true;
1701
+ } catch {
1702
+ return false;
1703
+ }
1704
+ },
1705
+ onSnapshot: (snapshot) => {
1706
+ setAxSnapshot((prev) => (axSnapshotsEqual(prev, snapshot) ? prev : snapshot));
1707
+ // Defer to a microtask so customer code (which may DOM-write,
1708
+ // start expensive work, or itself call back into ref
1709
+ // methods) doesn't run synchronously inside our state-setter
1710
+ // path. React then has a chance to schedule its render before
1711
+ // the customer handler kicks off side-effects.
1712
+ queueMicrotask(() => {
1713
+ safeInvoke('onAxSnapshotChange', onAxSnapshotChangeRef.current, snapshot);
1714
+ });
1715
+ },
1716
+ onStatusChange: (status, error) => {
1717
+ safeInvoke('onAxStatusChange', onAxStatusChangeRef.current, status, error);
1718
+ },
1719
+ });
1720
+ if (inspectModeRef.current === true || inspectModeRef.current === 'hover-only') {
1721
+ axFetcherRef.current.start();
1722
+ }
1723
+ }
1283
1724
  const sendRequestFrame = () => {
1284
1725
  if (
1285
1726
  !isCurrentAttempt() ||
@@ -1331,6 +1772,9 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1331
1772
  }),
1332
1773
  );
1333
1774
  }
1775
+ // openUrl can take a moment to load the destination — boost
1776
+ // AX polling so the overlay refreshes through the transition.
1777
+ axFetcherRef.current?.bumpActivity();
1334
1778
  }
1335
1779
  };
1336
1780
 
@@ -1364,9 +1808,32 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1364
1808
  if (!isCurrentAttempt() || peerConnectionRef.current !== peerConnection) {
1365
1809
  return;
1366
1810
  }
1367
- updateStatus('ICE state: ' + peerConnection.iceConnectionState);
1368
- if (peerConnection.iceConnectionState === 'failed') {
1811
+ const iceState = peerConnection.iceConnectionState;
1812
+ updateStatus('ICE state: ' + iceState);
1813
+ if (iceState === 'connected' || iceState === 'completed') {
1814
+ clearIceDisconnectedGrace();
1815
+ return;
1816
+ }
1817
+ if (iceState === 'failed') {
1818
+ clearIceDisconnectedGrace();
1369
1819
  scheduleRetry('ICE connection entered failed state', generation);
1820
+ return;
1821
+ }
1822
+ if (
1823
+ iceState === 'disconnected' &&
1824
+ autoReconnectRef.current &&
1825
+ iceDisconnectedGraceRef.current === undefined
1826
+ ) {
1827
+ // Cap the browser's natural disconnected→failed escalation to recover faster.
1828
+ iceDisconnectedGraceRef.current = window.setTimeout(() => {
1829
+ iceDisconnectedGraceRef.current = undefined;
1830
+ if (!isCurrentAttempt() || peerConnectionRef.current !== peerConnection) {
1831
+ return;
1832
+ }
1833
+ if (peerConnection.iceConnectionState === 'disconnected') {
1834
+ scheduleRetry('ICE stayed disconnected past grace period', generation);
1835
+ }
1836
+ }, ICE_DISCONNECTED_GRACE_MS);
1370
1837
  }
1371
1838
  };
1372
1839
 
@@ -1414,6 +1881,12 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1414
1881
  debugWarn('Error parsing message:', e);
1415
1882
  return;
1416
1883
  }
1884
+ // Inspect-mode responses are routed to the fetcher first so it
1885
+ // can resolve in-flight requests regardless of which platform's
1886
+ // protocol is in use.
1887
+ if (axFetcherRef.current?.handleMessage(message)) {
1888
+ return;
1889
+ }
1417
1890
  updateStatus('Received: ' + message.type);
1418
1891
  switch (message.type) {
1419
1892
  case 'answer':
@@ -1576,6 +2049,7 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1576
2049
  connectionAttemptRef.current = 0;
1577
2050
  controlChannelOpenedRef.current = false;
1578
2051
  clearScheduledRetry();
2052
+ clearIceDisconnectedGrace();
1579
2053
  teardownConnection();
1580
2054
  updateStatus('Stopped');
1581
2055
  };
@@ -1608,32 +2082,64 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1608
2082
  };
1609
2083
  }, [url, token, propSessionId]);
1610
2084
 
2085
+ // Recompute the inspect-overlay geometry (container-local pixel rect of
2086
+ // the actually-rendered video content) from the current mapping context.
2087
+ // The InfoCard places itself in viewport coordinates from pointer events
2088
+ // directly, so no viewport-space origin is needed in the geometry.
2089
+ const recomputeOverlayGeometry = () => {
2090
+ const ctx = computeVideoMappingContext();
2091
+ if (!ctx) {
2092
+ setOverlayGeometry(null);
2093
+ return;
2094
+ }
2095
+ const next: InspectOverlayGeometry = {
2096
+ left: ctx.videoRect.left - ctx.containerRect.left + ctx.offsetX,
2097
+ top: ctx.videoRect.top - ctx.containerRect.top + ctx.offsetY,
2098
+ width: ctx.actualWidth,
2099
+ height: ctx.actualHeight,
2100
+ };
2101
+ setOverlayGeometry((prev) =>
2102
+ (
2103
+ prev &&
2104
+ prev.left === next.left &&
2105
+ prev.top === next.top &&
2106
+ prev.width === next.width &&
2107
+ prev.height === next.height
2108
+ ) ?
2109
+ prev
2110
+ : next,
2111
+ );
2112
+ };
2113
+
1611
2114
  // Calculate video position and border-radius based on frame dimensions
1612
2115
  useEffect(() => {
1613
2116
  const video = videoRef.current;
1614
2117
  const frame = frameRef.current;
1615
-
2118
+ const container = containerRef.current;
2119
+
1616
2120
  if (!video) return;
1617
-
1618
- // If no frame, no positioning needed
1619
- if (!showFrame || !frame) {
1620
- setVideoStyle({});
1621
- return;
1622
- }
1623
2121
 
1624
2122
  const updateVideoPosition = () => {
2123
+ // If no frame, just refresh overlay geometry; no inset/letterbox math
2124
+ // is needed since the video element is its own size.
2125
+ if (!showFrame || !frame) {
2126
+ setVideoStyle({});
2127
+ recomputeOverlayGeometry();
2128
+ return;
2129
+ }
2130
+
1625
2131
  const frameWidth = frame.clientWidth;
1626
2132
  const frameHeight = frame.clientHeight;
1627
-
2133
+
1628
2134
  if (frameWidth === 0 || frameHeight === 0) return;
1629
-
2135
+
1630
2136
  // Determine landscape based on video's intrinsic dimensions
1631
2137
  const landscape = video.videoWidth > video.videoHeight;
1632
2138
  setIsLandscape(landscape);
1633
2139
  setUseAndroidTabletFrame(
1634
2140
  platform === 'android' && isAndroidTabletVideo(video.videoWidth, video.videoHeight),
1635
2141
  );
1636
-
2142
+
1637
2143
  const pos = landscape ? config.videoPosition.landscape : config.videoPosition.portrait;
1638
2144
  let newStyle: React.CSSProperties = {};
1639
2145
  if (pos.heightMultiplier) {
@@ -1645,20 +2151,26 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1645
2151
  // Let the other dimension follow the video stream's intrinsic aspect ratio.
1646
2152
  newStyle.height = 'auto';
1647
2153
  }
1648
- newStyle.borderRadius = `${landscape ? frameHeight * config.videoBorderRadiusMultiplier : frameWidth * config.videoBorderRadiusMultiplier}px`;
2154
+ newStyle.borderRadius = `${
2155
+ landscape ?
2156
+ frameHeight * config.videoBorderRadiusMultiplier
2157
+ : frameWidth * config.videoBorderRadiusMultiplier
2158
+ }px`;
1649
2159
  setVideoStyle(newStyle);
2160
+ recomputeOverlayGeometry();
1650
2161
  };
1651
2162
 
1652
2163
  const resizeObserver = new ResizeObserver(() => {
1653
2164
  updateVideoPosition();
1654
2165
  });
1655
2166
 
1656
- resizeObserver.observe(frame);
2167
+ if (frame) resizeObserver.observe(frame);
1657
2168
  resizeObserver.observe(video);
1658
-
2169
+ if (container) resizeObserver.observe(container);
2170
+
1659
2171
  // Also update when the frame image loads
1660
- frame.addEventListener('load', updateVideoPosition);
1661
-
2172
+ if (frame) frame.addEventListener('load', updateVideoPosition);
2173
+
1662
2174
  // Update when video metadata loads (to get correct intrinsic dimensions)
1663
2175
  video.addEventListener('loadedmetadata', updateVideoPosition);
1664
2176
 
@@ -1666,6 +2178,12 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1666
2178
  // (videoWidth/videoHeight) can change without re-firing 'loadedmetadata'.
1667
2179
  // The <video> element emits 'resize' in that case.
1668
2180
  video.addEventListener('resize', updateVideoPosition);
2181
+ // Orientation flips also mean every element's AX frame just changed
2182
+ // (portrait↔landscape rotates the layout). Bump so the overlay
2183
+ // refreshes immediately rather than waiting out the current poll
2184
+ // cycle in a layout that no longer matches the boxes.
2185
+ const bumpOnResize = () => axFetcherRef.current?.bumpActivity();
2186
+ video.addEventListener('resize', bumpOnResize);
1669
2187
 
1670
2188
  // Initial calculation
1671
2189
  updateVideoPosition();
@@ -1674,10 +2192,60 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1674
2192
  resizeObserver.disconnect();
1675
2193
  video.removeEventListener('loadedmetadata', updateVideoPosition);
1676
2194
  video.removeEventListener('resize', updateVideoPosition);
1677
- frame.removeEventListener('load', updateVideoPosition);
2195
+ video.removeEventListener('resize', bumpOnResize);
2196
+ if (frame) frame.removeEventListener('load', updateVideoPosition);
1678
2197
  };
1679
2198
  }, [config, showFrame]);
1680
2199
 
2200
+ // Start/stop the AX poller and reset inspect state when inspect mode
2201
+ // toggles. Connection state is independent: the fetcher gets created on
2202
+ // dataChannel.onopen and destroyed on teardown.
2203
+ useEffect(() => {
2204
+ const fetcher = axFetcherRef.current;
2205
+ if (inspectActive) {
2206
+ fetcher?.start();
2207
+ } else {
2208
+ fetcher?.stop();
2209
+ setAxSnapshot(null);
2210
+ setAxHighlightedId(null);
2211
+ setAxSelectedId(null);
2212
+ setAxCursorPosition(null);
2213
+ setAxFrozenCursorPosition(null);
2214
+ cursorPositionRef.current = null;
2215
+ if (cursorRafIdRef.current !== undefined) {
2216
+ window.cancelAnimationFrame(cursorRafIdRef.current);
2217
+ cursorRafIdRef.current = undefined;
2218
+ }
2219
+ safeInvoke('onAxSnapshotChange', onAxSnapshotChangeRef.current, null);
2220
+ }
2221
+ }, [inspectActive]);
2222
+
2223
+ // Cancel any pending cursor-rAF on unmount so we don't setState on a
2224
+ // dead component.
2225
+ useEffect(() => {
2226
+ return () => {
2227
+ if (cursorRafIdRef.current !== undefined) {
2228
+ window.cancelAnimationFrame(cursorRafIdRef.current);
2229
+ cursorRafIdRef.current = undefined;
2230
+ }
2231
+ };
2232
+ }, []);
2233
+
2234
+ // ESC clears overlay selection (Chrome DevTools behavior).
2235
+ useEffect(() => {
2236
+ if (!inspectActive) return;
2237
+ const handleEsc = (e: KeyboardEvent) => {
2238
+ if (e.key === 'Escape' && (axSelectedId || axHighlightedId)) {
2239
+ setAxSelectedId(null);
2240
+ setAxHighlightedId(null);
2241
+ setAxFrozenCursorPosition(null);
2242
+ safeInvoke('onInspectSelectionChange', onInspectSelectionChangeRef.current, null);
2243
+ }
2244
+ };
2245
+ window.addEventListener('keydown', handleEsc);
2246
+ return () => window.removeEventListener('keydown', handleEsc);
2247
+ }, [inspectActive, axSelectedId, axHighlightedId]);
2248
+
1681
2249
  const handleVideoClick = () => {
1682
2250
  if (videoRef.current) {
1683
2251
  videoRef.current.focus();
@@ -1711,6 +2279,7 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1711
2279
  }),
1712
2280
  );
1713
2281
  }
2282
+ axFetcherRef.current?.bumpActivity();
1714
2283
  },
1715
2284
 
1716
2285
  sendKeyEvent: (event: ImperativeKeyboardEvent) => {
@@ -1810,6 +2379,10 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1810
2379
  reject(err);
1811
2380
  return;
1812
2381
  }
2382
+ // Terminating the foreground app drops the user back to the home
2383
+ // screen — bump so the overlay reflects the post-terminate state
2384
+ // through the SpringBoard transition.
2385
+ axFetcherRef.current?.bumpActivity();
1813
2386
 
1814
2387
  setTimeout(() => {
1815
2388
  if (pendingTerminateAppResolversRef.current.has(id)) {
@@ -1821,22 +2394,62 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1821
2394
  }, 30000);
1822
2395
  });
1823
2396
  },
2397
+ reconnect: () => start(),
2398
+
2399
+ refreshAxTree: async (): Promise<AxSnapshot> => {
2400
+ const fetcher = axFetcherRef.current;
2401
+ if (!fetcher) {
2402
+ throw new Error('Inspect mode is not active');
2403
+ }
2404
+ // The fetcher's refresh() runs the result through the same
2405
+ // change-detect path as the poll loop (via deliver()), which calls
2406
+ // back into onSnapshot — already wired to setAxSnapshot +
2407
+ // onAxSnapshotChange (with safe-invoke). We just return the fetched
2408
+ // payload for callers that want it.
2409
+ return fetcher.refresh();
2410
+ },
2411
+
2412
+ getAxSnapshot: () => axSnapshotRef.current,
2413
+
2414
+ setInspectHighlight: (element: AxElement | null) => {
2415
+ setAxHighlightedId(element?.id ?? null);
2416
+ },
2417
+
2418
+ setInspectSelection: (element: AxElement | null) => {
2419
+ setAxSelectedId(element?.id ?? null);
2420
+ // Programmatic selection has no click position — anchor the card at
2421
+ // the last known cursor position (if any), otherwise clear.
2422
+ // Customer-facing UIs that drive selection from their own panels can
2423
+ // call setInspectHighlight separately to move the cursor visual.
2424
+ if (element) {
2425
+ setAxFrozenCursorPosition(cursorPositionRef.current);
2426
+ } else {
2427
+ setAxFrozenCursorPosition(null);
2428
+ }
2429
+ const snapshot = axSnapshotRef.current;
2430
+ if (element && snapshot) {
2431
+ safeInvoke('onInspectSelectionChange', onInspectSelectionChangeRef.current, { element, snapshot });
2432
+ } else {
2433
+ safeInvoke('onInspectSelectionChange', onInspectSelectionChangeRef.current, null);
2434
+ }
2435
+ },
2436
+
2437
+ getAxStatus: () => axFetcherRef.current?.getStatus() ?? 'idle',
1824
2438
  }));
1825
2439
 
1826
2440
  // Show indicators when Alt is held and we have a valid hover point (null when outside)
1827
2441
  const showAltIndicators = isAltHeld && hoverPoint !== null;
1828
2442
  const frameImageSrc =
1829
- platform === 'android' && useAndroidTabletFrame
1830
- ? (isLandscape ? pixelTabletFrameImageLandscape : pixelTabletFrameImage)
1831
- : (isLandscape ? config.frame.imageLandscape : config.frame.image);
2443
+ platform === 'android' && useAndroidTabletFrame ?
2444
+ isLandscape ? pixelTabletFrameImageLandscape
2445
+ : pixelTabletFrameImage
2446
+ : isLandscape ? config.frame.imageLandscape
2447
+ : config.frame.image;
1832
2448
 
1833
2449
  return (
1834
2450
  <div
1835
2451
  ref={containerRef}
1836
- className={clsx(
1837
- 'rc-container',
1838
- className,
1839
- )}
2452
+ className={clsx('rc-container', className)}
1840
2453
  style={{ touchAction: 'none' }} // Keep touchAction none for the container
1841
2454
  // Attach unified handler to all interaction events on the container
1842
2455
  // This helps capture mouseleave correctly even if the video element itself isn't hovered
@@ -1878,21 +2491,17 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1878
2491
  )}
1879
2492
  <video
1880
2493
  ref={videoRef}
1881
- className={clsx(
1882
- 'rc-video',
1883
- !showFrame && 'rc-video-frameless',
1884
- !videoLoaded && 'rc-video-loading',
1885
- )}
2494
+ className={clsx('rc-video', !showFrame && 'rc-video-frameless', !videoLoaded && 'rc-video-loading')}
1886
2495
  style={{
1887
2496
  ...videoStyle,
1888
- ...(config.loadingLogo
1889
- ? {
1890
- backgroundImage: `url("${config.loadingLogo}")`,
1891
- backgroundRepeat: 'no-repeat',
1892
- backgroundPosition: 'center',
1893
- backgroundSize: config.loadingLogoSize,
1894
- }
1895
- : {}),
2497
+ ...(config.loadingLogo ?
2498
+ {
2499
+ backgroundImage: `url("${config.loadingLogo}")`,
2500
+ backgroundRepeat: 'no-repeat',
2501
+ backgroundPosition: 'center',
2502
+ backgroundSize: config.loadingLogoSize,
2503
+ }
2504
+ : {}),
1896
2505
  }}
1897
2506
  autoPlay
1898
2507
  playsInline
@@ -1913,12 +2522,53 @@ export const RemoteControl = forwardRef<RemoteControlHandle, RemoteControlProps>
1913
2522
  }
1914
2523
  }}
1915
2524
  />
2525
+ {inspectActive && (
2526
+ <InspectOverlay
2527
+ snapshot={axSnapshot}
2528
+ geometry={overlayGeometry}
2529
+ highlightedId={axHighlightedId}
2530
+ selectedId={axSelectedId}
2531
+ mode={inspectModeResolved}
2532
+ cursorPosition={axCursorPosition}
2533
+ frozenCursorPosition={axFrozenCursorPosition}
2534
+ onSelectChange={(element, clickPosition) => {
2535
+ setAxSelectedId(element?.id ?? null);
2536
+ if (element && clickPosition) {
2537
+ setAxFrozenCursorPosition(clickPosition);
2538
+ } else if (!element) {
2539
+ setAxFrozenCursorPosition(null);
2540
+ }
2541
+ const snapshot = axSnapshotRef.current;
2542
+ if (element && snapshot) {
2543
+ safeInvoke('onInspectSelectionChange', onInspectSelectionChangeRef.current, {
2544
+ element,
2545
+ snapshot,
2546
+ });
2547
+ } else {
2548
+ safeInvoke('onInspectSelectionChange', onInspectSelectionChangeRef.current, null);
2549
+ }
2550
+ }}
2551
+ onTapElement={(element, tapAt) => {
2552
+ // Use the viewport-space position the user originally aimed at
2553
+ // (the frozen click position). For containers whose children
2554
+ // are absent from the accessibility tree — e.g. iOS UITabBar's
2555
+ // home/diagnostics/settings buttons — this taps the specific
2556
+ // button the user pointed at instead of the container's
2557
+ // averaged center.
2558
+ if (tapAt) {
2559
+ sendTapAtClient(tapAt.x, tapAt.y);
2560
+ return;
2561
+ }
2562
+ // Fallback (defensive): center of element. Should be
2563
+ // unreachable from the InfoCard since it always passes anchor.
2564
+ const snapshot = axSnapshotRef.current;
2565
+ if (!snapshot) return;
2566
+ sendTapAtElementCenter(element, snapshot);
2567
+ }}
2568
+ />
2569
+ )}
1916
2570
  {retryExhausted && (
1917
- <button
1918
- type="button"
1919
- className="rc-retry-button"
1920
- onClick={handleManualRetry}
1921
- >
2571
+ <button type="button" className="rc-retry-button" onClick={handleManualRetry}>
1922
2572
  Retry
1923
2573
  </button>
1924
2574
  )}
@@ -1954,4 +2604,4 @@ const toScreenshotData = (message: any): ScreenshotData | null => {
1954
2604
  }
1955
2605
 
1956
2606
  return null;
1957
- };
2607
+ };