mobile-debug-mcp 0.25.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ export { AndroidInteract, iOSInteract };
5
5
 
6
6
  import { resolveTargetDevice } from '../utils/resolve-device.js'
7
7
  import { ToolsObserve } from '../observe/index.js'
8
+ import { computeSnapshotSignature } from '../observe/snapshot-metadata.js'
8
9
  import { nextActionId } from '../server/common.js'
9
10
  import type {
10
11
  ActionFailureCode,
@@ -12,6 +13,7 @@ import type {
12
13
  ExpectElementVisibleResponse,
13
14
  ExpectStateResponse,
14
15
  ExpectScreenResponse,
16
+ WaitForUIChangeResponse,
15
17
  UIElementState,
16
18
  TapElementResponse
17
19
  } from '../types.js'
@@ -40,6 +42,11 @@ interface UiElement {
40
42
  _interactable?: boolean
41
43
  _sliderLike?: boolean
42
44
  state?: UIElementState | null
45
+ stable_id?: string | null
46
+ role?: string | null
47
+ test_tag?: string | null
48
+ selector?: { value: string | null, confidence: { score: number, reason: string } | null } | null
49
+ semantic?: { is_clickable: boolean, is_container: boolean } | null
43
50
  }
44
51
 
45
52
  interface ResolvedUiElementContext {
@@ -55,9 +62,16 @@ interface UiResolution {
55
62
  height?: number
56
63
  }
57
64
 
65
+ interface UiChangeSignatureSet {
66
+ hierarchy: string | null
67
+ text: string | null
68
+ state: string | null
69
+ }
70
+
58
71
 
59
72
  export class ToolsInteract {
60
73
  private static readonly _maxResolvedUiElements = 256
74
+ private static readonly _uiChangeKinds: Array<'hierarchy_diff' | 'text_change' | 'state_change'> = ['hierarchy_diff', 'text_change', 'state_change']
61
75
  private static readonly _sliderSearchLookahead = 8
62
76
  private static readonly _sliderNegativeGapTolerancePx = 32
63
77
  private static readonly _sliderPositiveGapLimitPx = 640
@@ -80,6 +94,10 @@ export class ToolsInteract {
80
94
  return normalized as [number, number, number, number]
81
95
  }
82
96
 
97
+ private static _hash(value: unknown): string {
98
+ return createHash('sha256').update(JSON.stringify(value)).digest('hex')
99
+ }
100
+
83
101
  private static _matchesSelector(el: UiElement, selector?: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean }): boolean {
84
102
  if (!selector) return false
85
103
  const normalize = ToolsInteract._normalize
@@ -157,7 +175,13 @@ export class ToolsInteract {
157
175
  class: el.type ?? el.class ?? null,
158
176
  bounds,
159
177
  index,
160
- elementId
178
+ elementId,
179
+ state: el.state ?? null,
180
+ stable_id: el.stable_id ?? null,
181
+ role: el.role ?? null,
182
+ test_tag: el.test_tag ?? null,
183
+ selector: el.selector ?? null,
184
+ semantic: el.semantic ?? null
161
185
  }
162
186
  }
163
187
 
@@ -184,6 +208,66 @@ export class ToolsInteract {
184
208
  }
185
209
  }
186
210
 
211
+ private static _buildUiChangeSignatures(tree: any): UiChangeSignatureSet {
212
+ const elements = Array.isArray(tree?.elements) ? tree.elements as UiElement[] : []
213
+ const textPayload: Array<{ text: string, contentDescription: string, resourceId: string }> = []
214
+ const statePayload: Array<{
215
+ checked: boolean | null
216
+ selected: boolean | string | { id: string; label?: string } | null
217
+ focused: boolean | null
218
+ expanded: boolean | null
219
+ enabled: boolean | null
220
+ text_value: string | null
221
+ value: number | string | null
222
+ raw_value: number | string | null
223
+ value_range: UIElementState['value_range']
224
+ }> = []
225
+
226
+ for (const el of elements) {
227
+ textPayload.push({
228
+ text: ToolsInteract._normalize(el?.text ?? el?.label ?? el?.value ?? ''),
229
+ contentDescription: ToolsInteract._normalize(el?.contentDescription ?? el?.contentDesc ?? el?.accessibilityLabel ?? ''),
230
+ resourceId: ToolsInteract._normalize(el?.resourceId ?? el?.resourceID ?? el?.id ?? '')
231
+ })
232
+
233
+ statePayload.push({
234
+ checked: el?.state?.checked ?? null,
235
+ selected: el?.state?.selected ?? null,
236
+ focused: el?.state?.focused ?? null,
237
+ expanded: el?.state?.expanded ?? null,
238
+ enabled: el?.state?.enabled ?? null,
239
+ text_value: el?.state?.text_value ?? null,
240
+ value: el?.state?.value ?? null,
241
+ raw_value: el?.state?.raw_value ?? null,
242
+ value_range: el?.state?.value_range ?? null
243
+ })
244
+ }
245
+
246
+ return {
247
+ hierarchy: computeSnapshotSignature(tree),
248
+ text: ToolsInteract._hash({
249
+ screen: ToolsInteract._normalize(tree?.screen),
250
+ elements: textPayload
251
+ }),
252
+ state: ToolsInteract._hash({
253
+ screen: ToolsInteract._normalize(tree?.screen),
254
+ elements: statePayload
255
+ })
256
+ }
257
+ }
258
+
259
+ private static _matchesUiChange(expected: 'hierarchy_diff' | 'text_change' | 'state_change' | undefined, initial: UiChangeSignatureSet, current: UiChangeSignatureSet): 'hierarchy_diff' | 'text_change' | 'state_change' | null {
260
+ const candidates = expected ? [expected] : ToolsInteract._uiChangeKinds
261
+
262
+ for (const changeKind of candidates) {
263
+ if (changeKind === 'hierarchy_diff' && initial.hierarchy !== current.hierarchy) return changeKind
264
+ if (changeKind === 'text_change' && initial.text !== current.text) return changeKind
265
+ if (changeKind === 'state_change' && initial.state !== current.state) return changeKind
266
+ }
267
+
268
+ return null
269
+ }
270
+
187
271
  private static _resolvedTargetFromElement(
188
272
  elementId: string,
189
273
  element: UiElement,
@@ -197,7 +281,12 @@ export class ToolsInteract {
197
281
  class: element.type ?? element.class ?? null,
198
282
  bounds: ToolsInteract._normalizeBounds(element.bounds),
199
283
  index,
200
- state: element.state ?? null
284
+ state: element.state ?? null,
285
+ stable_id: element.stable_id ?? null,
286
+ role: element.role ?? null,
287
+ test_tag: element.test_tag ?? null,
288
+ selector: element.selector ?? null,
289
+ semantic: element.semantic ?? null
201
290
  }
202
291
  }
203
292
 
@@ -621,6 +710,11 @@ export class ToolsInteract {
621
710
  bounds: boundsObj,
622
711
  clickable: !!best.clickable,
623
712
  enabled: !!best.enabled,
713
+ stable_id: best.stable_id ?? null,
714
+ role: best.role ?? null,
715
+ test_tag: best.test_tag ?? null,
716
+ selector: best.selector ?? null,
717
+ semantic: best.semantic ?? null,
624
718
  tapCoordinates,
625
719
  telemetry: {
626
720
  matchedIndex: best?._index ?? null,
@@ -934,6 +1028,84 @@ export class ToolsInteract {
934
1028
  }
935
1029
  }
936
1030
 
1031
+ static async waitForUIChangeHandler({
1032
+ platform,
1033
+ deviceId,
1034
+ timeout_ms = 60000,
1035
+ stability_window_ms = 250,
1036
+ expected_change
1037
+ }: {
1038
+ platform?: 'android' | 'ios',
1039
+ deviceId?: string,
1040
+ timeout_ms?: number,
1041
+ stability_window_ms?: number,
1042
+ expected_change?: 'hierarchy_diff' | 'text_change' | 'state_change'
1043
+ }): Promise<WaitForUIChangeResponse> {
1044
+ const start = Date.now()
1045
+ const pollIntervalMs = 300
1046
+ const stabilityWindow = Math.max(0, typeof stability_window_ms === 'number' ? stability_window_ms : 250)
1047
+ let baseline: UiChangeSignatureSet | null = null
1048
+ let lastObservedRevision: number | null = null
1049
+ let lastLoadingState: any = null
1050
+
1051
+ while (Date.now() - start < timeout_ms) {
1052
+ try {
1053
+ const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId }) as any
1054
+ const signatures = ToolsInteract._buildUiChangeSignatures(tree)
1055
+ lastObservedRevision = typeof tree?.snapshot_revision === 'number' ? tree.snapshot_revision : lastObservedRevision
1056
+ lastLoadingState = tree?.loading_state ?? lastLoadingState
1057
+
1058
+ if (!baseline) {
1059
+ baseline = signatures
1060
+ } else {
1061
+ const observedChange = ToolsInteract._matchesUiChange(expected_change, baseline, signatures)
1062
+ if (observedChange) {
1063
+ if (stabilityWindow > 0) {
1064
+ await new Promise(resolve => setTimeout(resolve, stabilityWindow))
1065
+ const confirmTree = await ToolsObserve.getUITreeHandler({ platform, deviceId }) as any
1066
+ const confirmSignatures = ToolsInteract._buildUiChangeSignatures(confirmTree)
1067
+ const confirmChange = ToolsInteract._matchesUiChange(expected_change, baseline, confirmSignatures)
1068
+ if (!confirmChange || confirmSignatures.hierarchy !== signatures.hierarchy || confirmSignatures.text !== signatures.text || confirmSignatures.state !== signatures.state) {
1069
+ lastObservedRevision = typeof confirmTree?.snapshot_revision === 'number' ? confirmTree.snapshot_revision : lastObservedRevision
1070
+ lastLoadingState = confirmTree?.loading_state ?? lastLoadingState
1071
+ await new Promise(resolve => setTimeout(resolve, pollIntervalMs))
1072
+ continue
1073
+ }
1074
+ lastObservedRevision = typeof confirmTree?.snapshot_revision === 'number' ? confirmTree.snapshot_revision : lastObservedRevision
1075
+ lastLoadingState = confirmTree?.loading_state ?? lastLoadingState
1076
+ }
1077
+
1078
+ return {
1079
+ success: true,
1080
+ observed_change: observedChange,
1081
+ snapshot_revision: lastObservedRevision ?? undefined,
1082
+ timeout: false,
1083
+ elapsed_ms: Date.now() - start,
1084
+ expected_change,
1085
+ loading_state: lastLoadingState ?? null,
1086
+ reason: 'UI change observed'
1087
+ }
1088
+ }
1089
+ }
1090
+ } catch {
1091
+ // Keep polling until timeout; the observable surface should be best-effort.
1092
+ }
1093
+
1094
+ await new Promise(resolve => setTimeout(resolve, pollIntervalMs))
1095
+ }
1096
+
1097
+ return {
1098
+ success: false,
1099
+ observed_change: null,
1100
+ snapshot_revision: lastObservedRevision ?? undefined,
1101
+ timeout: true,
1102
+ elapsed_ms: Date.now() - start,
1103
+ expected_change,
1104
+ loading_state: lastLoadingState ?? null,
1105
+ reason: 'timeout'
1106
+ }
1107
+ }
1108
+
937
1109
  static async expectScreenHandler({
938
1110
  platform,
939
1111
  fingerprint,
@@ -1040,7 +1212,12 @@ export class ToolsInteract {
1040
1212
  class: result.element.class ?? null,
1041
1213
  bounds: result.element.bounds ?? null,
1042
1214
  index: typeof result.element.index === 'number' ? result.element.index : null,
1043
- state: (result.element as any).state ?? null
1215
+ state: (result.element as any).state ?? null,
1216
+ stable_id: (result.element as any).stable_id ?? null,
1217
+ role: (result.element as any).role ?? null,
1218
+ test_tag: (result.element as any).test_tag ?? null,
1219
+ selector: (result.element as any).selector ?? null,
1220
+ semantic: (result.element as any).semantic ?? null
1044
1221
  },
1045
1222
  observed: {
1046
1223
  status: result.status,
@@ -1055,7 +1232,12 @@ export class ToolsInteract {
1055
1232
  class: result.element.class ?? null,
1056
1233
  bounds: result.element.bounds ?? null,
1057
1234
  index: typeof result.element.index === 'number' ? result.element.index : null,
1058
- state: (result.element as any).state ?? null
1235
+ state: (result.element as any).state ?? null,
1236
+ stable_id: (result.element as any).stable_id ?? null,
1237
+ role: (result.element as any).role ?? null,
1238
+ test_tag: (result.element as any).test_tag ?? null,
1239
+ selector: (result.element as any).selector ?? null,
1240
+ semantic: (result.element as any).semantic ?? null
1059
1241
  }
1060
1242
  },
1061
1243
  reason: 'selector is visible'
@@ -7,6 +7,7 @@ import { promises as fsPromises } from "fs"
7
7
  import path from "path"
8
8
  import { computeScreenFingerprint } from "../utils/ui/index.js"
9
9
  import { parsePngSize } from "../utils/image.js"
10
+ import { deriveSnapshotMetadata } from "./snapshot-metadata.js"
10
11
 
11
12
  const activeLogStreams: Map<string, { proc: any, file: string }> = new Map()
12
13
 
@@ -74,20 +75,29 @@ export class AndroidObserve {
74
75
  }
75
76
  }
76
77
 
78
+ const snapshotMetadata = deriveSnapshotMetadata(`android:${deviceInfo.id}`, {
79
+ screen: "",
80
+ resolution,
81
+ elements
82
+ }, 'ui_tree')
83
+
77
84
  return {
78
85
  device: deviceInfo,
79
86
  screen: "",
80
87
  resolution,
81
- elements
88
+ elements,
89
+ ...snapshotMetadata
82
90
  };
83
91
  } catch (e) {
84
92
  const errorMessage = `Failed to get UI tree. ADB Path: '${getAdbCmd()}'. Error: ${e instanceof Error ? e.message : String(e)}`;
85
93
  console.error(errorMessage);
94
+ const snapshotMetadata = deriveSnapshotMetadata(`android:${deviceInfo.id}`, null, 'ui_tree')
86
95
  return {
87
96
  device: deviceInfo,
88
97
  screen: "",
89
98
  resolution: { width: 0, height: 0 },
90
99
  elements: [],
100
+ ...snapshotMetadata,
91
101
  error: errorMessage
92
102
  };
93
103
  }
@@ -5,6 +5,7 @@ import type {
5
5
  CaptureDebugSnapshotRawResponse,
6
6
  SnapshotSemanticResponse
7
7
  } from '../types.js'
8
+ import { deriveSnapshotMetadata } from './snapshot-metadata.js'
8
9
 
9
10
  export { AndroidObserve } from './android.js'
10
11
  export { iOSObserve } from './ios.js'
@@ -21,6 +22,12 @@ interface SnapshotTreeElementLike {
21
22
  clickable?: boolean
22
23
  enabled?: boolean
23
24
  visible?: boolean
25
+ state?: unknown
26
+ stable_id?: string | null
27
+ role?: string | null
28
+ test_tag?: string | null
29
+ selector?: unknown
30
+ semantic?: unknown
24
31
  }
25
32
 
26
33
  interface SnapshotTreeLike {
@@ -239,7 +246,17 @@ export class ToolsObserve {
239
246
 
240
247
  static async captureDebugSnapshotHandler({ reason, includeLogs = true, logLines = 200, platform, appId, deviceId, sessionId }: { reason?: string; includeLogs?: boolean; logLines?: number; platform?: 'android' | 'ios'; appId?: string; deviceId?: string; sessionId?: string } = {}) {
241
248
  const timestamp = Date.now()
242
- const raw: CaptureDebugSnapshotRawResponse = { timestamp, reason: reason || '', activity: null, fingerprint: null, screenshot: null, ui_tree: null, logs: [] }
249
+ const raw: CaptureDebugSnapshotRawResponse = {
250
+ timestamp,
251
+ snapshot_revision: 0,
252
+ captured_at_ms: timestamp,
253
+ reason: reason || '',
254
+ activity: null,
255
+ fingerprint: null,
256
+ screenshot: null,
257
+ ui_tree: null,
258
+ logs: []
259
+ }
243
260
 
244
261
  // Parallel fetches for performance: screenshot, current screen, fingerprint, ui tree, and log stream/get logs
245
262
  const sid = sessionId || 'default'
@@ -329,6 +346,20 @@ export class ToolsObserve {
329
346
  }
330
347
  }
331
348
 
349
+ const snapshotDeviceKey = raw.ui_tree?.device
350
+ ? `${raw.ui_tree.device.platform}:${raw.ui_tree.device.id}`
351
+ : `${platform || 'unknown'}:${deviceId || 'default'}`
352
+ const snapshotMetadata = deriveSnapshotMetadata(
353
+ snapshotDeviceKey,
354
+ raw.ui_tree,
355
+ 'snapshot',
356
+ raw.ui_tree?.snapshot_revision ? null : (raw.fingerprint || raw.activity || null)
357
+ )
358
+
359
+ raw.snapshot_revision = raw.ui_tree?.snapshot_revision ?? snapshotMetadata.snapshot_revision
360
+ raw.captured_at_ms = raw.ui_tree?.captured_at_ms ?? snapshotMetadata.captured_at_ms
361
+ raw.loading_state = raw.ui_tree?.loading_state ?? snapshotMetadata.loading_state
362
+
332
363
  const semantic = deriveSnapshotSemantic(raw)
333
364
  return semantic ? { raw, semantic } : { raw }
334
365
  }
@@ -1,12 +1,13 @@
1
1
  import { spawn } from "child_process"
2
2
  import { promises as fs } from "fs"
3
- import { GetLogsResponse, CaptureIOSScreenshotResponse, GetUITreeResponse, UIElement, DeviceInfo, UIElementState } from "../types.js"
3
+ import { GetLogsResponse, CaptureIOSScreenshotResponse, GetUITreeResponse, UIElement, DeviceInfo, UIElementSemanticMetadata, UIElementState, UIResolutionSelector, SelectorConfidence } from "../types.js"
4
4
  import { execCommand, getIOSDeviceMetadata, validateBundleId, getIdbCmd, getXcrunCmd, isIDBInstalled } from "../utils/ios/utils.js"
5
5
  import { createWriteStream, promises as fsPromises } from 'fs'
6
6
  import path from 'path'
7
7
  import { parseLogLine } from '../utils/android/utils.js'
8
8
  import { computeScreenFingerprint } from '../utils/ui/index.js'
9
9
  import { parsePngSize } from '../utils/image.js'
10
+ import { deriveSnapshotMetadata } from './snapshot-metadata.js'
10
11
 
11
12
  const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
12
13
  let iosExecCommand = execCommand
@@ -22,6 +23,9 @@ export function _resetIOSExecCommandForTests() {
22
23
  interface IDBElement {
23
24
  AXFrame?: { x: number | string, y: number | string, width: number | string, height: number | string, w?: number | string, h?: number | string };
24
25
  frame?: { x: number | string, y: number | string, width: number | string, height: number | string, w?: number | string, h?: number | string };
26
+ AXIdentifier?: string;
27
+ accessibilityIdentifier?: string;
28
+ identifier?: string;
25
29
  AXUniqueId?: string;
26
30
  AXLabel?: string;
27
31
  AXValue?: string;
@@ -63,6 +67,59 @@ function parseIOSNumber(value: unknown): number | null {
63
67
  return Number.isFinite(parsed) ? parsed : null
64
68
  }
65
69
 
70
+ function normalizeIOSType(value: unknown): string {
71
+ return typeof value === 'string' ? value.trim().toLowerCase() : ''
72
+ }
73
+
74
+ function inferIOSRole(type: string, traits: string[]): string | null {
75
+ if (/slider|adjustable/.test(type) || traits.some((trait) => /adjustable|slider/.test(trait))) return 'slider'
76
+ if (/button/.test(type) || traits.some((trait) => /button/.test(trait))) return 'button'
77
+ if (/cell/.test(type)) return 'cell'
78
+ if (/switch/.test(type)) return 'switch'
79
+ if (/text field|textfield|search field/.test(type)) return 'text_field'
80
+ if (/image/.test(type)) return 'image'
81
+ if (/window|application|group|scroll view|collection view/.test(type)) return 'container'
82
+ return null
83
+ }
84
+
85
+ function getIOSStableId(node: IDBElement): string | null {
86
+ const candidates = [node.AXIdentifier, node.accessibilityIdentifier, node.identifier, node.AXUniqueId]
87
+ for (const candidate of candidates) {
88
+ if (typeof candidate === 'string' && candidate.trim().length > 0) return candidate
89
+ }
90
+ return null
91
+ }
92
+
93
+ function buildIOSSelectorConfidence(source: 'identifier' | 'label' | 'value' | 'type' | 'none'): SelectorConfidence | null {
94
+ switch (source) {
95
+ case 'identifier':
96
+ return { score: 1, reason: 'accessibility_identifier' }
97
+ case 'label':
98
+ return { score: 0.9, reason: 'label_match' }
99
+ case 'value':
100
+ return { score: 0.75, reason: 'value_match' }
101
+ case 'type':
102
+ return { score: 0.35, reason: 'type_match' }
103
+ default:
104
+ return null
105
+ }
106
+ }
107
+
108
+ function buildIOSSelector(type: string, label: string | null, value: string | null, stableId: string | null): UIResolutionSelector | null {
109
+ if (stableId) return { value: stableId, confidence: buildIOSSelectorConfidence('identifier') }
110
+ if (label) return { value: label, confidence: buildIOSSelectorConfidence('label') }
111
+ if (value) return { value: value, confidence: buildIOSSelectorConfidence('value') }
112
+ if (type) return { value: type, confidence: buildIOSSelectorConfidence('type') }
113
+ return null
114
+ }
115
+
116
+ function buildIOSSemantic(type: string, traits: string[]): UIElementSemanticMetadata {
117
+ return {
118
+ is_clickable: traits.includes("UIAccessibilityTraitButton") || /adjustable|slider/.test(type) || type === "Button" || type === "Cell",
119
+ is_container: /window|application|group|scroll view|collection view/.test(type)
120
+ }
121
+ }
122
+
66
123
  function isIOSAdjustable(node: IDBElement, type: string, traits: string[]): boolean {
67
124
  return /slider|adjustable|stepper|progress/i.test(type) || traits.some((trait) => /adjustable|slider|progress/i.test(trait))
68
125
  }
@@ -124,6 +181,11 @@ export function traverseIDBNode(node: IDBElement, elements: UIElement[], parentI
124
181
  const frame = node.AXFrame || node.frame;
125
182
  const traits = node.AXTraits || [];
126
183
  const state = extractIOSState(node, type, label, value, traits);
184
+ const normalizedType = normalizeIOSType(type)
185
+ const stableId = getIOSStableId(node)
186
+ const selector = buildIOSSelector(type, label, value, stableId)
187
+ const semantic = buildIOSSemantic(normalizedType, traits)
188
+ const role = inferIOSRole(normalizedType, traits)
127
189
 
128
190
  const clickable = traits.includes("UIAccessibilityTraitButton") || type === "Button" || type === "Cell";
129
191
 
@@ -135,14 +197,19 @@ export function traverseIDBNode(node: IDBElement, elements: UIElement[], parentI
135
197
  text: label,
136
198
  contentDescription: value,
137
199
  type: type,
138
- resourceId: node.AXUniqueId || null,
200
+ resourceId: stableId,
139
201
  clickable: clickable,
140
202
  enabled: true,
141
203
  visible: true,
142
204
  bounds: bounds,
143
205
  center: getCenter(bounds),
144
206
  depth: depth,
145
- state
207
+ state,
208
+ stable_id: stableId,
209
+ role,
210
+ test_tag: stableId,
211
+ selector,
212
+ semantic
146
213
  };
147
214
 
148
215
  if (parentIndex !== -1) {
@@ -419,16 +486,19 @@ export class iOSObserve {
419
486
 
420
487
  async getUITree(deviceId: string = "booted"): Promise<GetUITreeResponse> {
421
488
  const device = await getIOSDeviceMetadata(deviceId);
489
+ const deviceKey = `ios:${device.id}`
422
490
 
423
491
  const idbExists = await isIDBInstalled();
424
492
  if (!idbExists) {
493
+ const snapshotMetadata = deriveSnapshotMetadata(deviceKey, null, 'ui_tree')
425
494
  return {
426
495
  device,
427
496
  screen: "",
428
497
  resolution: { width: 0, height: 0 },
429
498
  elements: [],
499
+ ...snapshotMetadata,
430
500
  error: "iOS UI tree retrieval requires 'idb' (iOS Device Bridge). Please install it via Homebrew: `brew tap facebook/fb && brew install idb-companion` and `pip3 install fb-idb`."
431
- };
501
+ };
432
502
  }
433
503
 
434
504
  const targetUdid = (device.id && device.id !== 'booted') ? device.id : undefined;
@@ -474,15 +544,17 @@ export class iOSObserve {
474
544
  console.error(`Attempt ${attempts} failed: ${e}`);
475
545
  }
476
546
 
477
- if (attempts === maxAttempts) {
478
- return {
479
- device,
480
- screen: "",
481
- resolution: { width: 0, height: 0 },
482
- elements: [],
483
- error: `Failed to retrieve valid UI dump after ${maxAttempts} attempts.`
484
- };
485
- }
547
+ if (attempts === maxAttempts) {
548
+ const snapshotMetadata = deriveSnapshotMetadata(deviceKey, null, 'ui_tree')
549
+ return {
550
+ device,
551
+ screen: "",
552
+ resolution: { width: 0, height: 0 },
553
+ elements: [],
554
+ ...snapshotMetadata,
555
+ error: `Failed to retrieve valid UI dump after ${maxAttempts} attempts.`
556
+ };
557
+ }
486
558
  }
487
559
 
488
560
  try {
@@ -503,20 +575,29 @@ export class iOSObserve {
503
575
  height = rootBounds[3] - rootBounds[1];
504
576
  }
505
577
 
578
+ const snapshotMetadata = deriveSnapshotMetadata(deviceKey, {
579
+ screen: "",
580
+ resolution: { width, height },
581
+ elements
582
+ }, 'ui_tree')
583
+
506
584
  return {
507
585
  device,
508
586
  screen: "",
509
587
  resolution: { width, height },
510
- elements
588
+ elements,
589
+ ...snapshotMetadata
511
590
  };
512
591
  } catch (e) {
513
- return {
592
+ const snapshotMetadata = deriveSnapshotMetadata(deviceKey, null, 'ui_tree')
593
+ return {
514
594
  device,
515
595
  screen: "",
516
596
  resolution: { width: 0, height: 0 },
517
597
  elements: [],
598
+ ...snapshotMetadata,
518
599
  error: `Failed to parse idb output: ${e instanceof Error ? e.message : String(e)}`
519
- };
600
+ };
520
601
  }
521
602
  }
522
603
 
@@ -0,0 +1,107 @@
1
+ import crypto from 'crypto'
2
+ import type { GetUITreeResponse, LoadingState, UIElement } from '../types.js'
3
+
4
+ interface SnapshotState {
5
+ revision: number
6
+ signature: string | null
7
+ }
8
+
9
+ const snapshotStateByDevice = new Map<string, SnapshotState>()
10
+
11
+ function normalize(value: unknown): string {
12
+ if (value === null || value === undefined) return ''
13
+ return String(value).trim().toLowerCase()
14
+ }
15
+
16
+ function normalizeBounds(bounds: unknown): [number, number, number, number] | null {
17
+ if (!Array.isArray(bounds) || bounds.length < 4) return null
18
+ const normalized = bounds.slice(0, 4).map((value) => Number(value))
19
+ if (normalized.some((value) => Number.isNaN(value))) return null
20
+ return normalized as [number, number, number, number]
21
+ }
22
+
23
+ function stableElementSignature(element: UIElement) {
24
+ return {
25
+ text: normalize(element.text),
26
+ contentDescription: normalize(element.contentDescription),
27
+ resourceId: normalize(element.resourceId),
28
+ type: normalize(element.type),
29
+ stable_id: normalize(element.stable_id),
30
+ role: normalize(element.role),
31
+ test_tag: normalize(element.test_tag),
32
+ selector: normalize(element.selector?.value),
33
+ clickable: !!element.clickable,
34
+ enabled: !!element.enabled,
35
+ visible: !!element.visible,
36
+ state: element.state ?? null,
37
+ bounds: normalizeBounds(element.bounds)
38
+ }
39
+ }
40
+
41
+ export function computeSnapshotSignature(tree: Pick<GetUITreeResponse, 'elements' | 'screen' | 'resolution' | 'error'> | null | undefined): string | null {
42
+ if (!tree || tree.error) return null
43
+
44
+ const payload = {
45
+ screen: normalize(tree.screen),
46
+ resolution: tree.resolution || { width: 0, height: 0 },
47
+ elements: Array.isArray(tree.elements) ? tree.elements.map((element) => stableElementSignature(element)) : []
48
+ }
49
+
50
+ return crypto.createHash('sha256').update(JSON.stringify(payload)).digest('hex')
51
+ }
52
+
53
+ export function detectLoadingState(tree: Pick<GetUITreeResponse, 'elements' | 'error'> | null | undefined, source: string): LoadingState | null {
54
+ if (!tree || tree.error || !Array.isArray(tree.elements)) return null
55
+
56
+ for (const element of tree.elements) {
57
+ if (!element?.visible) continue
58
+ const text = normalize(element?.text ?? element?.contentDescription ?? '')
59
+ const type = normalize(element?.type ?? '')
60
+ const combined = `${type} ${text}`
61
+ if (/progress|spinner|loading|please wait|busy|loading indicator|skeleton|pending/.test(combined)) {
62
+ const signal = /progress/.test(combined)
63
+ ? 'progress_indicator'
64
+ : /spinner/.test(combined)
65
+ ? 'spinner'
66
+ : /busy/.test(combined)
67
+ ? 'busy_indicator'
68
+ : /skeleton/.test(combined)
69
+ ? 'skeleton'
70
+ : 'loading_indicator'
71
+ return { active: true, signal, source }
72
+ }
73
+ }
74
+
75
+ return null
76
+ }
77
+
78
+ export function deriveSnapshotMetadata(
79
+ deviceKey: string,
80
+ tree: Pick<GetUITreeResponse, 'elements' | 'screen' | 'resolution' | 'error'> | null | undefined,
81
+ source: string,
82
+ signatureOverride?: string | null
83
+ ) {
84
+ const signature = signatureOverride ?? computeSnapshotSignature(tree)
85
+ const previous = snapshotStateByDevice.get(deviceKey)
86
+
87
+ let revision = 1
88
+ if (previous) {
89
+ if (signature === null) {
90
+ revision = previous.revision
91
+ } else {
92
+ revision = previous.signature === signature ? previous.revision : previous.revision + 1
93
+ }
94
+ }
95
+
96
+ snapshotStateByDevice.set(deviceKey, { revision, signature })
97
+
98
+ return {
99
+ snapshot_revision: revision,
100
+ captured_at_ms: Date.now(),
101
+ loading_state: detectLoadingState(tree, source)
102
+ }
103
+ }
104
+
105
+ export function resetSnapshotMetadataForTests() {
106
+ snapshotStateByDevice.clear()
107
+ }