mobile-debug-mcp 0.25.1 → 0.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/dist/interact/classify.js +48 -11
  2. package/dist/interact/index.js +113 -0
  3. package/dist/observe/android.js +10 -1
  4. package/dist/observe/index.js +19 -1
  5. package/dist/observe/ios.js +15 -1
  6. package/dist/observe/snapshot-metadata.js +88 -0
  7. package/dist/server/tool-definitions.js +49 -14
  8. package/dist/server/tool-handlers.js +12 -0
  9. package/dist/server-core.js +1 -1
  10. package/docs/CHANGELOG.md +9 -0
  11. package/docs/ROADMAP.md +66 -38
  12. package/docs/rfcs/003-wait-and-synchronization-reliability.md +296 -0
  13. package/docs/rfcs/004-action-verification-routing.md +342 -0
  14. package/docs/specs/mcp-tooling-spec-v1.md +11 -3
  15. package/docs/tools/interact.md +31 -8
  16. package/docs/tools/observe.md +4 -2
  17. package/package.json +1 -1
  18. package/skills/rfc-review/SKILL.md +52 -0
  19. package/skills/rfc-review/references/rfc-review-checklist.md +12 -0
  20. package/skills/rfc-review/references/rfc-review-template.md +28 -0
  21. package/src/interact/classify.ts +53 -13
  22. package/src/interact/index.ts +151 -0
  23. package/src/observe/android.ts +11 -1
  24. package/src/observe/index.ts +26 -1
  25. package/src/observe/ios.ts +28 -13
  26. package/src/observe/snapshot-metadata.ts +107 -0
  27. package/src/server/tool-definitions.ts +49 -14
  28. package/src/server/tool-handlers.ts +13 -0
  29. package/src/server-core.ts +1 -1
  30. package/src/types.ts +23 -0
  31. package/test/unit/interact/classify_action_outcome.test.ts +44 -25
  32. package/test/unit/interact/wait_for_ui_change.test.ts +76 -0
  33. package/test/unit/server/contract.test.ts +8 -6
  34. package/test/unit/server/response_shapes.test.ts +37 -3
  35. package/docs/rfcs/003-wait-and-synchronization-reliability +0 -232
@@ -1,5 +1,6 @@
1
1
  export type ActionOutcome = 'success' | 'no_op' | 'backend_failure' | 'ui_failure' | 'unknown'
2
2
  export type NetworkRequestStatus = 'success' | 'failure' | 'retryable'
3
+ export type ActionCategory = 'local_state' | 'side_effect'
3
4
 
4
5
  export interface NetworkRequest {
5
6
  endpoint: string
@@ -9,6 +10,8 @@ export interface NetworkRequest {
9
10
  export interface ClassifyActionOutcomeInput {
10
11
  uiChanged: boolean
11
12
  expectedElementVisible?: boolean | null
13
+ /** Concrete action_type from the runtime action result (for example: tap, type_text, start_app). */
14
+ actionType?: string | null
12
15
  /** null = get_network_activity has not been called yet */
13
16
  networkRequests?: NetworkRequest[] | null
14
17
  hasLogErrors?: boolean | null
@@ -17,8 +20,29 @@ export interface ClassifyActionOutcomeInput {
17
20
  export interface ClassifyActionOutcomeResult {
18
21
  outcome: ActionOutcome
19
22
  reasoning: string
20
- /** Present when the caller must call get_network_activity before a final classification is possible */
21
- nextAction?: 'call_get_network_activity'
23
+ }
24
+
25
+ const ACTION_CATEGORY_BY_TYPE: Record<string, ActionCategory> = {
26
+ tap: 'local_state',
27
+ tap_element: 'local_state',
28
+ swipe: 'local_state',
29
+ scroll_to_element: 'local_state',
30
+ type_text: 'local_state',
31
+ press_back: 'local_state',
32
+ start_app: 'side_effect',
33
+ restart_app: 'side_effect',
34
+ terminate_app: 'side_effect',
35
+ reset_app_data: 'side_effect',
36
+ install_app: 'side_effect',
37
+ build_app: 'side_effect',
38
+ build_and_install: 'side_effect'
39
+ }
40
+
41
+ function inferActionCategory(actionType?: string | null): ActionCategory | null {
42
+ if (typeof actionType !== 'string') return null
43
+ const normalized = actionType.trim().toLowerCase()
44
+ if (!normalized) return null
45
+ return ACTION_CATEGORY_BY_TYPE[normalized] ?? 'side_effect'
22
46
  }
23
47
 
24
48
  /**
@@ -26,39 +50,55 @@ export interface ClassifyActionOutcomeResult {
26
50
  * Same inputs always produce the same output.
27
51
  */
28
52
  export function classifyActionOutcome(input: ClassifyActionOutcomeInput): ClassifyActionOutcomeResult {
29
- const { uiChanged, expectedElementVisible, networkRequests, hasLogErrors } = input
53
+ const { uiChanged, expectedElementVisible, actionType, networkRequests, hasLogErrors } = input
54
+ const actionCategory = inferActionCategory(actionType)
30
55
 
31
56
  // Step 1 — UI signal is positive
32
57
  if (uiChanged || expectedElementVisible === true) {
33
58
  return { outcome: 'success', reasoning: expectedElementVisible === true ? 'expected element is visible' : 'UI changed after action' }
34
59
  }
35
60
 
36
- // Step 2 — UI did not change; network signal is required
37
- if (networkRequests === null || networkRequests === undefined) {
61
+ // Step 2 — no action type means we cannot choose a safe routing path
62
+ if (actionCategory === null) {
38
63
  return {
39
64
  outcome: 'unknown',
40
- reasoning: 'UI did not change; get_network_activity must be called before classification can proceed',
41
- nextAction: 'call_get_network_activity'
65
+ reasoning: 'actionType was not supplied; pass the runtime action_type so the classifier can distinguish local-state and side-effect routing'
42
66
  }
43
67
  }
44
68
 
45
- // Step 3 any network failure
46
- const failedRequest = networkRequests.find((r) => r.status === 'failure' || r.status === 'retryable')
69
+ const failedRequest = networkRequests?.find((r) => r.status === 'failure' || r.status === 'retryable')
47
70
  if (failedRequest) {
48
71
  return { outcome: 'backend_failure', reasoning: `network request ${failedRequest.endpoint} returned ${failedRequest.status}` }
49
72
  }
50
73
 
51
- // Step 4no network requests at all
74
+ // Step 3local-state actions should be verified with state-specific signals first
75
+ if (actionCategory === 'local_state') {
76
+ const logNote = hasLogErrors ? ' (log errors present)' : ''
77
+ return {
78
+ outcome: 'no_op',
79
+ reasoning: `local-state action${logNote}; use expect_state, refreshed snapshot comparison, or expect_element_visible instead of defaulting to network inspection`
80
+ }
81
+ }
82
+
83
+ // Step 4 — side-effect actions may legitimately need network or log inspection
84
+ if (networkRequests === null || networkRequests === undefined) {
85
+ return {
86
+ outcome: 'unknown',
87
+ reasoning: 'side-effect action without network data; inspect network or log signals only if the outcome is still ambiguous'
88
+ }
89
+ }
90
+
91
+ // Step 5 — no network requests at all
52
92
  if (networkRequests.length === 0) {
53
93
  const logNote = hasLogErrors ? ' (log errors present)' : ''
54
- return { outcome: 'no_op', reasoning: `no UI change and no network activity${logNote}` }
94
+ return { outcome: 'no_op', reasoning: `side-effect action and no network activity${logNote}` }
55
95
  }
56
96
 
57
- // Step 5 — network requests exist and all succeeded
97
+ // Step 6 — network requests exist and all succeeded
58
98
  if (networkRequests.every((r) => r.status === 'success')) {
59
99
  return { outcome: 'ui_failure', reasoning: 'network requests succeeded but UI did not change' }
60
100
  }
61
101
 
62
- // Step 6 — fallback
102
+ // Step 7 — fallback
63
103
  return { outcome: 'unknown', reasoning: 'signals are inconclusive' }
64
104
  }
@@ -5,6 +5,7 @@ export { AndroidInteract, iOSInteract };
5
5
 
6
6
  import { resolveTargetDevice } from '../utils/resolve-device.js'
7
7
  import { ToolsObserve } from '../observe/index.js'
8
+ import { computeSnapshotSignature } from '../observe/snapshot-metadata.js'
8
9
  import { nextActionId } from '../server/common.js'
9
10
  import type {
10
11
  ActionFailureCode,
@@ -12,6 +13,7 @@ import type {
12
13
  ExpectElementVisibleResponse,
13
14
  ExpectStateResponse,
14
15
  ExpectScreenResponse,
16
+ WaitForUIChangeResponse,
15
17
  UIElementState,
16
18
  TapElementResponse
17
19
  } from '../types.js'
@@ -60,9 +62,16 @@ interface UiResolution {
60
62
  height?: number
61
63
  }
62
64
 
65
+ interface UiChangeSignatureSet {
66
+ hierarchy: string | null
67
+ text: string | null
68
+ state: string | null
69
+ }
70
+
63
71
 
64
72
  export class ToolsInteract {
65
73
  private static readonly _maxResolvedUiElements = 256
74
+ private static readonly _uiChangeKinds: Array<'hierarchy_diff' | 'text_change' | 'state_change'> = ['hierarchy_diff', 'text_change', 'state_change']
66
75
  private static readonly _sliderSearchLookahead = 8
67
76
  private static readonly _sliderNegativeGapTolerancePx = 32
68
77
  private static readonly _sliderPositiveGapLimitPx = 640
@@ -85,6 +94,10 @@ export class ToolsInteract {
85
94
  return normalized as [number, number, number, number]
86
95
  }
87
96
 
97
+ private static _hash(value: unknown): string {
98
+ return createHash('sha256').update(JSON.stringify(value)).digest('hex')
99
+ }
100
+
88
101
  private static _matchesSelector(el: UiElement, selector?: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean }): boolean {
89
102
  if (!selector) return false
90
103
  const normalize = ToolsInteract._normalize
@@ -195,6 +208,66 @@ export class ToolsInteract {
195
208
  }
196
209
  }
197
210
 
211
+ private static _buildUiChangeSignatures(tree: any): UiChangeSignatureSet {
212
+ const elements = Array.isArray(tree?.elements) ? tree.elements as UiElement[] : []
213
+ const textPayload: Array<{ text: string, contentDescription: string, resourceId: string }> = []
214
+ const statePayload: Array<{
215
+ checked: boolean | null
216
+ selected: boolean | string | { id: string; label?: string } | null
217
+ focused: boolean | null
218
+ expanded: boolean | null
219
+ enabled: boolean | null
220
+ text_value: string | null
221
+ value: number | string | null
222
+ raw_value: number | string | null
223
+ value_range: UIElementState['value_range']
224
+ }> = []
225
+
226
+ for (const el of elements) {
227
+ textPayload.push({
228
+ text: ToolsInteract._normalize(el?.text ?? el?.label ?? el?.value ?? ''),
229
+ contentDescription: ToolsInteract._normalize(el?.contentDescription ?? el?.contentDesc ?? el?.accessibilityLabel ?? ''),
230
+ resourceId: ToolsInteract._normalize(el?.resourceId ?? el?.resourceID ?? el?.id ?? '')
231
+ })
232
+
233
+ statePayload.push({
234
+ checked: el?.state?.checked ?? null,
235
+ selected: el?.state?.selected ?? null,
236
+ focused: el?.state?.focused ?? null,
237
+ expanded: el?.state?.expanded ?? null,
238
+ enabled: el?.state?.enabled ?? null,
239
+ text_value: el?.state?.text_value ?? null,
240
+ value: el?.state?.value ?? null,
241
+ raw_value: el?.state?.raw_value ?? null,
242
+ value_range: el?.state?.value_range ?? null
243
+ })
244
+ }
245
+
246
+ return {
247
+ hierarchy: computeSnapshotSignature(tree),
248
+ text: ToolsInteract._hash({
249
+ screen: ToolsInteract._normalize(tree?.screen),
250
+ elements: textPayload
251
+ }),
252
+ state: ToolsInteract._hash({
253
+ screen: ToolsInteract._normalize(tree?.screen),
254
+ elements: statePayload
255
+ })
256
+ }
257
+ }
258
+
259
+ private static _matchesUiChange(expected: 'hierarchy_diff' | 'text_change' | 'state_change' | undefined, initial: UiChangeSignatureSet, current: UiChangeSignatureSet): 'hierarchy_diff' | 'text_change' | 'state_change' | null {
260
+ const candidates = expected ? [expected] : ToolsInteract._uiChangeKinds
261
+
262
+ for (const changeKind of candidates) {
263
+ if (changeKind === 'hierarchy_diff' && initial.hierarchy !== current.hierarchy) return changeKind
264
+ if (changeKind === 'text_change' && initial.text !== current.text) return changeKind
265
+ if (changeKind === 'state_change' && initial.state !== current.state) return changeKind
266
+ }
267
+
268
+ return null
269
+ }
270
+
198
271
  private static _resolvedTargetFromElement(
199
272
  elementId: string,
200
273
  element: UiElement,
@@ -955,6 +1028,84 @@ export class ToolsInteract {
955
1028
  }
956
1029
  }
957
1030
 
1031
+ static async waitForUIChangeHandler({
1032
+ platform,
1033
+ deviceId,
1034
+ timeout_ms = 60000,
1035
+ stability_window_ms = 250,
1036
+ expected_change
1037
+ }: {
1038
+ platform?: 'android' | 'ios',
1039
+ deviceId?: string,
1040
+ timeout_ms?: number,
1041
+ stability_window_ms?: number,
1042
+ expected_change?: 'hierarchy_diff' | 'text_change' | 'state_change'
1043
+ }): Promise<WaitForUIChangeResponse> {
1044
+ const start = Date.now()
1045
+ const pollIntervalMs = 300
1046
+ const stabilityWindow = Math.max(0, typeof stability_window_ms === 'number' ? stability_window_ms : 250)
1047
+ let baseline: UiChangeSignatureSet | null = null
1048
+ let lastObservedRevision: number | null = null
1049
+ let lastLoadingState: any = null
1050
+
1051
+ while (Date.now() - start < timeout_ms) {
1052
+ try {
1053
+ const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId }) as any
1054
+ const signatures = ToolsInteract._buildUiChangeSignatures(tree)
1055
+ lastObservedRevision = typeof tree?.snapshot_revision === 'number' ? tree.snapshot_revision : lastObservedRevision
1056
+ lastLoadingState = tree?.loading_state ?? lastLoadingState
1057
+
1058
+ if (!baseline) {
1059
+ baseline = signatures
1060
+ } else {
1061
+ const observedChange = ToolsInteract._matchesUiChange(expected_change, baseline, signatures)
1062
+ if (observedChange) {
1063
+ if (stabilityWindow > 0) {
1064
+ await new Promise(resolve => setTimeout(resolve, stabilityWindow))
1065
+ const confirmTree = await ToolsObserve.getUITreeHandler({ platform, deviceId }) as any
1066
+ const confirmSignatures = ToolsInteract._buildUiChangeSignatures(confirmTree)
1067
+ const confirmChange = ToolsInteract._matchesUiChange(expected_change, baseline, confirmSignatures)
1068
+ if (!confirmChange || confirmSignatures.hierarchy !== signatures.hierarchy || confirmSignatures.text !== signatures.text || confirmSignatures.state !== signatures.state) {
1069
+ lastObservedRevision = typeof confirmTree?.snapshot_revision === 'number' ? confirmTree.snapshot_revision : lastObservedRevision
1070
+ lastLoadingState = confirmTree?.loading_state ?? lastLoadingState
1071
+ await new Promise(resolve => setTimeout(resolve, pollIntervalMs))
1072
+ continue
1073
+ }
1074
+ lastObservedRevision = typeof confirmTree?.snapshot_revision === 'number' ? confirmTree.snapshot_revision : lastObservedRevision
1075
+ lastLoadingState = confirmTree?.loading_state ?? lastLoadingState
1076
+ }
1077
+
1078
+ return {
1079
+ success: true,
1080
+ observed_change: observedChange,
1081
+ snapshot_revision: lastObservedRevision ?? undefined,
1082
+ timeout: false,
1083
+ elapsed_ms: Date.now() - start,
1084
+ expected_change,
1085
+ loading_state: lastLoadingState ?? null,
1086
+ reason: 'UI change observed'
1087
+ }
1088
+ }
1089
+ }
1090
+ } catch {
1091
+ // Keep polling until timeout; the observable surface should be best-effort.
1092
+ }
1093
+
1094
+ await new Promise(resolve => setTimeout(resolve, pollIntervalMs))
1095
+ }
1096
+
1097
+ return {
1098
+ success: false,
1099
+ observed_change: null,
1100
+ snapshot_revision: lastObservedRevision ?? undefined,
1101
+ timeout: true,
1102
+ elapsed_ms: Date.now() - start,
1103
+ expected_change,
1104
+ loading_state: lastLoadingState ?? null,
1105
+ reason: 'timeout'
1106
+ }
1107
+ }
1108
+
958
1109
  static async expectScreenHandler({
959
1110
  platform,
960
1111
  fingerprint,
@@ -7,6 +7,7 @@ import { promises as fsPromises } from "fs"
7
7
  import path from "path"
8
8
  import { computeScreenFingerprint } from "../utils/ui/index.js"
9
9
  import { parsePngSize } from "../utils/image.js"
10
+ import { deriveSnapshotMetadata } from "./snapshot-metadata.js"
10
11
 
11
12
  const activeLogStreams: Map<string, { proc: any, file: string }> = new Map()
12
13
 
@@ -74,20 +75,29 @@ export class AndroidObserve {
74
75
  }
75
76
  }
76
77
 
78
+ const snapshotMetadata = deriveSnapshotMetadata(`android:${deviceInfo.id}`, {
79
+ screen: "",
80
+ resolution,
81
+ elements
82
+ }, 'ui_tree')
83
+
77
84
  return {
78
85
  device: deviceInfo,
79
86
  screen: "",
80
87
  resolution,
81
- elements
88
+ elements,
89
+ ...snapshotMetadata
82
90
  };
83
91
  } catch (e) {
84
92
  const errorMessage = `Failed to get UI tree. ADB Path: '${getAdbCmd()}'. Error: ${e instanceof Error ? e.message : String(e)}`;
85
93
  console.error(errorMessage);
94
+ const snapshotMetadata = deriveSnapshotMetadata(`android:${deviceInfo.id}`, null, 'ui_tree')
86
95
  return {
87
96
  device: deviceInfo,
88
97
  screen: "",
89
98
  resolution: { width: 0, height: 0 },
90
99
  elements: [],
100
+ ...snapshotMetadata,
91
101
  error: errorMessage
92
102
  };
93
103
  }
@@ -5,6 +5,7 @@ import type {
5
5
  CaptureDebugSnapshotRawResponse,
6
6
  SnapshotSemanticResponse
7
7
  } from '../types.js'
8
+ import { deriveSnapshotMetadata } from './snapshot-metadata.js'
8
9
 
9
10
  export { AndroidObserve } from './android.js'
10
11
  export { iOSObserve } from './ios.js'
@@ -245,7 +246,17 @@ export class ToolsObserve {
245
246
 
246
247
  static async captureDebugSnapshotHandler({ reason, includeLogs = true, logLines = 200, platform, appId, deviceId, sessionId }: { reason?: string; includeLogs?: boolean; logLines?: number; platform?: 'android' | 'ios'; appId?: string; deviceId?: string; sessionId?: string } = {}) {
247
248
  const timestamp = Date.now()
248
- const raw: CaptureDebugSnapshotRawResponse = { timestamp, reason: reason || '', activity: null, fingerprint: null, screenshot: null, ui_tree: null, logs: [] }
249
+ const raw: CaptureDebugSnapshotRawResponse = {
250
+ timestamp,
251
+ snapshot_revision: 0,
252
+ captured_at_ms: timestamp,
253
+ reason: reason || '',
254
+ activity: null,
255
+ fingerprint: null,
256
+ screenshot: null,
257
+ ui_tree: null,
258
+ logs: []
259
+ }
249
260
 
250
261
  // Parallel fetches for performance: screenshot, current screen, fingerprint, ui tree, and log stream/get logs
251
262
  const sid = sessionId || 'default'
@@ -335,6 +346,20 @@ export class ToolsObserve {
335
346
  }
336
347
  }
337
348
 
349
+ const snapshotDeviceKey = raw.ui_tree?.device
350
+ ? `${raw.ui_tree.device.platform}:${raw.ui_tree.device.id}`
351
+ : `${platform || 'unknown'}:${deviceId || 'default'}`
352
+ const snapshotMetadata = deriveSnapshotMetadata(
353
+ snapshotDeviceKey,
354
+ raw.ui_tree,
355
+ 'snapshot',
356
+ raw.ui_tree?.snapshot_revision ? null : (raw.fingerprint || raw.activity || null)
357
+ )
358
+
359
+ raw.snapshot_revision = raw.ui_tree?.snapshot_revision ?? snapshotMetadata.snapshot_revision
360
+ raw.captured_at_ms = raw.ui_tree?.captured_at_ms ?? snapshotMetadata.captured_at_ms
361
+ raw.loading_state = raw.ui_tree?.loading_state ?? snapshotMetadata.loading_state
362
+
338
363
  const semantic = deriveSnapshotSemantic(raw)
339
364
  return semantic ? { raw, semantic } : { raw }
340
365
  }
@@ -7,6 +7,7 @@ import path from 'path'
7
7
  import { parseLogLine } from '../utils/android/utils.js'
8
8
  import { computeScreenFingerprint } from '../utils/ui/index.js'
9
9
  import { parsePngSize } from '../utils/image.js'
10
+ import { deriveSnapshotMetadata } from './snapshot-metadata.js'
10
11
 
11
12
  const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
12
13
  let iosExecCommand = execCommand
@@ -485,16 +486,19 @@ export class iOSObserve {
485
486
 
486
487
  async getUITree(deviceId: string = "booted"): Promise<GetUITreeResponse> {
487
488
  const device = await getIOSDeviceMetadata(deviceId);
489
+ const deviceKey = `ios:${device.id}`
488
490
 
489
491
  const idbExists = await isIDBInstalled();
490
492
  if (!idbExists) {
493
+ const snapshotMetadata = deriveSnapshotMetadata(deviceKey, null, 'ui_tree')
491
494
  return {
492
495
  device,
493
496
  screen: "",
494
497
  resolution: { width: 0, height: 0 },
495
498
  elements: [],
499
+ ...snapshotMetadata,
496
500
  error: "iOS UI tree retrieval requires 'idb' (iOS Device Bridge). Please install it via Homebrew: `brew tap facebook/fb && brew install idb-companion` and `pip3 install fb-idb`."
497
- };
501
+ };
498
502
  }
499
503
 
500
504
  const targetUdid = (device.id && device.id !== 'booted') ? device.id : undefined;
@@ -540,15 +544,17 @@ export class iOSObserve {
540
544
  console.error(`Attempt ${attempts} failed: ${e}`);
541
545
  }
542
546
 
543
- if (attempts === maxAttempts) {
544
- return {
545
- device,
546
- screen: "",
547
- resolution: { width: 0, height: 0 },
548
- elements: [],
549
- error: `Failed to retrieve valid UI dump after ${maxAttempts} attempts.`
550
- };
551
- }
547
+ if (attempts === maxAttempts) {
548
+ const snapshotMetadata = deriveSnapshotMetadata(deviceKey, null, 'ui_tree')
549
+ return {
550
+ device,
551
+ screen: "",
552
+ resolution: { width: 0, height: 0 },
553
+ elements: [],
554
+ ...snapshotMetadata,
555
+ error: `Failed to retrieve valid UI dump after ${maxAttempts} attempts.`
556
+ };
557
+ }
552
558
  }
553
559
 
554
560
  try {
@@ -569,20 +575,29 @@ export class iOSObserve {
569
575
  height = rootBounds[3] - rootBounds[1];
570
576
  }
571
577
 
578
+ const snapshotMetadata = deriveSnapshotMetadata(deviceKey, {
579
+ screen: "",
580
+ resolution: { width, height },
581
+ elements
582
+ }, 'ui_tree')
583
+
572
584
  return {
573
585
  device,
574
586
  screen: "",
575
587
  resolution: { width, height },
576
- elements
588
+ elements,
589
+ ...snapshotMetadata
577
590
  };
578
591
  } catch (e) {
579
- return {
592
+ const snapshotMetadata = deriveSnapshotMetadata(deviceKey, null, 'ui_tree')
593
+ return {
580
594
  device,
581
595
  screen: "",
582
596
  resolution: { width: 0, height: 0 },
583
597
  elements: [],
598
+ ...snapshotMetadata,
584
599
  error: `Failed to parse idb output: ${e instanceof Error ? e.message : String(e)}`
585
- };
600
+ };
586
601
  }
587
602
  }
588
603
 
@@ -0,0 +1,107 @@
1
+ import crypto from 'crypto'
2
+ import type { GetUITreeResponse, LoadingState, UIElement } from '../types.js'
3
+
4
+ interface SnapshotState {
5
+ revision: number
6
+ signature: string | null
7
+ }
8
+
9
+ const snapshotStateByDevice = new Map<string, SnapshotState>()
10
+
11
+ function normalize(value: unknown): string {
12
+ if (value === null || value === undefined) return ''
13
+ return String(value).trim().toLowerCase()
14
+ }
15
+
16
+ function normalizeBounds(bounds: unknown): [number, number, number, number] | null {
17
+ if (!Array.isArray(bounds) || bounds.length < 4) return null
18
+ const normalized = bounds.slice(0, 4).map((value) => Number(value))
19
+ if (normalized.some((value) => Number.isNaN(value))) return null
20
+ return normalized as [number, number, number, number]
21
+ }
22
+
23
+ function stableElementSignature(element: UIElement) {
24
+ return {
25
+ text: normalize(element.text),
26
+ contentDescription: normalize(element.contentDescription),
27
+ resourceId: normalize(element.resourceId),
28
+ type: normalize(element.type),
29
+ stable_id: normalize(element.stable_id),
30
+ role: normalize(element.role),
31
+ test_tag: normalize(element.test_tag),
32
+ selector: normalize(element.selector?.value),
33
+ clickable: !!element.clickable,
34
+ enabled: !!element.enabled,
35
+ visible: !!element.visible,
36
+ state: element.state ?? null,
37
+ bounds: normalizeBounds(element.bounds)
38
+ }
39
+ }
40
+
41
+ export function computeSnapshotSignature(tree: Pick<GetUITreeResponse, 'elements' | 'screen' | 'resolution' | 'error'> | null | undefined): string | null {
42
+ if (!tree || tree.error) return null
43
+
44
+ const payload = {
45
+ screen: normalize(tree.screen),
46
+ resolution: tree.resolution || { width: 0, height: 0 },
47
+ elements: Array.isArray(tree.elements) ? tree.elements.map((element) => stableElementSignature(element)) : []
48
+ }
49
+
50
+ return crypto.createHash('sha256').update(JSON.stringify(payload)).digest('hex')
51
+ }
52
+
53
+ export function detectLoadingState(tree: Pick<GetUITreeResponse, 'elements' | 'error'> | null | undefined, source: string): LoadingState | null {
54
+ if (!tree || tree.error || !Array.isArray(tree.elements)) return null
55
+
56
+ for (const element of tree.elements) {
57
+ if (!element?.visible) continue
58
+ const text = normalize(element?.text ?? element?.contentDescription ?? '')
59
+ const type = normalize(element?.type ?? '')
60
+ const combined = `${type} ${text}`
61
+ if (/progress|spinner|loading|please wait|busy|loading indicator|skeleton|pending/.test(combined)) {
62
+ const signal = /progress/.test(combined)
63
+ ? 'progress_indicator'
64
+ : /spinner/.test(combined)
65
+ ? 'spinner'
66
+ : /busy/.test(combined)
67
+ ? 'busy_indicator'
68
+ : /skeleton/.test(combined)
69
+ ? 'skeleton'
70
+ : 'loading_indicator'
71
+ return { active: true, signal, source }
72
+ }
73
+ }
74
+
75
+ return null
76
+ }
77
+
78
+ export function deriveSnapshotMetadata(
79
+ deviceKey: string,
80
+ tree: Pick<GetUITreeResponse, 'elements' | 'screen' | 'resolution' | 'error'> | null | undefined,
81
+ source: string,
82
+ signatureOverride?: string | null
83
+ ) {
84
+ const signature = signatureOverride ?? computeSnapshotSignature(tree)
85
+ const previous = snapshotStateByDevice.get(deviceKey)
86
+
87
+ let revision = 1
88
+ if (previous) {
89
+ if (signature === null) {
90
+ revision = previous.revision
91
+ } else {
92
+ revision = previous.signature === signature ? previous.revision : previous.revision + 1
93
+ }
94
+ }
95
+
96
+ snapshotStateByDevice.set(deviceKey, { revision, signature })
97
+
98
+ return {
99
+ snapshot_revision: revision,
100
+ captured_at_ms: Date.now(),
101
+ loading_state: detectLoadingState(tree, source)
102
+ }
103
+ }
104
+
105
+ export function resetSnapshotMetadataForTests() {
106
+ snapshotStateByDevice.clear()
107
+ }