mobile-debug-mcp 0.24.8 → 0.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/types.ts CHANGED
@@ -79,6 +79,36 @@ export interface GetCrashResponse {
79
79
  crashes: string[];
80
80
  }
81
81
 
82
+ export interface UIElementState {
83
+ checked?: boolean | null;
84
+ selected?: boolean | string | { id: string; label?: string } | null;
85
+ focused?: boolean | null;
86
+ expanded?: boolean | null;
87
+ enabled?: boolean | null;
88
+ text_value?: string | null;
89
+ value?: number | string | null;
90
+ raw_value?: number | string | null;
91
+ value_range?: {
92
+ min: number;
93
+ max: number;
94
+ } | null;
95
+ }
96
+
97
+ export interface SelectorConfidence {
98
+ score: number;
99
+ reason: string;
100
+ }
101
+
102
+ export interface UIResolutionSelector {
103
+ value: string | null;
104
+ confidence: SelectorConfidence | null;
105
+ }
106
+
107
+ export interface UIElementSemanticMetadata {
108
+ is_clickable: boolean;
109
+ is_container: boolean;
110
+ }
111
+
82
112
  export interface CaptureAndroidScreenResponse {
83
113
  device: DeviceInfo;
84
114
  screenshot: string; // base64 encoded string
@@ -116,6 +146,12 @@ export interface UIElement {
116
146
  children?: number[];
117
147
  center?: [number, number];
118
148
  depth?: number;
149
+ state?: UIElementState | null;
150
+ stable_id?: string | null;
151
+ role?: string | null;
152
+ test_tag?: string | null;
153
+ selector?: UIResolutionSelector | null;
154
+ semantic?: UIElementSemanticMetadata | null;
119
155
  }
120
156
 
121
157
  export interface GetUITreeResponse {
@@ -151,7 +187,7 @@ export interface CaptureDebugSnapshotRawResponse {
151
187
  activity: string | null;
152
188
  fingerprint: string | null;
153
189
  screenshot: string | null;
154
- ui_tree: unknown | null;
190
+ ui_tree: GetUITreeResponse | null;
155
191
  logs: StructuredLogEntry[];
156
192
  device?: DeviceInfo;
157
193
  screenshot_error?: string;
@@ -198,6 +234,12 @@ export interface ActionTargetResolved {
198
234
  class: string | null;
199
235
  bounds: [number, number, number, number] | null;
200
236
  index: number | null;
237
+ state?: UIElementState | null;
238
+ stable_id?: string | null;
239
+ role?: string | null;
240
+ test_tag?: string | null;
241
+ selector?: UIResolutionSelector | null;
242
+ semantic?: UIElementSemanticMetadata | null;
201
243
  }
202
244
 
203
245
  export interface ActionExecutionResult {
@@ -260,6 +302,30 @@ export interface ExpectElementVisibleResponse {
260
302
  retryable?: boolean;
261
303
  }
262
304
 
305
+ export interface ExpectStateResponse {
306
+ success: boolean;
307
+ selector?: {
308
+ text?: string;
309
+ resource_id?: string;
310
+ accessibility_id?: string;
311
+ contains?: boolean;
312
+ };
313
+ element_id: string | null;
314
+ expected_state: {
315
+ property: string;
316
+ expected: boolean | number | string | Record<string, unknown>;
317
+ };
318
+ element?: (ActionTargetResolved & { state?: UIElementState | null }) | null;
319
+ observed_state?: {
320
+ property: string;
321
+ value: boolean | number | string | Record<string, unknown> | null;
322
+ raw_value?: boolean | number | string | null;
323
+ };
324
+ reason?: string;
325
+ failure_code?: 'ELEMENT_NOT_FOUND' | 'UNKNOWN';
326
+ retryable?: boolean;
327
+ }
328
+
263
329
  export interface SwipeResponse {
264
330
  device: DeviceInfo;
265
331
  success: boolean;
@@ -1,4 +1,4 @@
1
- import { DeviceInfo, UIElement } from "../../types.js"
1
+ import { DeviceInfo, UIElement, UIElementSemanticMetadata, UIElementState, UIResolutionSelector, SelectorConfidence } from "../../types.js"
2
2
  import { promises as fsPromises, existsSync } from 'fs'
3
3
  import path from 'path'
4
4
  import { detectJavaHome } from '../java.js'
@@ -323,6 +323,114 @@ export function getCenter(bounds: [number, number, number, number]): [number, nu
323
323
  return [Math.floor((x1 + x2) / 2), Math.floor((y1 + y2) / 2)];
324
324
  }
325
325
 
326
+ function parseBooleanAttr(value: unknown): boolean | null {
327
+ if (value === true || value === 'true') return true
328
+ if (value === false || value === 'false') return false
329
+ return null
330
+ }
331
+
332
+ function parseNumberAttr(value: unknown): number | null {
333
+ if (typeof value === 'number' && Number.isFinite(value)) return value
334
+ if (typeof value !== 'string') return null
335
+ const parsed = Number(value)
336
+ return Number.isFinite(parsed) ? parsed : null
337
+ }
338
+
339
+ function normalizeClassName(value: unknown): string {
340
+ return typeof value === 'string' ? value.trim().toLowerCase() : ''
341
+ }
342
+
343
+ function inferAndroidRole(className: string): string | null {
344
+ if (/seekbar|slider|progress/.test(className)) return 'slider'
345
+ if (/switch|toggle/.test(className)) return 'switch'
346
+ if (/checkbox/.test(className)) return 'checkbox'
347
+ if (/radiobutton|radio/.test(className)) return 'radio'
348
+ if (/edittext|textfield|search/.test(className)) return 'text_field'
349
+ if (/button|fab/.test(className)) return 'button'
350
+ if (/imageview|icon/.test(className)) return 'image'
351
+ if (/recyclerview|scroll|layout|viewgroup|frame/.test(className)) return 'container'
352
+ return null
353
+ }
354
+
355
+ function buildAndroidSelectorConfidence(source: 'resource_id' | 'content_desc' | 'text' | 'class' | 'none'): SelectorConfidence | null {
356
+ switch (source) {
357
+ case 'resource_id':
358
+ return { score: 1, reason: 'resource_id' }
359
+ case 'content_desc':
360
+ return { score: 0.9, reason: 'content_description' }
361
+ case 'text':
362
+ return { score: 0.6, reason: 'text_match' }
363
+ case 'class':
364
+ return { score: 0.35, reason: 'class_match' }
365
+ default:
366
+ return null
367
+ }
368
+ }
369
+
370
+ function buildAndroidSelector(text: string | null, contentDescription: string | null, resourceId: string | null, className: string): UIResolutionSelector | null {
371
+ if (resourceId) return { value: resourceId, confidence: buildAndroidSelectorConfidence('resource_id') }
372
+ if (contentDescription) return { value: contentDescription, confidence: buildAndroidSelectorConfidence('content_desc') }
373
+ if (text) return { value: text, confidence: buildAndroidSelectorConfidence('text') }
374
+ if (className) return { value: className, confidence: buildAndroidSelectorConfidence('class') }
375
+ return null
376
+ }
377
+
378
+ function buildAndroidSemantic(clickable: boolean, className: string): UIElementSemanticMetadata {
379
+ return {
380
+ is_clickable: clickable,
381
+ is_container: /recyclerview|scroll|layout|viewgroup|frame/.test(className)
382
+ }
383
+ }
384
+
385
+ function isSliderLikeAndroid(node: any): boolean {
386
+ const className = String(node['@_class'] || '').toLowerCase()
387
+ return /seekbar|slider|range|progress/i.test(className)
388
+ }
389
+
390
+ function extractAndroidState(node: any): UIElementState | null {
391
+ const checked = parseBooleanAttr(node['@_checked'])
392
+ const selectedFlag = parseBooleanAttr(node['@_selected'])
393
+ const focused = parseBooleanAttr(node['@_focused'])
394
+ const expanded = parseBooleanAttr(node['@_expanded'])
395
+ const enabled = parseBooleanAttr(node['@_enabled'])
396
+ const textValue = typeof node['@_text'] === 'string' && node['@_text'].trim().length > 0 ? node['@_text'] : null
397
+ const state: UIElementState = {}
398
+
399
+ if (checked !== null) state.checked = checked
400
+ if (selectedFlag !== null) {
401
+ state.selected = textValue || node['@_content-desc'] || true
402
+ }
403
+ if (focused !== null) state.focused = focused
404
+ if (expanded !== null) state.expanded = expanded
405
+ if (enabled !== null) state.enabled = enabled
406
+
407
+ if (textValue && /edittext|textfield|search/i.test(String(node['@_class'] || ''))) {
408
+ state.text_value = textValue
409
+ }
410
+
411
+ if (isSliderLikeAndroid(node)) {
412
+ const rawProgress = parseNumberAttr(node['@_progress'])
413
+ const max = parseNumberAttr(node['@_max'])
414
+ const fallbackValue = rawProgress ?? parseNumberAttr(node['@_value']) ?? parseNumberAttr(node['@_content-desc'])
415
+ const numericValue = rawProgress ?? fallbackValue
416
+ if (numericValue !== null) {
417
+ state.raw_value = numericValue
418
+ state.value_range = max !== null && max > 0 ? { min: 0, max } : null
419
+ state.value = max !== null && max > 0 ? Math.round((numericValue / max) * 100) : numericValue
420
+ }
421
+ } else {
422
+ const numericValue = parseNumberAttr(node['@_value'])
423
+ if (numericValue !== null) {
424
+ state.value = numericValue
425
+ state.raw_value = numericValue
426
+ } else if (textValue) {
427
+ state.value = textValue
428
+ }
429
+ }
430
+
431
+ return Object.keys(state).length > 0 ? state : null
432
+ }
433
+
326
434
  export async function getScreenResolution(deviceId?: string): Promise<{ width: number; height: number }> {
327
435
  try {
328
436
  const output = await execAdb(['shell', 'wm', 'size'], deviceId);
@@ -343,7 +451,15 @@ export function traverseNode(node: any, elements: UIElement[], parentIndex: numb
343
451
  const text = node['@_text'] || null;
344
452
  const contentDescription = node['@_content-desc'] || null;
345
453
  const clickable = node['@_clickable'] === 'true';
454
+ const className = String(node['@_class'] || 'unknown');
346
455
  const bounds = parseBounds(node['@_bounds'] || '[0,0][0,0]');
456
+ const state = extractAndroidState(node);
457
+ const role = inferAndroidRole(normalizeClassName(className));
458
+ const resourceId = typeof node['@_resource-id'] === 'string' && node['@_resource-id'].trim().length > 0 ? node['@_resource-id'] : null
459
+ const stableId = resourceId ?? (typeof contentDescription === 'string' && contentDescription.trim().length > 0 ? contentDescription : null)
460
+ const testTag = stableId
461
+ const selector = buildAndroidSelector(text, contentDescription, resourceId, normalizeClassName(className))
462
+ const semantic = buildAndroidSemantic(clickable, normalizeClassName(className))
347
463
 
348
464
  const isUseful = clickable || (text && text.length > 0) || (contentDescription && contentDescription.length > 0);
349
465
 
@@ -351,14 +467,20 @@ export function traverseNode(node: any, elements: UIElement[], parentIndex: numb
351
467
  const element: UIElement = {
352
468
  text,
353
469
  contentDescription,
354
- type: node['@_class'] || 'unknown',
355
- resourceId: node['@_resource-id'] || null,
470
+ type: className,
471
+ resourceId,
356
472
  clickable,
357
473
  enabled: node['@_enabled'] === 'true',
358
474
  visible: true,
359
475
  bounds,
360
476
  center: getCenter(bounds),
361
- depth
477
+ depth,
478
+ state,
479
+ stable_id: stableId,
480
+ role,
481
+ test_tag: testTag,
482
+ selector,
483
+ semantic
362
484
  };
363
485
 
364
486
  if (parentIndex !== -1) {
@@ -0,0 +1,90 @@
1
+ import assert from 'assert'
2
+ import { traverseNode } from '../../../src/utils/android/utils.js'
3
+ import { traverseIDBNode } from '../../../src/observe/ios.js'
4
+
5
+ async function run() {
6
+ const androidElements: any[] = []
7
+ traverseNode({
8
+ '@_class': 'android.widget.SeekBar',
9
+ '@_text': '',
10
+ '@_content-desc': 'Duration',
11
+ '@_resource-id': 'com.example:id/duration',
12
+ '@_clickable': 'true',
13
+ '@_enabled': 'true',
14
+ '@_selected': 'true',
15
+ '@_progress': '7',
16
+ '@_max': '14',
17
+ '@_bounds': '[0,0][200,40]'
18
+ }, androidElements)
19
+
20
+ assert.strictEqual(androidElements.length, 1)
21
+ assert.deepStrictEqual(androidElements[0].state?.selected, 'Duration')
22
+ assert.strictEqual(androidElements[0].state?.raw_value, 7)
23
+ assert.strictEqual(androidElements[0].state?.value, 50)
24
+ assert.deepStrictEqual(androidElements[0].state?.value_range, { min: 0, max: 14 })
25
+ assert.strictEqual(androidElements[0].stable_id, 'com.example:id/duration')
26
+ assert.strictEqual(androidElements[0].role, 'slider')
27
+ assert.strictEqual(androidElements[0].test_tag, 'com.example:id/duration')
28
+ assert.deepStrictEqual(androidElements[0].selector, {
29
+ value: 'com.example:id/duration',
30
+ confidence: { score: 1, reason: 'resource_id' }
31
+ })
32
+ assert.deepStrictEqual(androidElements[0].semantic, { is_clickable: true, is_container: false })
33
+
34
+ const androidFallbackElements: any[] = []
35
+ traverseNode({
36
+ '@_class': 'android.widget.Button',
37
+ '@_text': '',
38
+ '@_content-desc': 'Save',
39
+ '@_clickable': 'true',
40
+ '@_enabled': 'true',
41
+ '@_bounds': '[0,0][100,50]'
42
+ }, androidFallbackElements)
43
+
44
+ assert.strictEqual(androidFallbackElements.length, 1)
45
+ assert.strictEqual(androidFallbackElements[0].resourceId, null)
46
+ assert.strictEqual(androidFallbackElements[0].stable_id, 'Save')
47
+ assert.deepStrictEqual(androidFallbackElements[0].selector, {
48
+ value: 'Save',
49
+ confidence: { score: 0.9, reason: 'content_description' }
50
+ })
51
+
52
+ const iosElements: any[] = []
53
+ traverseIDBNode({
54
+ AXElementType: 'Slider',
55
+ AXLabel: 'Playback speed',
56
+ AXValue: '0.75',
57
+ AXUniqueId: 'playback_speed_slider',
58
+ AXTraits: ['UIAccessibilityTraitAdjustable']
59
+ }, iosElements)
60
+
61
+ assert.strictEqual(iosElements.length, 1)
62
+ assert.strictEqual(iosElements[0].state?.value, 75)
63
+ assert.strictEqual(iosElements[0].state?.raw_value, 0.75)
64
+ assert.strictEqual(iosElements[0].stable_id, 'playback_speed_slider')
65
+ assert.strictEqual(iosElements[0].role, 'slider')
66
+ assert.strictEqual(iosElements[0].test_tag, 'playback_speed_slider')
67
+ assert.deepStrictEqual(iosElements[0].selector, {
68
+ value: 'playback_speed_slider',
69
+ confidence: { score: 1, reason: 'accessibility_identifier' }
70
+ })
71
+ assert.deepStrictEqual(iosElements[0].semantic, { is_clickable: true, is_container: false })
72
+
73
+ const iosFallbackElements: any[] = []
74
+ traverseIDBNode({
75
+ AXElementType: 'Button',
76
+ AXLabel: 'Save',
77
+ AXTraits: ['UIAccessibilityTraitButton'],
78
+ AXUniqueId: 'fallback_unique_id'
79
+ }, iosFallbackElements)
80
+
81
+ assert.strictEqual(iosFallbackElements.length, 1)
82
+ assert.strictEqual(iosFallbackElements[0].stable_id, 'fallback_unique_id')
83
+
84
+ console.log('state extraction tests passed')
85
+ }
86
+
87
+ run().catch((error) => {
88
+ console.error(error)
89
+ process.exit(1)
90
+ })
@@ -12,6 +12,7 @@ async function run() {
12
12
  const originalTapHandler = (ToolsInteract as any).tapHandler
13
13
  const originalExpectScreenHandler = (ToolsInteract as any).expectScreenHandler
14
14
  const originalExpectElementVisibleHandler = (ToolsInteract as any).expectElementVisibleHandler
15
+ const originalExpectStateHandler = (ToolsInteract as any).expectStateHandler
15
16
  const originalStartApp = AndroidManage.prototype.startApp
16
17
  const originalCaptureScreenshotHandler = (ToolsObserve as any).captureScreenshotHandler
17
18
  const originalGetUITreeHandler = (ToolsObserve as any).getUITreeHandler
@@ -130,8 +131,8 @@ async function run() {
130
131
  selector: { text: 'Ready' },
131
132
  element_id: 'el_ready',
132
133
  expected_condition: 'visible',
133
- element: { elementId: 'el_ready', text: 'Ready', resource_id: null, accessibility_id: null, class: 'TextView', bounds: [0, 0, 10, 10], index: 0 },
134
- observed: { status: 'success', matched_count: 1, condition_satisfied: true, selected_index: 0, last_matched_element: { elementId: 'el_ready', text: 'Ready', resource_id: null, accessibility_id: null, class: 'TextView', bounds: [0, 0, 10, 10], index: 0 } },
134
+ element: { elementId: 'el_ready', text: 'Ready', resource_id: null, accessibility_id: null, class: 'TextView', bounds: [0, 0, 10, 10], index: 0, state: { enabled: true } },
135
+ observed: { status: 'success', matched_count: 1, condition_satisfied: true, selected_index: 0, last_matched_element: { elementId: 'el_ready', text: 'Ready', resource_id: null, accessibility_id: null, class: 'TextView', bounds: [0, 0, 10, 10], index: 0, state: { enabled: true } } },
135
136
  reason: 'selector is visible'
136
137
  })
137
138
 
@@ -141,6 +142,42 @@ async function run() {
141
142
  assert.strictEqual(expectElementPayload.element_id, 'el_ready')
142
143
  assert.strictEqual(expectElementPayload.expected_condition, 'visible')
143
144
 
145
+ ;(ToolsObserve as any).getUITreeHandler = async () => ({
146
+ device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true },
147
+ resolution: { width: 1080, height: 2400 },
148
+ elements: [{
149
+ text: 'Notifications',
150
+ depth: 0,
151
+ center: { x: 50, y: 20 },
152
+ state: { checked: true, selected: 'Notifications' }
153
+ }]
154
+ })
155
+
156
+ ;(ToolsInteract as any).expectStateHandler = async () => ({
157
+ success: true,
158
+ selector: { text: 'Notifications' },
159
+ element_id: 'el_notifications',
160
+ expected_state: { property: 'checked', expected: true },
161
+ element: {
162
+ elementId: 'el_notifications',
163
+ text: 'Notifications',
164
+ resource_id: null,
165
+ accessibility_id: null,
166
+ class: 'Switch',
167
+ bounds: [0, 0, 10, 10],
168
+ index: 0,
169
+ state: { checked: true, selected: 'Notifications' }
170
+ },
171
+ observed_state: { property: 'checked', value: true, raw_value: true },
172
+ reason: 'checked matches expected value'
173
+ })
174
+
175
+ const expectStateResponse = await handleToolCall('expect_state', { selector: { text: 'Notifications' }, property: 'checked', expected: true })
176
+ const expectStatePayload = JSON.parse((expectStateResponse as any).content[0].text)
177
+ assert.strictEqual(expectStatePayload.success, true)
178
+ assert.strictEqual(expectStatePayload.expected_state.property, 'checked')
179
+ assert.strictEqual(expectStatePayload.observed_state.value, true)
180
+
144
181
  ;(ToolsInteract as any).tapHandler = async () => {
145
182
  throw new Error('boom')
146
183
  }
@@ -234,6 +271,7 @@ async function run() {
234
271
  ;(ToolsInteract as any).tapHandler = originalTapHandler
235
272
  ;(ToolsInteract as any).expectScreenHandler = originalExpectScreenHandler
236
273
  ;(ToolsInteract as any).expectElementVisibleHandler = originalExpectElementVisibleHandler
274
+ ;(ToolsInteract as any).expectStateHandler = originalExpectStateHandler
237
275
  AndroidManage.prototype.startApp = originalStartApp
238
276
  ;(ToolsObserve as any).captureScreenshotHandler = originalCaptureScreenshotHandler
239
277
  ;(ToolsObserve as any).getUITreeHandler = originalGetUITreeHandler