mobile-debug-mcp 0.25.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -356,6 +356,59 @@ function parseNumberAttr(value) {
356
356
  const parsed = Number(value);
357
357
  return Number.isFinite(parsed) ? parsed : null;
358
358
  }
359
+ function normalizeClassName(value) {
360
+ return typeof value === 'string' ? value.trim().toLowerCase() : '';
361
+ }
362
+ function inferAndroidRole(className) {
363
+ if (/seekbar|slider|progress/.test(className))
364
+ return 'slider';
365
+ if (/switch|toggle/.test(className))
366
+ return 'switch';
367
+ if (/checkbox/.test(className))
368
+ return 'checkbox';
369
+ if (/radiobutton|radio/.test(className))
370
+ return 'radio';
371
+ if (/edittext|textfield|search/.test(className))
372
+ return 'text_field';
373
+ if (/button|fab/.test(className))
374
+ return 'button';
375
+ if (/imageview|icon/.test(className))
376
+ return 'image';
377
+ if (/recyclerview|scroll|layout|viewgroup|frame/.test(className))
378
+ return 'container';
379
+ return null;
380
+ }
381
+ function buildAndroidSelectorConfidence(source) {
382
+ switch (source) {
383
+ case 'resource_id':
384
+ return { score: 1, reason: 'resource_id' };
385
+ case 'content_desc':
386
+ return { score: 0.9, reason: 'content_description' };
387
+ case 'text':
388
+ return { score: 0.6, reason: 'text_match' };
389
+ case 'class':
390
+ return { score: 0.35, reason: 'class_match' };
391
+ default:
392
+ return null;
393
+ }
394
+ }
395
+ function buildAndroidSelector(text, contentDescription, resourceId, className) {
396
+ if (resourceId)
397
+ return { value: resourceId, confidence: buildAndroidSelectorConfidence('resource_id') };
398
+ if (contentDescription)
399
+ return { value: contentDescription, confidence: buildAndroidSelectorConfidence('content_desc') };
400
+ if (text)
401
+ return { value: text, confidence: buildAndroidSelectorConfidence('text') };
402
+ if (className)
403
+ return { value: className, confidence: buildAndroidSelectorConfidence('class') };
404
+ return null;
405
+ }
406
+ function buildAndroidSemantic(clickable, className) {
407
+ return {
408
+ is_clickable: clickable,
409
+ is_container: /recyclerview|scroll|layout|viewgroup|frame/.test(className)
410
+ };
411
+ }
359
412
  function isSliderLikeAndroid(node) {
360
413
  const className = String(node['@_class'] || '').toLowerCase();
361
414
  return /seekbar|slider|range|progress/i.test(className);
@@ -426,22 +479,34 @@ export function traverseNode(node, elements, parentIndex = -1, depth = 0) {
426
479
  const text = node['@_text'] || null;
427
480
  const contentDescription = node['@_content-desc'] || null;
428
481
  const clickable = node['@_clickable'] === 'true';
482
+ const className = String(node['@_class'] || 'unknown');
429
483
  const bounds = parseBounds(node['@_bounds'] || '[0,0][0,0]');
430
484
  const state = extractAndroidState(node);
485
+ const role = inferAndroidRole(normalizeClassName(className));
486
+ const resourceId = typeof node['@_resource-id'] === 'string' && node['@_resource-id'].trim().length > 0 ? node['@_resource-id'] : null;
487
+ const stableId = resourceId ?? (typeof contentDescription === 'string' && contentDescription.trim().length > 0 ? contentDescription : null);
488
+ const testTag = stableId;
489
+ const selector = buildAndroidSelector(text, contentDescription, resourceId, normalizeClassName(className));
490
+ const semantic = buildAndroidSemantic(clickable, normalizeClassName(className));
431
491
  const isUseful = clickable || (text && text.length > 0) || (contentDescription && contentDescription.length > 0);
432
492
  if (isUseful) {
433
493
  const element = {
434
494
  text,
435
495
  contentDescription,
436
- type: node['@_class'] || 'unknown',
437
- resourceId: node['@_resource-id'] || null,
496
+ type: className,
497
+ resourceId,
438
498
  clickable,
439
499
  enabled: node['@_enabled'] === 'true',
440
500
  visible: true,
441
501
  bounds,
442
502
  center: getCenter(bounds),
443
503
  depth,
444
- state
504
+ state,
505
+ stable_id: stableId,
506
+ role,
507
+ test_tag: testTag,
508
+ selector,
509
+ semantic
445
510
  };
446
511
  if (parentIndex !== -1) {
447
512
  element.parentId = parentIndex;
package/docs/CHANGELOG.md CHANGED
@@ -2,6 +2,18 @@
2
2
 
3
3
  All notable changes to the **Mobile Debug MCP** project will be documented in this file.
4
4
 
5
+ ## [0.26.0]
6
+ - RFC-003 wait/synchronization contract with `snapshot_revision`, `captured_at_ms`, and `loading_state`
7
+ - Added `wait_for_ui_change` for stable in-place UI mutations
8
+ - Updated `get_ui_tree` and `capture_debug_snapshot` to surface snapshot metadata
9
+ - Emulator-validated the new UI-change flow against the Modul8 app
10
+
11
+ ## [0.25.1]
12
+ - Platform-native element identity metadata for UI targeting
13
+ - Hierarchy-independent element references
14
+ - Selector confidence metadata for reliability
15
+ - Structured fallback resolution strategy
16
+
5
17
  ## [0.25.0]
6
18
  - Introduces the `expect_state` tool and a standardized state object for UI elements across Android and iOS.
7
19
 
package/docs/ROADMAP.md CHANGED
@@ -26,11 +26,27 @@ Higher task success with fewer retries.
26
26
 
27
27
  ---
28
28
 
29
+ # Completed
30
+
31
+ These priorities are done and kept here for history:
32
+
33
+ - Priority 1 — Stronger State Verification
34
+ - Priority 2 — Richer Element Identity
35
+
36
+ Completion notes:
37
+
38
+ - State-aware verification is now implemented and wired through the tool surface.
39
+ - Platform-native element metadata and selector-confidence hints are now part of the runtime contract.
40
+
41
+ ---
42
+
29
43
  # Priority 1 — Stronger State Verification
30
44
 
31
45
  ## Why first
32
46
  Highest leverage improvement.
33
47
 
48
+ **Status:** Completed
49
+
34
50
  Most failures are not “can’t act,” they’re:
35
51
  - uncertain state
36
52
  - weak verification
@@ -68,6 +84,8 @@ Blocks or strengthens:
68
84
  ## Why second
69
85
  Directly reduces selector brittleness.
70
86
 
87
+ **Status:** Completed
88
+
71
89
  Improves:
72
90
  - targeting stability
73
91
  - repeatability
@@ -385,4 +403,4 @@ Still out of scope:
385
403
  - Recovery planning logic
386
404
  - Autonomous retry strategy
387
405
  - MCP-level agent orchestration
388
- - Autonomous recovery hinting (future consideration only)
406
+ - Autonomous recovery hinting (future consideration only)
@@ -0,0 +1,400 @@
1
+ # RFC-002: Platform-Native Element Metadata and Resolution Hints
2
+
3
+ Priority: 2
4
+ Depends on: RFC-001 (Stronger State Verification)
5
+
6
+ ---
7
+
8
+ # 1. Problem
9
+
10
+ Agents currently rely on brittle or inconsistent selectors when identifying UI elements.
11
+
12
+ This leads to:
13
+
14
+ - selector drift across UI updates
15
+ - failure to target correct elements in dynamic layouts
16
+ - retry loops due to ambiguous element matching
17
+ - inability to distinguish visually similar components
18
+
19
+ Current system limitations:
20
+
21
+ - weak or inconsistent element identifiers
22
+ - over-reliance on hierarchy position or text inference
23
+ - insufficient metadata for stable targeting
24
+
25
+ This RFC assumes stable identity may be derived from underlying platform accessibility or testing hooks, but does not assume a universal cross-platform stable identifier model.
26
+
27
+ This RFC does not assume that a universal or guaranteed stable_id exists across all platforms. Instead, it defines a best-effort model based on platform-native identifiers and developer-provided metadata, supplemented by resolution hints.
28
+
29
+ ---
30
+
31
+ # 2. Goals
32
+
33
+ This RFC introduces:
34
+
35
+ 1. Platform-native element identity metadata for UI targeting
36
+ 2. Hierarchy-independent element references
37
+ 3. Selector confidence metadata for reliability
38
+ 4. Structured fallback resolution strategy
39
+
40
+ Success goals:
41
+
42
+ - Increase element match success rate
43
+ - Reduce selector-related retries
44
+ - Improve robustness across UI updates and Compose-heavy layouts
45
+
46
+ ---
47
+
48
+ # 3. Non-Goals
49
+
50
+ This RFC does not:
51
+
52
+ - Modify state verification logic (RFC-001)
53
+ - Introduce gesture handling (future RFCs)
54
+ - Define synchronization/waiting behaviour (RFC-003)
55
+ - Add new interaction primitives beyond identification and selection
56
+
57
+ ---
58
+
59
+ # 4. Proposed Model
60
+
61
+ ## 4.1 Stable Element Identity
62
+
63
+ Each UI element SHOULD expose a stable identifier when available.
64
+
65
+ Preferred model:
66
+
67
+ ```json
68
+ {
69
+ "element_id": "wifi_toggle",
70
+ "stable_id": "settings_wifi_toggle",
71
+ "role": "switch"
72
+ }
73
+ ```
74
+
75
+ Rules:
76
+
77
+ - stable_id SHOULD be derived from platform-native or developer-provided identifiers when available
78
+ - stable_id MAY remain consistent across UI renders where supported by the platform
79
+ - stable_id is a preferred targeting key when present, but not guaranteed to exist
80
+ - element_id is session-scoped and may change between snapshots
81
+
82
+ ---
83
+
84
+ ## 4.1.1 Stable ID Origin
85
+
86
+ stable_id MUST be derived from platform-provided or framework-provided identifiers when available.
87
+
88
+ Acceptable sources include:
89
+
90
+ - Android: resource-id, content-desc (when stable and explicitly set)
91
+ - iOS: accessibilityIdentifier
92
+ - Web: data-testid or equivalent testing attributes
93
+ - Compose: semantics properties or developer-assigned test tags
94
+
95
+ Rules:
96
+
97
+ - stable_id MUST NOT be heuristically generated from visual text alone
98
+ - stable_id SHOULD prefer developer-defined identifiers over inferred values
99
+ - If no reliable source exists, stable_id MUST be omitted (not fabricated)
100
+
101
+ ---
102
+
103
+ ## 4.1.2 Stable ID Collision Handling
104
+
105
+ If multiple elements share the same stable_id:
106
+
107
+ - system MUST treat this as a collision state
108
+ - all matching elements MUST be returned
109
+ - agent MUST disambiguate using role, label, or hierarchy context
110
+
111
+ Rules:
112
+
113
+ - collisions MUST NOT be silently resolved by system-level heuristics
114
+ - stable_id uniqueness is a best-effort constraint, not a guarantee
115
+
116
+ ---
117
+
118
+ ## 4.2 Selector Confidence Model
119
+
120
+ Each element MAY include confidence metadata for selection reliability.
121
+
122
+ ```json
123
+ {
124
+ "selector": "Text('WiFi')",
125
+ "confidence": 0.92
126
+ }
127
+ ```
128
+
129
+ Rules:
130
+
131
+ - Confidence reflects likelihood of correct element match
132
+ - Low confidence SHOULD trigger fallback resolution
133
+ - Confidence MUST NOT be treated as deterministic truth
134
+
135
+ ---
136
+
137
+ ## 4.2.1 Confidence API Exposure
138
+
139
+ Confidence metadata MUST be exposed as part of the element selector object.
140
+
141
+ Preferred shape:
142
+
143
+ ```json
144
+ {
145
+ "selector": "Text('WiFi')",
146
+ "confidence": {
147
+ "score": 0.92,
148
+ "reason": "unique_text_match"
149
+ }
150
+ }
151
+ ```
152
+
153
+ Rules:
154
+
155
+ - confidence.score MUST be a float between 0 and 1
156
+ - confidence.reason SHOULD indicate primary matching heuristic
157
+ - confidence MUST be attached to selector metadata, not state
158
+
159
+ This structure is expected to be present in both snapshot metadata and any downstream selector debugging output produced by the resolution engine.
160
+
161
+ ---
162
+
163
+ ## 4.3 Fallback Resolution Strategy
164
+
165
+ Resolution order MUST be:
166
+
167
+ 1. stable_id (if unique or disambiguated via collision handling)
168
+ 2. platform-native metadata match (role + label + test_tag)
169
+ 3. selector + confidence scoring
170
+ 4. structural hierarchy fallback
171
+ 5. text inference (last resort)
172
+
173
+ Agents MUST prefer higher-order resolution strategies before falling back.
174
+
175
+ ---
176
+
177
+ ## 4.4 Element Metadata Model (Platform-Aware)
178
+
179
+ For Compose and similar UI systems, elements MUST expose structured metadata rather than inferred semantic paths.
180
+
181
+ Preferred model:
182
+
183
+ ```json
184
+ {
185
+ "role": "button",
186
+ "label": "Save",
187
+ "text": "Save",
188
+ "test_tag": "settings_save_button",
189
+ "semantic": {
190
+ "is_clickable": true,
191
+ "is_container": false
192
+ }
193
+ }
194
+ ```
195
+
196
+ Rules:
197
+
198
+ - semantic_path MUST NOT be used as a required field
199
+ - platform-native metadata (test_tag / accessibility id) is preferred
200
+ - hierarchy information MAY be included but is not authoritative
201
+
202
+ ---
203
+
204
+ ## 4.5 Snapshot Response Contract (v1)
205
+
206
+ This RFC defines the expected structure of element metadata returned by the snapshot observation tool.
207
+
208
+ Each element in a snapshot MUST conform to the following shape:
209
+
210
+ ```json
211
+ {
212
+ "element_id": "string (session-scoped)",
213
+ "stable_id": "string (optional)",
214
+ "role": "string",
215
+ "label": "string (optional)",
216
+ "text": "string (optional)",
217
+ "test_tag": "string (optional)",
218
+ "selector": {
219
+ "value": "string",
220
+ "confidence": {
221
+ "score": 0.0-1.0,
222
+ "reason": "string"
223
+ }
224
+ }
225
+ }
226
+ ```
227
+
228
+ Rules:
229
+
230
+ - element_id MUST be present and session-scoped
231
+ - stable_id MAY be present when provided by platform or developer metadata
232
+ - selector.confidence MUST be attached when selector is present
233
+ - test_tag SHOULD be preferred over inferred identifiers where available
234
+
235
+ Note:
236
+ This schema replaces ambiguous "snapshot response" references in prior sections and defines the canonical output contract for element identity and resolution metadata.
237
+
238
+ This contract defines the boundary between platform-derived metadata and resolution-engine-generated metadata, and is the single source of truth for all element identity fields used by downstream agents.
239
+
240
+
241
+ ## 4.6 API Surface Mapping
242
+
243
+ This section defines where each field in the Snapshot Response Contract is produced within the system.
244
+
245
+ ### 4.6.1 Snapshot Tool Responsibility
246
+
247
+ The snapshot observation tool (e.g. `observe_ui_snapshot`) is responsible for returning the raw UI tree enriched with platform-derived metadata.
248
+
249
+ It MUST return elements conforming to the Snapshot Response Contract (Section 4.5).
250
+
251
+ In the current codebase, this maps to the `observe_ui_snapshot` pipeline (or equivalent snapshot generation function), which MUST return data conforming to the SnapshotResponse TypeScript contract defined in Section 4.6.4.
252
+
253
+ ### 4.6.2 Field Origin Mapping
254
+
255
+ Each field in the element model has a defined source of truth:
256
+
257
+ - element_id:
258
+ - Origin: Snapshot session layer
259
+ - Responsibility: Generated per snapshot traversal
260
+ - Scope: Session-scoped only
261
+
262
+ - stable_id:
263
+ - Origin: Platform adapter layer (Android/iOS/Web/Compose)
264
+ - Responsibility: Extracted from platform-native identifiers
265
+ - Constraint: MUST NOT be generated by heuristics alone
266
+
267
+ - role:
268
+ - Origin: Accessibility tree / platform UI framework
269
+ - Responsibility: Semantic role mapping from native UI system
270
+
271
+ - label / text:
272
+ - Origin: Platform accessibility node
273
+ - Responsibility: Visible or accessible text content extraction
274
+
275
+ - test_tag:
276
+ - Origin: Developer-defined metadata (when available)
277
+ - Responsibility: Explicit testing identifiers (e.g. accessibilityIdentifier, data-testid)
278
+
279
+ - selector:
280
+ - Origin: Resolution engine (post-processing layer)
281
+ - Responsibility: Generated match expression for agent targeting
282
+
283
+ - selector.confidence:
284
+ - Origin: Resolution engine
285
+ - Responsibility: Heuristic confidence scoring of selector correctness
286
+
287
+ ### 4.6.3 Layer Separation Rule
288
+
289
+ The system MUST maintain strict separation between:
290
+
291
+ - Platform extraction layer (stable_id, role, label, test_tag)
292
+ - Resolution layer (selector, confidence)
293
+ - Session layer (element_id)
294
+
295
+ No layer is permitted to overwrite another layer's source of truth.
296
+
297
+ ---
298
+
299
+ ## 4.6.4 TypeScript Contract (Implementation Binding)
300
+
301
+ This section defines the concrete TypeScript-level contract used by the codebase for snapshot and element resolution.
302
+
303
+ These types represent the implementation binding for the Snapshot Response Contract (Section 4.5).
304
+
305
+ ```ts
306
+ export interface SelectorConfidence {
307
+ score: number; // 0.0 - 1.0
308
+ reason: string;
309
+ }
310
+
311
+ export interface ElementSelector {
312
+ value: string;
313
+ confidence: SelectorConfidence;
314
+ }
315
+
316
+ export interface ElementSnapshot {
317
+ element_id: string; // session-scoped
318
+ stable_id?: string;
319
+ role: string;
320
+ label?: string;
321
+ text?: string;
322
+ test_tag?: string;
323
+ selector?: ElementSelector;
324
+ }
325
+
326
+ export interface SnapshotResponse {
327
+ elements: ElementSnapshot[];
328
+ }
329
+ ```
330
+
331
+ Notes:
332
+
333
+ - This interface MUST align with the runtime snapshot implementation.
334
+ - This is the canonical mapping between RFC definition and codebase types.
335
+ - Any deviation in implementation MUST be reflected in a future RFC revision.
336
+
337
+ ---
338
+
339
+ # 5. Failure Modes
340
+
341
+ ## 5.1 Ambiguous match
342
+
343
+ If multiple elements match a selector:
344
+
345
+ - The snapshot MUST include all matching candidates in the underlying element tree or debug snapshot.
346
+ - Current action APIs (e.g. find_element / tap / wait_for_ui) MAY return a single best-effort match for compatibility.
347
+ - When ambiguity exists, systems SHOULD expose candidate alternatives via snapshot inspection or debug instrumentation.
348
+ - Future extensions MAY introduce explicit multi-candidate resolution APIs, but are not required for RFC-002 compliance.
349
+
350
+ ---
351
+
352
+ ## 5.2 Missing stable identity
353
+
354
+ If stable_id is unavailable:
355
+
356
+ - fallback hierarchy MUST be used
357
+ - selector confidence SHOULD reflect reduced certainty and include reason="no_stable_id"
358
+ - retries MAY be triggered
359
+
360
+ ---
361
+
362
+ ## 5.3 Layout drift
363
+
364
+ If UI structure changes:
365
+
366
+ - stable_id remains valid if preserved
367
+ - structural selectors may degrade
368
+ - confidence SHOULD reflect uncertainty
369
+
370
+ ---
371
+
372
+ # 6. Acceptance Criteria
373
+
374
+ RFC-002 is complete when:
375
+
376
+ - platform-native identity metadata (stable_id, test_tag, role) is present where available
377
+ - selector confidence metadata is present and conforms to Snapshot Response Contract (Section 4.5)
378
+ - fallback resolution strategy is implemented
379
+ - element match success rate improves on benchmark flows
380
+ - selector-related retries are reduced
381
+
382
+ ---
383
+
384
+ # 7. Success Metrics
385
+
386
+ - Higher element resolution match rate using platform-native metadata + confidence hints
387
+ - Reduced selector retries
388
+ - Lower failure rate on UI updates
389
+ - Improved stability in Compose UI trees
390
+
391
+ ---
392
+
393
+ # 8. Out of Scope
394
+
395
+ - State verification (RFC-001)
396
+ - Wait/synchronization (RFC-003)
397
+ - Gestures (future RFCs)
398
+ - Action tracing
399
+
400
+ This RFC is scoped as a metadata and resolution hint layer. It does not guarantee stable identity across all platforms, but standardises how identity signals are exposed and consumed.