mobile-debug-mcp 0.26.4 → 0.26.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -226,6 +226,13 @@ export class ToolsInteract {
226
226
  const role = ToolsInteract._normalize(el.role ?? '');
227
227
  return !!el.state?.value_range || /slider|seekbar|stepper|adjustable|range/.test(type) || /slider|seekbar|stepper|adjustable|range/.test(role);
228
228
  }
229
+ static _isSemanticActionable(el) {
230
+ if (!el?.semantic)
231
+ return false;
232
+ if (el.semantic.adjustable)
233
+ return true;
234
+ return Array.isArray(el.semantic.supported_actions) && el.semantic.supported_actions.length > 0;
235
+ }
229
236
  static _readNumericControlValue(el, property) {
230
237
  if (!el?.state)
231
238
  return null;
@@ -318,11 +325,11 @@ export class ToolsInteract {
318
325
  static _resolveActionableAncestor(elements, chosen) {
319
326
  if (!chosen)
320
327
  return null;
321
- if (chosen.el.clickable || chosen.el.focusable)
328
+ if (chosen.el.clickable || chosen.el.focusable || ToolsInteract._isSemanticActionable(chosen.el))
322
329
  return chosen;
323
330
  let current = chosen;
324
331
  let safety = 0;
325
- while (safety < 20 && current.el && !(current.el.clickable || current.el.focusable) && current.el.parentId !== undefined && current.el.parentId !== null) {
332
+ while (safety < 20 && current.el && !(current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el)) && current.el.parentId !== undefined && current.el.parentId !== null) {
326
333
  const parentId = current.el.parentId;
327
334
  let parentIndex = null;
328
335
  if (typeof parentId === 'number')
@@ -331,7 +338,7 @@ export class ToolsInteract {
331
338
  parentIndex = Number(parentId);
332
339
  if (parentIndex !== null && elements[parentIndex]) {
333
340
  current = { el: elements[parentIndex], idx: parentIndex };
334
- if (current.el.clickable || current.el.focusable)
341
+ if (current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el))
335
342
  return current;
336
343
  }
337
344
  else if (typeof parentId === 'string') {
@@ -339,7 +346,7 @@ export class ToolsInteract {
339
346
  if (foundIndex === -1)
340
347
  break;
341
348
  current = { el: elements[foundIndex], idx: foundIndex };
342
- if (current.el.clickable || current.el.focusable)
349
+ if (current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el))
343
350
  return current;
344
351
  }
345
352
  else {
@@ -355,7 +362,7 @@ export class ToolsInteract {
355
362
  let bestArea = Infinity;
356
363
  for (let i = 0; i < elements.length; i++) {
357
364
  const el = elements[i];
358
- if (!el || !(el.clickable || el.focusable))
365
+ if (!el || !(el.clickable || el.focusable || ToolsInteract._isSemanticActionable(el)))
359
366
  continue;
360
367
  const bounds = ToolsInteract._normalizeBounds(el.bounds);
361
368
  if (!bounds)
@@ -904,11 +911,13 @@ export class ToolsInteract {
904
911
  if (r <= l || b <= t)
905
912
  return null;
906
913
  // Do not early-return on non-interactable elements — score them so we can locate their clickable ancestor later
907
- const interactable = !!(el.clickable || el.enabled || el.focusable);
914
+ const interactable = !!(el.clickable || el.enabled || el.focusable || ToolsInteract._isSemanticActionable(el));
908
915
  const text = normalize(el.text ?? el.label ?? el.value ?? '');
909
916
  const content = normalize(el.contentDescription ?? el.contentDesc ?? el.accessibilityLabel ?? '');
910
917
  const resourceId = normalize(el.resourceId ?? el.resourceID ?? el.id ?? '');
911
918
  const className = normalize(el.type ?? el.class ?? '');
919
+ const semanticRole = normalize(el.semantic?.semantic_role ?? '');
920
+ const semanticActions = Array.isArray(el.semantic?.supported_actions) ? el.semantic.supported_actions.map((action) => normalize(action)).filter(Boolean) : [];
912
921
  let score = 0;
913
922
  let reason = 'best_scoring_candidate';
914
923
  if (exact) {
@@ -959,6 +968,30 @@ export class ToolsInteract {
959
968
  reason = 'partial_class_match';
960
969
  }
961
970
  }
971
+ if (!exact) {
972
+ if (!score && semanticRole && semanticRole.includes(q)) {
973
+ score = 0.5;
974
+ reason = 'semantic_role_match';
975
+ }
976
+ if (semanticActions.some((action) => action.includes(q))) {
977
+ score = Math.max(score, score > 0 ? 0.65 : 0.6);
978
+ reason = 'semantic_action_match';
979
+ }
980
+ if (score === 0 && el.semantic?.adjustable && /slider|stepper|dropdown|segment|control|adjust/.test(q)) {
981
+ score = 0.45;
982
+ reason = 'semantic_control_match';
983
+ }
984
+ }
985
+ else {
986
+ if (!score && semanticRole && semanticRole === q) {
987
+ score = 0.5;
988
+ reason = 'semantic_role_match';
989
+ }
990
+ if (semanticActions.some((action) => action === q)) {
991
+ score = Math.max(score, score > 0 ? 0.65 : 0.6);
992
+ reason = 'semantic_action_match';
993
+ }
994
+ }
962
995
  if (score > 0 && interactable)
963
996
  score += 0.05;
964
997
  if (score <= 0)
@@ -1087,7 +1120,7 @@ export class ToolsInteract {
1087
1120
  interactable: true
1088
1121
  };
1089
1122
  }
1090
- if (best && !(best.el.clickable || best.el.focusable)) {
1123
+ if (best && !(best.el.clickable || best.el.focusable || ToolsInteract._isSemanticActionable(best.el))) {
1091
1124
  const nearbyActionable = ToolsInteract._resolveNearbyActionableControl(elements, { el: best.el, idx: best.idx }, screen);
1092
1125
  if (nearbyActionable) {
1093
1126
  best = {
@@ -52,6 +52,12 @@ function normalizeIOSType(value) {
52
52
  function inferIOSRole(type, traits) {
53
53
  if (/slider|adjustable/.test(type) || traits.some((trait) => /adjustable|slider/.test(trait)))
54
54
  return 'slider';
55
+ if (/stepper/.test(type))
56
+ return 'stepper';
57
+ if (/picker|pop up button|dropdown/.test(type))
58
+ return 'dropdown';
59
+ if (/segmented control/.test(type))
60
+ return 'segmented_control';
55
61
  if (/button/.test(type) || traits.some((trait) => /button/.test(trait)))
56
62
  return 'button';
57
63
  if (/cell/.test(type))
@@ -99,11 +105,49 @@ function buildIOSSelector(type, label, value, stableId) {
99
105
  return { value: type, confidence: buildIOSSelectorConfidence('type') };
100
106
  return null;
101
107
  }
102
- function buildIOSSemantic(type, traits) {
103
- return {
108
+ function buildIOSSemantic(type, traits, role, value) {
109
+ const semantic = {
104
110
  is_clickable: traits.includes("UIAccessibilityTraitButton") || /adjustable|slider/.test(type) || type === "Button" || type === "Cell",
105
111
  is_container: /window|application|group|scroll view|collection view/.test(type)
106
112
  };
113
+ if (role === 'slider') {
114
+ semantic.semantic_role = 'slider';
115
+ semantic.adjustable = true;
116
+ semantic.supported_actions = ['adjust'];
117
+ semantic.state_shape = 'continuous';
118
+ }
119
+ else if (role === 'stepper') {
120
+ semantic.semantic_role = 'stepper';
121
+ semantic.adjustable = true;
122
+ semantic.supported_actions = ['increment', 'decrement'];
123
+ semantic.state_shape = 'discrete';
124
+ }
125
+ else if (role === 'dropdown') {
126
+ semantic.semantic_role = 'dropdown';
127
+ semantic.supported_actions = ['tap', 'expand'];
128
+ semantic.state_shape = 'semantic';
129
+ }
130
+ else if (role === 'segmented_control') {
131
+ semantic.semantic_role = 'segmented_control';
132
+ semantic.supported_actions = ['tap'];
133
+ semantic.state_shape = 'discrete';
134
+ }
135
+ else if (traits.some((trait) => /adjustable|slider/i.test(trait)) || /adjustable|slider/.test(type)) {
136
+ semantic.semantic_role = 'custom_adjustable';
137
+ semantic.adjustable = true;
138
+ semantic.supported_actions = ['adjust'];
139
+ semantic.state_shape = 'continuous';
140
+ }
141
+ else if (semantic.is_clickable) {
142
+ semantic.supported_actions = ['tap'];
143
+ }
144
+ if (semantic.state_shape === undefined && semantic.adjustable && value !== null) {
145
+ const numericValue = parseIOSNumber(value);
146
+ if (numericValue !== null && numericValue >= 0 && numericValue <= 1) {
147
+ semantic.state_shape = 'continuous';
148
+ }
149
+ }
150
+ return semantic;
107
151
  }
108
152
  function isIOSAdjustable(node, type, traits) {
109
153
  return /slider|adjustable|stepper/i.test(type) || traits.some((trait) => /adjustable|slider/i.test(trait));
@@ -162,8 +206,8 @@ export function traverseIDBNode(node, elements, parentIndex = -1, depth = 0) {
162
206
  const normalizedType = normalizeIOSType(type);
163
207
  const stableId = getIOSStableId(node);
164
208
  const selector = buildIOSSelector(type, label, value, stableId);
165
- const semantic = buildIOSSemantic(normalizedType, traits);
166
209
  const role = inferIOSRole(normalizedType, traits);
210
+ const semantic = buildIOSSemantic(normalizedType, traits, role, value);
167
211
  const clickable = traits.includes("UIAccessibilityTraitButton") || type === "Button" || type === "Cell";
168
212
  const isUseful = clickable || (label && label.length > 0) || (value && value.length > 0) || type === "Application" || type === "Window";
169
213
  if (isUseful) {
@@ -6,7 +6,7 @@ import { handleToolCall } from './server/tool-handlers.js';
6
6
  export { wrapResponse, toolDefinitions, handleToolCall };
7
7
  export const serverInfo = {
8
8
  name: 'mobile-debug-mcp',
9
- version: '0.26.4'
9
+ version: '0.26.5'
10
10
  };
11
11
  export function createServer() {
12
12
  const server = new Server(serverInfo, {
@@ -362,6 +362,12 @@ function normalizeClassName(value) {
362
362
  function inferAndroidRole(className) {
363
363
  if (/seekbar|slider/.test(className))
364
364
  return 'slider';
365
+ if (/stepper|numberpicker/.test(className))
366
+ return 'stepper';
367
+ if (/spinner|dropdown/.test(className))
368
+ return 'dropdown';
369
+ if (/segment|tablayout/.test(className))
370
+ return 'segmented_control';
365
371
  if (/switch|toggle/.test(className))
366
372
  return 'switch';
367
373
  if (/checkbox/.test(className))
@@ -403,11 +409,37 @@ function buildAndroidSelector(text, contentDescription, resourceId, className) {
403
409
  return { value: className, confidence: buildAndroidSelectorConfidence('class') };
404
410
  return null;
405
411
  }
406
- function buildAndroidSemantic(clickable, className) {
407
- return {
412
+ function buildAndroidSemantic(clickable, className, role) {
413
+ const semantic = {
408
414
  is_clickable: clickable,
409
415
  is_container: /recyclerview|scroll|layout|viewgroup|frame/.test(className)
410
416
  };
417
+ if (role === 'slider') {
418
+ semantic.semantic_role = 'slider';
419
+ semantic.adjustable = true;
420
+ semantic.supported_actions = ['adjust'];
421
+ semantic.state_shape = 'continuous';
422
+ }
423
+ else if (role === 'stepper') {
424
+ semantic.semantic_role = 'stepper';
425
+ semantic.adjustable = true;
426
+ semantic.supported_actions = ['increment', 'decrement'];
427
+ semantic.state_shape = 'discrete';
428
+ }
429
+ else if (role === 'dropdown') {
430
+ semantic.semantic_role = 'dropdown';
431
+ semantic.supported_actions = ['tap', 'expand'];
432
+ semantic.state_shape = 'semantic';
433
+ }
434
+ else if (role === 'segmented_control') {
435
+ semantic.semantic_role = 'segmented_control';
436
+ semantic.supported_actions = ['tap'];
437
+ semantic.state_shape = 'discrete';
438
+ }
439
+ else if (clickable) {
440
+ semantic.supported_actions = ['tap'];
441
+ }
442
+ return semantic;
411
443
  }
412
444
  function isSliderLikeAndroid(node) {
413
445
  const className = String(node['@_class'] || '').toLowerCase();
@@ -487,7 +519,7 @@ export function traverseNode(node, elements, parentIndex = -1, depth = 0) {
487
519
  const stableId = resourceId ?? (typeof contentDescription === 'string' && contentDescription.trim().length > 0 ? contentDescription : null);
488
520
  const testTag = stableId;
489
521
  const selector = buildAndroidSelector(text, contentDescription, resourceId, normalizeClassName(className));
490
- const semantic = buildAndroidSemantic(clickable, normalizeClassName(className));
522
+ const semantic = buildAndroidSemantic(clickable, normalizeClassName(className), role);
491
523
  const isUseful = clickable || (text && text.length > 0) || (contentDescription && contentDescription.length > 0);
492
524
  if (isUseful) {
493
525
  const element = {
package/docs/CHANGELOG.md CHANGED
@@ -2,6 +2,9 @@
2
2
 
3
3
  All notable changes to the **Mobile Debug MCP** project will be documented in this file.
4
4
 
5
+ ## [0.26.5]
6
+ - Introduces a semantic control model to improve the identification and interaction with custom and composite UI controls.
7
+
5
8
  ## [0.26.4]
6
9
  - Improved slider accuracy
7
10
 
package/docs/ROADMAP.md CHANGED
@@ -44,6 +44,7 @@ Higher task success with fewer retries.
44
44
 
45
45
  - Stronger State Verification — Complete (Foundational verification layer shipped)
46
46
  - Richer Element Identity — Complete (Identity and selector confidence foundations shipped)
47
+ - Better Compose / Custom Control Semantics — Complete (Semantic role enrichment and custom-adjustable inference shipped)
47
48
 
48
49
  ## Current Focus
49
50
 
@@ -54,7 +55,6 @@ Higher task success with fewer retries.
54
55
 
55
56
  - Environment Auto-Configuration and Toolchain Discovery
56
57
  - Adjustable Control Support
57
- - Better Compose / Custom Control Semantics
58
58
  - Signal-Oriented Diagnostic Filtering
59
59
  - Long Press Gesture
60
60
  # Stronger State Verification
@@ -435,7 +435,7 @@ Strengthens:
435
435
  ## Rationale
436
436
  Higher priority after agent feedback exposed custom control semantics as a core reliability gap, not a later optimization.
437
437
 
438
- **Status:** Spec Ready
438
+ **Status:** Completed
439
439
 
440
440
  Semantics become more useful once:
441
441
  - identity is stronger
@@ -446,8 +446,8 @@ Semantics become more useful once:
446
446
 
447
447
  ## Scope
448
448
  - Composite control traits
449
- - Control role enrichment (adjustable, expandable, selectable_group)
450
- - Interaction contracts metadata
449
+ - Control role enrichment (`slider`, `stepper`, `dropdown`, `segmented_control`, `custom_adjustable`)
450
+ - Interaction contract metadata (`supported_actions`, `adjustable`, `state_shape`)
451
451
  - Custom widget gesture affordance hints
452
452
  - Semantic confidence annotations
453
453
  - Compose-aware selectors for waits (merged semantics and element relationships)
@@ -457,7 +457,7 @@ High.
457
457
 
458
458
  ## Exit Criteria
459
459
  - Semantic traits implemented for major custom control classes
460
- - Interaction contracts surfaced in snapshot model
460
+ - Interaction contracts surfaced in observation and resolution paths
461
461
  - Confidence model defined for derived semantics
462
462
  - Custom control manipulation success validated in benchmark flows
463
463
 
@@ -0,0 +1,238 @@
1
+ # RFC 009 — Semantic Control Modeling for Custom and Composite Controls
2
+
3
+ ## 1. Summary
4
+
5
+ This RFC defines a semantic control model for identifying, exposing, and interacting with custom and composite controls that are poorly represented through raw accessibility or platform UI trees.
6
+
7
+ It introduces semantic enrichment for controls such as:
8
+
9
+ - sliders
10
+ - steppers
11
+ - segmented controls
12
+ - dropdowns
13
+ - Compose/SwiftUI custom widgets
14
+ - composite gesture-driven controls
15
+
16
+ The goal is to improve target resolution, control interaction, and verification reliability for controls whose actionable semantics are not fully captured by raw snapshots.
17
+
18
+ ---
19
+
20
+ ## 2. Problem Statement
21
+
22
+ Current interaction logic works well when platform semantics are explicit.
23
+
24
+ It is weaker when controls appear as:
25
+
26
+ - generic container views
27
+ - unlabeled clickable wrappers
28
+ - nested composite controls
29
+ - custom Compose/SwiftUI components with weak accessibility exposure
30
+
31
+ Observed problems include:
32
+
33
+ - controls resolving as parent containers rather than actionable targets
34
+ - missing slider-like controls in snapshots
35
+ - weak distinction between discrete vs continuous controls
36
+ - inability to infer supported interactions from control structure
37
+ - unreliable verification of control state
38
+
39
+ This causes brittle automation and coordinate fallback behavior.
40
+
41
+ ---
42
+
43
+ ## 3. Goals
44
+
45
+ This RFC introduces a semantic layer that MUST:
46
+
47
+ - infer higher-level control semantics from raw UI structures
48
+ - enrich snapshots with semantic control metadata
49
+ - improve actionable target selection (RFC 007)
50
+ - improve adjustable control handling (RFC 008)
51
+ - improve verification for semantic control state
52
+ - reduce coordinate fallback usage
53
+
54
+ ---
55
+
56
+ ## 4. Non-Goals
57
+
58
+ This RFC does NOT define:
59
+
60
+ - replacement of raw accessibility trees
61
+ - ML-based semantic inference
62
+ - probabilistic control classification
63
+ - new gesture primitives
64
+ - autonomous planning behavior
65
+
66
+ Semantic modeling is deterministic enrichment layered over raw signals.
67
+
68
+ ---
69
+
70
+ ## 5. Runtime Surfaces
71
+
72
+ This RFC applies to existing runtime surfaces:
73
+
74
+ - findElementHandler
75
+ - _resolveActionableAncestor
76
+ - _buildResolvedElement
77
+ - tapElementHandler
78
+ - scrollToElementHandler
79
+
80
+ Semantic modeling augments these surfaces; it does not replace them.
81
+
82
+ ---
83
+
84
+ ## 6. Semantic Control Model
85
+
86
+ Controls MAY progressively expose semantic metadata such as:
87
+
88
+ ```ts
89
+ interface SemanticControl {
90
+ semantic_role:
91
+ | "slider"
92
+ | "stepper"
93
+ | "dropdown"
94
+ | "segmented_control"
95
+ | "custom_adjustable"
96
+ | "composite_control";
97
+
98
+ supported_actions: string[];
99
+
100
+ adjustable: boolean;
101
+
102
+ state_shape:
103
+ | "continuous"
104
+ | "discrete"
105
+ | "semantic";
106
+ }
107
+ ```
108
+
109
+ The control roles above represent an expected semantic model, not a claim that all such control classes are equally surfaced in the current runtime.
110
+
111
+ Current runtime support may initially expose simpler semantic signals such as:
112
+ - role hints
113
+ - semantic labels
114
+ - value_range metadata
115
+ - selector confidence or related resolution signals
116
+
117
+ Richer control roles are progressive extensions over time.
118
+
119
+ ---
120
+
121
+ ## 7. Semantic Inference Rules
122
+
123
+ Inference MAY use signals such as:
124
+
125
+ - accessibility role hints
126
+ - value_range metadata
127
+ - child composition patterns
128
+ - repeated selectable child structures
129
+ - platform traits (adjustable, selected, expanded)
130
+ - known control heuristics
131
+
132
+ Inference MUST be deterministic and explainable.
133
+
134
+ Raw signals always win on conflict.
135
+
136
+ Semantic inference confidence, where present, is advisory only and MUST NOT be treated as executable truth.
137
+
138
+ ---
139
+
140
+ ## 8. Resolution Integration (RFC 007)
141
+
142
+ Semantic metadata SHOULD improve target resolution by:
143
+
144
+ - preferring actionable child controls over generic containers
145
+ - promoting semantically actionable descendants
146
+ - disambiguating among multiple candidate matches
147
+
148
+ Semantic signals are advisory enrichment, not executable truth.
149
+
150
+ ---
151
+
152
+ ## 9. Adjustable Control Integration (RFC 008)
153
+
154
+ Where adjustable=true:
155
+
156
+ Semantic metadata MAY expose:
157
+
158
+ - supported adjustment mode
159
+ - discrete vs continuous state model
160
+ - expected verification strategy
161
+
162
+ This improves convergence for value-setting workflows.
163
+
164
+ ---
165
+
166
+ ## 10. Verification Integration
167
+
168
+ Verification MAY use semantic control metadata to improve:
169
+
170
+ - value-state verification
171
+ - discrete selection verification
172
+ - semantic-state checks
173
+
174
+ Formal verification still remains governed by RFC 005.
175
+
176
+ ---
177
+
178
+ ## 11. Output Contract (Progressive Extension)
179
+
180
+ Current runtime may expose partial semantic outputs.
181
+
182
+ Expected progressive shape (future extension model):
183
+
184
+ ```ts
185
+ interface SemanticResolutionMetadata {
186
+ semantic_role?: string;
187
+ supported_actions?: string[];
188
+ adjustable?: boolean;
189
+ state_shape?: string;
190
+ confidence?: "low" | "medium" | "high";
191
+ }
192
+ ```
193
+
194
+ These fields are progressive enrichment and MUST NOT be assumed universally present.
195
+
196
+ Implementations MAY expose only a subset of this model initially. Presence of a richer semantic role does not imply universal runtime support for all control classes.
197
+
198
+ ---
199
+
200
+ ## 12. Failure Modes
201
+
202
+ Semantic modeling MAY fail due to:
203
+
204
+ - insufficient raw signals
205
+ - ambiguous composite structures
206
+ - conflicting heuristics
207
+
208
+ When semantic inference confidence is insufficient:
209
+
210
+ - raw resolution flow MUST continue
211
+ - semantic fields MAY be omitted
212
+ - no semantic guessing should be forced
213
+
214
+ ---
215
+
216
+ ## 13. Success Metrics
217
+
218
+ - fewer coordinate fallbacks
219
+ - improved control discovery
220
+ - improved actionable-target precision
221
+ - improved slider/custom-control automation success
222
+ - reduced semantic mismatch failures (RFC 010)
223
+
224
+ ---
225
+
226
+ ## 14. Relationship to Other RFCs
227
+
228
+ RFC 005 — verification correctness model
229
+ RFC 006 — runtime action execution
230
+ RFC 007 — target resolution
231
+ RFC 008 — adjustable control support
232
+ RFC 010 — recovery uses semantic mismatch failures defined here
233
+
234
+ ---
235
+
236
+ ## 15. Summary
237
+
238
+ This RFC adds deterministic semantic control enrichment for custom and composite controls, improving resolution, interaction reliability, and verification while remaining layered over existing runtime signals.
@@ -244,6 +244,7 @@ Raw layer contents include:
244
244
  - UI hierarchy or accessibility tree
245
245
  - normalized readable element state where exposed by the platform
246
246
  - platform-native identity hints such as stable identifiers, roles, and test tags
247
+ - semantic control metadata when derivable from the raw tree, including `semantic_role`, `supported_actions`, `adjustable`, and `state_shape`
247
248
  - snapshot metadata such as `snapshot_revision` and `captured_at_ms`
248
249
  - `loading_state` when a reliable loading signal is detectable
249
250
  - screenshot when available
@@ -292,6 +293,27 @@ Semantic output MUST NOT replace classification or verification.
292
293
 
293
294
  Classification remains a supplementary, post-action interpretation mechanism.
294
295
 
296
+ ### 9.4 Semantic Control Metadata
297
+
298
+ When present, semantic control metadata MAY include:
299
+
300
+ ```ts
301
+ {
302
+ semantic_role?: 'slider' | 'stepper' | 'dropdown' | 'segmented_control' | 'custom_adjustable' | 'composite_control' | null,
303
+ supported_actions?: string[] | null,
304
+ adjustable?: boolean | null,
305
+ state_shape?: 'continuous' | 'discrete' | 'semantic' | null
306
+ }
307
+ ```
308
+
309
+ Rules:
310
+
311
+ - semantic control metadata is derived and best-effort
312
+ - raw platform roles and state remain authoritative on conflict
313
+ - `adjustable` MAY be inferred from platform traits when no known role matches
314
+ - `state_shape` MUST respect known control roles before value-based heuristics
315
+ - `supported_actions` are hints only and MUST NOT be treated as guaranteed executable actions
316
+
295
317
  ## 10. Classification
296
318
 
297
319
  Tool: `classify_action_outcome`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobile-debug-mcp",
3
- "version": "0.26.4",
3
+ "version": "0.26.5",
4
4
  "description": "MCP server for mobile app debugging (Android + iOS), with focus on security and reliability",
5
5
  "type": "module",
6
6
  "bin": {
@@ -16,6 +16,7 @@ import type {
16
16
  ExpectStateResponse,
17
17
  ExpectScreenResponse,
18
18
  WaitForUIChangeResponse,
19
+ UIElementSemanticMetadata,
19
20
  UIElementState,
20
21
  TapElementResponse
21
22
  } from '../types.js'
@@ -48,7 +49,7 @@ interface UiElement {
48
49
  role?: string | null
49
50
  test_tag?: string | null
50
51
  selector?: { value: string | null, confidence: { score: number, reason: string } | null } | null
51
- semantic?: { is_clickable: boolean, is_container: boolean } | null
52
+ semantic?: UIElementSemanticMetadata | null
52
53
  }
53
54
 
54
55
  interface ResolvedUiElementContext {
@@ -342,6 +343,12 @@ export class ToolsInteract {
342
343
  return !!el.state?.value_range || /slider|seekbar|stepper|adjustable|range/.test(type) || /slider|seekbar|stepper|adjustable|range/.test(role)
343
344
  }
344
345
 
346
+ private static _isSemanticActionable(el: UiElement | null): boolean {
347
+ if (!el?.semantic) return false
348
+ if (el.semantic.adjustable) return true
349
+ return Array.isArray(el.semantic.supported_actions) && el.semantic.supported_actions.length > 0
350
+ }
351
+
345
352
  private static _readNumericControlValue(el: UiElement | null, property: string): number | null {
346
353
  if (!el?.state) return null
347
354
  const stateValue = el.state[property as keyof UIElementState]
@@ -460,12 +467,12 @@ export class ToolsInteract {
460
467
 
461
468
  private static _resolveActionableAncestor(elements: UiElement[], chosen: { el: UiElement, idx: number } | null): { el: UiElement, idx: number } | null {
462
469
  if (!chosen) return null
463
- if (chosen.el.clickable || chosen.el.focusable) return chosen
470
+ if (chosen.el.clickable || chosen.el.focusable || ToolsInteract._isSemanticActionable(chosen.el)) return chosen
464
471
 
465
472
  let current = chosen
466
473
  let safety = 0
467
474
 
468
- while (safety < 20 && current.el && !(current.el.clickable || current.el.focusable) && current.el.parentId !== undefined && current.el.parentId !== null) {
475
+ while (safety < 20 && current.el && !(current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el)) && current.el.parentId !== undefined && current.el.parentId !== null) {
469
476
  const parentId = current.el.parentId
470
477
  let parentIndex: number | null = null
471
478
 
@@ -474,12 +481,12 @@ export class ToolsInteract {
474
481
 
475
482
  if (parentIndex !== null && elements[parentIndex]) {
476
483
  current = { el: elements[parentIndex], idx: parentIndex }
477
- if (current.el.clickable || current.el.focusable) return current
484
+ if (current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el)) return current
478
485
  } else if (typeof parentId === 'string') {
479
486
  const foundIndex = elements.findIndex((el) => el.resourceId === parentId || el.id === parentId)
480
487
  if (foundIndex === -1) break
481
488
  current = { el: elements[foundIndex], idx: foundIndex }
482
- if (current.el.clickable || current.el.focusable) return current
489
+ if (current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el)) return current
483
490
  } else {
484
491
  break
485
492
  }
@@ -496,7 +503,7 @@ export class ToolsInteract {
496
503
 
497
504
  for (let i = 0; i < elements.length; i++) {
498
505
  const el = elements[i]
499
- if (!el || !(el.clickable || el.focusable)) continue
506
+ if (!el || !(el.clickable || el.focusable || ToolsInteract._isSemanticActionable(el))) continue
500
507
  const bounds = ToolsInteract._normalizeBounds(el.bounds)
501
508
  if (!bounds) continue
502
509
  const [pl, pt, pr, pb] = bounds
@@ -1196,12 +1203,14 @@ export class ToolsInteract {
1196
1203
  const [l,t,r,b] = bounds
1197
1204
  if (r <= l || b <= t) return null
1198
1205
  // Do not early-return on non-interactable elements — score them so we can locate their clickable ancestor later
1199
- const interactable = !!(el.clickable || el.enabled || el.focusable)
1206
+ const interactable = !!(el.clickable || el.enabled || el.focusable || ToolsInteract._isSemanticActionable(el))
1200
1207
 
1201
1208
  const text = normalize(el.text ?? el.label ?? el.value ?? '')
1202
1209
  const content = normalize(el.contentDescription ?? el.contentDesc ?? el.accessibilityLabel ?? '')
1203
1210
  const resourceId = normalize(el.resourceId ?? el.resourceID ?? el.id ?? '')
1204
1211
  const className = normalize(el.type ?? el.class ?? '')
1212
+ const semanticRole = normalize(el.semantic?.semantic_role ?? '')
1213
+ const semanticActions = Array.isArray(el.semantic?.supported_actions) ? el.semantic.supported_actions.map((action) => normalize(action)).filter(Boolean) : []
1205
1214
 
1206
1215
  let score = 0
1207
1216
  let reason = 'best_scoring_candidate'
@@ -1243,6 +1252,29 @@ export class ToolsInteract {
1243
1252
  reason = 'partial_class_match'
1244
1253
  }
1245
1254
  }
1255
+ if (!exact) {
1256
+ if (!score && semanticRole && semanticRole.includes(q)) {
1257
+ score = 0.5
1258
+ reason = 'semantic_role_match'
1259
+ }
1260
+ if (semanticActions.some((action) => action.includes(q))) {
1261
+ score = Math.max(score, score > 0 ? 0.65 : 0.6)
1262
+ reason = 'semantic_action_match'
1263
+ }
1264
+ if (score === 0 && el.semantic?.adjustable && /slider|stepper|dropdown|segment|control|adjust/.test(q)) {
1265
+ score = 0.45
1266
+ reason = 'semantic_control_match'
1267
+ }
1268
+ } else {
1269
+ if (!score && semanticRole && semanticRole === q) {
1270
+ score = 0.5
1271
+ reason = 'semantic_role_match'
1272
+ }
1273
+ if (semanticActions.some((action) => action === q)) {
1274
+ score = Math.max(score, score > 0 ? 0.65 : 0.6)
1275
+ reason = 'semantic_action_match'
1276
+ }
1277
+ }
1246
1278
  if (score > 0 && interactable) score += 0.05
1247
1279
  if (score <= 0) return null
1248
1280
  return { el, idx, score, reason, interactable }
@@ -1352,7 +1384,7 @@ export class ToolsInteract {
1352
1384
  }
1353
1385
  }
1354
1386
 
1355
- if (best && !(best.el.clickable || best.el.focusable)) {
1387
+ if (best && !(best.el.clickable || best.el.focusable || ToolsInteract._isSemanticActionable(best.el))) {
1356
1388
  const nearbyActionable = ToolsInteract._resolveNearbyActionableControl(elements, { el: best.el, idx: best.idx }, screen)
1357
1389
  if (nearbyActionable) {
1358
1390
  best = {
@@ -73,6 +73,9 @@ function normalizeIOSType(value: unknown): string {
73
73
 
74
74
  function inferIOSRole(type: string, traits: string[]): string | null {
75
75
  if (/slider|adjustable/.test(type) || traits.some((trait) => /adjustable|slider/.test(trait))) return 'slider'
76
+ if (/stepper/.test(type)) return 'stepper'
77
+ if (/picker|pop up button|dropdown/.test(type)) return 'dropdown'
78
+ if (/segmented control/.test(type)) return 'segmented_control'
76
79
  if (/button/.test(type) || traits.some((trait) => /button/.test(trait))) return 'button'
77
80
  if (/cell/.test(type)) return 'cell'
78
81
  if (/switch/.test(type)) return 'switch'
@@ -113,11 +116,47 @@ function buildIOSSelector(type: string, label: string | null, value: string | nu
113
116
  return null
114
117
  }
115
118
 
116
- function buildIOSSemantic(type: string, traits: string[]): UIElementSemanticMetadata {
117
- return {
119
+ function buildIOSSemantic(type: string, traits: string[], role: string | null, value: string | null): UIElementSemanticMetadata {
120
+ const semantic: UIElementSemanticMetadata = {
118
121
  is_clickable: traits.includes("UIAccessibilityTraitButton") || /adjustable|slider/.test(type) || type === "Button" || type === "Cell",
119
122
  is_container: /window|application|group|scroll view|collection view/.test(type)
120
123
  }
124
+
125
+ if (role === 'slider') {
126
+ semantic.semantic_role = 'slider'
127
+ semantic.adjustable = true
128
+ semantic.supported_actions = ['adjust']
129
+ semantic.state_shape = 'continuous'
130
+ } else if (role === 'stepper') {
131
+ semantic.semantic_role = 'stepper'
132
+ semantic.adjustable = true
133
+ semantic.supported_actions = ['increment', 'decrement']
134
+ semantic.state_shape = 'discrete'
135
+ } else if (role === 'dropdown') {
136
+ semantic.semantic_role = 'dropdown'
137
+ semantic.supported_actions = ['tap', 'expand']
138
+ semantic.state_shape = 'semantic'
139
+ } else if (role === 'segmented_control') {
140
+ semantic.semantic_role = 'segmented_control'
141
+ semantic.supported_actions = ['tap']
142
+ semantic.state_shape = 'discrete'
143
+ } else if (traits.some((trait) => /adjustable|slider/i.test(trait)) || /adjustable|slider/.test(type)) {
144
+ semantic.semantic_role = 'custom_adjustable'
145
+ semantic.adjustable = true
146
+ semantic.supported_actions = ['adjust']
147
+ semantic.state_shape = 'continuous'
148
+ } else if (semantic.is_clickable) {
149
+ semantic.supported_actions = ['tap']
150
+ }
151
+
152
+ if (semantic.state_shape === undefined && semantic.adjustable && value !== null) {
153
+ const numericValue = parseIOSNumber(value)
154
+ if (numericValue !== null && numericValue >= 0 && numericValue <= 1) {
155
+ semantic.state_shape = 'continuous'
156
+ }
157
+ }
158
+
159
+ return semantic
121
160
  }
122
161
 
123
162
  function isIOSAdjustable(node: IDBElement, type: string, traits: string[]): boolean {
@@ -184,8 +223,8 @@ export function traverseIDBNode(node: IDBElement, elements: UIElement[], parentI
184
223
  const normalizedType = normalizeIOSType(type)
185
224
  const stableId = getIOSStableId(node)
186
225
  const selector = buildIOSSelector(type, label, value, stableId)
187
- const semantic = buildIOSSemantic(normalizedType, traits)
188
226
  const role = inferIOSRole(normalizedType, traits)
227
+ const semantic = buildIOSSemantic(normalizedType, traits, role, value)
189
228
 
190
229
  const clickable = traits.includes("UIAccessibilityTraitButton") || type === "Button" || type === "Cell";
191
230
 
@@ -13,7 +13,7 @@ export { wrapResponse, toolDefinitions, handleToolCall }
13
13
 
14
14
  export const serverInfo = {
15
15
  name: 'mobile-debug-mcp',
16
- version: '0.26.4'
16
+ version: '0.26.5'
17
17
  }
18
18
 
19
19
  export function createServer() {
package/src/types.ts CHANGED
@@ -107,6 +107,10 @@ export interface UIResolutionSelector {
107
107
  export interface UIElementSemanticMetadata {
108
108
  is_clickable: boolean;
109
109
  is_container: boolean;
110
+ semantic_role?: 'slider' | 'stepper' | 'dropdown' | 'segmented_control' | 'custom_adjustable' | 'composite_control' | null;
111
+ supported_actions?: string[] | null;
112
+ adjustable?: boolean | null;
113
+ state_shape?: 'continuous' | 'discrete' | 'semantic' | null;
110
114
  }
111
115
 
112
116
  export interface LoadingState {
@@ -342,6 +342,9 @@ function normalizeClassName(value: unknown): string {
342
342
 
343
343
  function inferAndroidRole(className: string): string | null {
344
344
  if (/seekbar|slider/.test(className)) return 'slider'
345
+ if (/stepper|numberpicker/.test(className)) return 'stepper'
346
+ if (/spinner|dropdown/.test(className)) return 'dropdown'
347
+ if (/segment|tablayout/.test(className)) return 'segmented_control'
345
348
  if (/switch|toggle/.test(className)) return 'switch'
346
349
  if (/checkbox/.test(className)) return 'checkbox'
347
350
  if (/radiobutton|radio/.test(className)) return 'radio'
@@ -375,11 +378,35 @@ function buildAndroidSelector(text: string | null, contentDescription: string |
375
378
  return null
376
379
  }
377
380
 
378
- function buildAndroidSemantic(clickable: boolean, className: string): UIElementSemanticMetadata {
379
- return {
381
+ function buildAndroidSemantic(clickable: boolean, className: string, role: string | null): UIElementSemanticMetadata {
382
+ const semantic: UIElementSemanticMetadata = {
380
383
  is_clickable: clickable,
381
384
  is_container: /recyclerview|scroll|layout|viewgroup|frame/.test(className)
382
385
  }
386
+
387
+ if (role === 'slider') {
388
+ semantic.semantic_role = 'slider'
389
+ semantic.adjustable = true
390
+ semantic.supported_actions = ['adjust']
391
+ semantic.state_shape = 'continuous'
392
+ } else if (role === 'stepper') {
393
+ semantic.semantic_role = 'stepper'
394
+ semantic.adjustable = true
395
+ semantic.supported_actions = ['increment', 'decrement']
396
+ semantic.state_shape = 'discrete'
397
+ } else if (role === 'dropdown') {
398
+ semantic.semantic_role = 'dropdown'
399
+ semantic.supported_actions = ['tap', 'expand']
400
+ semantic.state_shape = 'semantic'
401
+ } else if (role === 'segmented_control') {
402
+ semantic.semantic_role = 'segmented_control'
403
+ semantic.supported_actions = ['tap']
404
+ semantic.state_shape = 'discrete'
405
+ } else if (clickable) {
406
+ semantic.supported_actions = ['tap']
407
+ }
408
+
409
+ return semantic
383
410
  }
384
411
 
385
412
  function isSliderLikeAndroid(node: any): boolean {
@@ -459,7 +486,7 @@ export function traverseNode(node: any, elements: UIElement[], parentIndex: numb
459
486
  const stableId = resourceId ?? (typeof contentDescription === 'string' && contentDescription.trim().length > 0 ? contentDescription : null)
460
487
  const testTag = stableId
461
488
  const selector = buildAndroidSelector(text, contentDescription, resourceId, normalizeClassName(className))
462
- const semantic = buildAndroidSemantic(clickable, normalizeClassName(className))
489
+ const semantic = buildAndroidSemantic(clickable, normalizeClassName(className), role)
463
490
 
464
491
  const isUseful = clickable || (text && text.length > 0) || (contentDescription && contentDescription.length > 0);
465
492
 
@@ -78,6 +78,52 @@ async function run() {
78
78
  assert.ok((res4.resolution?.alternates || []).length >= 1, 'Parent promotion should preserve alternates')
79
79
  process.stdout.write('Test 4: ' + (pass4 ? 'PASS' : 'FAIL') + '\n');
80
80
 
81
+ // Test 4b: semantic-only stepper should be discoverable by supported action
82
+ ;(ToolsObserve as any).getUITreeHandler = async () => ({
83
+ device: { platform: 'android', id: 'mock' },
84
+ screen: '',
85
+ resolution: { width: 1080, height: 1920 },
86
+ elements: [
87
+ {
88
+ text: null,
89
+ contentDescription: 'Quantity stepper',
90
+ type: 'android.widget.NumberPicker',
91
+ clickable: false,
92
+ enabled: true,
93
+ visible: true,
94
+ bounds: [10,10,200,80],
95
+ resourceId: 'picker_quantity',
96
+ semantic: {
97
+ is_clickable: false,
98
+ is_container: true,
99
+ semantic_role: 'stepper',
100
+ supported_actions: ['increment', 'decrement'],
101
+ adjustable: true,
102
+ state_shape: 'discrete'
103
+ }
104
+ }
105
+ ]
106
+ })
107
+
108
+ const res4b: any = await ToolsInteract.findElementHandler({ query: 'increment', exact: false, platform: 'android', timeoutMs: 300 })
109
+ process.stdout.write('res4b ' + JSON.stringify(res4b, null, 2) + '\n');
110
+ const pass4b = res4b.found === true && res4b.element && res4b.element.resourceId === 'picker_quantity' && res4b.element.semantic?.semantic_role === 'stepper'
111
+ assert.ok(pass4b, 'Semantic-only steppers should be discoverable by supported actions')
112
+ assert.strictEqual(res4b.resolution?.reason, 'semantic_action_match')
113
+ process.stdout.write('Test 4b: ' + (pass4b ? 'PASS' : 'FAIL') + '\n');
114
+
115
+ const res4bb: any = await ToolsInteract.findElementHandler({ query: 'increment', exact: true, platform: 'android', timeoutMs: 300 })
116
+ process.stdout.write('res4bb ' + JSON.stringify(res4bb, null, 2) + '\n');
117
+ const pass4bb = res4bb.found === true && res4bb.element && res4bb.element.resourceId === 'picker_quantity' && res4bb.resolution?.reason === 'semantic_action_match'
118
+ assert.ok(pass4bb, 'Exact searches should still match exact semantic actions')
119
+ process.stdout.write('Test 4bb: ' + (pass4bb ? 'PASS' : 'FAIL') + '\n');
120
+
121
+ const res4c: any = await ToolsInteract.findElementHandler({ query: 'control', exact: true, platform: 'android', timeoutMs: 300 })
122
+ process.stdout.write('res4c ' + JSON.stringify(res4c, null, 2) + '\n');
123
+ const pass4c = res4c.found === false
124
+ assert.ok(pass4c, 'Exact searches should not fall back to broad semantic keywords')
125
+ process.stdout.write('Test 4c: ' + (pass4c ? 'PASS' : 'FAIL') + '\n');
126
+
81
127
  // Test 5: duration label should resolve to the nearby slider control
82
128
  ;(ToolsObserve as any).getUITreeHandler = async () => ({
83
129
  device: { platform: 'android', id: 'mock' },
@@ -29,7 +29,14 @@ async function run() {
29
29
  value: 'com.example:id/duration',
30
30
  confidence: { score: 1, reason: 'resource_id' }
31
31
  })
32
- assert.deepStrictEqual(androidElements[0].semantic, { is_clickable: true, is_container: false })
32
+ assert.deepStrictEqual(androidElements[0].semantic, {
33
+ is_clickable: true,
34
+ is_container: false,
35
+ semantic_role: 'slider',
36
+ supported_actions: ['adjust'],
37
+ adjustable: true,
38
+ state_shape: 'continuous'
39
+ })
33
40
 
34
41
  const androidProgressElements: any[] = []
35
42
  traverseNode({
@@ -44,6 +51,21 @@ async function run() {
44
51
 
45
52
  assert.notStrictEqual(androidProgressElements[0]?.role, 'slider')
46
53
  assert.notStrictEqual(androidProgressElements[0]?.state?.value, 40)
54
+ assert.notStrictEqual(androidProgressElements[0]?.semantic?.adjustable, true)
55
+
56
+ const androidStepperElements: any[] = []
57
+ traverseNode({
58
+ '@_class': 'android.widget.NumberPicker',
59
+ '@_text': 'Quantity',
60
+ '@_content-desc': 'Quantity stepper',
61
+ '@_clickable': 'false',
62
+ '@_enabled': 'true',
63
+ '@_bounds': '[0,0][200,80]'
64
+ }, androidStepperElements)
65
+ assert.strictEqual(androidStepperElements[0].role, 'stepper')
66
+ assert.deepStrictEqual(androidStepperElements[0].semantic?.semantic_role, 'stepper')
67
+ assert.deepStrictEqual(androidStepperElements[0].semantic?.supported_actions, ['increment', 'decrement'])
68
+ assert.strictEqual(androidStepperElements[0].semantic?.adjustable, true)
47
69
 
48
70
  const androidFallbackElements: any[] = []
49
71
  traverseNode({
@@ -82,7 +104,14 @@ async function run() {
82
104
  value: 'playback_speed_slider',
83
105
  confidence: { score: 1, reason: 'accessibility_identifier' }
84
106
  })
85
- assert.deepStrictEqual(iosElements[0].semantic, { is_clickable: true, is_container: false })
107
+ assert.deepStrictEqual(iosElements[0].semantic, {
108
+ is_clickable: true,
109
+ is_container: false,
110
+ semantic_role: 'slider',
111
+ supported_actions: ['adjust'],
112
+ adjustable: true,
113
+ state_shape: 'continuous'
114
+ })
86
115
 
87
116
  const iosProgressElements: any[] = []
88
117
  traverseIDBNode({
@@ -94,6 +123,40 @@ async function run() {
94
123
 
95
124
  assert.notStrictEqual(iosProgressElements[0]?.role, 'slider')
96
125
 
126
+ const iosStepperElements: any[] = []
127
+ traverseIDBNode({
128
+ AXElementType: 'Stepper',
129
+ AXLabel: 'Quantity',
130
+ AXValue: '1',
131
+ AXTraits: ['UIAccessibilityTraitAdjustable']
132
+ }, iosStepperElements)
133
+ assert.strictEqual(iosStepperElements[0].role, 'stepper')
134
+ assert.strictEqual(iosStepperElements[0].semantic?.semantic_role, 'stepper')
135
+ assert.deepStrictEqual(iosStepperElements[0].semantic?.supported_actions, ['increment', 'decrement'])
136
+ assert.strictEqual(iosStepperElements[0].semantic?.state_shape, 'discrete')
137
+
138
+ const iosSegmentedElements: any[] = []
139
+ traverseIDBNode({
140
+ AXElementType: 'Segmented Control',
141
+ AXLabel: 'Playback mode',
142
+ AXTraits: ['UIAccessibilityTraitButton']
143
+ }, iosSegmentedElements)
144
+ assert.strictEqual(iosSegmentedElements[0].role, 'segmented_control')
145
+ assert.strictEqual(iosSegmentedElements[0].semantic?.semantic_role, 'segmented_control')
146
+ assert.deepStrictEqual(iosSegmentedElements[0].semantic?.supported_actions, ['tap'])
147
+
148
+ const iosCustomAdjustableElements: any[] = []
149
+ traverseIDBNode({
150
+ AXElementType: 'CustomControl',
151
+ AXLabel: 'Intensity',
152
+ AXValue: '0.25',
153
+ AXTraits: ['UIAccessibilityTraitAdjustable']
154
+ }, iosCustomAdjustableElements)
155
+ assert.strictEqual(iosCustomAdjustableElements[0].semantic?.semantic_role, 'custom_adjustable')
156
+ assert.strictEqual(iosCustomAdjustableElements[0].semantic?.adjustable, true)
157
+ assert.deepStrictEqual(iosCustomAdjustableElements[0].semantic?.supported_actions, ['adjust'])
158
+ assert.strictEqual(iosCustomAdjustableElements[0].semantic?.state_shape, 'continuous')
159
+
97
160
  const iosFallbackElements: any[] = []
98
161
  traverseIDBNode({
99
162
  AXElementType: 'Button',