mobile-debug-mcp 0.26.2 → 0.26.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -106,7 +106,7 @@ function buildIOSSemantic(type, traits) {
106
106
  };
107
107
  }
108
108
  function isIOSAdjustable(node, type, traits) {
109
- return /slider|adjustable|stepper|progress/i.test(type) || traits.some((trait) => /adjustable|slider|progress/i.test(trait));
109
+ return /slider|adjustable|stepper/i.test(type) || traits.some((trait) => /adjustable|slider/i.test(trait));
110
110
  }
111
111
  function extractIOSState(node, type, label, value, traits) {
112
112
  const state = {};
@@ -555,6 +555,62 @@ Failure Handling:
555
555
  required: ['property', 'expected']
556
556
  }
557
557
  },
558
+ {
559
+ name: 'adjust_control',
560
+ description: `Purpose:
561
+ Adjust a numeric control value with verification.
562
+
563
+ This is the initial adjustable-control surface for slider-like controls and other controls that expose a numeric value or value_range.
564
+
565
+ Inputs:
566
+ - selector or element_id
567
+ - property (defaults to "value")
568
+ - targetValue
569
+ - tolerance (optional)
570
+ - maxAttempts (optional)
571
+ - platform/deviceId (optional)
572
+
573
+ Output Structure:
574
+ - action_id, timestamp (ISO 8601), action_type
575
+ - lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
576
+ - source_module: runtime source of the action envelope
577
+ - target_state / actual_state / within_tolerance / converged / attempts / adjustment_mode
578
+ - target.selector = original selector or element handle
579
+ - success = true when the control converges within tolerance
580
+
581
+ Verification Guidance:
582
+ - Prefer direct target placement when value_range is available; fall back to a drag only if the direct tap does not converge
583
+ - Use expect_state for the control value readback
584
+ - Treat coordinate fallback as degraded mode
585
+
586
+ Failure Handling:
587
+ - ELEMENT_NOT_FOUND → re-resolve the control
588
+ - ELEMENT_NOT_INTERACTABLE → the control cannot be adjusted through the current runtime
589
+ - TIMEOUT → the control did not converge within bounded retries
590
+ - UNKNOWN → capture a snapshot and stop`,
591
+ inputSchema: {
592
+ type: 'object',
593
+ properties: {
594
+ selector: {
595
+ type: 'object',
596
+ properties: {
597
+ text: { type: 'string' },
598
+ resource_id: { type: 'string' },
599
+ accessibility_id: { type: 'string' },
600
+ contains: { type: 'boolean', default: false }
601
+ }
602
+ },
603
+ element_id: { type: 'string', description: 'Optional previously resolved element identifier.' },
604
+ property: { type: 'string', description: 'Readable numeric state property to adjust.', default: 'value' },
605
+ targetValue: { type: 'number', description: 'Target numeric value.' },
606
+ tolerance: { type: 'number', description: 'Accepted numeric tolerance around the target value.', default: 0 },
607
+ maxAttempts: { type: 'number', description: 'Maximum adjustment attempts.', default: 3 },
608
+ platform: { type: 'string', enum: ['android', 'ios'], description: 'Optional platform override' },
609
+ deviceId: { type: 'string', description: 'Optional device serial/udid' }
610
+ },
611
+ required: ['targetValue']
612
+ }
613
+ },
558
614
  {
559
615
  name: 'wait_for_ui',
560
616
  description: `Purpose:
@@ -596,7 +652,9 @@ Recommended Usage:
596
652
  },
597
653
  {
598
654
  name: 'find_element',
599
- description: 'Find a UI element by semantic query (text, content-desc, resource-id, class). Returns best match.',
655
+ description: `Find a UI element by semantic query (text, content-desc, resource-id, class).
656
+
657
+ Returns the best match plus resolution metadata when available, including confidence, selection reason, and fallback alternates.`,
600
658
  inputSchema: {
601
659
  type: 'object',
602
660
  properties: {
@@ -226,6 +226,30 @@ async function handleExpectState(args) {
226
226
  const res = await ToolsInteract.expectStateHandler({ selector: selector ?? undefined, element_id: element_id ?? undefined, property, expected, platform, deviceId });
227
227
  return wrapResponse(res);
228
228
  }
229
+ async function handleAdjustControl(args) {
230
+ const selector = getObjectArg(args, 'selector');
231
+ const element_id = getStringArg(args, 'element_id');
232
+ const property = getStringArg(args, 'property') ?? 'value';
233
+ const targetValue = requireNumberArg(args, 'targetValue');
234
+ const tolerance = getNumberArg(args, 'tolerance') ?? 0;
235
+ const maxAttempts = getNumberArg(args, 'maxAttempts') ?? 3;
236
+ const platform = getStringArg(args, 'platform');
237
+ const deviceId = getStringArg(args, 'deviceId');
238
+ if (!selector && !element_id) {
239
+ throw new Error('Missing selector or element_id argument');
240
+ }
241
+ const res = await ToolsInteract.adjustControlHandler({
242
+ selector: selector ?? undefined,
243
+ element_id: element_id ?? undefined,
244
+ property,
245
+ targetValue,
246
+ tolerance,
247
+ maxAttempts,
248
+ platform,
249
+ deviceId
250
+ });
251
+ return wrapResponse(res);
252
+ }
229
253
  async function handleWaitForUI(args) {
230
254
  const selector = getObjectArg(args, 'selector');
231
255
  const condition = getStringArg(args, 'condition') ?? 'exists';
@@ -431,6 +455,7 @@ export const toolHandlers = {
431
455
  expect_screen: handleExpectScreen,
432
456
  expect_element_visible: handleExpectElementVisible,
433
457
  expect_state: handleExpectState,
458
+ adjust_control: handleAdjustControl,
434
459
  wait_for_ui: handleWaitForUI,
435
460
  find_element: handleFindElement,
436
461
  tap: handleTap,
@@ -6,7 +6,7 @@ import { handleToolCall } from './server/tool-handlers.js';
6
6
  export { wrapResponse, toolDefinitions, handleToolCall };
7
7
  export const serverInfo = {
8
8
  name: 'mobile-debug-mcp',
9
- version: '0.26.2'
9
+ version: '0.26.4'
10
10
  };
11
11
  export function createServer() {
12
12
  const server = new Server(serverInfo, {
@@ -360,7 +360,7 @@ function normalizeClassName(value) {
360
360
  return typeof value === 'string' ? value.trim().toLowerCase() : '';
361
361
  }
362
362
  function inferAndroidRole(className) {
363
- if (/seekbar|slider|progress/.test(className))
363
+ if (/seekbar|slider/.test(className))
364
364
  return 'slider';
365
365
  if (/switch|toggle/.test(className))
366
366
  return 'switch';
@@ -411,7 +411,7 @@ function buildAndroidSemantic(clickable, className) {
411
411
  }
412
412
  function isSliderLikeAndroid(node) {
413
413
  const className = String(node['@_class'] || '').toLowerCase();
414
- return /seekbar|slider|range|progress/i.test(className);
414
+ return /seekbar|slider|range/i.test(className);
415
415
  }
416
416
  function extractAndroidState(node) {
417
417
  const checked = parseBooleanAttr(node['@_checked']);
package/docs/CHANGELOG.md CHANGED
@@ -2,6 +2,12 @@
2
2
 
3
3
  All notable changes to the **Mobile Debug MCP** project will be documented in this file.
4
4
 
5
+ ## [0.26.4]
6
+ - Improved slider accuracy
7
+
8
+ ## [0.26.3]
9
+ - updates the `find_element` tool to return detailed resolution metadata, including confidence scores,
10
+
5
11
  ## [0.26.2]
6
12
  - unified action execution and verification model
7
13
 
package/docs/ROADMAP.md CHANGED
@@ -52,10 +52,62 @@ Higher task success with fewer retries.
52
52
 
53
53
  ## Upcoming Work
54
54
 
55
+ - Environment Auto-Configuration and Toolchain Discovery
55
56
  - Adjustable Control Support
57
+ - Better Compose / Custom Control Semantics
56
58
  - Signal-Oriented Diagnostic Filtering
57
59
  - Long Press Gesture
58
- - Better Compose / Custom Control Semantics
60
+ # Stronger State Verification
61
+ # Richer Element Identity
62
+ # Wait and Synchronization Reliability
63
+ # Environment Auto-Configuration and Toolchain Discovery
64
+
65
+ ## Rationale
66
+ Reduce onboarding friction and improve developer experience by minimizing manual setup dependencies.
67
+
68
+ **Status:** Planned
69
+
70
+ Addresses friction around:
71
+ - manual idb installation
72
+ - manual adb path configuration
73
+ - manual xcrun path configuration
74
+ - environment drift across machines
75
+ - setup failures blocking first use
76
+
77
+ ## Scope
78
+ - Automatic discovery of adb
79
+ - Automatic discovery of xcrun
80
+ - idb detection and guided bootstrap support
81
+ - Startup toolchain validation
82
+ - Environment health diagnostics / doctor-style checks
83
+ - Minimal-manual-configuration defaults
84
+
85
+ ## Expected Impact
86
+ High.
87
+
88
+ ## Exit Criteria
89
+ - adb and xcrun auto-discovery implemented
90
+ - Missing dependencies surfaced with guided remediation
91
+ - Startup environment validation available
92
+ - Manual path configuration eliminated or minimized for standard setups
93
+ - First-run setup validated on representative developer environments
94
+
95
+ ## Success Metrics
96
+ - Reduced setup friction during onboarding
97
+ - Lower environment configuration failures
98
+ - Faster time-to-first-successful-session
99
+ - Reduced support/debugging caused by local setup issues
100
+
101
+ ## Dependencies
102
+ Depends on:
103
+ - Stronger State Verification
104
+ - Richer Element Identity
105
+
106
+ Strengthens:
107
+ - Actionability Resolution
108
+ - Broader user adoption readiness
109
+
110
+ ---
59
111
 
60
112
  ## Later Horizon
61
113
 
@@ -160,6 +212,7 @@ Addresses failures where agents:
160
212
  - wait_for_ui_change (hierarchy diff based waiting)
161
213
  - Structured loading state detection
162
214
  - Snapshot revision / staleness metadata
215
+ - Focused snapshot views / incremental snapshot diffs
163
216
  - Compose-aware wait robustness improvements
164
217
 
165
218
  ## Expected Impact
@@ -169,6 +222,7 @@ Very high.
169
222
  - wait_for_ui_change implemented
170
223
  - Loading state detection available for representative controls
171
224
  - Snapshot revision or staleness metadata exposed
225
+ - Focused or diff-oriented snapshots validated in benchmark flows
172
226
  - UI-first sync guidance added to spec guardrails
173
227
  - In-place update waits validated on benchmark flows
174
228
 
@@ -379,9 +433,9 @@ Strengthens:
379
433
  # Better Compose / Custom Control Semantics
380
434
 
381
435
  ## Rationale
382
- Important, but strengthened by earlier capabilities first.
436
+ Higher priority after agent feedback exposed custom control semantics as a core reliability gap, not a later optimization.
383
437
 
384
- **Status:** Planned
438
+ **Status:** Spec Ready
385
439
 
386
440
  Semantics become more useful once:
387
441
  - identity is stronger
@@ -419,7 +473,6 @@ Depends on:
419
473
  - Wait and Synchronization Reliability
420
474
  - Actionability Resolution
421
475
  - Adjustable Control Support
422
- - Signal-Oriented Diagnostic Filtering
423
476
  - Long Press Gesture
424
477
 
425
478
  ---
@@ -515,6 +568,7 @@ Foundation
515
568
  - Richer Element Identity
516
569
 
517
570
  Synchronization & Actionability
571
+ - Environment Auto-Configuration and Toolchain Discovery
518
572
  - Wait and Synchronization Reliability
519
573
  - Actionability Resolution
520
574
 
@@ -535,27 +589,28 @@ Deep Observability
535
589
  - Richer Element Identity
536
590
  - Wait and Synchronization Reliability
537
591
  - Actionability Resolution
592
+ - Environment Auto-Configuration and Toolchain Discovery
538
593
 
539
594
  Focus:
540
- Make core loop more reliable.
595
+ Make core loop reliable and reduce onboarding friction.
541
596
 
542
597
  ---
543
598
 
544
599
  ## Wave 2 (Control Precision + Diagnostics)
545
600
  - Adjustable Control Support
601
+ - Better Compose / Custom Control Semantics
546
602
  - Signal-Oriented Diagnostic Filtering
547
603
 
548
604
  Focus:
549
- Improve control precision and signal observability.
605
+ Improve control precision, custom control semantics, and signal observability.
550
606
 
551
607
  ---
552
608
 
553
609
  ## Wave 3 (Interaction Expansion)
554
610
  - Long Press Gesture
555
- - Better Compose / Custom Control Semantics
556
611
 
557
612
  Focus:
558
- Expand interaction capability.
613
+ Expand interaction capability after core control reliability is improved.
559
614
 
560
615
  ---
561
616
 
@@ -574,16 +629,17 @@ Roadmap Ordering:
574
629
  1. Stronger State Verification
575
630
  2. Richer Element Identity
576
631
  3. Wait and Synchronization Reliability
577
- 4. Actionability Resolution
578
- 5. Adjustable Control Support
579
- 6. Signal-Oriented Diagnostic Filtering
580
- 7. Long Press Gesture
581
- 8. Better Compose / Custom Control Semantics
582
- 9. Pinch to Zoom
583
- 10. Action Trace Correlation
632
+ 4. Environment Auto-Configuration and Toolchain Discovery
633
+ 5. Actionability Resolution
634
+ 6. Adjustable Control Support
635
+ 7. Better Compose / Custom Control Semantics
636
+ 8. Signal-Oriented Diagnostic Filtering
637
+ 9. Long Press Gesture
638
+ 10. Pinch to Zoom
639
+ 11. Action Trace Correlation
584
640
 
585
641
  Rationale:
586
- - Early roadmap items harden state, targeting, synchronization, action execution.
642
+ - Early roadmap items harden state, targeting, synchronization, environment readiness, and action execution.
587
643
  - Mid roadmap items improve control precision and signal observability.
588
644
  - Later interaction-focused items expand interaction coverage.
589
645
  - Final observability work deepens debugging observability.
@@ -0,0 +1,277 @@
1
+ # RFC 007 — Actionability Resolution and Executable Target Selection
2
+
3
+ ## 1. Summary
4
+
5
+ This RFC defines how the system resolves which discovered UI element should receive an action before dispatch.
6
+
7
+ It addresses ambiguity between:
8
+ - visible elements vs actionable elements
9
+ - leaf nodes vs clickable containers
10
+ - semantic targets vs coordinate fallbacks
11
+ - multiple candidate targets with uncertain executability
12
+
13
+ Goal:
14
+ Improve first-attempt action correctness by resolving the best executable target prior to action dispatch.
15
+
16
+ This RFC defines the `Resolved` stage semantics referenced in RFC 005 and operationalized by RFC 006.
17
+ It is grounded in the existing element-resolution flow and extends current resolution behavior rather than assuming a wholly new resolver architecture.
18
+
19
+ ---
20
+
21
+ ## 2. Problem Statement
22
+
23
+ Current interaction failures often arise before execution.
24
+
25
+ The agent may discover the intended UI concept, but not the correct executable target.
26
+
27
+ Examples:
28
+ - tapping label text instead of clickable container
29
+ - sliders not surfacing semantic handles
30
+ - generic Compose containers hiding true affordances
31
+ - multiple matching targets without ranking logic
32
+
33
+ Observed failure modes:
34
+ - false taps
35
+ - submit ambiguity
36
+ - coordinate guessing
37
+ - retry loops
38
+ - brittle fallback behavior
39
+
40
+ This is a target-resolution problem, not an execution problem.
41
+
42
+ ---
43
+
44
+ ## 3. Design Goals
45
+
46
+ Resolution MUST:
47
+ - Prefer executable targets over merely visible matches
48
+ - Reduce ambiguous target selection
49
+ - Support confidence-based ranking
50
+ - Build on existing runtime resolution surfaces before introducing new resolution metadata
51
+ - Use structural and semantic resolution signals
52
+ - Minimize coordinate fallback usage
53
+ - Integrate with verification expectations from RFC 005
54
+
55
+ ---
56
+
57
+ ## 4. Actionability Model
58
+
59
+ Candidate targets are evaluated using actionability signals.
60
+
61
+ ### Structural signals
62
+ - clickable
63
+ - enabled
64
+ - focusable
65
+ - bounds
66
+ - parent action ownership
67
+
68
+ ### Semantic signals
69
+ - control role
70
+ - label association
71
+ - affordance hints
72
+ - selectable or adjustable semantics
73
+
74
+ ### Interaction signals
75
+ - reliable target patterns
76
+ - control-specific heuristics
77
+ - gesture compatibility
78
+
79
+ ---
80
+
81
+ ## 4.1 Current Runtime Resolution Surfaces
82
+
83
+ This RFC builds on current runtime resolution paths, including:
84
+ - `findElementHandler` for candidate discovery
85
+ - `_resolveActionableAncestor` for executable ancestor promotion
86
+ - `tapElementHandler` for resolved element dispatch
87
+ - `scrollToElementHandler` for scroll-mediated target acquisition
88
+
89
+ These existing handlers are the current implementation substrate for the Resolved stage.
90
+ This RFC extends and systematizes those behaviors; it does not assume replacement of those paths.
91
+
92
+ ---
93
+
94
+ ## 5. Target Candidate Ranking
95
+
96
+ When multiple targets match, candidates are ranked.
97
+
98
+ Illustrative confidence model:
99
+
100
+ resolution_confidence =
101
+ interactability_score
102
+ + semantic_match_score
103
+ + structural_reliability_score
104
+
105
+ Highest-confidence executable target is preferred.
106
+
107
+ The confidence model is illustrative and normative only at the rule-precedence level; implementations may use simpler heuristics while preserving resolution ordering guarantees. Any scoring mechanism is implementation-defined and may not be externally surfaced.
108
+
109
+ ---
110
+
111
+ ## 6. Resolution Rules
112
+
113
+ ### Rule A — Prefer actionable containers over passive leaf nodes
114
+
115
+ Prefer:
116
+ - clickable container
117
+
118
+ Over:
119
+ - passive child text nodes
120
+
121
+ Example:
122
+ Prefer button container over "Generate Session" label node.
123
+
124
+ ---
125
+
126
+ ### Rule B — Prefer semantic controls over coordinate fallbacks
127
+
128
+ Use semantic control targets whenever possible.
129
+
130
+ Coordinate fallback only when:
131
+ - no semantic target exists
132
+ - adjustable control semantics absent
133
+ - fallback confidence acceptable
134
+
135
+ ---
136
+
137
+ ### Rule C — Prefer explicit affordance ownership
138
+
139
+ If child and parent differ:
140
+ prefer the node owning the action handler.
141
+
142
+ ---
143
+
144
+ ## 7. Ambiguity Handling
145
+
146
+ When multiple plausible targets remain:
147
+
148
+ System SHOULD:
149
+ - rank candidates
150
+ - expose confidence
151
+ - preserve alternates for fallback reasoning
152
+
153
+ Low-confidence targets may trigger:
154
+ - guarded execution
155
+ - alternate resolution attempt
156
+ - explicit recovery path
157
+
158
+ ---
159
+
160
+ ## 8. Adjustable Control Resolution
161
+
162
+ Special handling for:
163
+ - sliders
164
+ - steppers
165
+ - drag controls
166
+
167
+ Support:
168
+ - adjustable-role recognition
169
+ - control-bound discovery
170
+ - value-aware interaction targeting
171
+
172
+ This RFC defines target resolution.
173
+ Value-setting behavior remains governed by Adjustable Control Support.
174
+
175
+ ---
176
+
177
+ ## 9. Compose / Custom Control Resolution
178
+
179
+ Support derived actionability for:
180
+ - merged Compose semantics
181
+ - composite controls
182
+ - inferred interaction contracts
183
+
184
+ This RFC depends on and strengthens Better Compose / Custom Control Semantics.
185
+
186
+ ---
187
+
188
+ ## 10. Resolution Output Model (Current + Future Extension)
189
+
190
+ This model is non-normative and represents a progressive enrichment direction rather than a required runtime contract.
191
+
192
+ Resolution may evolve toward the following enriched output shape. Current runtime implementations may expose only resolved-target output plus limited supporting metadata.
193
+
194
+ At minimum, current implementations are expected to produce a resolved target. Confidence, alternates, fallback metadata, and reason codes may be introduced incrementally.
195
+
196
+ Illustrative future-complete shape:
197
+
198
+ {
199
+ "resolved_target": "...",
200
+ "confidence": 0.92,
201
+ "fallback_available": true,
202
+ "resolution_reason": "clickable_parent_preferred"
203
+ }
204
+
205
+ ---
206
+
207
+ ## 11. Verification Integration
208
+
209
+ Resolution is incomplete without verification expectations.
210
+
211
+ Resolved output should be derived directly from the existing element-resolution flow before adding richer metadata layers.
212
+
213
+ Resolved target should carry expected post-action signal.
214
+
215
+ Examples:
216
+ - navigation transition expected
217
+ - menu expected
218
+ - control value change expected
219
+
220
+ This feeds RFC 005 verification.
221
+
222
+ ---
223
+
224
+ ## 12. Success Metrics
225
+
226
+ Track:
227
+ - reduced false-tap failures
228
+ - lower retarget retries
229
+ - higher first-attempt action success
230
+ - reduced coordinate fallback usage
231
+ - improved custom control interaction success
232
+
233
+ ---
234
+
235
+ ## 13. Dependencies
236
+
237
+ Depends on:
238
+ - Stronger State Verification
239
+ - Richer Element Identity
240
+ - Wait and Synchronization Reliability
241
+
242
+ Strengthens:
243
+ - Adjustable Control Support
244
+ - Better Compose / Custom Control Semantics
245
+
246
+ ---
247
+
248
+ ## 14. Relationship to Other RFCs
249
+
250
+ RFC 005
251
+ Defines what Resolved means in lifecycle semantics.
252
+
253
+ RFC 006
254
+ Defines how runtime interprets action execution.
255
+
256
+ RFC 007
257
+ Defines how a target becomes Resolved.
258
+ Specifically, it formalizes the current discovery → actionable ancestor resolution → dispatch preparation flow already present in runtime handlers.
259
+
260
+ Together:
261
+ - RFC 005 — action correctness
262
+ - RFC 006 — runtime execution binding
263
+ - RFC 007 — executable target resolution
264
+
265
+ ---
266
+
267
+ ## 15. Summary
268
+
269
+ This RFC reduces failures caused by acting on the wrong thing, even when the right thing was discovered.
270
+
271
+ It improves:
272
+ - action precision
273
+ - control reliability
274
+ - Compose interaction robustness
275
+ - agent success with fewer retries
276
+
277
+ It addresses one of the largest remaining sources of interaction brittleness.