mobile-debug-mcp 0.22.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,20 @@
1
1
  # Observe (logs, screenshots, UI trees)
2
2
 
3
- Tools that retrieve device state, logs, screenshots and UI hierarchies.
3
+ Tools that retrieve device state, logs, screenshots, fingerprints, and UI hierarchies.
4
+
5
+ These tools are primarily for:
6
+
7
+ - building context before an action
8
+ - supporting synchronization
9
+ - diagnostics when verification fails
10
+
11
+ They are **not** the primary success signal when an applicable `expect_*` tool exists.
4
12
 
5
13
  ## get_logs
6
14
 
7
- Fetch recent logs as structured entries optimized for AI agents. Use logs as a debugging aid only — prefer UI validation (wait_for_ui) first.
15
+ Fetch recent logs as structured entries optimized for AI agents.
16
+
17
+ Use logs as a debugging aid only. Prefer `expect_*` for verification, and use logs after verification fails or when an error is suspected.
8
18
 
9
19
  Input (example):
10
20
 
@@ -18,7 +28,7 @@ Defaults:
18
28
 
19
29
  When to use get_logs:
20
30
 
21
- - After a UI validation (wait_for_ui) fails to confirm the expected outcome.
31
+ - After deterministic verification fails.
22
32
  - When you suspect a crash, error, or silent failure that the UI doesn't expose.
23
33
  - To provide additional debugging context correlated with an action.
24
34
 
@@ -41,7 +51,7 @@ Notes:
41
51
  - Errors are returned as structured objects with `error.code` and `error.message`. Possible codes: LOGS_UNAVAILABLE, INVALID_FILTER, PLATFORM_NOT_SUPPORTED, INTERNAL_ERROR.
42
52
 
43
53
  ## capture_screenshot
44
- Capture screen. Returns JSON metadata then an image/png block with base64 PNG data.
54
+ Capture the current screen. Returns JSON metadata followed by one or more image blocks.
45
55
 
46
56
  Input:
47
57
 
@@ -52,13 +62,17 @@ Input:
52
62
  Response (metadata):
53
63
 
54
64
  ```json
55
- { "device": { "platform": "android", "id": "emulator-5554" }, "width": 1080, "height": 2400 }
65
+ { "device": { "platform": "android", "id": "emulator-5554" }, "result": { "resolution": { "width": 1080, "height": 2400 }, "mimeType": "image/webp" } }
56
66
  ```
57
67
 
68
+ Notes:
69
+ - The image block may use WebP, PNG, or a compatibility fallback such as JPEG.
70
+ - Best used for inspection and debugging, not as a primary verification mechanism.
71
+
58
72
  ---
59
73
 
60
74
  ## get_ui_tree
61
- Returns parsed UI hierarchy.
75
+ Return the parsed UI hierarchy for the current screen.
62
76
 
63
77
  Input:
64
78
 
@@ -69,9 +83,13 @@ Input:
69
83
  Response (example):
70
84
 
71
85
  ```json
72
- { "device": { "platform": "android", "id": "emulator-5554" }, "elements": [ { "text": "Sign in", "type": "android.widget.Button", "resourceId": "com.example:id/signin", "clickable": true, "bounds": [0,0,100,50] } ] }
86
+ { "device": { "platform": "android", "id": "emulator-5554" }, "screen": "", "resolution": { "width": 1080, "height": 2400 }, "elements": [ { "text": "Sign in", "type": "android.widget.Button", "resourceId": "com.example:id/signin", "clickable": true, "bounds": [0,0,100,50] } ] }
73
87
  ```
74
88
 
89
+ Notes:
90
+ - Useful for inspection, selector development, and fallback debugging.
91
+ - Prefer `wait_for_ui` for deterministic element resolution in interactive flows.
92
+
75
93
  ---
76
94
 
77
95
  ## get_current_screen
@@ -136,7 +154,7 @@ Notes:
136
154
  ---
137
155
 
138
156
  ## get_screen_fingerprint
139
- Generate a stable fingerprint representing the visible screen. Useful for detecting navigation changes, preventing loops, and synchronisation.
157
+ Generate a stable fingerprint representing the visible screen. Useful for detecting navigation changes, preventing loops, and synchronization.
140
158
 
141
159
  Input (optional):
142
160
 
@@ -157,6 +175,10 @@ Notes:
157
175
  - Sorts deterministically (top-to-bottom, left-to-right) and limits elements to 50.
158
176
  - Returns fingerprint: null and an error message if the UI tree or activity cannot be retrieved.
159
177
 
178
+ Guidance:
179
+ - Use as a baseline for `wait_for_screen_change`.
180
+ - Use fingerprints to define expected screens for `expect_screen`.
181
+
160
182
  ---
161
183
 
162
184
  ## start_log_stream / read_log_stream / stop_log_stream
@@ -44,5 +44,5 @@ Behavior notes:
44
44
 
45
45
  Usage guidance:
46
46
  - Call before build/install flows to avoid wasted build attempts on misconfigured systems.
47
+ - Call early in a session when device or toolchain availability is uncertain.
47
48
  - If `success: false`, attempt recovery steps or report issues to the user.
48
-
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobile-debug-mcp",
3
- "version": "0.22.0",
3
+ "version": "0.24.0",
4
4
  "description": "MCP server for mobile app debugging (Android + iOS), with focus on security and reliability",
5
5
  "type": "module",
6
6
  "bin": {
@@ -0,0 +1,64 @@
1
+ export type ActionOutcome = 'success' | 'no_op' | 'backend_failure' | 'ui_failure' | 'unknown'
2
+ export type NetworkRequestStatus = 'success' | 'failure' | 'retryable'
3
+
4
+ export interface NetworkRequest {
5
+ endpoint: string
6
+ status: NetworkRequestStatus
7
+ }
8
+
9
+ export interface ClassifyActionOutcomeInput {
10
+ uiChanged: boolean
11
+ expectedElementVisible?: boolean | null
12
+ /** null = get_network_activity has not been called yet */
13
+ networkRequests?: NetworkRequest[] | null
14
+ hasLogErrors?: boolean | null
15
+ }
16
+
17
+ export interface ClassifyActionOutcomeResult {
18
+ outcome: ActionOutcome
19
+ reasoning: string
20
+ /** Present when the caller must call get_network_activity before a final classification is possible */
21
+ nextAction?: 'call_get_network_activity'
22
+ }
23
+
24
+ /**
25
+ * Pure deterministic classifier. Applies rules in fixed order.
26
+ * Same inputs always produce the same output.
27
+ */
28
+ export function classifyActionOutcome(input: ClassifyActionOutcomeInput): ClassifyActionOutcomeResult {
29
+ const { uiChanged, expectedElementVisible, networkRequests, hasLogErrors } = input
30
+
31
+ // Step 1 — UI signal is positive
32
+ if (uiChanged || expectedElementVisible === true) {
33
+ return { outcome: 'success', reasoning: expectedElementVisible === true ? 'expected element is visible' : 'UI changed after action' }
34
+ }
35
+
36
+ // Step 2 — UI did not change; network signal is required
37
+ if (networkRequests === null || networkRequests === undefined) {
38
+ return {
39
+ outcome: 'unknown',
40
+ reasoning: 'UI did not change; get_network_activity must be called before classification can proceed',
41
+ nextAction: 'call_get_network_activity'
42
+ }
43
+ }
44
+
45
+ // Step 3 — any network failure
46
+ const failedRequest = networkRequests.find((r) => r.status === 'failure' || r.status === 'retryable')
47
+ if (failedRequest) {
48
+ return { outcome: 'backend_failure', reasoning: `network request ${failedRequest.endpoint} returned ${failedRequest.status}` }
49
+ }
50
+
51
+ // Step 4 — no network requests at all
52
+ if (networkRequests.length === 0) {
53
+ const logNote = hasLogErrors ? ' (log errors present)' : ''
54
+ return { outcome: 'no_op', reasoning: `no UI change and no network activity${logNote}` }
55
+ }
56
+
57
+ // Step 5 — network requests exist and all succeeded
58
+ if (networkRequests.every((r) => r.status === 'success')) {
59
+ return { outcome: 'ui_failure', reasoning: 'network requests succeeded but UI did not change' }
60
+ }
61
+
62
+ // Step 6 — fallback
63
+ return { outcome: 'unknown', reasoning: 'signals are inconclusive' }
64
+ }
@@ -5,7 +5,14 @@ export { AndroidInteract, iOSInteract };
5
5
 
6
6
  import { resolveTargetDevice } from '../utils/resolve-device.js'
7
7
  import { ToolsObserve } from '../observe/index.js'
8
- import type { TapElementResponse } from '../types.js'
8
+ import { nextActionId } from '../server/common.js'
9
+ import type {
10
+ ActionFailureCode,
11
+ ActionTargetResolved,
12
+ ExpectElementVisibleResponse,
13
+ ExpectScreenResponse,
14
+ TapElementResponse
15
+ } from '../types.js'
9
16
 
10
17
  interface ScreenFingerprintResponse { fingerprint: string | null }
11
18
 
@@ -112,6 +119,55 @@ export class ToolsInteract {
112
119
  }
113
120
  }
114
121
 
122
+ private static async _captureFingerprint(platform: 'android' | 'ios', deviceId?: string): Promise<string | null> {
123
+ try {
124
+ const fingerprint = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId }) as ScreenFingerprintResponse | null
125
+ return fingerprint?.fingerprint ?? null
126
+ } catch {
127
+ return null
128
+ }
129
+ }
130
+
131
+ private static _resolvedTargetFromElement(
132
+ elementId: string,
133
+ element: UiElement,
134
+ index: number
135
+ ): ActionTargetResolved {
136
+ return {
137
+ elementId,
138
+ text: element.text ?? null,
139
+ resource_id: element.resourceId ?? element.resourceID ?? element.id ?? null,
140
+ accessibility_id: element.contentDescription ?? element.contentDesc ?? element.accessibilityLabel ?? element.label ?? null,
141
+ class: element.type ?? element.class ?? null,
142
+ bounds: ToolsInteract._normalizeBounds(element.bounds),
143
+ index
144
+ }
145
+ }
146
+
147
+ private static _actionFailure(
148
+ actionId: string,
149
+ timestamp: number,
150
+ actionType: string,
151
+ selector: Record<string, unknown> | null,
152
+ resolved: ActionTargetResolved | null,
153
+ failureCode: ActionFailureCode,
154
+ retryable: boolean,
155
+ uiFingerprintBefore: string | null,
156
+ uiFingerprintAfter?: string | null
157
+ ): TapElementResponse {
158
+ return {
159
+ action_id: actionId,
160
+ timestamp,
161
+ action_type: actionType,
162
+ target: { selector, resolved },
163
+ success: false,
164
+ failure_code: failureCode,
165
+ retryable,
166
+ ui_fingerprint_before: uiFingerprintBefore,
167
+ ui_fingerprint_after: uiFingerprintAfter
168
+ }
169
+ }
170
+
115
171
  static _resetResolvedUiElementsForTests() {
116
172
  ToolsInteract._resolvedUiElements.clear()
117
173
  }
@@ -198,20 +254,17 @@ export class ToolsInteract {
198
254
  }
199
255
 
200
256
  static async tapElementHandler({ elementId }: { elementId: string }): Promise<TapElementResponse> {
201
- const action = 'tap' as const
257
+ const timestamp = Date.now()
258
+ const actionType = 'tap_element'
259
+ const actionId = nextActionId(actionType, timestamp)
260
+ const selector = { elementId }
202
261
  const resolved = ToolsInteract._resolvedUiElements.get(elementId)
203
262
  if (!resolved) {
204
- return {
205
- success: false,
206
- elementId,
207
- action,
208
- error: {
209
- code: 'element_not_found',
210
- message: 'Element ID was not found in the current UI context'
211
- }
212
- }
263
+ return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, null, 'STALE_REFERENCE', true, null)
213
264
  }
214
265
 
266
+ const fingerprintBefore = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId)
267
+
215
268
  const tree = await ToolsObserve.getUITreeHandler({ platform: resolved.platform, deviceId: resolved.deviceId }) as any
216
269
  const treePlatform = tree?.device?.platform === 'ios' ? 'ios' : resolved.platform
217
270
  const treeDeviceId = tree?.device?.id || resolved.deviceId
@@ -219,52 +272,22 @@ export class ToolsInteract {
219
272
  const currentMatch = ToolsInteract._findCurrentResolvedElement(elements, treePlatform, treeDeviceId, resolved)
220
273
 
221
274
  if (!currentMatch) {
222
- return {
223
- success: false,
224
- elementId,
225
- action,
226
- error: {
227
- code: 'element_not_found',
228
- message: 'Element ID is not present in the current UI context'
229
- }
230
- }
275
+ return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, null, 'STALE_REFERENCE', true, fingerprintBefore)
231
276
  }
232
277
 
278
+ const resolvedTarget = ToolsInteract._resolvedTargetFromElement(resolved.elementId, currentMatch.el, currentMatch.index)
279
+
233
280
  if (!ToolsInteract._isVisibleElement(currentMatch.el)) {
234
- return {
235
- success: false,
236
- elementId,
237
- action,
238
- error: {
239
- code: 'element_not_visible',
240
- message: 'Element is not visible'
241
- }
242
- }
281
+ return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
243
282
  }
244
283
 
245
284
  if (currentMatch.el.enabled === false) {
246
- return {
247
- success: false,
248
- elementId,
249
- action,
250
- error: {
251
- code: 'element_not_enabled',
252
- message: 'Element is not enabled'
253
- }
254
- }
285
+ return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
255
286
  }
256
287
 
257
288
  const bounds = ToolsInteract._normalizeBounds(currentMatch.el.bounds) ?? resolved.bounds
258
289
  if (!bounds || bounds[2] <= bounds[0] || bounds[3] <= bounds[1]) {
259
- return {
260
- success: false,
261
- elementId,
262
- action,
263
- error: {
264
- code: 'element_not_visible',
265
- message: 'Element does not have valid visible bounds'
266
- }
267
- }
290
+ return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
268
291
  }
269
292
 
270
293
  const x = Math.floor((bounds[0] + bounds[2]) / 2)
@@ -272,21 +295,22 @@ export class ToolsInteract {
272
295
  const tapResult = await ToolsInteract.tapHandler({ platform: resolved.platform, x, y, deviceId: resolved.deviceId })
273
296
 
274
297
  if (!tapResult.success) {
275
- return {
276
- success: false,
277
- elementId,
278
- action,
279
- error: {
280
- code: 'tap_failed',
281
- message: tapResult.error || 'Tap failed'
282
- }
283
- }
298
+ const fingerprintAfterFailure = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId)
299
+ return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, resolvedTarget, 'UNKNOWN', false, fingerprintBefore, fingerprintAfterFailure)
284
300
  }
285
301
 
302
+ const fingerprintAfter = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId)
286
303
  return {
304
+ action_id: actionId,
305
+ timestamp,
306
+ action_type: actionType,
307
+ target: {
308
+ selector,
309
+ resolved: resolvedTarget
310
+ },
287
311
  success: true,
288
- elementId,
289
- action
312
+ ui_fingerprint_before: fingerprintBefore,
313
+ ui_fingerprint_after: fingerprintAfter
290
314
  }
291
315
  }
292
316
 
@@ -692,6 +716,110 @@ export class ToolsInteract {
692
716
  return { success: false, reason: 'timeout', lastFingerprint, elapsedMs: Date.now() - start }
693
717
  }
694
718
 
719
+ static async expectScreenHandler({
720
+ platform,
721
+ fingerprint,
722
+ screen,
723
+ deviceId
724
+ }: {
725
+ platform?: 'android' | 'ios',
726
+ fingerprint?: string,
727
+ screen?: string,
728
+ deviceId?: string
729
+ }): Promise<ExpectScreenResponse> {
730
+ const observedFingerprint = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId }) as any
731
+ const observedScreen = {
732
+ fingerprint: observedFingerprint?.fingerprint ?? null,
733
+ screen: observedFingerprint?.activity ?? null
734
+ }
735
+
736
+ let observedScreenLabel = observedScreen.screen
737
+ if (!fingerprint && screen && platform !== 'ios') {
738
+ try {
739
+ const current = await ToolsObserve.getCurrentScreenHandler({ deviceId }) as any
740
+ observedScreenLabel = current?.shortActivity || current?.activity || observedScreenLabel
741
+ } catch {
742
+ // Keep fingerprint-derived activity when current-screen lookup is unavailable.
743
+ }
744
+ }
745
+
746
+ const expectedScreen = {
747
+ fingerprint: fingerprint ?? null,
748
+ screen: screen ?? null
749
+ }
750
+
751
+ let success = false
752
+ if (fingerprint) {
753
+ success = observedScreen.fingerprint === fingerprint
754
+ } else if (screen) {
755
+ const candidates = new Set<string>()
756
+ if (observedScreen.screen) candidates.add(observedScreen.screen)
757
+ if (observedScreenLabel) candidates.add(observedScreenLabel)
758
+ success = candidates.has(screen)
759
+ }
760
+
761
+ return {
762
+ success,
763
+ observed_screen: {
764
+ fingerprint: observedScreen.fingerprint,
765
+ screen: observedScreenLabel
766
+ },
767
+ expected_screen: expectedScreen,
768
+ confidence: success ? 1 : 0
769
+ }
770
+ }
771
+
772
+ static async expectElementVisibleHandler({
773
+ selector,
774
+ element_id,
775
+ timeout_ms = 5000,
776
+ poll_interval_ms = 300,
777
+ platform,
778
+ deviceId
779
+ }: {
780
+ selector: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean },
781
+ element_id?: string,
782
+ timeout_ms?: number,
783
+ poll_interval_ms?: number,
784
+ platform?: 'android' | 'ios',
785
+ deviceId?: string
786
+ }): Promise<ExpectElementVisibleResponse> {
787
+ const result = await ToolsInteract.waitForUIHandler({
788
+ selector,
789
+ condition: 'visible',
790
+ timeout_ms,
791
+ poll_interval_ms,
792
+ platform,
793
+ deviceId
794
+ }) as any
795
+
796
+ if (result?.status === 'success' && result?.element) {
797
+ return {
798
+ success: true,
799
+ selector,
800
+ element_id: result.element.elementId ?? element_id ?? null,
801
+ element: {
802
+ elementId: result.element.elementId ?? null,
803
+ text: result.element.text ?? null,
804
+ resource_id: result.element.resource_id ?? null,
805
+ accessibility_id: result.element.accessibility_id ?? null,
806
+ class: result.element.class ?? null,
807
+ bounds: result.element.bounds ?? null,
808
+ index: typeof result.element.index === 'number' ? result.element.index : null
809
+ }
810
+ }
811
+ }
812
+
813
+ const errorCode = result?.error?.code === 'INTERNAL_ERROR' ? 'UNKNOWN' : 'TIMEOUT'
814
+ return {
815
+ success: false,
816
+ selector,
817
+ element_id: element_id ?? null,
818
+ failure_code: errorCode,
819
+ retryable: errorCode === 'TIMEOUT'
820
+ }
821
+ }
822
+
695
823
  static async waitForUICore({ type = 'ui', query, timeoutMs = 30000, pollIntervalMs = 300, includeSnapshotOnFailure = true, match = 'present', stability_ms = 700, observationDelayMs = 0, platform, deviceId }: { type?: 'ui' | 'log' | 'screen' | 'idle', query?: string, timeoutMs?: number, pollIntervalMs?: number, includeSnapshotOnFailure?: boolean, match?: 'present'|'absent', stability_ms?: number, observationDelayMs?: number, platform?: 'android' | 'ios', deviceId?: string }) {
696
824
  const start = Date.now()
697
825
  const deadline = start + (timeoutMs || 0)