mobile-debug-mcp 0.22.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,921 @@
1
+ export const toolDefinitions = [
2
+ {
3
+ name: 'start_app',
4
+ description: `Purpose:
5
+ Launch a mobile app on Android or iOS.
6
+
7
+ Inputs:
8
+ - platform
9
+ - appId
10
+ - deviceId (optional)
11
+
12
+ Output Structure:
13
+ - action_id, timestamp, action_type
14
+ - target.selector = { appId }
15
+ - success = true when launch was dispatched successfully
16
+ - failure_code/retryable when launch dispatch fails
17
+ - ui_fingerprint_before/ui_fingerprint_after when available
18
+
19
+ Recommended Usage:
20
+ 1. Define the expected landing screen when it is known
21
+ 2. Call start_app
22
+ 3. If needed, wait for transition using wait_for_*
23
+ 4. Verify with expect_screen
24
+ 5. If verification fails, retry once or capture a snapshot
25
+
26
+ Verification Guidance:
27
+ - Follow RESOLVE → ACT → WAIT (if needed) → EXPECT
28
+ - expect_screen is the authoritative verification step when the landing screen is known
29
+ - Do not treat timing or screen change alone as final verification
30
+
31
+ Failure Handling:
32
+ - TIMEOUT → retry once
33
+ - UNKNOWN → inspect snapshot/logs before retrying`,
34
+ inputSchema: {
35
+ type: 'object',
36
+ properties: {
37
+ platform: {
38
+ type: 'string',
39
+ enum: ['android', 'ios']
40
+ },
41
+ appId: {
42
+ type: 'string',
43
+ description: 'Android package name or iOS bundle id'
44
+ },
45
+ deviceId: {
46
+ type: 'string',
47
+ description: 'Device UDID (iOS) or Serial (Android). Defaults to booted/connected.'
48
+ }
49
+ },
50
+ required: ['platform', 'appId']
51
+ }
52
+ },
53
+ {
54
+ name: 'terminate_app',
55
+ description: 'Terminate a mobile app on Android or iOS simulator',
56
+ inputSchema: {
57
+ type: 'object',
58
+ properties: {
59
+ platform: {
60
+ type: 'string',
61
+ enum: ['android', 'ios']
62
+ },
63
+ appId: {
64
+ type: 'string',
65
+ description: 'Android package name or iOS bundle id'
66
+ },
67
+ deviceId: {
68
+ type: 'string',
69
+ description: 'Device UDID (iOS) or Serial (Android). Defaults to booted/connected.'
70
+ }
71
+ },
72
+ required: ['platform', 'appId']
73
+ }
74
+ },
75
+ {
76
+ name: 'restart_app',
77
+ description: `Purpose:
78
+ Restart a mobile app on Android or iOS.
79
+
80
+ Inputs:
81
+ - platform
82
+ - appId
83
+ - deviceId (optional)
84
+
85
+ Output Structure:
86
+ - action_id, timestamp, action_type
87
+ - target.selector = { appId }
88
+ - success = true when the restart command completed
89
+ - failure_code/retryable when restart dispatch fails
90
+ - ui_fingerprint_before/ui_fingerprint_after when available
91
+
92
+ Recommended Usage:
93
+ 1. Define the expected landing screen when it is known
94
+ 2. Call restart_app
95
+ 3. If needed, wait for transition using wait_for_*
96
+ 4. Verify with expect_screen
97
+ 5. If verification fails, retry once or capture a snapshot
98
+
99
+ Verification Guidance:
100
+ - Follow RESOLVE → ACT → WAIT (if needed) → EXPECT
101
+ - expect_screen is the authoritative verification step when the reopened screen is known
102
+ - Do not treat timing or screen change alone as final verification
103
+
104
+ Failure Handling:
105
+ - TIMEOUT → retry once
106
+ - UNKNOWN → inspect snapshot/logs before retrying`,
107
+ inputSchema: {
108
+ type: 'object',
109
+ properties: {
110
+ platform: {
111
+ type: 'string',
112
+ enum: ['android', 'ios']
113
+ },
114
+ appId: {
115
+ type: 'string',
116
+ description: 'Android package name or iOS bundle id'
117
+ },
118
+ deviceId: {
119
+ type: 'string',
120
+ description: 'Device UDID (iOS) or Serial (Android). Defaults to booted/connected.'
121
+ }
122
+ },
123
+ required: ['platform', 'appId']
124
+ }
125
+ },
126
+ {
127
+ name: 'reset_app_data',
128
+ description: 'Reset app data (clear storage) for a mobile app on Android or iOS simulator',
129
+ inputSchema: {
130
+ type: 'object',
131
+ properties: {
132
+ platform: {
133
+ type: 'string',
134
+ enum: ['android', 'ios']
135
+ },
136
+ appId: {
137
+ type: 'string',
138
+ description: 'Android package name or iOS bundle id'
139
+ },
140
+ deviceId: {
141
+ type: 'string',
142
+ description: 'Device UDID (iOS) or Serial (Android). Defaults to booted/connected.'
143
+ }
144
+ },
145
+ required: ['platform', 'appId']
146
+ }
147
+ },
148
+ {
149
+ name: 'install_app',
150
+ description: 'Install an app on Android or iOS. Accepts a built binary (apk/.ipa/.app) or a project directory to build then install. platform and projectType are required.',
151
+ inputSchema: {
152
+ type: 'object',
153
+ properties: {
154
+ platform: { type: 'string', enum: ['android', 'ios'], description: 'Platform to install to (required).' },
155
+ projectType: { type: 'string', enum: ['native', 'kmp', 'react-native', 'flutter'], description: 'Project type to guide build/install tool selection (required).' },
156
+ appPath: { type: 'string', description: 'Path to APK, .app, .ipa, or project directory' },
157
+ deviceId: { type: 'string', description: 'Device UDID (iOS) or Serial (Android). Defaults to booted/connected.' }
158
+ },
159
+ required: ['platform', 'projectType', 'appPath']
160
+ }
161
+ },
162
+ {
163
+ name: 'build_app',
164
+ description: 'Build a project for Android or iOS and return the built artifact path. Does not install. platform and projectType are required.',
165
+ inputSchema: {
166
+ type: 'object',
167
+ properties: {
168
+ platform: { type: 'string', enum: ['android', 'ios'], description: 'Platform to build for (required).' },
169
+ projectType: { type: 'string', enum: ['native', 'kmp', 'react-native', 'flutter'], description: 'Project type to guide build tool selection (required).' },
170
+ projectPath: { type: 'string', description: 'Path to project directory (contains gradlew or xcodeproj/xcworkspace)' },
171
+ variant: { type: 'string', description: 'Optional build variant (e.g., Debug/Release)' }
172
+ },
173
+ required: ['platform', 'projectType', 'projectPath']
174
+ }
175
+ },
176
+ {
177
+ name: 'get_logs',
178
+ description: 'Get recent logs from Android or iOS simulator. Returns device metadata and structured logs suitable for AI consumption.',
179
+ inputSchema: {
180
+ type: 'object',
181
+ properties: {
182
+ platform: {
183
+ type: 'string',
184
+ enum: ['android', 'ios']
185
+ },
186
+ appId: {
187
+ type: 'string',
188
+ description: 'Filter by Android package name or iOS bundle id'
189
+ },
190
+ deviceId: {
191
+ type: 'string',
192
+ description: 'Device UDID (iOS) or Serial (Android). Defaults to booted/connected.'
193
+ },
194
+ pid: { type: 'number', description: 'Filter by process id' },
195
+ tag: { type: 'string', description: 'Filter by tag (Android) or subsystem/category (iOS)' },
196
+ level: { type: 'string', description: 'Log level filter (VERBOSE, DEBUG, INFO, WARN, ERROR)' },
197
+ contains: { type: 'string', description: 'Substring to match in log message' },
198
+ since_seconds: { type: 'number', description: 'Only return logs from the last N seconds' },
199
+ limit: { type: 'number', description: 'Override default number of returned lines' },
200
+ lines: {
201
+ type: 'number',
202
+ description: 'Legacy - number of log lines (android only)'
203
+ }
204
+ },
205
+ required: ['platform']
206
+ }
207
+ },
208
+ {
209
+ name: 'list_devices',
210
+ description: 'List connected devices and their metadata (android + ios).',
211
+ inputSchema: {
212
+ type: 'object',
213
+ properties: {
214
+ platform: { type: 'string', enum: ['android', 'ios'] }
215
+ }
216
+ }
217
+ },
218
+ {
219
+ name: 'get_system_status',
220
+ description: 'Quick healthcheck of local mobile debugging environment (adb, devices, logs, env, iOS).',
221
+ inputSchema: { type: 'object', properties: {} }
222
+ },
223
+ {
224
+ name: 'capture_screenshot',
225
+ description: 'Capture a screenshot from an Android device or iOS simulator. Returns device metadata and the screenshot image.',
226
+ inputSchema: {
227
+ type: 'object',
228
+ properties: {
229
+ platform: {
230
+ type: 'string',
231
+ enum: ['android', 'ios']
232
+ },
233
+ deviceId: {
234
+ type: 'string',
235
+ description: 'Device UDID (iOS) or Serial (Android). Defaults to booted/connected.'
236
+ }
237
+ },
238
+ required: ['platform']
239
+ }
240
+ },
241
+ {
242
+ name: 'capture_debug_snapshot',
243
+ description: 'Capture a complete debug snapshot (screenshot, ui tree, activity, fingerprint, logs). Returns structured JSON.',
244
+ inputSchema: {
245
+ type: 'object',
246
+ properties: {
247
+ reason: { type: 'string', description: 'Optional reason for snapshot' },
248
+ includeLogs: { type: 'boolean', description: 'Whether to include logs', default: true },
249
+ logLines: { type: 'number', description: 'Maximum number of log lines to include', default: 200 },
250
+ platform: { type: 'string', enum: ['android', 'ios'], description: 'Optional platform override' },
251
+ appId: { type: 'string', description: 'Optional appId to scope logs (package/bundle id)' },
252
+ deviceId: { type: 'string', description: 'Optional device serial/udid' },
253
+ sessionId: { type: 'string', description: 'Optional log stream session id to prefer' }
254
+ }
255
+ }
256
+ },
257
+ {
258
+ name: 'start_log_stream',
259
+ description: 'Start streaming logs for a target application on Android or iOS. For Android this uses adb logcat --pid=<pid>; for iOS it streams `xcrun simctl spawn <device> log stream` with a predicate.',
260
+ inputSchema: {
261
+ type: 'object',
262
+ properties: {
263
+ platform: { type: 'string', enum: ['android', 'ios'], default: 'android' },
264
+ packageName: { type: 'string', description: 'Android package name or iOS bundle id' },
265
+ level: { type: 'string', enum: ['error', 'warn', 'info', 'debug'], default: 'error' },
266
+ deviceId: { type: 'string', description: 'Device Serial (Android) or UDID (iOS). Defaults to connected/booted device.' },
267
+ sessionId: { type: 'string', description: 'Session identifier for the log stream' }
268
+ },
269
+ required: ['packageName']
270
+ }
271
+ },
272
+ {
273
+ name: 'read_log_stream',
274
+ description: 'Read accumulated log stream entries for the active session.',
275
+ inputSchema: {
276
+ type: 'object',
277
+ properties: {
278
+ sessionId: { type: 'string' }
279
+ }
280
+ }
281
+ },
282
+ {
283
+ name: 'stop_log_stream',
284
+ description: 'Stop an active log stream for the session.',
285
+ inputSchema: {
286
+ type: 'object',
287
+ properties: {
288
+ sessionId: { type: 'string' }
289
+ }
290
+ }
291
+ },
292
+ {
293
+ name: 'get_ui_tree',
294
+ description: 'Get the current UI hierarchy from an Android device or iOS simulator. Returns a structured JSON representation of the screen content.',
295
+ inputSchema: {
296
+ type: 'object',
297
+ properties: {
298
+ platform: {
299
+ type: 'string',
300
+ enum: ['android', 'ios'],
301
+ description: 'Platform to get UI tree for'
302
+ },
303
+ deviceId: {
304
+ type: 'string',
305
+ description: 'Device Serial (Android) or UDID (iOS). Defaults to connected/booted device.'
306
+ }
307
+ },
308
+ required: ['platform']
309
+ }
310
+ },
311
+ {
312
+ name: 'get_current_screen',
313
+ description: 'Get the currently visible activity on an Android device. Returns package and activity name.',
314
+ inputSchema: {
315
+ type: 'object',
316
+ properties: {
317
+ deviceId: {
318
+ type: 'string',
319
+ description: 'Device Serial (Android). Defaults to connected/booted device.'
320
+ }
321
+ }
322
+ }
323
+ },
324
+ {
325
+ name: 'get_screen_fingerprint',
326
+ description: 'Generate a stable fingerprint representing the current visible screen (activity + visible UI elements).',
327
+ inputSchema: {
328
+ type: 'object',
329
+ properties: {
330
+ platform: { type: 'string', enum: ['android', 'ios'], description: 'Optional platform override (android|ios)' },
331
+ deviceId: { type: 'string', description: 'Optional device id/udid to target' }
332
+ }
333
+ }
334
+ },
335
+ {
336
+ name: 'wait_for_screen_change',
337
+ description: `Purpose:
338
+ Detect that a screen transition has occurred by waiting for the current fingerprint to differ from a previous fingerprint.
339
+
340
+ Capabilities:
341
+ - Synchronization for uncertain navigation timing
342
+ - Detection that something changed on screen
343
+
344
+ Constraints:
345
+ - Does not verify correctness of the resulting state
346
+ - Must not be used alone to confirm action success when an applicable expect_* tool exists
347
+
348
+ Recommended Usage:
349
+ 1. Capture or define the expected outcome
350
+ 2. Call an action tool
351
+ 3. Use wait_for_screen_change when transition timing is uncertain
352
+ 4. Follow with expect_screen when the expected destination is known`,
353
+ inputSchema: {
354
+ type: 'object',
355
+ properties: {
356
+ platform: { type: 'string', enum: ['android', 'ios'], description: 'Optional platform override (android|ios)' },
357
+ previousFingerprint: { type: 'string', description: 'The fingerprint to compare against (required)' },
358
+ timeoutMs: { type: 'number', description: 'Timeout in ms to wait for change (default 5000)', default: 5000 },
359
+ pollIntervalMs: { type: 'number', description: 'Polling interval in ms (default 300)', default: 300 },
360
+ deviceId: { type: 'string', description: 'Optional device id/udid to target' }
361
+ },
362
+ required: ['previousFingerprint']
363
+ }
364
+ },
365
+ {
366
+ name: 'expect_screen',
367
+ description: `Purpose:
368
+ Deterministically verify that the intended navigation outcome of an action has occurred.
369
+
370
+ Inputs:
371
+ - fingerprint: preferred exact-match screen fingerprint
372
+ - screen: exact semantic screen identifier when a fingerprint is not available
373
+
374
+ Output Structure:
375
+ - success: true when the expected screen matches the observed screen
376
+ - observed_screen: current fingerprint and screen identifier
377
+ - expected_screen: the expected fingerprint and/or screen identifier
378
+ - confidence: 1 for an exact match, otherwise 0
379
+
380
+ Recommended Usage:
381
+ 1. Define the expected screen before executing the action
382
+ 2. Resolve the target element or screen state
383
+ 3. Call an action tool such as tap_element
384
+ 4. If needed, wait for transition using wait_for_*
385
+ 5. Call expect_screen as the final verification step
386
+ 6. If success=false, treat the outcome as unverified and follow the action tool retry guidance
387
+
388
+ Verification Guidance:
389
+ - Primary and authoritative verification tool for navigation outcomes
390
+ - Prefer fingerprint whenever you have one
391
+ - Use screen only as a fallback exact match against known identifiers
392
+ - Works best when the expected screen identifier is known ahead of time
393
+ - If the expected screen is not already known, capture or define it before executing the action
394
+
395
+ Constraints:
396
+ - Returns structured binary success/failure only
397
+ - Must not rely on natural-language interpretation or reasoning
398
+
399
+ Failure Handling:
400
+ - success=false means the expected screen was not reached; retry or recover using the action tool's failure strategy`,
401
+ inputSchema: {
402
+ type: 'object',
403
+ properties: {
404
+ platform: { type: 'string', enum: ['android', 'ios'], description: 'Optional platform override (android|ios)' },
405
+ fingerprint: { type: 'string', description: 'Expected screen fingerprint. Preferred verification mechanism.' },
406
+ screen: { type: 'string', description: 'Expected exact screen identifier when no fingerprint is available.' },
407
+ deviceId: { type: 'string', description: 'Optional device id/udid to target' }
408
+ }
409
+ }
410
+ },
411
+ {
412
+ name: 'expect_element_visible',
413
+ description: `Purpose:
414
+ Deterministically verify that the intended UI outcome of an action has occurred by confirming a target element is visible.
415
+
416
+ Inputs:
417
+ - selector: required selector used to resolve the target element
418
+ - element_id: optional previously resolved element identifier used only as context
419
+
420
+ Output Structure:
421
+ - success: true when the element is visible
422
+ - selector: selector used for verification
423
+ - element_id: resolved element identifier when available
424
+ - element: minimal resolved element info when visible
425
+ - failure_code: TIMEOUT or UNKNOWN when verification fails
426
+ - retryable: true when failure_code=TIMEOUT
427
+
428
+ Recommended Usage:
429
+ 1. Define the expected element state before executing the action
430
+ 2. Resolve the target element or triggering control
431
+ 3. Call an action tool such as tap_element
432
+ 4. If needed, wait for UI availability using wait_for_*
433
+ 5. Call expect_element_visible as the final verification step
434
+ 6. If success=false, follow the action tool retry guidance
435
+
436
+ Verification Guidance:
437
+ - Primary and authoritative verification tool for expected element appearance or visibility
438
+ - Use this when the screen should stay the same but the UI should reveal or update a specific element
439
+ - selector is the primary input; element_id is an optional optimization only
440
+ - The tool resolves the selector internally when needed
441
+
442
+ Constraints:
443
+ - Returns structured binary success/failure only
444
+ - Must not rely on natural-language interpretation or reasoning
445
+
446
+ Failure Handling:
447
+ - TIMEOUT → retry verification once or retry the action after re-resolving
448
+ - UNKNOWN → capture a snapshot and stop`,
449
+ inputSchema: {
450
+ type: 'object',
451
+ properties: {
452
+ selector: {
453
+ type: 'object',
454
+ properties: {
455
+ text: { type: 'string' },
456
+ resource_id: { type: 'string' },
457
+ accessibility_id: { type: 'string' },
458
+ contains: { type: 'boolean', default: false }
459
+ }
460
+ },
461
+ element_id: { type: 'string', description: 'Optional previously resolved element identifier.' },
462
+ timeout_ms: { type: 'number', default: 5000 },
463
+ poll_interval_ms: { type: 'number', default: 300 },
464
+ platform: { type: 'string', enum: ['android', 'ios'], description: 'Optional platform override' },
465
+ deviceId: { type: 'string', description: 'Optional device serial/udid' }
466
+ },
467
+ required: ['selector']
468
+ }
469
+ },
470
+ {
471
+ name: 'wait_for_ui',
472
+ description: `Purpose:
473
+ Resolve elements and/or detect that a UI transition or availability condition has occurred.
474
+
475
+ Capabilities:
476
+ - Deterministic element resolution
477
+ - Synchronization when element timing or availability is uncertain
478
+
479
+ Constraints:
480
+ - Does not verify correctness of the resulting state
481
+ - Must not be used alone to confirm action success when an applicable expect_* tool exists
482
+
483
+ Recommended Usage:
484
+ 1. Use wait_for_ui to resolve an element before acting or to wait for UI readiness
485
+ 2. Call the action tool
486
+ 3. If the expected outcome is known, follow with expect_* as final verification`,
487
+ inputSchema: {
488
+ type: 'object',
489
+ properties: {
490
+ selector: {
491
+ type: 'object',
492
+ properties: {
493
+ text: { type: 'string' },
494
+ resource_id: { type: 'string' },
495
+ accessibility_id: { type: 'string' },
496
+ contains: { type: 'boolean', description: 'When true, perform substring matching', default: false }
497
+ }
498
+ },
499
+ condition: { type: 'string', enum: ['exists', 'not_exists', 'visible', 'clickable'], default: 'exists' },
500
+ timeout_ms: { type: 'number', default: 60000 },
501
+ poll_interval_ms: { type: 'number', default: 300 },
502
+ match: { type: 'object', properties: { index: { type: 'number' } } },
503
+ retry: { type: 'object', properties: { max_attempts: { type: 'number', default: 1 }, backoff_ms: { type: 'number', default: 0 } } },
504
+ platform: { type: 'string', enum: ['android', 'ios'], description: 'Optional platform override' },
505
+ deviceId: { type: 'string', description: 'Optional device serial/udid' }
506
+ }
507
+ }
508
+ },
509
+ {
510
+ name: 'find_element',
511
+ description: 'Find a UI element by semantic query (text, content-desc, resource-id, class). Returns best match.',
512
+ inputSchema: {
513
+ type: 'object',
514
+ properties: {
515
+ query: { type: 'string', description: 'Search query (text or label)' },
516
+ exact: { type: 'boolean', description: 'Require exact match (true/false)', default: false },
517
+ timeoutMs: { type: 'number', description: 'Timeout in ms to keep searching', default: 3000 },
518
+ platform: { type: 'string', enum: ['android', 'ios'], description: 'Optional platform override' },
519
+ deviceId: { type: 'string', description: 'Optional device serial/udid' }
520
+ },
521
+ required: ['query']
522
+ }
523
+ },
524
+ {
525
+ name: 'tap',
526
+ description: `Purpose:
527
+ Dispatch a tap at specific screen coordinates.
528
+
529
+ Inputs:
530
+ - x, y coordinates
531
+ - platform (optional)
532
+ - deviceId (optional)
533
+
534
+ Output Structure:
535
+ - action_id, timestamp, action_type
536
+ - target.selector = { x, y }
537
+ - success = true when the tap was dispatched
538
+ - failure_code/retryable when dispatch fails
539
+ - ui_fingerprint_before/ui_fingerprint_after when available
540
+
541
+ Recommended Usage:
542
+ 1. Resolve coordinates deterministically
543
+ 2. Call tap
544
+ 3. If needed, wait for transition using wait_for_*
545
+ 4. Verify with expect_screen or expect_element_visible depending on the intended outcome
546
+
547
+ Verification Guidance:
548
+ - Prefer tap_element over tap when an element can be resolved
549
+ - Follow RESOLVE → ACT → WAIT (if needed) → EXPECT
550
+ - Use expect_screen for navigation and expect_element_visible for local UI changes
551
+ - Do not use wait_for_* alone as final verification when an applicable expect_* tool exists
552
+
553
+ Failure Handling:
554
+ - TIMEOUT → retry once
555
+ - UNKNOWN → capture a snapshot and stop`,
556
+ inputSchema: {
557
+ type: 'object',
558
+ properties: {
559
+ platform: {
560
+ type: 'string',
561
+ enum: ['android', 'ios'],
562
+ description: 'Platform to tap on'
563
+ },
564
+ x: {
565
+ type: 'number',
566
+ description: 'X coordinate'
567
+ },
568
+ y: {
569
+ type: 'number',
570
+ description: 'Y coordinate'
571
+ },
572
+ deviceId: {
573
+ type: 'string',
574
+ description: 'Device Serial/UDID. Defaults to connected/booted device.'
575
+ }
576
+ },
577
+ required: ['x', 'y']
578
+ }
579
+ },
580
+ {
581
+ name: 'tap_element',
582
+ description: `Purpose:
583
+ Tap a previously resolved UI element using its elementId.
584
+
585
+ Inputs:
586
+ - elementId: a resolved UI element identifier returned by wait_for_ui
587
+
588
+ Output Structure:
589
+ - action_id: unique timestamp-based action identifier
590
+ - timestamp: epoch milliseconds for the action attempt
591
+ - action_type: "tap_element"
592
+ - target.selector: original target handle ({ elementId })
593
+ - target.resolved: minimal resolved element info used for the tap
594
+ - success: true when the tap was dispatched
595
+ - failure_code: present when success=false
596
+ - retryable: present when failure_code exists
597
+ - ui_fingerprint_before/ui_fingerprint_after: optional fingerprints captured around the action
598
+
599
+ Recommended Usage:
600
+ 1. Resolve the target with wait_for_ui or another deterministic resolver
601
+ 2. Call tap_element
602
+ 3. If needed, wait for transition using wait_for_*
603
+ 4. Verify outcome using expect_*
604
+ - use expect_screen when navigation is expected
605
+ - use expect_element_visible when the UI change is local
606
+ 5. If verification fails, inspect failure_code and follow the retry strategy below
607
+
608
+ Verification Guidance:
609
+ - Follow RESOLVE → ACT → WAIT (if needed) → EXPECT
610
+ - Prefer expect_screen for navigation or modal transitions
611
+ - Prefer expect_element_visible when the tap should reveal or update a specific element
612
+ - wait_for_* may be used for resolution and synchronization, but not as the final verification step when an applicable expect_* tool exists
613
+ - Do not treat tap_element.success as outcome success; it only means the tap was executed
614
+
615
+ Failure Handling:
616
+ - STALE_REFERENCE → re-resolve the element, then retry
617
+ - ELEMENT_NOT_INTERACTABLE → wait or refine the target, then retry
618
+ - UNKNOWN → capture a snapshot and stop
619
+
620
+ This tool reports execution success only. Verification must be done with a separate expect_* tool.`,
621
+ inputSchema: {
622
+ type: 'object',
623
+ properties: {
624
+ elementId: {
625
+ type: 'string',
626
+ description: 'A unique element identifier returned by wait_for_ui'
627
+ }
628
+ },
629
+ required: ['elementId']
630
+ }
631
+ },
632
+ {
633
+ name: 'swipe',
634
+ description: `Purpose:
635
+ Dispatch a swipe gesture on Android or iOS.
636
+
637
+ Inputs:
638
+ - start and end coordinates
639
+ - duration
640
+ - platform/deviceId (optional)
641
+
642
+ Output Structure:
643
+ - action_id, timestamp, action_type
644
+ - target.selector = { x1, y1, x2, y2, duration }
645
+ - success = true when the swipe was dispatched
646
+ - failure_code/retryable when dispatch fails
647
+ - ui_fingerprint_before/ui_fingerprint_after when available
648
+
649
+ Recommended Usage:
650
+ 1. Determine swipe coordinates
651
+ 2. Call swipe
652
+ 3. If needed, wait for transition using wait_for_*
653
+ 4. Verify with expect_screen or expect_element_visible when a deterministic outcome is expected
654
+
655
+ Verification Guidance:
656
+ - Swipe outcomes are less predictable; choose the most specific verifier available for the intended effect
657
+ - Follow RESOLVE → ACT → WAIT (if needed) → EXPECT
658
+ - Do not use wait_for_* alone as final verification when an applicable expect_* tool exists
659
+
660
+ Failure Handling:
661
+ - TIMEOUT → retry once
662
+ - UNKNOWN → capture a snapshot and stop`,
663
+ inputSchema: {
664
+ type: 'object',
665
+ properties: {
666
+ platform: {
667
+ type: 'string',
668
+ enum: ['android', 'ios'],
669
+ description: 'Platform to swipe on (android or ios)'
670
+ },
671
+ x1: { type: 'number', description: 'Start X coordinate' },
672
+ y1: { type: 'number', description: 'Start Y coordinate' },
673
+ x2: { type: 'number', description: 'End X coordinate' },
674
+ y2: { type: 'number', description: 'End Y coordinate' },
675
+ duration: { type: 'number', description: 'Duration in ms' },
676
+ deviceId: {
677
+ type: 'string',
678
+ description: 'Device Serial/UDID. Defaults to connected/booted device.'
679
+ }
680
+ },
681
+ required: ['x1', 'y1', 'x2', 'y2', 'duration']
682
+ }
683
+ },
684
+ {
685
+ name: 'scroll_to_element',
686
+ description: `Purpose:
687
+ Scroll until a target element becomes visible.
688
+
689
+ Inputs:
690
+ - platform
691
+ - selector
692
+ - direction, maxScrolls, scrollAmount, deviceId (optional)
693
+
694
+ Output Structure:
695
+ - action_id, timestamp, action_type
696
+ - target.selector = original selector
697
+ - target.resolved = minimal resolved element info when found
698
+ - success = true when scrolling produced a visible target element
699
+ - failure_code/retryable when the target was not reached
700
+ - ui_fingerprint_before/ui_fingerprint_after when available
701
+
702
+ Recommended Usage:
703
+ 1. Resolve the target selector
704
+ 2. Call scroll_to_element
705
+ 3. If needed, wait for UI stabilization using wait_for_*
706
+ 4. Verify with expect_element_visible when the expected element visibility is known
707
+ 5. If success=false, follow failure handling before retrying
708
+
709
+ Verification Guidance:
710
+ - Follow RESOLVE → ACT → WAIT (if needed) → EXPECT
711
+ - Use expect_element_visible when you need an explicit post-scroll confirmation
712
+ - Do not use wait_for_* alone as final verification when an applicable expect_* tool exists
713
+
714
+ Failure Handling:
715
+ - NAVIGATION_NO_CHANGE → adjust scroll direction or stop
716
+ - TIMEOUT → retry with refined selector or larger scroll budget
717
+ - UNKNOWN → capture a snapshot and stop`,
718
+ inputSchema: {
719
+ type: 'object',
720
+ properties: {
721
+ platform: { type: 'string', enum: ['android', 'ios'], description: 'Platform to operate on (required)' },
722
+ selector: {
723
+ type: 'object',
724
+ properties: {
725
+ text: { type: 'string' },
726
+ resourceId: { type: 'string' },
727
+ contentDesc: { type: 'string' },
728
+ className: { type: 'string' }
729
+ }
730
+ },
731
+ direction: { type: 'string', enum: ['down', 'up'], default: 'down' },
732
+ maxScrolls: { type: 'number', default: 10 },
733
+ scrollAmount: { type: 'number', default: 0.7 },
734
+ deviceId: { type: 'string', description: 'Device UDID (iOS) or Serial (Android). Defaults to booted/connected.' }
735
+ },
736
+ required: ['platform', 'selector']
737
+ }
738
+ },
739
+ {
740
+ name: 'type_text',
741
+ description: `Purpose:
742
+ Type text into the currently focused Android input field.
743
+
744
+ Inputs:
745
+ - text
746
+ - platform/deviceId (optional)
747
+
748
+ Output Structure:
749
+ - action_id, timestamp, action_type
750
+ - target.selector = { text }
751
+ - success = true when text input was dispatched
752
+ - failure_code/retryable when dispatch fails
753
+ - ui_fingerprint_before/ui_fingerprint_after when available
754
+
755
+ Recommended Usage:
756
+ 1. Resolve or focus the target input first
757
+ 2. Call type_text
758
+ 3. If needed, wait for UI stabilization using wait_for_*
759
+ 4. Verify with expect_element_visible or expect_screen, depending on the intended outcome
760
+
761
+ Verification Guidance:
762
+ - Prefer verifying the next expected element or screen state instead of inferring success from the text action alone
763
+ - Follow RESOLVE → ACT → WAIT (if needed) → EXPECT
764
+ - Do not use wait_for_* alone as final verification when an applicable expect_* tool exists
765
+
766
+ Failure Handling:
767
+ - TIMEOUT → retry once
768
+ - UNKNOWN → re-focus the input or capture a snapshot`,
769
+ inputSchema: {
770
+ type: 'object',
771
+ properties: {
772
+ platform: {
773
+ type: 'string',
774
+ enum: ['android'],
775
+ description: 'Platform to type on (currently only android supported)'
776
+ },
777
+ text: {
778
+ type: 'string',
779
+ description: 'The text to type'
780
+ },
781
+ deviceId: {
782
+ type: 'string',
783
+ description: 'Device Serial/UDID. Defaults to connected/booted device.'
784
+ }
785
+ },
786
+ required: ['text']
787
+ }
788
+ },
789
+ {
790
+ name: 'press_back',
791
+ description: `Purpose:
792
+ Dispatch the Android Back action.
793
+
794
+ Inputs:
795
+ - platform/deviceId (optional)
796
+
797
+ Output Structure:
798
+ - action_id, timestamp, action_type
799
+ - target.selector = { key: "back" }
800
+ - success = true when the back action was dispatched
801
+ - failure_code/retryable when dispatch fails
802
+ - ui_fingerprint_before/ui_fingerprint_after when available
803
+
804
+ Recommended Usage:
805
+ 1. Call press_back
806
+ 2. If needed, wait for transition using wait_for_*
807
+ 3. Verify with expect_screen when a known destination is expected
808
+ 4. If verification fails, retry once or recover explicitly
809
+
810
+ Verification Guidance:
811
+ - Back outcomes can vary by screen, so verify against the intended destination when possible
812
+ - Follow RESOLVE → ACT → WAIT (if needed) → EXPECT
813
+ - Do not use wait_for_* alone as final verification when an applicable expect_* tool exists
814
+
815
+ Failure Handling:
816
+ - TIMEOUT → retry once
817
+ - UNKNOWN → capture a snapshot and stop`,
818
+ inputSchema: {
819
+ type: 'object',
820
+ properties: {
821
+ platform: {
822
+ type: 'string',
823
+ enum: ['android'],
824
+ description: 'Platform (currently only android supported)'
825
+ },
826
+ deviceId: {
827
+ type: 'string',
828
+ description: 'Device Serial/UDID. Defaults to connected/booted device.'
829
+ }
830
+ }
831
+ }
832
+ },
833
+ {
834
+ name: 'classify_action_outcome',
835
+ description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
836
+
837
+ MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
838
+
839
+ HOW TO GATHER INPUTS before calling:
840
+ 1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
841
+ 2. If you checked for a specific element with wait_for_ui, set expectedElementVisible.
842
+ 3. Do NOT call get_network_activity yet — omit networkRequests on the first call.
843
+
844
+ RULES (applied in order — stop at first match):
845
+ 1. If uiChanged=true OR expectedElementVisible=true → outcome=success
846
+ 2. Otherwise this tool returns nextAction="call_get_network_activity" — you MUST call get_network_activity once, then call classify_action_outcome again with the results in networkRequests.
847
+ 3. If any request has status=failure or retryable → outcome=backend_failure
848
+ 4. If no requests returned → outcome=no_op
849
+ 5. If all requests succeeded → outcome=ui_failure
850
+ 6. Otherwise → outcome=unknown
851
+
852
+ BEHAVIOUR after outcome:
853
+ - success → continue
854
+ - no_op → retry the action once or re-resolve the element
855
+ - backend_failure → stop and report the failing endpoint
856
+ - ui_failure → stop and report failure
857
+ - unknown → take one recovery step (e.g. capture_debug_snapshot), then stop`,
858
+ inputSchema: {
859
+ type: 'object',
860
+ properties: {
861
+ uiChanged: {
862
+ type: 'boolean',
863
+ description: 'true if the screen fingerprint or activity changed after the action. Use wait_for_screen_change or compare get_screen_fingerprint before and after.'
864
+ },
865
+ expectedElementVisible: {
866
+ type: 'boolean',
867
+ description: 'true if the element you expected to appear is now visible (from wait_for_ui). Omit if you did not check for a specific element.'
868
+ },
869
+ networkRequests: {
870
+ type: 'array',
871
+ description: 'Pass this only after calling get_network_activity as instructed by nextAction. Map each request to endpoint + status.',
872
+ items: {
873
+ type: 'object',
874
+ properties: {
875
+ endpoint: { type: 'string', description: 'Request endpoint or full URL' },
876
+ status: { type: 'string', enum: ['success', 'failure', 'retryable'], description: 'Outcome of the request' }
877
+ },
878
+ required: ['endpoint', 'status']
879
+ }
880
+ },
881
+ hasLogErrors: {
882
+ type: 'boolean',
883
+ description: 'true if structured log errors were observed (e.g. from read_log_stream). Optional — include if you have already read logs.'
884
+ }
885
+ },
886
+ required: ['uiChanged']
887
+ }
888
+ },
889
+ {
890
+ name: 'get_network_activity',
891
+ description: `Returns structured network events captured from platform logs since the last action.
892
+
893
+ Call this only when classify_action_outcome returns nextAction="call_get_network_activity".
894
+ Do not call more than once per action.
895
+
896
+ Events are filtered to significant (non-background) requests only.
897
+ Each event includes endpoint, method, statusCode, networkError, status, and durationMs.
898
+
899
+ status values:
900
+ - success: HTTP 2xx or request detected with no error signal
901
+ - failure: HTTP 4xx
902
+ - retryable: HTTP 5xx, network error (timeout, dns_error, tls_error, etc.)
903
+
904
+ Returns { requests: [], count: 0 } when no credible network signals are found.`,
905
+ inputSchema: {
906
+ type: 'object',
907
+ properties: {
908
+ platform: {
909
+ type: 'string',
910
+ enum: ['android', 'ios'],
911
+ description: 'Platform to read network logs from'
912
+ },
913
+ deviceId: {
914
+ type: 'string',
915
+ description: 'Device Serial (Android) or UDID (iOS). Defaults to connected/booted device.'
916
+ }
917
+ },
918
+ required: ['platform']
919
+ }
920
+ }
921
+ ];