mobile-debug-mcp 0.24.2 → 0.24.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/interact/index.js +4 -2
- package/dist/server/common.js +28 -2
- package/dist/server/tool-definitions.js +13 -10
- package/dist/server/tool-handlers.js +3 -4
- package/docs/CHANGELOG.md +6 -0
- package/docs/specs/baseline-spec-v0.md +312 -0
- package/docs/specs/mcp-tooling-spec-v1.md +281 -0
- package/docs/tools/interact.md +27 -3
- package/docs/tools/manage.md +1 -1
- package/package.json +1 -1
- package/src/interact/index.ts +5 -3
- package/src/server/common.ts +28 -2
- package/src/server/tool-definitions.ts +13 -10
- package/src/server/tool-handlers.ts +4 -4
- package/src/types.ts +1 -1
- package/test/unit/server/contract.test.ts +13 -0
- package/test/unit/server/response_shapes.test.ts +28 -1
package/dist/interact/index.js
CHANGED
|
@@ -185,9 +185,10 @@ export class ToolsInteract {
|
|
|
185
185
|
return await interact.tap(x, y, resolved.id);
|
|
186
186
|
}
|
|
187
187
|
static async tapElementHandler({ elementId }) {
|
|
188
|
-
const
|
|
188
|
+
const timestampMs = Date.now();
|
|
189
|
+
const timestamp = new Date(timestampMs).toISOString();
|
|
189
190
|
const actionType = 'tap_element';
|
|
190
|
-
const actionId = nextActionId(actionType,
|
|
191
|
+
const actionId = nextActionId(actionType, timestampMs);
|
|
191
192
|
const selector = { elementId };
|
|
192
193
|
const resolved = ToolsInteract._resolvedUiElements.get(elementId);
|
|
193
194
|
if (!resolved) {
|
|
@@ -225,6 +226,7 @@ export class ToolsInteract {
|
|
|
225
226
|
action_id: actionId,
|
|
226
227
|
timestamp,
|
|
227
228
|
action_type: actionType,
|
|
229
|
+
...(tree?.device ? { device: tree.device } : {}),
|
|
228
230
|
target: {
|
|
229
231
|
selector,
|
|
230
232
|
resolved: resolvedTarget
|
package/dist/server/common.js
CHANGED
|
@@ -49,9 +49,10 @@ export function inferScrollFailure(message) {
|
|
|
49
49
|
return { failureCode: 'UNKNOWN', retryable: false };
|
|
50
50
|
}
|
|
51
51
|
export function buildActionExecutionResult({ actionType, device, selector, resolved, success, uiFingerprintBefore, uiFingerprintAfter, failure, details }) {
|
|
52
|
-
const
|
|
52
|
+
const timestampMs = Date.now();
|
|
53
|
+
const timestamp = new Date(timestampMs).toISOString();
|
|
53
54
|
return {
|
|
54
|
-
action_id: nextActionId(actionType,
|
|
55
|
+
action_id: nextActionId(actionType, timestampMs),
|
|
55
56
|
timestamp,
|
|
56
57
|
action_type: actionType,
|
|
57
58
|
...(device ? { device } : {}),
|
|
@@ -66,3 +67,28 @@ export function buildActionExecutionResult({ actionType, device, selector, resol
|
|
|
66
67
|
...(details ? { details } : {})
|
|
67
68
|
};
|
|
68
69
|
}
|
|
70
|
+
export function wrapToolError(name, error) {
|
|
71
|
+
const message = error instanceof Error
|
|
72
|
+
? error.message
|
|
73
|
+
: typeof error === 'object' && error !== null
|
|
74
|
+
? (() => {
|
|
75
|
+
try {
|
|
76
|
+
return JSON.stringify(error, null, 2);
|
|
77
|
+
}
|
|
78
|
+
catch {
|
|
79
|
+
return '[unserializable error object]';
|
|
80
|
+
}
|
|
81
|
+
})()
|
|
82
|
+
: String(error);
|
|
83
|
+
return {
|
|
84
|
+
content: [{
|
|
85
|
+
type: 'text',
|
|
86
|
+
text: JSON.stringify({
|
|
87
|
+
error: {
|
|
88
|
+
tool: name,
|
|
89
|
+
message
|
|
90
|
+
}
|
|
91
|
+
}, null, 2)
|
|
92
|
+
}]
|
|
93
|
+
};
|
|
94
|
+
}
|
|
@@ -10,7 +10,7 @@ Inputs:
|
|
|
10
10
|
- deviceId (optional)
|
|
11
11
|
|
|
12
12
|
Output Structure:
|
|
13
|
-
- action_id, timestamp, action_type
|
|
13
|
+
- action_id, timestamp (ISO 8601), action_type
|
|
14
14
|
- target.selector = { appId }
|
|
15
15
|
- success = true when launch was dispatched successfully
|
|
16
16
|
- failure_code/retryable when launch dispatch fails
|
|
@@ -83,7 +83,7 @@ Inputs:
|
|
|
83
83
|
- deviceId (optional)
|
|
84
84
|
|
|
85
85
|
Output Structure:
|
|
86
|
-
- action_id, timestamp, action_type
|
|
86
|
+
- action_id, timestamp (ISO 8601), action_type
|
|
87
87
|
- target.selector = { appId }
|
|
88
88
|
- success = true when the restart command completed
|
|
89
89
|
- failure_code/retryable when restart dispatch fails
|
|
@@ -344,6 +344,7 @@ Capabilities:
|
|
|
344
344
|
Constraints:
|
|
345
345
|
- Does not verify correctness of the resulting state
|
|
346
346
|
- Must not be used alone to confirm action success when an applicable expect_* tool exists
|
|
347
|
+
- Use classify_action_outcome + get_network_activity when the expected outcome is backend/API activity without a visible UI change
|
|
347
348
|
|
|
348
349
|
Recommended Usage:
|
|
349
350
|
1. Capture or define the expected outcome
|
|
@@ -532,7 +533,7 @@ Inputs:
|
|
|
532
533
|
- deviceId (optional)
|
|
533
534
|
|
|
534
535
|
Output Structure:
|
|
535
|
-
- action_id, timestamp, action_type
|
|
536
|
+
- action_id, timestamp (ISO 8601), action_type
|
|
536
537
|
- target.selector = { x, y }
|
|
537
538
|
- success = true when the tap was dispatched
|
|
538
539
|
- failure_code/retryable when dispatch fails
|
|
@@ -587,7 +588,7 @@ Inputs:
|
|
|
587
588
|
|
|
588
589
|
Output Structure:
|
|
589
590
|
- action_id: unique timestamp-based action identifier
|
|
590
|
-
- timestamp:
|
|
591
|
+
- timestamp: ISO 8601 timestamp for the action attempt
|
|
591
592
|
- action_type: "tap_element"
|
|
592
593
|
- target.selector: original target handle ({ elementId })
|
|
593
594
|
- target.resolved: minimal resolved element info used for the tap
|
|
@@ -640,7 +641,7 @@ Inputs:
|
|
|
640
641
|
- platform/deviceId (optional)
|
|
641
642
|
|
|
642
643
|
Output Structure:
|
|
643
|
-
- action_id, timestamp, action_type
|
|
644
|
+
- action_id, timestamp (ISO 8601), action_type
|
|
644
645
|
- target.selector = { x1, y1, x2, y2, duration }
|
|
645
646
|
- success = true when the swipe was dispatched
|
|
646
647
|
- failure_code/retryable when dispatch fails
|
|
@@ -692,7 +693,7 @@ Inputs:
|
|
|
692
693
|
- direction, maxScrolls, scrollAmount, deviceId (optional)
|
|
693
694
|
|
|
694
695
|
Output Structure:
|
|
695
|
-
- action_id, timestamp, action_type
|
|
696
|
+
- action_id, timestamp (ISO 8601), action_type
|
|
696
697
|
- target.selector = original selector
|
|
697
698
|
- target.resolved = minimal resolved element info when found
|
|
698
699
|
- success = true when scrolling produced a visible target element
|
|
@@ -746,7 +747,7 @@ Inputs:
|
|
|
746
747
|
- platform/deviceId (optional)
|
|
747
748
|
|
|
748
749
|
Output Structure:
|
|
749
|
-
- action_id, timestamp, action_type
|
|
750
|
+
- action_id, timestamp (ISO 8601), action_type
|
|
750
751
|
- target.selector = { text }
|
|
751
752
|
- success = true when text input was dispatched
|
|
752
753
|
- failure_code/retryable when dispatch fails
|
|
@@ -795,7 +796,7 @@ Inputs:
|
|
|
795
796
|
- platform/deviceId (optional)
|
|
796
797
|
|
|
797
798
|
Output Structure:
|
|
798
|
-
- action_id, timestamp, action_type
|
|
799
|
+
- action_id, timestamp (ISO 8601), action_type
|
|
799
800
|
- target.selector = { key: "back" }
|
|
800
801
|
- success = true when the back action was dispatched
|
|
801
802
|
- failure_code/retryable when dispatch fails
|
|
@@ -835,6 +836,8 @@ Failure Handling:
|
|
|
835
836
|
description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
|
|
836
837
|
|
|
837
838
|
MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
|
|
839
|
+
Use this with get_network_activity when the expected outcome is backend/API activity without a visible UI change.
|
|
840
|
+
For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action instead of waiting for wait_for_screen_change.
|
|
838
841
|
|
|
839
842
|
HOW TO GATHER INPUTS before calling:
|
|
840
843
|
1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
|
|
@@ -868,7 +871,7 @@ BEHAVIOUR after outcome:
|
|
|
868
871
|
},
|
|
869
872
|
networkRequests: {
|
|
870
873
|
type: 'array',
|
|
871
|
-
description: 'Pass this only after calling get_network_activity as instructed by nextAction.
|
|
874
|
+
description: 'Pass this only after calling get_network_activity as instructed by nextAction. Also use it when the expected outcome is backend/API activity without a visible UI change.',
|
|
872
875
|
items: {
|
|
873
876
|
type: 'object',
|
|
874
877
|
properties: {
|
|
@@ -890,7 +893,7 @@ BEHAVIOUR after outcome:
|
|
|
890
893
|
name: 'get_network_activity',
|
|
891
894
|
description: `Returns structured network events captured from platform logs since the last action.
|
|
892
895
|
|
|
893
|
-
Call this
|
|
896
|
+
Call this when classify_action_outcome returns nextAction="call_get_network_activity" or immediately after an action whose expected outcome is backend/API activity without a visible UI change.
|
|
894
897
|
Do not call more than once per action.
|
|
895
898
|
|
|
896
899
|
Events are filtered to significant (non-background) requests only.
|
|
@@ -4,7 +4,7 @@ import { ToolsObserve } from '../observe/index.js';
|
|
|
4
4
|
import { classifyActionOutcome } from '../interact/classify.js';
|
|
5
5
|
import { ToolsNetwork } from '../network/index.js';
|
|
6
6
|
import { getSystemStatus } from '../system/index.js';
|
|
7
|
-
import { buildActionExecutionResult, captureActionFingerprint, inferGenericFailure, inferScrollFailure, wrapResponse } from './common.js';
|
|
7
|
+
import { buildActionExecutionResult, captureActionFingerprint, inferGenericFailure, inferScrollFailure, wrapResponse, wrapToolError } from './common.js';
|
|
8
8
|
async function handleStartApp(args) {
|
|
9
9
|
const { platform, appId, deviceId } = args;
|
|
10
10
|
const uiFingerprintBefore = await captureActionFingerprint(platform, deviceId);
|
|
@@ -330,8 +330,7 @@ export async function handleToolCall(name, args = {}) {
|
|
|
330
330
|
return await handler(args);
|
|
331
331
|
}
|
|
332
332
|
catch (error) {
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
};
|
|
333
|
+
console.error(`Error executing tool ${name}:`, error);
|
|
334
|
+
return wrapToolError(name, error);
|
|
336
335
|
}
|
|
337
336
|
}
|
package/docs/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to the **Mobile Debug MCP** project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.24.4]
|
|
6
|
+
- Moving agents away from `wait_for_screen_change`
|
|
7
|
+
|
|
8
|
+
## [0.24.3]
|
|
9
|
+
- Improved output consistency
|
|
10
|
+
|
|
5
11
|
## [0.24.2]
|
|
6
12
|
- Fixed Android install issue
|
|
7
13
|
- Updated tools to have more detailed responses
|
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
# Baseline Spec v0
|
|
2
|
+
|
|
3
|
+
## 1. System Overview
|
|
4
|
+
|
|
5
|
+
The MCP surface is defined in `src/server/tool-definitions.ts` and dispatched in `src/server/tool-handlers.ts`. Tools are grouped in code by module, not by an explicit runtime taxonomy: **manage**, **observe**, **interact**, **network/classification**, and **system**.
|
|
6
|
+
|
|
7
|
+
Agents interact with tools by name through `handleToolCall(name, args)`. Most handlers return a **single text content block containing JSON** via `wrapResponse(...)`. Exceptions are observable in code:
|
|
8
|
+
|
|
9
|
+
| Tool | MCP content shape |
|
|
10
|
+
| --- | --- |
|
|
11
|
+
| most tools | one text block with JSON |
|
|
12
|
+
| `get_logs` | two text blocks: metadata JSON, then logs JSON |
|
|
13
|
+
| `capture_screenshot` | one text block with JSON metadata, then one or more image blocks |
|
|
14
|
+
| `build_and_install` | one NDJSON text block, then one JSON text block |
|
|
15
|
+
| uncaught handler error | one plain text error string, not wrapped JSON |
|
|
16
|
+
|
|
17
|
+
Observable execution flow for state-mutating action tools at the MCP boundary:
|
|
18
|
+
|
|
19
|
+
1. resolve device/platform
|
|
20
|
+
2. call `ToolsNetwork.notifyActionStart()`
|
|
21
|
+
3. capture UI fingerprint before the action
|
|
22
|
+
4. execute the platform action
|
|
23
|
+
5. capture UI fingerprint after the action
|
|
24
|
+
6. wrap the result into an action envelope
|
|
25
|
+
|
|
26
|
+
That flow is applied to `start_app`, `restart_app`, `tap`, `swipe`, `scroll_to_element`, `type_text`, and `press_back`. `tap_element` builds a similar envelope inside `src/interact/index.ts` rather than through the shared wrapper.
|
|
27
|
+
|
|
28
|
+
## 2. Tool Inventory
|
|
29
|
+
|
|
30
|
+
### Manage / lifecycle
|
|
31
|
+
|
|
32
|
+
| Tool | Purpose | Inputs | Outputs | Side effects |
|
|
33
|
+
| --- | --- | --- | --- | --- |
|
|
34
|
+
| `start_app` | Launch app on Android or iOS. | `{ platform: 'android'\|'ios', appId: string, deviceId?: string }` | `ActionExecutionResult` JSON with `device` and `details` (`launch_time_ms`, `device_id`, `output?`, `observed_app?`, `error?`). | Launches app, captures fingerprints, resets network window. |
|
|
35
|
+
| `terminate_app` | Stop app process. | `{ platform: 'android'\|'ios', appId: string, deviceId?: string }` | `{ terminated: boolean, device: DeviceInfo }` | Terminates app. |
|
|
36
|
+
| `restart_app` | Terminate then relaunch app. | `{ platform: 'android'\|'ios', appId: string, deviceId?: string }` | `ActionExecutionResult` JSON with `device` and restart `details` (`terminated_before_restart`, `terminate_error?`, `output?`, `observed_app?`, `error?`). | Stops and launches app, captures fingerprints, resets network window. |
|
|
37
|
+
| `reset_app_data` | Clear app storage / simulator container data. | `{ platform: 'android'\|'ios', appId: string, deviceId?: string }` | `{ reset: boolean, device: DeviceInfo }` | Clears app state. |
|
|
38
|
+
| `install_app` | Install built artifact or project output. | `{ platform: 'android'\|'ios', projectType: 'native'\|'kmp'\|'react-native'\|'flutter', appPath: string, deviceId?: string }` | `{ device: DeviceInfo, installed: boolean, output?: string, error?: string }` | Installs app; Android may push APK/AAB and run `pm install`; iOS may use `simctl` or `idb`. |
|
|
39
|
+
| `build_app` | Build project and return artifact path. | `{ platform: 'android'\|'ios', projectType: ..., projectPath: string, variant?: string }` | Build result JSON from platform builder, including artifact path on success or `error`. | Runs Gradle or Xcode build. |
|
|
40
|
+
| `build_and_install` | Build then install, streaming progress. | `{ platform: 'android'\|'ios', projectType: ..., projectPath: string, deviceId?: string, variant?: string }` | MCP response has NDJSON event block plus result JSON `{ success: boolean, artifactPath?: string, device?: DeviceInfo, output?: string, error?: string }`. | Builds, installs, emits progress events. |
|
|
41
|
+
| `list_devices` | Enumerate available devices. | `{ platform?: 'android'\|'ios', appId?: string }` | `{ devices: DeviceInfo[] }` (runtime objects may also include `appInstalled`/`booted`). | Reads device lists. |
|
|
42
|
+
|
|
43
|
+
### Observe / inspect
|
|
44
|
+
|
|
45
|
+
| Tool | Purpose | Inputs | Outputs | Side effects |
|
|
46
|
+
| --- | --- | --- | --- | --- |
|
|
47
|
+
| `get_logs` | Fetch recent device logs. | `{ platform: 'android'\|'ios', appId?: string, deviceId?: string, pid?: number, tag?: string, level?: string, contains?: string, since_seconds?: number, limit?: number, lines?: number }` | Two text blocks: metadata `{ device, result: { count, filtered, crashLines, source, meta } }`, then `{ logs: [...] }`. | Reads platform logs. |
|
|
48
|
+
| `capture_screenshot` | Capture current screenshot. | `{ platform: 'android'\|'ios', deviceId?: string }` | Text metadata block plus image block(s). | Captures screenshot; uses temp files. |
|
|
49
|
+
| `capture_debug_snapshot` | Bundle screenshot, UI tree, screen, fingerprint, and logs. | `{ reason?: string, includeLogs?: boolean, logLines?: number, platform?: 'android'\|'ios', appId?: string, deviceId?: string, sessionId?: string }` | Wrapped JSON snapshot object with device metadata, screenshot metadata, UI tree, fingerprint, current screen, and logs/errors. | Captures multiple observations. |
|
|
50
|
+
| `start_log_stream` | Start background structured log stream. | `{ platform?: 'android'\|'ios', packageName: string, level?: 'error'\|'warn'\|'info'\|'debug', deviceId?: string, sessionId?: string }` | `{ success: boolean, stream_started?: boolean, device_id?: string, pid?: number, error?: string }` | Starts long-lived log process, writes NDJSON file. |
|
|
51
|
+
| `read_log_stream` | Read accumulated streamed logs. | `{ sessionId?: string }` | `{ entries: any[], crash_summary?: { crash_detected: boolean, exception?: string, sample?: string } }` | Reads stream file; no new device action. |
|
|
52
|
+
| `stop_log_stream` | Stop background log stream. | `{ sessionId?: string }` | `{ success: boolean }` | Stops stream process and clears session entry. |
|
|
53
|
+
| `get_ui_tree` | Return current UI hierarchy. | `{ platform: 'android'\|'ios', deviceId?: string }` | `GetUITreeResponse` with `device`, `elements`, `resolution`, optional `error`. | Dumps UI hierarchy; Android writes/pulls XML; iOS queries via `idb`. |
|
|
54
|
+
| `get_current_screen` | Return visible Android activity. | `{ deviceId?: string }` | `GetCurrentScreenResponse` with `device`, `activity`, `package`, `shortActivity?`, `error?`. | Reads `dumpsys`; Android only. |
|
|
55
|
+
| `get_screen_fingerprint` | Compute stable screen fingerprint from UI tree and current screen. | `{ platform?: 'android'\|'ios', deviceId?: string }` | `{ fingerprint: string\|null, activity?: string, error?: string }` | Reads UI tree and, on Android, current screen. |
|
|
56
|
+
|
|
57
|
+
### Interact / wait / verify
|
|
58
|
+
|
|
59
|
+
| Tool | Purpose | Inputs | Outputs | Side effects |
|
|
60
|
+
| --- | --- | --- | --- | --- |
|
|
61
|
+
| `wait_for_screen_change` | Wait until fingerprint differs from provided previous fingerprint. | `{ platform?: 'android'\|'ios', previousFingerprint: string, timeoutMs?: number, pollIntervalMs?: number, deviceId?: string }` | `{ success: boolean, previousFingerprint, newFingerprint?\|lastFingerprint?, elapsedMs, observed_screen: { fingerprint, activity }, reason?: 'timeout' }` | Polls fingerprints. |
|
|
62
|
+
| `expect_screen` | Exact check against expected fingerprint or screen name. | `{ platform?: 'android'\|'ios', fingerprint?: string, screen?: string, deviceId?: string }` | `{ success, observed_screen, expected_screen, confidence, comparison: { basis, matched, reason } }` | Reads fingerprint/current screen. |
|
|
63
|
+
| `expect_element_visible` | Binary visible check for selector. | `{ selector: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean }, element_id?: string, timeout_ms?: number, poll_interval_ms?: number, platform?: 'android'\|'ios', deviceId?: string }` | `{ success, selector, element_id, expected_condition: 'visible', element?, observed, reason, failure_code?, retryable? }` | Polls UI tree through `wait_for_ui`. |
|
|
64
|
+
| `wait_for_ui` | Deterministic UI wait and element resolution. | `{ selector?: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean }, condition?: 'exists'\|'not_exists'\|'visible'\|'clickable', timeout_ms?: number, poll_interval_ms?: number, match?: { index?: number }, retry?: { max_attempts?: number, backoff_ms?: number }, platform?: 'android'\|'ios', deviceId?: string }` | Success: `{ status:'success', matched, element, metrics, requested, observed }`; failure: `{ status:'timeout', error:{code,message}, metrics, requested, observed }`. | Polls UI tree; resolves actionable ancestor for `clickable`. |
|
|
65
|
+
| `find_element` | Heuristic semantic element search. | `{ query: string, exact?: boolean, timeoutMs?: number, platform?: 'android'\|'ios', deviceId?: string }` | `{ found: true, element, score, confidence }` or `{ found: false, error }` | Polls UI tree; no mutation. |
|
|
66
|
+
|
|
67
|
+
### Action / mutation
|
|
68
|
+
|
|
69
|
+
| Tool | Purpose | Inputs | Outputs | Side effects |
|
|
70
|
+
| --- | --- | --- | --- | --- |
|
|
71
|
+
| `tap` | Tap coordinates. | `{ x: number, y: number, platform?: 'android'\|'ios', deviceId?: string }` | `ActionExecutionResult` | Taps screen; captures fingerprints; resets network window. |
|
|
72
|
+
| `tap_element` | Tap resolved UI element by `elementId`. | `{ elementId: string }` | Action-style JSON with `action_type: 'tap_element'`, target selector/resolved element, `success`, fingerprints, `failure_code?`, `retryable?`. | Reads cached element/UI context, validates element, taps it, resets network window. |
|
|
73
|
+
| `swipe` | Swipe coordinates. | `{ platform?: 'android'\|'ios', x1, y1, x2, y2, duration, deviceId?: string }` | `ActionExecutionResult` | Swipes screen; captures fingerprints; resets network window. |
|
|
74
|
+
| `scroll_to_element` | Repeatedly scroll until matching visible element is found. | `{ platform: 'android'\|'ios', selector: { text?: string, resourceId?: string, contentDesc?: string, className?: string }, direction?: 'down'\|'up', maxScrolls?: number, scrollAmount?: number, deviceId?: string }` | `ActionExecutionResult` | Repeated swipes plus UI tree checks; resets network window. |
|
|
75
|
+
| `type_text` | Type text into focused field. | `{ platform?: 'android', text: string, deviceId?: string }` | `ActionExecutionResult` | Android text input; captures fingerprints; resets network window. |
|
|
76
|
+
| `press_back` | Send Android Back key. | `{ platform?: 'android', deviceId?: string }` | `ActionExecutionResult` | Android back action; captures fingerprints; resets network window. |
|
|
77
|
+
|
|
78
|
+
### Classification / network / system
|
|
79
|
+
|
|
80
|
+
| Tool | Purpose | Inputs | Outputs | Side effects |
|
|
81
|
+
| --- | --- | --- | --- | --- |
|
|
82
|
+
| `classify_action_outcome` | Deterministic rule-based classifier over supplied signals. | `{ uiChanged: boolean, expectedElementVisible?: boolean, networkRequests?: { url?: string, status: 'success'\|'failure'\|'retryable' }[], hasLogErrors?: boolean }` | `{ outcome: 'success'\|'no_op'\|'backend_failure'\|'ui_failure'\|'unknown', reasoning: string, nextAction?: 'call_get_network_activity' }` | Pure computation. |
|
|
83
|
+
| `get_network_activity` | Return normalized request events since last action window. | `{}` | `{ requests: NetworkRequestSummary[], count: number }` | Reads logs, advances internal `lastConsumedTimestamp`. |
|
|
84
|
+
| `get_system_status` | Aggregate Android/iOS/Gradle readiness. | `{}` | `{ success, status: 'ready'\|'degraded'\|'blocked', adbAvailable, adbVersion, devices, deviceStates, logsAvailable, envValid, issues, appInstalled, iosAvailable, iosDevices, gradleJavaHome, gradleValid, gradleFilesChecked, gradleSuggestedFixes, summary }` | Reads toolchain/device state. |
|
|
85
|
+
|
|
86
|
+
## 3. Action Tools (Mutation Tools)
|
|
87
|
+
|
|
88
|
+
| Tool | Actual output shape | Success reporting | Failure structure | Retry logic |
|
|
89
|
+
| --- | --- | --- | --- | --- |
|
|
90
|
+
| `start_app` | `ActionExecutionResult` + `device` + `details` | `success` mirrors underlying launch success | `failure_code` inferred generically; raw launch `error` only appears in `details` | none |
|
|
91
|
+
| `terminate_app` | `{ terminated: boolean, device }` | `terminated === true` | no standardized failure code; boolean only at MCP layer | none |
|
|
92
|
+
| `restart_app` | `ActionExecutionResult` + `device` + restart `details` | `success` mirrors underlying restart success | `failure_code` inferred generically; terminate/start details kept in `details` | no retry; always does terminate then start |
|
|
93
|
+
| `reset_app_data` | `{ reset: boolean, device }` | `reset === true` | no standardized failure code | none |
|
|
94
|
+
| `install_app` | `{ device, installed, output?, error? }` | `installed === true` | unstructured `error` string; no action envelope | Android has internal fallback paths; iOS may fall back from `simctl` to `idb` |
|
|
95
|
+
| `build_and_install` | NDJSON event stream + `{ success, artifactPath?, device?, output?, error? }` | final `success === true` | unstructured `error`; build/install phases encoded in NDJSON | build and install internals may retry depending on platform helpers |
|
|
96
|
+
| `tap` | `ActionExecutionResult` | `success` means command executed | `failure_code`/`retryable` inferred from generic error text; raw error omitted | none |
|
|
97
|
+
| `tap_element` | action-style JSON built in `src/interact/index.ts` | `success` means element was resolved and tap dispatched | structured `failure_code` from `ActionFailureCode`; includes `retryable` | none |
|
|
98
|
+
| `swipe` | `ActionExecutionResult` | command executed | generic inferred `failure_code` | none |
|
|
99
|
+
| `scroll_to_element` | `ActionExecutionResult` | **different semantics**: success means target element became visible during scroll loop | `failure_code` inferred by scroll-specific string matching | internal loop up to `maxScrolls` |
|
|
100
|
+
| `type_text` | `ActionExecutionResult` | command executed | generic inferred `failure_code` | none |
|
|
101
|
+
| `press_back` | `ActionExecutionResult` | command executed | generic inferred `failure_code` | none |
|
|
102
|
+
|
|
103
|
+
**Observed inconsistency:** `start_app`/`restart_app` expose `device` and rich `details`; `tap`/`swipe`/`type_text`/`press_back` do not. `scroll_to_element` reports an outcome-oriented success, while the others mostly report execution success.
|
|
104
|
+
|
|
105
|
+
## 4. Observation and Wait Tools
|
|
106
|
+
|
|
107
|
+
### `wait_for_ui`
|
|
108
|
+
|
|
109
|
+
- **Role:** both waits and resolves.
|
|
110
|
+
- **Signals used:** only the current UI tree from `get_ui_tree`.
|
|
111
|
+
- **Behavior:** filters elements by selector, supports `match.index`, evaluates `exists` / `not_exists` / `visible` / `clickable`, and resolves an actionable ancestor for `clickable`.
|
|
112
|
+
- **Output:** descriptive, not binary. Returns `requested`, `observed`, `metrics`, and optionally `element`.
|
|
113
|
+
- **Success model:** `status: 'success'`; otherwise `status: 'timeout'` with structured `error`.
|
|
114
|
+
|
|
115
|
+
### `wait_for_screen_change`
|
|
116
|
+
|
|
117
|
+
- **Role:** wait only.
|
|
118
|
+
- **Signals used:** screen fingerprints from `get_screen_fingerprint`.
|
|
119
|
+
- **Behavior:** polls until fingerprint differs from `previousFingerprint`, then performs a confirmation read for stability.
|
|
120
|
+
- **Output:** binary `success` plus descriptive `observed_screen`, elapsed time, and either `newFingerprint` or `lastFingerprint`.
|
|
121
|
+
|
|
122
|
+
### `find_element`
|
|
123
|
+
|
|
124
|
+
- **Role:** resolve only.
|
|
125
|
+
- **Signals used:** UI tree.
|
|
126
|
+
- **Behavior:** heuristic scoring over text/content/resource/class; if best element is not interactable it tries to resolve a clickable ancestor.
|
|
127
|
+
- **Output:** descriptive, scored result (`score`, `confidence`) or `{ found:false, error }`.
|
|
128
|
+
|
|
129
|
+
### `get_ui_tree`
|
|
130
|
+
|
|
131
|
+
- **Role:** inspect only.
|
|
132
|
+
- **Signals used:** platform accessibility/UI dump.
|
|
133
|
+
- **Output:** raw tree data with `elements`, `resolution`, and `device`.
|
|
134
|
+
- **Notes:** Android and iOS each retry internally up to three attempts.
|
|
135
|
+
|
|
136
|
+
### `get_current_screen`
|
|
137
|
+
|
|
138
|
+
- **Role:** inspect only.
|
|
139
|
+
- **Signals used:** Android activity manager / window dumps.
|
|
140
|
+
- **Output:** current package/activity object.
|
|
141
|
+
- **Notes:** Android-only.
|
|
142
|
+
|
|
143
|
+
### `get_screen_fingerprint`
|
|
144
|
+
|
|
145
|
+
- **Role:** inspect only.
|
|
146
|
+
- **Signals used:** UI tree plus current screen on Android.
|
|
147
|
+
- **Behavior:** normalizes a subset of visible, structurally significant elements and hashes them.
|
|
148
|
+
- **Output:** `{ fingerprint, activity?, error? }`.
|
|
149
|
+
- **Notes:** iOS fingerprint omits activity in the hash payload.
|
|
150
|
+
|
|
151
|
+
### Log/snapshot observation
|
|
152
|
+
|
|
153
|
+
- `get_logs` returns structured metadata plus raw/structured log entries.
|
|
154
|
+
- `start_log_stream` / `read_log_stream` / `stop_log_stream` manage background NDJSON log capture.
|
|
155
|
+
- `capture_screenshot` and `capture_debug_snapshot` provide point-in-time observation artifacts.
|
|
156
|
+
|
|
157
|
+
## 5. Existing Verification Mechanisms
|
|
158
|
+
|
|
159
|
+
| Mechanism | Success rule | Determinism | Ambiguity |
|
|
160
|
+
| --- | --- | --- | --- |
|
|
161
|
+
| `expect_screen` | exact fingerprint equality, else exact screen-name equality | binary and deterministic | if only `screen` is provided, Android may use either fingerprint-derived `activity` or `get_current_screen` label |
|
|
162
|
+
| `expect_element_visible` | delegated `wait_for_ui(condition:'visible')` reaches success | binary wrapper over deterministic wait | failure collapses to `TIMEOUT` or `UNKNOWN` |
|
|
163
|
+
| `wait_for_ui` used as verification | requested condition becomes true | deterministic per poll inputs | descriptive output, not a dedicated verification result |
|
|
164
|
+
| `wait_for_screen_change` | fingerprint changes and stays stable for one confirmation pass | deterministic | verifies change, not correctness of destination |
|
|
165
|
+
| `classify_action_outcome` | ordered rule evaluation over provided UI/network/log inputs | deterministic pure function | if `networkRequests` omitted, it returns `unknown` with `nextAction: 'call_get_network_activity'`; `hasLogErrors` does not change the enum outcome |
|
|
166
|
+
|
|
167
|
+
## 6. Action Result Semantics
|
|
168
|
+
|
|
169
|
+
Across action tools, **success is not uniform**:
|
|
170
|
+
|
|
171
|
+
1. **Execution success:** `tap`, `swipe`, `type_text`, `press_back`, `start_app`, `restart_app`, and `tap_element` mainly report that the command ran or the tap was dispatched.
|
|
172
|
+
2. **Outcome success:** `scroll_to_element` reports success only if the target element was actually found during scrolling.
|
|
173
|
+
3. **Boolean operation success:** `install_app`, `terminate_app`, and `reset_app_data` use tool-specific booleans (`installed`, `terminated`, `reset`) instead of the action envelope.
|
|
174
|
+
|
|
175
|
+
Failure handling is **partly standardized**:
|
|
176
|
+
|
|
177
|
+
- action-envelope tools use `failure_code` and `retryable`
|
|
178
|
+
- manage tools often use plain booleans plus `error` strings
|
|
179
|
+
- some handlers drop underlying diagnostics before the MCP response is built
|
|
180
|
+
|
|
181
|
+
## 7. Failure Handling
|
|
182
|
+
|
|
183
|
+
### Structured failure signals
|
|
184
|
+
|
|
185
|
+
| Source | Structured signals |
|
|
186
|
+
| --- | --- |
|
|
187
|
+
| action envelope | `ELEMENT_NOT_FOUND`, `ELEMENT_NOT_INTERACTABLE`, `TIMEOUT`, `NAVIGATION_NO_CHANGE`, `AMBIGUOUS_TARGET`, `STALE_REFERENCE`, `UNKNOWN` |
|
|
188
|
+
| `wait_for_ui` | `INVALID_SELECTOR`, `INVALID_CONDITION`, `PLATFORM_NOT_SUPPORTED`, `ELEMENT_NOT_FOUND`, `INTERNAL_ERROR` |
|
|
189
|
+
| `expect_element_visible` | `failure_code: 'TIMEOUT'\|'UNKNOWN'`, `retryable` |
|
|
190
|
+
| `classify_action_outcome` | `outcome: success\|no_op\|backend_failure\|ui_failure\|unknown` |
|
|
191
|
+
| `get_network_activity` | per-request `status: success\|failure\|retryable` |
|
|
192
|
+
|
|
193
|
+
### Unstructured failure signals
|
|
194
|
+
|
|
195
|
+
- plain `error` strings from `install_app`, `build_app`, `build_and_install`, `find_element`, `start_log_stream`, many platform helpers
|
|
196
|
+
- boolean-only failures from `terminate_app` and `reset_app_data`
|
|
197
|
+
- top-level handler fallback: `Error executing tool <name>: ...` as plain text, not JSON
|
|
198
|
+
|
|
199
|
+
### Retry / recovery logic present in implementation
|
|
200
|
+
|
|
201
|
+
| Area | Observed logic |
|
|
202
|
+
| --- | --- |
|
|
203
|
+
| `wait_for_ui` | `retry.max_attempts` and `retry.backoff_ms` |
|
|
204
|
+
| `scroll_to_element` | repeated swipes up to `maxScrolls` |
|
|
205
|
+
| Android `install_app` | retries `pm install` with `-t` on test-only failure; has push + shell fallback |
|
|
206
|
+
| iOS `install_app` | tries `simctl install`, may fall back to `idb` |
|
|
207
|
+
| `get_ui_tree` | platform handlers retry up to three times |
|
|
208
|
+
| `wait_for_screen_change` | one stability confirmation pass after a detected change |
|
|
209
|
+
|
|
210
|
+
## 8. Execution Patterns (Observed)
|
|
211
|
+
|
|
212
|
+
1. **Generic action wrapper**
|
|
213
|
+
`notifyActionStart()` → fingerprint before → platform action → fingerprint after → action envelope.
|
|
214
|
+
|
|
215
|
+
2. **Resolved tap flow**
|
|
216
|
+
`wait_for_ui` returns `element.elementId` → `tap_element` uses cached element and current UI tree to validate it → tap → fingerprints before/after.
|
|
217
|
+
|
|
218
|
+
3. **Visibility verification flow**
|
|
219
|
+
`expect_element_visible` is implemented as `wait_for_ui(... condition:'visible' ...)` plus a narrower binary result.
|
|
220
|
+
|
|
221
|
+
4. **Screen verification flow**
|
|
222
|
+
`wait_for_screen_change` and `expect_screen` both depend on `get_screen_fingerprint`; `expect_screen` may additionally call `get_current_screen` on Android when matching by screen name.
|
|
223
|
+
|
|
224
|
+
5. **Network correlation flow**
|
|
225
|
+
action tools that call `notifyActionStart()` create the time window used by `get_network_activity`; `classify_action_outcome` can then classify using supplied request summaries.
|
|
226
|
+
|
|
227
|
+
6. **Snapshot/debug flow**
|
|
228
|
+
`capture_debug_snapshot` aggregates screenshot, current screen, fingerprint, UI tree, and logs in one call.
|
|
229
|
+
|
|
230
|
+
## 9. Inconsistencies and Gaps
|
|
231
|
+
|
|
232
|
+
1. **Response envelope mismatch:** most tools return wrapped JSON, but `get_logs`, `capture_screenshot`, and `build_and_install` use multi-block responses.
|
|
233
|
+
2. **Unexpected-error shape mismatch:** uncaught handler failures become plain text strings, not structured JSON.
|
|
234
|
+
3. **Action result mismatch:** some mutation tools use `ActionExecutionResult`; `install_app`, `terminate_app`, `reset_app_data`, and `build_and_install` do not.
|
|
235
|
+
4. **Success semantics mismatch:** `scroll_to_element` success is outcome-based; most other action tools are execution-based.
|
|
236
|
+
5. **Detail richness mismatch:** `start_app` and `restart_app` include `device` and rich `details`; other action-envelope tools usually omit raw error/details.
|
|
237
|
+
6. **Failure-code derivation mismatch:** generic action wrappers infer `failure_code` by matching substrings in error text; `tap_element` assigns codes directly.
|
|
238
|
+
7. **Dropped diagnostics:** handler-level MCP responses omit some underlying `diagnostics`/`error` detail, especially for `terminate_app`, `reset_app_data`, and `get_logs`.
|
|
239
|
+
8. **`expect_element_visible` type/implementation mismatch:** the type allows `ELEMENT_NOT_FOUND`, but the implementation only emits `TIMEOUT` or `UNKNOWN`.
|
|
240
|
+
9. **Platform mismatch:** `get_current_screen` is Android-only; `type_text` and `press_back` are Android-only; other tools are dual-platform.
|
|
241
|
+
10. **Observation helper gap:** `waitForUICore` supports `ui`/`log`/`screen`/`idle` modes internally, but only the newer selector-based `wait_for_ui` is exposed as a tool.
|
|
242
|
+
11. **Network-window coverage gap:** only tools that call `notifyActionStart()` reset the network activity window; `install_app`, `terminate_app`, and `reset_app_data` do not.
|
|
243
|
+
12. **`classify_action_outcome` log input is secondary in name only:** `hasLogErrors` affects reasoning text for `no_op` but never changes the enum outcome.
|
|
244
|
+
13. **`build_and_install` has dead autodetect code:** handler requires `platform` and `projectType`, but later still contains unreachable fallback autodetection branches.
|
|
245
|
+
14. **Runtime object shape drift:** `list_devices` may return extra runtime fields like `appInstalled` and `booted` beyond the base `DeviceInfo` shape.
|
|
246
|
+
|
|
247
|
+
## 10. Minimal Canonical Model (Derived, Not Invented)
|
|
248
|
+
|
|
249
|
+
### Common action shape already present
|
|
250
|
+
|
|
251
|
+
```ts
|
|
252
|
+
{
|
|
253
|
+
action_id: string,
|
|
254
|
+
timestamp: string,
|
|
255
|
+
action_type: string,
|
|
256
|
+
target: {
|
|
257
|
+
selector: Record<string, unknown>,
|
|
258
|
+
resolved: Record<string, unknown> | null
|
|
259
|
+
},
|
|
260
|
+
success: boolean,
|
|
261
|
+
failure_code?: string,
|
|
262
|
+
retryable?: boolean,
|
|
263
|
+
ui_fingerprint_before: string | null,
|
|
264
|
+
ui_fingerprint_after: string | null,
|
|
265
|
+
device?: DeviceInfo,
|
|
266
|
+
details?: Record<string, unknown>
|
|
267
|
+
}
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
This shape is already used directly or closely approximated by:
|
|
271
|
+
|
|
272
|
+
- `start_app`
|
|
273
|
+
- `restart_app`
|
|
274
|
+
- `tap`
|
|
275
|
+
- `tap_element`
|
|
276
|
+
- `swipe`
|
|
277
|
+
- `scroll_to_element`
|
|
278
|
+
- `type_text`
|
|
279
|
+
- `press_back`
|
|
280
|
+
|
|
281
|
+
### Common observation/verification pattern already present
|
|
282
|
+
|
|
283
|
+
```ts
|
|
284
|
+
{
|
|
285
|
+
requested|expected: ...,
|
|
286
|
+
observed: ...,
|
|
287
|
+
success|status: boolean | 'success' | 'timeout',
|
|
288
|
+
metrics?|confidence?|comparison?|reason?
|
|
289
|
+
}
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
Examples:
|
|
293
|
+
|
|
294
|
+
- `wait_for_ui` → `requested`, `observed`, `metrics`
|
|
295
|
+
- `expect_screen` → `expected_screen`, `observed_screen`, `comparison`
|
|
296
|
+
- `expect_element_visible` → `selector`, `observed`, `reason`
|
|
297
|
+
- `wait_for_screen_change` → previous vs observed/new fingerprint
|
|
298
|
+
|
|
299
|
+
### Common failure signals already present
|
|
300
|
+
|
|
301
|
+
- action failure codes from `ActionFailureCode`
|
|
302
|
+
- wait/expect codes (`INVALID_*`, `ELEMENT_NOT_FOUND`, `TIMEOUT`, `UNKNOWN`)
|
|
303
|
+
- network request statuses (`success`, `failure`, `retryable`)
|
|
304
|
+
- fallback unstructured `error` strings
|
|
305
|
+
|
|
306
|
+
### Common flow already present
|
|
307
|
+
|
|
308
|
+
- resolve device
|
|
309
|
+
- perform platform operation
|
|
310
|
+
- optionally capture fingerprints before/after
|
|
311
|
+
- return structured JSON, usually in one text block
|
|
312
|
+
- perform verification in separate tools rather than as part of most actions
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
# MCP Tooling Specification — Spec v1 (Refined)
|
|
2
|
+
|
|
3
|
+
## 1. Scope
|
|
4
|
+
|
|
5
|
+
This specification defines the runtime contract for MCP tools used to interact with mobile applications.
|
|
6
|
+
|
|
7
|
+
It standardizes:
|
|
8
|
+
|
|
9
|
+
- action execution semantics
|
|
10
|
+
- verification model
|
|
11
|
+
- failure handling
|
|
12
|
+
- response shape constraints
|
|
13
|
+
|
|
14
|
+
This spec is incremental and aligned with the current implementation. It does not introduce new tools or require architectural redesign.
|
|
15
|
+
|
|
16
|
+
## 2. Core Model
|
|
17
|
+
|
|
18
|
+
The system is based on a strict separation:
|
|
19
|
+
|
|
20
|
+
- Action tools perform execution
|
|
21
|
+
- Verification tools determine outcome
|
|
22
|
+
- `wait_for_*` tools resolve and synchronize
|
|
23
|
+
- Observation tools inspect state
|
|
24
|
+
|
|
25
|
+
## 3. Execution Model
|
|
26
|
+
|
|
27
|
+
Canonical flow for verifiable interactions:
|
|
28
|
+
|
|
29
|
+
`RESOLVE -> ACT -> WAIT (optional) -> EXPECT`
|
|
30
|
+
|
|
31
|
+
This flow applies when outcome verification is required.
|
|
32
|
+
|
|
33
|
+
It does not apply to:
|
|
34
|
+
|
|
35
|
+
- pure inspection tools
|
|
36
|
+
- observation-only flows
|
|
37
|
+
- non-verifiable or exploratory actions
|
|
38
|
+
|
|
39
|
+
Outcome-specific guidance:
|
|
40
|
+
|
|
41
|
+
- visible navigation expected -> `wait_for_screen_change` (optional) -> `expect_screen`
|
|
42
|
+
- local UI change expected -> `wait_for_ui` (optional) -> `expect_element_visible`
|
|
43
|
+
- backend/API activity expected without a visible UI change -> compare `get_screen_fingerprint` before/after, then call `get_network_activity` immediately after the action and `classify_action_outcome` with the observed requests
|
|
44
|
+
|
|
45
|
+
For backend/API activity, `wait_for_screen_change` is not the right verification tool unless a visible transition is also expected.
|
|
46
|
+
|
|
47
|
+
## 4. Action Tools
|
|
48
|
+
|
|
49
|
+
### 4.1 Definition
|
|
50
|
+
|
|
51
|
+
Action tools mutate application state.
|
|
52
|
+
|
|
53
|
+
Includes:
|
|
54
|
+
`start_app`, `restart_app`, `tap`, `tap_element`, `swipe`, `scroll_to_element`, `type_text`, `press_back`
|
|
55
|
+
|
|
56
|
+
### 4.2 Required Semantics
|
|
57
|
+
|
|
58
|
+
- `success` MUST represent execution success only
|
|
59
|
+
- execution success means the platform command was dispatched without error
|
|
60
|
+
- `success` MUST NOT imply outcome success
|
|
61
|
+
|
|
62
|
+
### 4.3 Action Envelope
|
|
63
|
+
|
|
64
|
+
MUST be returned in this structure:
|
|
65
|
+
|
|
66
|
+
```ts
|
|
67
|
+
{
|
|
68
|
+
action_id: string,
|
|
69
|
+
timestamp: string,
|
|
70
|
+
action_type: string,
|
|
71
|
+
target: {
|
|
72
|
+
selector: object,
|
|
73
|
+
resolved: object | null
|
|
74
|
+
},
|
|
75
|
+
success: boolean,
|
|
76
|
+
ui_fingerprint_before: string | null,
|
|
77
|
+
ui_fingerprint_after: string | null,
|
|
78
|
+
failure_code?: string,
|
|
79
|
+
retryable?: boolean,
|
|
80
|
+
device?: DeviceInfo,
|
|
81
|
+
details?: object
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Rules:
|
|
86
|
+
|
|
87
|
+
- `success` is at the top level, not nested
|
|
88
|
+
- `target` contains only selection and resolution context
|
|
89
|
+
- fingerprints represent observed pre/post UI state on a best-effort basis
|
|
90
|
+
- `failure_code` is optional but MUST be used when a structured mapping exists
|
|
91
|
+
|
|
92
|
+
### 4.4 Allowed Deviations
|
|
93
|
+
|
|
94
|
+
Explicit temporary exceptions:
|
|
95
|
+
|
|
96
|
+
- `install_app`, `terminate_app`, `reset_app_data` do not use this envelope
|
|
97
|
+
- `scroll_to_element` may temporarily retain outcome-based success semantics
|
|
98
|
+
- partial `failure_code` coverage is allowed
|
|
99
|
+
- detail richness may vary across tools
|
|
100
|
+
|
|
101
|
+
## 5. Verification Tools
|
|
102
|
+
|
|
103
|
+
### 5.1 Definition
|
|
104
|
+
|
|
105
|
+
Verification tools determine whether the intended outcome occurred.
|
|
106
|
+
|
|
107
|
+
Primary:
|
|
108
|
+
|
|
109
|
+
- `expect_screen`
|
|
110
|
+
- `expect_element_visible`
|
|
111
|
+
|
|
112
|
+
### 5.2 Required Semantics
|
|
113
|
+
|
|
114
|
+
- MUST return `success` as a boolean
|
|
115
|
+
- `success` MUST represent outcome truth
|
|
116
|
+
- MUST be binary and deterministic
|
|
117
|
+
|
|
118
|
+
Optional fields do not affect `success`:
|
|
119
|
+
`observed`, `expected`, `comparison`, `reason`, `confidence`
|
|
120
|
+
|
|
121
|
+
### 5.3 Authoritative Role
|
|
122
|
+
|
|
123
|
+
Verification tools are the only authoritative source of outcome truth.
|
|
124
|
+
|
|
125
|
+
Action tools MUST NOT be used to infer outcome success.
|
|
126
|
+
|
|
127
|
+
### 5.4 Applicability Rules
|
|
128
|
+
|
|
129
|
+
An `expect_*` tool is applicable when:
|
|
130
|
+
|
|
131
|
+
- expected destination screen is known -> `expect_screen`
|
|
132
|
+
- expected UI element state is known -> `expect_element_visible`
|
|
133
|
+
- outcome is explicitly defined or testable
|
|
134
|
+
|
|
135
|
+
Rules:
|
|
136
|
+
|
|
137
|
+
- `wait_for_*` MAY be used before `expect_*` for synchronization
|
|
138
|
+
- `wait_for_*` MUST NOT replace `expect_*` when an applicable `expect_*` tool exists
|
|
139
|
+
- when no applicable `expect_*` tool exists, `expect_*` MAY be skipped
|
|
140
|
+
|
|
141
|
+
## 6. wait_for_* Tools
|
|
142
|
+
|
|
143
|
+
### 6.1 Definition
|
|
144
|
+
|
|
145
|
+
`wait_for_*` tools provide deterministic resolution and synchronization.
|
|
146
|
+
|
|
147
|
+
Examples:
|
|
148
|
+
|
|
149
|
+
- `wait_for_ui`
|
|
150
|
+
- `wait_for_screen_change`
|
|
151
|
+
|
|
152
|
+
### 6.2 Rules
|
|
153
|
+
|
|
154
|
+
- MAY resolve UI elements
|
|
155
|
+
- MAY synchronize UI/system state
|
|
156
|
+
- MUST NOT be treated as final verification when `expect_*` is applicable
|
|
157
|
+
|
|
158
|
+
### 6.3 Semantics
|
|
159
|
+
|
|
160
|
+
- `success` indicates condition met or resolution succeeded
|
|
161
|
+
- `success` does NOT indicate outcome correctness
|
|
162
|
+
|
|
163
|
+
## 7. Failure Semantics
|
|
164
|
+
|
|
165
|
+
### 7.1 Canonical Codes
|
|
166
|
+
|
|
167
|
+
- `ELEMENT_NOT_FOUND`
|
|
168
|
+
- `ELEMENT_NOT_INTERACTABLE`
|
|
169
|
+
- `TIMEOUT`
|
|
170
|
+
- `NAVIGATION_NO_CHANGE`
|
|
171
|
+
- `AMBIGUOUS_TARGET`
|
|
172
|
+
- `STALE_REFERENCE`
|
|
173
|
+
- `UNKNOWN`
|
|
174
|
+
|
|
175
|
+
### 7.2 Rules
|
|
176
|
+
|
|
177
|
+
- `failure_code` MUST be used when a structured mapping exists
|
|
178
|
+
- `failure_code` MUST NOT be replaced by string errors
|
|
179
|
+
- string errors MAY exist for diagnostics only
|
|
180
|
+
- not all tools must emit all codes
|
|
181
|
+
|
|
182
|
+
### 7.3 Scope
|
|
183
|
+
|
|
184
|
+
Applies to:
|
|
185
|
+
|
|
186
|
+
- action tools
|
|
187
|
+
- verification tools
|
|
188
|
+
- `wait_for_ui`-style tools
|
|
189
|
+
|
|
190
|
+
## 8. Response Shape
|
|
191
|
+
|
|
192
|
+
### 8.1 Default
|
|
193
|
+
|
|
194
|
+
All responses MUST be a single JSON text block.
|
|
195
|
+
|
|
196
|
+
### 8.2 Allowed Exceptions
|
|
197
|
+
|
|
198
|
+
Multi-block responses are allowed only for:
|
|
199
|
+
|
|
200
|
+
- `get_logs`
|
|
201
|
+
- `capture_screenshot`
|
|
202
|
+
- `build_and_install`
|
|
203
|
+
|
|
204
|
+
### 8.3 Errors
|
|
205
|
+
|
|
206
|
+
All handler/runtime errors MUST be JSON-wrapped.
|
|
207
|
+
|
|
208
|
+
String-only errors are not allowed, including fallback handler errors.
|
|
209
|
+
|
|
210
|
+
Note: string diagnostics may still appear inside structured JSON payloads where explicitly defined by a tool.
|
|
211
|
+
|
|
212
|
+
## 9. Classification
|
|
213
|
+
|
|
214
|
+
Tool: `classify_action_outcome`
|
|
215
|
+
|
|
216
|
+
Rules:
|
|
217
|
+
|
|
218
|
+
- MAY use UI, network, and log signals
|
|
219
|
+
- MUST be deterministic
|
|
220
|
+
- MUST NOT replace `expect_*` tools
|
|
221
|
+
- MUST be treated as a supplementary signal only
|
|
222
|
+
- SHOULD be used with `get_network_activity` when the expected outcome is backend/API activity without a visible UI change
|
|
223
|
+
|
|
224
|
+
It is not a verification mechanism.
|
|
225
|
+
|
|
226
|
+
## 10. Execution Patterns
|
|
227
|
+
|
|
228
|
+
Canonical pattern:
|
|
229
|
+
|
|
230
|
+
`wait_for_ui -> tap_element -> wait_for_screen_change (optional) -> expect_screen`
|
|
231
|
+
|
|
232
|
+
Interpretation:
|
|
233
|
+
|
|
234
|
+
- `tap_element.success` = executed
|
|
235
|
+
- `wait_for_screen_change.success` = UI changed
|
|
236
|
+
- `expect_screen.success` = correct outcome verified
|
|
237
|
+
|
|
238
|
+
## 11. Known Deviations
|
|
239
|
+
|
|
240
|
+
Explicitly allowed:
|
|
241
|
+
|
|
242
|
+
- `install_app`, `terminate_app`, `reset_app_data` not using envelope
|
|
243
|
+
- `build_and_install` streaming NDJSON
|
|
244
|
+
- platform-specific tools
|
|
245
|
+
- partial failure coverage
|
|
246
|
+
- `scroll_to_element` outcome-based success (temporary exception)
|
|
247
|
+
- extended runtime fields in `list_devices`
|
|
248
|
+
|
|
249
|
+
## 12. Migration Rules
|
|
250
|
+
|
|
251
|
+
Must change now:
|
|
252
|
+
|
|
253
|
+
- uncaught errors must be JSON-wrapped
|
|
254
|
+
|
|
255
|
+
Should align when touched:
|
|
256
|
+
|
|
257
|
+
- `tap`, `swipe`, `type_text`, `press_back`
|
|
258
|
+
- `start_app`, `restart_app`
|
|
259
|
+
- `scroll_to_element`
|
|
260
|
+
- `wait_for_ui`
|
|
261
|
+
|
|
262
|
+
No change required:
|
|
263
|
+
|
|
264
|
+
- `tap_element`
|
|
265
|
+
- `expect_screen`
|
|
266
|
+
- `expect_element_visible`
|
|
267
|
+
- `wait_for_screen_change`
|
|
268
|
+
|
|
269
|
+
## 13. Guiding Principles
|
|
270
|
+
|
|
271
|
+
- Actions execute
|
|
272
|
+
- Verification proves
|
|
273
|
+
- Waiting synchronizes
|
|
274
|
+
- Classification assists
|
|
275
|
+
|
|
276
|
+
## Final Definition
|
|
277
|
+
|
|
278
|
+
Action success equals execution success.
|
|
279
|
+
Outcome success equals verification success.
|
|
280
|
+
|
|
281
|
+
Verification tools are authoritative when the expected outcome is defined.
|
package/docs/tools/interact.md
CHANGED
|
@@ -33,7 +33,7 @@ Example response:
|
|
|
33
33
|
```json
|
|
34
34
|
{
|
|
35
35
|
"action_id": "tap_1710000000000_1",
|
|
36
|
-
"timestamp":
|
|
36
|
+
"timestamp": "2026-04-23T08:00:00.000Z",
|
|
37
37
|
"action_type": "tap",
|
|
38
38
|
"target": { "selector": { "x": 100, "y": 200 }, "resolved": null },
|
|
39
39
|
"success": true,
|
|
@@ -53,6 +53,10 @@ Preferred verification:
|
|
|
53
53
|
|
|
54
54
|
- navigation outcome known -> `expect_screen`
|
|
55
55
|
- local UI change known -> `expect_element_visible`
|
|
56
|
+
- backend/API activity expected -> `classify_action_outcome` + `get_network_activity`
|
|
57
|
+
|
|
58
|
+
Use `wait_for_screen_change` only when a visible transition is the expected outcome. If a button should trigger an API request but the screen should stay the same, rely on network activity and classification instead.
|
|
59
|
+
For backend-only actions, prefer comparing `get_screen_fingerprint` before/after and call `get_network_activity` immediately after the action; do not wait on `wait_for_screen_change` if no visible transition is expected.
|
|
56
60
|
|
|
57
61
|
---
|
|
58
62
|
|
|
@@ -139,6 +143,7 @@ Notes:
|
|
|
139
143
|
- Treats `null` fingerprints as transient and keeps polling.
|
|
140
144
|
- Adds a stability confirmation before returning success to avoid transient animation frames.
|
|
141
145
|
- Follow with `expect_screen` when the expected destination is known.
|
|
146
|
+
- Do not use this as the main success check for backend/API activity that does not change the visible UI.
|
|
142
147
|
|
|
143
148
|
---
|
|
144
149
|
|
|
@@ -303,7 +308,7 @@ Success response:
|
|
|
303
308
|
```json
|
|
304
309
|
{
|
|
305
310
|
"action_id": "tap_element_1710000000000_1",
|
|
306
|
-
"timestamp":
|
|
311
|
+
"timestamp": "2026-04-23T08:00:00.000Z",
|
|
307
312
|
"action_type": "tap_element",
|
|
308
313
|
"target": {
|
|
309
314
|
"selector": { "elementId": "el_123" },
|
|
@@ -328,7 +333,7 @@ Failure response:
|
|
|
328
333
|
```json
|
|
329
334
|
{
|
|
330
335
|
"action_id": "tap_element_1710000000001_2",
|
|
331
|
-
"timestamp":
|
|
336
|
+
"timestamp": "2026-04-23T08:00:00.001Z",
|
|
332
337
|
"action_type": "tap_element",
|
|
333
338
|
"target": { "selector": { "elementId": "el_123" }, "resolved": null },
|
|
334
339
|
"success": false,
|
|
@@ -451,3 +456,22 @@ Notes:
|
|
|
451
456
|
- The tool resolves the selector internally when needed.
|
|
452
457
|
- On failure, `reason` and `observed` tell you whether the selector was missing entirely or present but not yet visible.
|
|
453
458
|
- Use when the screen should remain on the same destination but a specific element should appear or become visible.
|
|
459
|
+
|
|
460
|
+
---
|
|
461
|
+
|
|
462
|
+
## classify_action_outcome + get_network_activity
|
|
463
|
+
|
|
464
|
+
Use this pair when the action is expected to trigger network/backend work and the screen may not visibly change.
|
|
465
|
+
|
|
466
|
+
Pattern:
|
|
467
|
+
|
|
468
|
+
1. perform the action
|
|
469
|
+
2. call `classify_action_outcome` with `uiChanged` from `wait_for_screen_change` or a screen fingerprint comparison
|
|
470
|
+
3. if the classifier asks for it, call `get_network_activity`
|
|
471
|
+
4. call `classify_action_outcome` again with `networkRequests`
|
|
472
|
+
|
|
473
|
+
Guidance:
|
|
474
|
+
|
|
475
|
+
- `uiChanged=true` or `expectedElementVisible=true` means the action outcome is already verified
|
|
476
|
+
- `nextAction="call_get_network_activity"` means the UI signal was inconclusive and the agent should inspect network activity
|
|
477
|
+
- if network requests succeed but the UI stays unchanged, treat the outcome as a backend/API result rather than a screen transition
|
package/docs/tools/manage.md
CHANGED
|
@@ -121,7 +121,7 @@ start_app response example:
|
|
|
121
121
|
```json
|
|
122
122
|
{
|
|
123
123
|
"action_id": "start_app_1710000000000_1",
|
|
124
|
-
"timestamp":
|
|
124
|
+
"timestamp": "2026-04-23T08:00:00.000Z",
|
|
125
125
|
"action_type": "start_app",
|
|
126
126
|
"device": { "platform": "android", "id": "emulator-5554", "osVersion": "14", "model": "Pixel", "simulator": true },
|
|
127
127
|
"target": { "selector": { "appId": "com.example.app" }, "resolved": null },
|
package/package.json
CHANGED
package/src/interact/index.ts
CHANGED
|
@@ -146,7 +146,7 @@ export class ToolsInteract {
|
|
|
146
146
|
|
|
147
147
|
private static _actionFailure(
|
|
148
148
|
actionId: string,
|
|
149
|
-
timestamp:
|
|
149
|
+
timestamp: string,
|
|
150
150
|
actionType: string,
|
|
151
151
|
selector: Record<string, unknown> | null,
|
|
152
152
|
resolved: ActionTargetResolved | null,
|
|
@@ -254,9 +254,10 @@ export class ToolsInteract {
|
|
|
254
254
|
}
|
|
255
255
|
|
|
256
256
|
static async tapElementHandler({ elementId }: { elementId: string }): Promise<TapElementResponse> {
|
|
257
|
-
const
|
|
257
|
+
const timestampMs = Date.now()
|
|
258
|
+
const timestamp = new Date(timestampMs).toISOString()
|
|
258
259
|
const actionType = 'tap_element'
|
|
259
|
-
const actionId = nextActionId(actionType,
|
|
260
|
+
const actionId = nextActionId(actionType, timestampMs)
|
|
260
261
|
const selector = { elementId }
|
|
261
262
|
const resolved = ToolsInteract._resolvedUiElements.get(elementId)
|
|
262
263
|
if (!resolved) {
|
|
@@ -304,6 +305,7 @@ export class ToolsInteract {
|
|
|
304
305
|
action_id: actionId,
|
|
305
306
|
timestamp,
|
|
306
307
|
action_type: actionType,
|
|
308
|
+
...(tree?.device ? { device: tree.device } : {}),
|
|
307
309
|
target: {
|
|
308
310
|
selector,
|
|
309
311
|
resolved: resolvedTarget
|
package/src/server/common.ts
CHANGED
|
@@ -82,9 +82,10 @@ export function buildActionExecutionResult({
|
|
|
82
82
|
failure?: { failureCode: ActionFailureCode; retryable: boolean }
|
|
83
83
|
details?: Record<string, unknown>
|
|
84
84
|
}): ActionExecutionResult {
|
|
85
|
-
const
|
|
85
|
+
const timestampMs = Date.now()
|
|
86
|
+
const timestamp = new Date(timestampMs).toISOString()
|
|
86
87
|
return {
|
|
87
|
-
action_id: nextActionId(actionType,
|
|
88
|
+
action_id: nextActionId(actionType, timestampMs),
|
|
88
89
|
timestamp,
|
|
89
90
|
action_type: actionType,
|
|
90
91
|
...(device ? { device } : {}),
|
|
@@ -99,3 +100,28 @@ export function buildActionExecutionResult({
|
|
|
99
100
|
...(details ? { details } : {})
|
|
100
101
|
}
|
|
101
102
|
}
|
|
103
|
+
|
|
104
|
+
export function wrapToolError(name: string, error: unknown) {
|
|
105
|
+
const message = error instanceof Error
|
|
106
|
+
? error.message
|
|
107
|
+
: typeof error === 'object' && error !== null
|
|
108
|
+
? (() => {
|
|
109
|
+
try {
|
|
110
|
+
return JSON.stringify(error, null, 2)
|
|
111
|
+
} catch {
|
|
112
|
+
return '[unserializable error object]'
|
|
113
|
+
}
|
|
114
|
+
})()
|
|
115
|
+
: String(error)
|
|
116
|
+
return {
|
|
117
|
+
content: [{
|
|
118
|
+
type: 'text' as const,
|
|
119
|
+
text: JSON.stringify({
|
|
120
|
+
error: {
|
|
121
|
+
tool: name,
|
|
122
|
+
message
|
|
123
|
+
}
|
|
124
|
+
}, null, 2)
|
|
125
|
+
}]
|
|
126
|
+
}
|
|
127
|
+
}
|
|
@@ -10,7 +10,7 @@ Inputs:
|
|
|
10
10
|
- deviceId (optional)
|
|
11
11
|
|
|
12
12
|
Output Structure:
|
|
13
|
-
- action_id, timestamp, action_type
|
|
13
|
+
- action_id, timestamp (ISO 8601), action_type
|
|
14
14
|
- target.selector = { appId }
|
|
15
15
|
- success = true when launch was dispatched successfully
|
|
16
16
|
- failure_code/retryable when launch dispatch fails
|
|
@@ -83,7 +83,7 @@ Inputs:
|
|
|
83
83
|
- deviceId (optional)
|
|
84
84
|
|
|
85
85
|
Output Structure:
|
|
86
|
-
- action_id, timestamp, action_type
|
|
86
|
+
- action_id, timestamp (ISO 8601), action_type
|
|
87
87
|
- target.selector = { appId }
|
|
88
88
|
- success = true when the restart command completed
|
|
89
89
|
- failure_code/retryable when restart dispatch fails
|
|
@@ -344,6 +344,7 @@ Capabilities:
|
|
|
344
344
|
Constraints:
|
|
345
345
|
- Does not verify correctness of the resulting state
|
|
346
346
|
- Must not be used alone to confirm action success when an applicable expect_* tool exists
|
|
347
|
+
- Use classify_action_outcome + get_network_activity when the expected outcome is backend/API activity without a visible UI change
|
|
347
348
|
|
|
348
349
|
Recommended Usage:
|
|
349
350
|
1. Capture or define the expected outcome
|
|
@@ -532,7 +533,7 @@ Inputs:
|
|
|
532
533
|
- deviceId (optional)
|
|
533
534
|
|
|
534
535
|
Output Structure:
|
|
535
|
-
- action_id, timestamp, action_type
|
|
536
|
+
- action_id, timestamp (ISO 8601), action_type
|
|
536
537
|
- target.selector = { x, y }
|
|
537
538
|
- success = true when the tap was dispatched
|
|
538
539
|
- failure_code/retryable when dispatch fails
|
|
@@ -587,7 +588,7 @@ Inputs:
|
|
|
587
588
|
|
|
588
589
|
Output Structure:
|
|
589
590
|
- action_id: unique timestamp-based action identifier
|
|
590
|
-
- timestamp:
|
|
591
|
+
- timestamp: ISO 8601 timestamp for the action attempt
|
|
591
592
|
- action_type: "tap_element"
|
|
592
593
|
- target.selector: original target handle ({ elementId })
|
|
593
594
|
- target.resolved: minimal resolved element info used for the tap
|
|
@@ -640,7 +641,7 @@ Inputs:
|
|
|
640
641
|
- platform/deviceId (optional)
|
|
641
642
|
|
|
642
643
|
Output Structure:
|
|
643
|
-
- action_id, timestamp, action_type
|
|
644
|
+
- action_id, timestamp (ISO 8601), action_type
|
|
644
645
|
- target.selector = { x1, y1, x2, y2, duration }
|
|
645
646
|
- success = true when the swipe was dispatched
|
|
646
647
|
- failure_code/retryable when dispatch fails
|
|
@@ -692,7 +693,7 @@ Inputs:
|
|
|
692
693
|
- direction, maxScrolls, scrollAmount, deviceId (optional)
|
|
693
694
|
|
|
694
695
|
Output Structure:
|
|
695
|
-
- action_id, timestamp, action_type
|
|
696
|
+
- action_id, timestamp (ISO 8601), action_type
|
|
696
697
|
- target.selector = original selector
|
|
697
698
|
- target.resolved = minimal resolved element info when found
|
|
698
699
|
- success = true when scrolling produced a visible target element
|
|
@@ -746,7 +747,7 @@ Inputs:
|
|
|
746
747
|
- platform/deviceId (optional)
|
|
747
748
|
|
|
748
749
|
Output Structure:
|
|
749
|
-
- action_id, timestamp, action_type
|
|
750
|
+
- action_id, timestamp (ISO 8601), action_type
|
|
750
751
|
- target.selector = { text }
|
|
751
752
|
- success = true when text input was dispatched
|
|
752
753
|
- failure_code/retryable when dispatch fails
|
|
@@ -795,7 +796,7 @@ Inputs:
|
|
|
795
796
|
- platform/deviceId (optional)
|
|
796
797
|
|
|
797
798
|
Output Structure:
|
|
798
|
-
- action_id, timestamp, action_type
|
|
799
|
+
- action_id, timestamp (ISO 8601), action_type
|
|
799
800
|
- target.selector = { key: "back" }
|
|
800
801
|
- success = true when the back action was dispatched
|
|
801
802
|
- failure_code/retryable when dispatch fails
|
|
@@ -835,6 +836,8 @@ Failure Handling:
|
|
|
835
836
|
description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
|
|
836
837
|
|
|
837
838
|
MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
|
|
839
|
+
Use this with get_network_activity when the expected outcome is backend/API activity without a visible UI change.
|
|
840
|
+
For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action instead of waiting for wait_for_screen_change.
|
|
838
841
|
|
|
839
842
|
HOW TO GATHER INPUTS before calling:
|
|
840
843
|
1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
|
|
@@ -868,7 +871,7 @@ BEHAVIOUR after outcome:
|
|
|
868
871
|
},
|
|
869
872
|
networkRequests: {
|
|
870
873
|
type: 'array',
|
|
871
|
-
description: 'Pass this only after calling get_network_activity as instructed by nextAction.
|
|
874
|
+
description: 'Pass this only after calling get_network_activity as instructed by nextAction. Also use it when the expected outcome is backend/API activity without a visible UI change.',
|
|
872
875
|
items: {
|
|
873
876
|
type: 'object',
|
|
874
877
|
properties: {
|
|
@@ -890,7 +893,7 @@ BEHAVIOUR after outcome:
|
|
|
890
893
|
name: 'get_network_activity',
|
|
891
894
|
description: `Returns structured network events captured from platform logs since the last action.
|
|
892
895
|
|
|
893
|
-
Call this
|
|
896
|
+
Call this when classify_action_outcome returns nextAction="call_get_network_activity" or immediately after an action whose expected outcome is backend/API activity without a visible UI change.
|
|
894
897
|
Do not call more than once per action.
|
|
895
898
|
|
|
896
899
|
Events are filtered to significant (non-background) requests only.
|
|
@@ -16,7 +16,8 @@ import {
|
|
|
16
16
|
inferScrollFailure,
|
|
17
17
|
ToolCallArgs,
|
|
18
18
|
ToolHandler,
|
|
19
|
-
wrapResponse
|
|
19
|
+
wrapResponse,
|
|
20
|
+
wrapToolError
|
|
20
21
|
} from './common.js'
|
|
21
22
|
|
|
22
23
|
async function handleStartApp(args: ToolCallArgs) {
|
|
@@ -375,8 +376,7 @@ export async function handleToolCall(name: string, args: ToolCallArgs = {}) {
|
|
|
375
376
|
try {
|
|
376
377
|
return await handler(args)
|
|
377
378
|
} catch (error) {
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
}
|
|
379
|
+
console.error(`Error executing tool ${name}:`, error)
|
|
380
|
+
return wrapToolError(name, error)
|
|
381
381
|
}
|
|
382
382
|
}
|
package/src/types.ts
CHANGED
|
@@ -26,6 +26,7 @@ async function run() {
|
|
|
26
26
|
assert(waitForScreenChange, 'wait_for_screen_change should be registered')
|
|
27
27
|
assert.match((waitForScreenChange as any).description, /does not verify correctness of the resulting state/i)
|
|
28
28
|
assert.match((waitForScreenChange as any).description, /follow with expect_screen/i)
|
|
29
|
+
assert.match((waitForScreenChange as any).description, /backend\/API activity without a visible UI change/i)
|
|
29
30
|
|
|
30
31
|
const captureDebugSnapshot = toolDefinitions.find((tool) => tool.name === 'capture_debug_snapshot')
|
|
31
32
|
assert(captureDebugSnapshot, 'capture_debug_snapshot should be registered')
|
|
@@ -60,6 +61,18 @@ async function run() {
|
|
|
60
61
|
assert.match((expectElementVisible as any).description, /selector is the primary input/i)
|
|
61
62
|
assert.match((expectElementVisible as any).description, /Returns structured binary success\/failure only/i)
|
|
62
63
|
|
|
64
|
+
const classifyActionOutcome = toolDefinitions.find((tool) => tool.name === 'classify_action_outcome')
|
|
65
|
+
assert(classifyActionOutcome, 'classify_action_outcome should be registered')
|
|
66
|
+
assert.match((classifyActionOutcome as any).description, /backend\/API activity without a visible UI change/i)
|
|
67
|
+
assert.match((classifyActionOutcome as any).description, /get_network_activity/i)
|
|
68
|
+
assert.match((classifyActionOutcome as any).description, /immediately after the action/i)
|
|
69
|
+
|
|
70
|
+
const getNetworkActivity = toolDefinitions.find((tool) => tool.name === 'get_network_activity')
|
|
71
|
+
assert(getNetworkActivity, 'get_network_activity should be registered')
|
|
72
|
+
assert.match((getNetworkActivity as any).description, /backend\/API activity without a visible UI change/i)
|
|
73
|
+
assert.doesNotMatch((getNetworkActivity as any).description, /Call this only when/i)
|
|
74
|
+
assert.match((getNetworkActivity as any).description, /immediately after an action/i)
|
|
75
|
+
|
|
63
76
|
await assert.rejects(() => handleToolCall('unknown_tool'), /Unknown tool: unknown_tool/)
|
|
64
77
|
|
|
65
78
|
console.log('server contract tests passed')
|
|
@@ -47,7 +47,7 @@ async function run() {
|
|
|
47
47
|
|
|
48
48
|
;(ToolsInteract as any).tapElementHandler = async () => ({
|
|
49
49
|
action_id: 'tap_element_1',
|
|
50
|
-
timestamp:
|
|
50
|
+
timestamp: '2026-04-23T08:00:00.000Z',
|
|
51
51
|
action_type: 'tap_element',
|
|
52
52
|
target: {
|
|
53
53
|
selector: { elementId: 'el_ready' },
|
|
@@ -62,6 +62,7 @@ async function run() {
|
|
|
62
62
|
const tapElementPayload = JSON.parse((tapElementResponse as any).content[0].text)
|
|
63
63
|
assert.strictEqual(tapElementPayload.success, true)
|
|
64
64
|
assert.strictEqual(tapElementPayload.action_type, 'tap_element')
|
|
65
|
+
assert.match(tapElementPayload.timestamp, /^\d{4}-\d{2}-\d{2}T/)
|
|
65
66
|
assert.strictEqual(tapElementPayload.target.resolved.elementId, 'el_ready')
|
|
66
67
|
assert.strictEqual(tapElementPayload.ui_fingerprint_before, 'fp_before')
|
|
67
68
|
|
|
@@ -71,6 +72,7 @@ async function run() {
|
|
|
71
72
|
const tapPayload = JSON.parse((tapResponse as any).content[0].text)
|
|
72
73
|
assert.strictEqual(tapPayload.success, true)
|
|
73
74
|
assert.strictEqual(tapPayload.action_type, 'tap')
|
|
75
|
+
assert.match(tapPayload.timestamp, /^\d{4}-\d{2}-\d{2}T/)
|
|
74
76
|
assert.deepStrictEqual(tapPayload.target.selector, { x: 1, y: 2 })
|
|
75
77
|
assert.strictEqual(tapPayload.ui_fingerprint_before, 'fp_mock')
|
|
76
78
|
|
|
@@ -93,6 +95,7 @@ async function run() {
|
|
|
93
95
|
const startAppPayload = JSON.parse((startAppResponse as any).content[0].text)
|
|
94
96
|
assert.strictEqual(startAppPayload.success, true)
|
|
95
97
|
assert.strictEqual(startAppPayload.action_type, 'start_app')
|
|
98
|
+
assert.match(startAppPayload.timestamp, /^\d{4}-\d{2}-\d{2}T/)
|
|
96
99
|
assert.strictEqual(startAppPayload.device.id, 'emulator-5554')
|
|
97
100
|
assert.deepStrictEqual(startAppPayload.target.selector, { appId: 'com.example.app' })
|
|
98
101
|
assert.strictEqual(startAppPayload.details.launch_time_ms, 123)
|
|
@@ -128,6 +131,30 @@ async function run() {
|
|
|
128
131
|
assert.strictEqual(expectElementPayload.element_id, 'el_ready')
|
|
129
132
|
assert.strictEqual(expectElementPayload.expected_condition, 'visible')
|
|
130
133
|
|
|
134
|
+
;(ToolsInteract as any).tapHandler = async () => {
|
|
135
|
+
throw new Error('boom')
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const failingTapResponse = await handleToolCall('tap', { platform: 'android', x: 1, y: 2 })
|
|
139
|
+
assert.strictEqual((failingTapResponse as any).content.length, 1)
|
|
140
|
+
const failingTapPayload = JSON.parse((failingTapResponse as any).content[0].text)
|
|
141
|
+
assert.deepStrictEqual(failingTapPayload, {
|
|
142
|
+
error: {
|
|
143
|
+
tool: 'tap',
|
|
144
|
+
message: 'boom'
|
|
145
|
+
}
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
;(ToolsInteract as any).tapHandler = async () => {
|
|
149
|
+
throw { code: 'E_CUSTOM', detail: { field: 'value' } }
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const objectTapResponse = await handleToolCall('tap', { platform: 'android', x: 1, y: 2 })
|
|
153
|
+
const objectTapPayload = JSON.parse((objectTapResponse as any).content[0].text)
|
|
154
|
+
assert.strictEqual(objectTapPayload.error.tool, 'tap')
|
|
155
|
+
assert.match(objectTapPayload.error.message, /"code": "E_CUSTOM"/)
|
|
156
|
+
assert.match(objectTapPayload.error.message, /"field": "value"/)
|
|
157
|
+
|
|
131
158
|
;(ToolsObserve as any).captureScreenshotHandler = async () => ({
|
|
132
159
|
device: { platform: 'ios', id: 'booted', osVersion: '18.0', model: 'Simulator', simulator: true },
|
|
133
160
|
screenshot: Buffer.from('png-data').toString('base64'),
|