mobile-debug-mcp 0.25.1 → 0.26.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/interact/classify.js +48 -11
- package/dist/interact/index.js +113 -0
- package/dist/observe/android.js +10 -1
- package/dist/observe/index.js +19 -1
- package/dist/observe/ios.js +15 -1
- package/dist/observe/snapshot-metadata.js +88 -0
- package/dist/server/tool-definitions.js +49 -14
- package/dist/server/tool-handlers.js +12 -0
- package/dist/server-core.js +1 -1
- package/docs/CHANGELOG.md +9 -0
- package/docs/ROADMAP.md +66 -38
- package/docs/rfcs/003-wait-and-synchronization-reliability.md +296 -0
- package/docs/rfcs/004-action-verification-routing.md +342 -0
- package/docs/specs/mcp-tooling-spec-v1.md +11 -3
- package/docs/tools/interact.md +31 -8
- package/docs/tools/observe.md +4 -2
- package/package.json +1 -1
- package/skills/rfc-review/SKILL.md +52 -0
- package/skills/rfc-review/references/rfc-review-checklist.md +12 -0
- package/skills/rfc-review/references/rfc-review-template.md +28 -0
- package/src/interact/classify.ts +53 -13
- package/src/interact/index.ts +151 -0
- package/src/observe/android.ts +11 -1
- package/src/observe/index.ts +26 -1
- package/src/observe/ios.ts +28 -13
- package/src/observe/snapshot-metadata.ts +107 -0
- package/src/server/tool-definitions.ts +49 -14
- package/src/server/tool-handlers.ts +13 -0
- package/src/server-core.ts +1 -1
- package/src/types.ts +23 -0
- package/test/unit/interact/classify_action_outcome.test.ts +44 -25
- package/test/unit/interact/wait_for_ui_change.test.ts +76 -0
- package/test/unit/server/contract.test.ts +8 -6
- package/test/unit/server/response_shapes.test.ts +37 -3
- package/docs/rfcs/003-wait-and-synchronization-reliability +0 -232
|
@@ -240,7 +240,7 @@ Failure Handling:
|
|
|
240
240
|
},
|
|
241
241
|
{
|
|
242
242
|
name: 'capture_debug_snapshot',
|
|
243
|
-
description: 'Capture a complete debug snapshot (raw observation layer plus optional derived semantic layer). Returns structured JSON.',
|
|
243
|
+
description: 'Capture a complete debug snapshot (raw observation layer plus optional derived semantic layer). Returns structured JSON with snapshot_revision, captured_at_ms, and loading_state when detectable.',
|
|
244
244
|
inputSchema: {
|
|
245
245
|
type: 'object',
|
|
246
246
|
properties: {
|
|
@@ -291,7 +291,7 @@ Failure Handling:
|
|
|
291
291
|
},
|
|
292
292
|
{
|
|
293
293
|
name: 'get_ui_tree',
|
|
294
|
-
description: 'Get the current UI hierarchy from an Android device or iOS simulator. Returns a structured JSON representation of the screen content.',
|
|
294
|
+
description: 'Get the current UI hierarchy from an Android device or iOS simulator. Returns a structured JSON representation of the screen content with snapshot metadata when available.',
|
|
295
295
|
inputSchema: {
|
|
296
296
|
type: 'object',
|
|
297
297
|
properties: {
|
|
@@ -344,7 +344,7 @@ Capabilities:
|
|
|
344
344
|
Constraints:
|
|
345
345
|
- Does not verify correctness of the resulting state
|
|
346
346
|
- Must not be used alone to confirm action success when an applicable expect_* tool exists
|
|
347
|
-
-
|
|
347
|
+
- For backend/API activity without a visible UI change, pass the runtime action_type into classify_action_outcome and collect network evidence only if the result remains ambiguous
|
|
348
348
|
|
|
349
349
|
Recommended Usage:
|
|
350
350
|
1. Capture or define the expected outcome
|
|
@@ -363,6 +363,34 @@ Recommended Usage:
|
|
|
363
363
|
required: ['previousFingerprint']
|
|
364
364
|
}
|
|
365
365
|
},
|
|
366
|
+
{
|
|
367
|
+
name: 'wait_for_ui_change',
|
|
368
|
+
description: `Purpose:
|
|
369
|
+
Wait for a non-navigation UI mutation or in-place update to become stable.
|
|
370
|
+
|
|
371
|
+
Inputs:
|
|
372
|
+
- expected_change (optional): hierarchy_diff, text_change, or state_change
|
|
373
|
+
- timeout_ms (optional)
|
|
374
|
+
- stability_window_ms (optional)
|
|
375
|
+
|
|
376
|
+
Guidance:
|
|
377
|
+
- Prefer wait_for_screen_change for navigation transitions.
|
|
378
|
+
- Prefer wait_for_ui_change for in-place mutations and non-navigation updates.
|
|
379
|
+
- Use the returned snapshot_revision as the observed synchronization point when available.
|
|
380
|
+
|
|
381
|
+
Failure Handling:
|
|
382
|
+
- TIMEOUT means the UI did not change in a stable way within the allotted time.`,
|
|
383
|
+
inputSchema: {
|
|
384
|
+
type: 'object',
|
|
385
|
+
properties: {
|
|
386
|
+
platform: { type: 'string', enum: ['android', 'ios'], description: 'Optional platform override (android|ios)' },
|
|
387
|
+
deviceId: { type: 'string', description: 'Optional device id/udid to target' },
|
|
388
|
+
expected_change: { type: 'string', enum: ['hierarchy_diff', 'text_change', 'state_change'], description: 'Optional type of UI change to wait for' },
|
|
389
|
+
timeout_ms: { type: 'number', description: 'Timeout in ms to wait for change (default 60000)', default: 60000 },
|
|
390
|
+
stability_window_ms: { type: 'number', description: 'How long the change must remain stable before success (default 250)', default: 250 }
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
},
|
|
366
394
|
{
|
|
367
395
|
name: 'expect_screen',
|
|
368
396
|
description: `Purpose:
|
|
@@ -890,26 +918,29 @@ Failure Handling:
|
|
|
890
918
|
name: 'classify_action_outcome',
|
|
891
919
|
description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
|
|
892
920
|
|
|
893
|
-
|
|
894
|
-
Use this
|
|
895
|
-
For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action
|
|
921
|
+
Use the runtime action result's \`action_type\` as \`actionType\` so the classifier can distinguish local-state actions from side-effect actions.
|
|
922
|
+
Use this when the intended outcome is not already fully verified by the UI signal alone.
|
|
923
|
+
For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action if the outcome is still ambiguous.
|
|
896
924
|
|
|
897
925
|
HOW TO GATHER INPUTS before calling:
|
|
898
926
|
1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
|
|
899
927
|
2. If you checked for a specific element with wait_for_ui, set expectedElementVisible.
|
|
900
|
-
3.
|
|
928
|
+
3. Pass actionType from the action response when available.
|
|
929
|
+
4. Only provide networkRequests if you already collected them or want to classify a side-effect action with backend evidence.
|
|
901
930
|
|
|
902
931
|
RULES (applied in order — stop at first match):
|
|
903
932
|
1. If uiChanged=true OR expectedElementVisible=true → outcome=success
|
|
904
|
-
2.
|
|
933
|
+
2. If actionType is missing → outcome=unknown
|
|
905
934
|
3. If any request has status=failure or retryable → outcome=backend_failure
|
|
906
|
-
4. If
|
|
907
|
-
5. If
|
|
908
|
-
6.
|
|
935
|
+
4. If actionType maps to a local-state action → outcome=no_op; prefer state-based verification and avoid default network fallback
|
|
936
|
+
5. If actionType maps to a side-effect action and no networkRequests were supplied → outcome=unknown
|
|
937
|
+
6. If no requests returned → outcome=no_op
|
|
938
|
+
7. If all requests succeeded → outcome=ui_failure
|
|
939
|
+
8. Otherwise → outcome=unknown
|
|
909
940
|
|
|
910
941
|
BEHAVIOUR after outcome:
|
|
911
942
|
- success → continue
|
|
912
|
-
- no_op → retry
|
|
943
|
+
- no_op → retry with richer state verification or re-resolve the element
|
|
913
944
|
- backend_failure → stop and report the failing endpoint
|
|
914
945
|
- ui_failure → stop and report failure
|
|
915
946
|
- unknown → take one recovery step (e.g. capture_debug_snapshot), then stop`,
|
|
@@ -924,9 +955,13 @@ BEHAVIOUR after outcome:
|
|
|
924
955
|
type: 'boolean',
|
|
925
956
|
description: 'true if the element you expected to appear is now visible (from wait_for_ui). Omit if you did not check for a specific element.'
|
|
926
957
|
},
|
|
958
|
+
actionType: {
|
|
959
|
+
type: 'string',
|
|
960
|
+
description: 'The runtime action_type from the action response (for example tap, tap_element, swipe, type_text, press_back, start_app).'
|
|
961
|
+
},
|
|
927
962
|
networkRequests: {
|
|
928
963
|
type: 'array',
|
|
929
|
-
description: '
|
|
964
|
+
description: 'Optional network evidence collected after the action. Use it when the expected outcome is backend/API activity or when the UI signal is ambiguous.',
|
|
930
965
|
items: {
|
|
931
966
|
type: 'object',
|
|
932
967
|
properties: {
|
|
@@ -948,7 +983,7 @@ BEHAVIOUR after outcome:
|
|
|
948
983
|
name: 'get_network_activity',
|
|
949
984
|
description: `Returns structured network events captured from platform logs since the last action.
|
|
950
985
|
|
|
951
|
-
Call this
|
|
986
|
+
Call this immediately after an action when you want backend evidence for a side-effect flow, only if the result is still ambiguous.
|
|
952
987
|
Do not call more than once per action.
|
|
953
988
|
|
|
954
989
|
Events are filtered to significant (non-background) requests only.
|
|
@@ -288,6 +288,16 @@ async function handleWaitForUI(args: ToolCallArgs) {
|
|
|
288
288
|
return wrapResponse(res)
|
|
289
289
|
}
|
|
290
290
|
|
|
291
|
+
async function handleWaitForUIChange(args: ToolCallArgs) {
|
|
292
|
+
const platform = getStringArg(args, 'platform') as PlatformArg | undefined
|
|
293
|
+
const deviceId = getStringArg(args, 'deviceId')
|
|
294
|
+
const timeout_ms = getNumberArg(args, 'timeout_ms') ?? 60000
|
|
295
|
+
const stability_window_ms = getNumberArg(args, 'stability_window_ms') ?? 250
|
|
296
|
+
const expected_change = getStringArg(args, 'expected_change') as 'hierarchy_diff' | 'text_change' | 'state_change' | undefined
|
|
297
|
+
const res = await ToolsInteract.waitForUIChangeHandler({ platform, deviceId, timeout_ms, stability_window_ms, expected_change })
|
|
298
|
+
return wrapResponse(res)
|
|
299
|
+
}
|
|
300
|
+
|
|
291
301
|
async function handleFindElement(args: ToolCallArgs) {
|
|
292
302
|
const query = requireStringArg(args, 'query')
|
|
293
303
|
const exact = getBooleanArg(args, 'exact') ?? false
|
|
@@ -438,11 +448,13 @@ async function handleStopLogStream(args: ToolCallArgs) {
|
|
|
438
448
|
function handleClassifyActionOutcome(args: ToolCallArgs) {
|
|
439
449
|
const uiChanged = requireBooleanArg(args, 'uiChanged')
|
|
440
450
|
const expectedElementVisible = getBooleanArg(args, 'expectedElementVisible')
|
|
451
|
+
const actionType = getStringArg(args, 'actionType')
|
|
441
452
|
const networkRequests = getArrayArg<ClassifyNetworkRequestArg>(args, 'networkRequests')
|
|
442
453
|
const hasLogErrors = getBooleanArg(args, 'hasLogErrors')
|
|
443
454
|
const result = classifyActionOutcome({
|
|
444
455
|
uiChanged,
|
|
445
456
|
expectedElementVisible: expectedElementVisible ?? null,
|
|
457
|
+
actionType: actionType ?? null,
|
|
446
458
|
networkRequests: networkRequests ?? null,
|
|
447
459
|
hasLogErrors: hasLogErrors ?? null
|
|
448
460
|
})
|
|
@@ -473,6 +485,7 @@ export const toolHandlers: Record<string, ToolHandler> = {
|
|
|
473
485
|
get_current_screen: handleGetCurrentScreen,
|
|
474
486
|
get_screen_fingerprint: handleGetScreenFingerprint,
|
|
475
487
|
wait_for_screen_change: handleWaitForScreenChange,
|
|
488
|
+
wait_for_ui_change: handleWaitForUIChange,
|
|
476
489
|
expect_screen: handleExpectScreen,
|
|
477
490
|
expect_element_visible: handleExpectElementVisible,
|
|
478
491
|
expect_state: handleExpectState,
|
package/src/server-core.ts
CHANGED
package/src/types.ts
CHANGED
|
@@ -109,6 +109,12 @@ export interface UIElementSemanticMetadata {
|
|
|
109
109
|
is_container: boolean;
|
|
110
110
|
}
|
|
111
111
|
|
|
112
|
+
export interface LoadingState {
|
|
113
|
+
active: boolean;
|
|
114
|
+
signal: string;
|
|
115
|
+
source: string;
|
|
116
|
+
}
|
|
117
|
+
|
|
112
118
|
export interface CaptureAndroidScreenResponse {
|
|
113
119
|
device: DeviceInfo;
|
|
114
120
|
screenshot: string; // base64 encoded string
|
|
@@ -162,6 +168,9 @@ export interface GetUITreeResponse {
|
|
|
162
168
|
height: number;
|
|
163
169
|
};
|
|
164
170
|
elements: UIElement[];
|
|
171
|
+
snapshot_revision: number;
|
|
172
|
+
captured_at_ms: number;
|
|
173
|
+
loading_state?: LoadingState | null;
|
|
165
174
|
error?: string;
|
|
166
175
|
}
|
|
167
176
|
|
|
@@ -183,12 +192,15 @@ export interface SnapshotSemanticResponse {
|
|
|
183
192
|
|
|
184
193
|
export interface CaptureDebugSnapshotRawResponse {
|
|
185
194
|
timestamp: number;
|
|
195
|
+
snapshot_revision: number;
|
|
196
|
+
captured_at_ms: number;
|
|
186
197
|
reason: string;
|
|
187
198
|
activity: string | null;
|
|
188
199
|
fingerprint: string | null;
|
|
189
200
|
screenshot: string | null;
|
|
190
201
|
ui_tree: GetUITreeResponse | null;
|
|
191
202
|
logs: StructuredLogEntry[];
|
|
203
|
+
loading_state?: LoadingState | null;
|
|
192
204
|
device?: DeviceInfo;
|
|
193
205
|
screenshot_error?: string;
|
|
194
206
|
activity_error?: string;
|
|
@@ -326,6 +338,17 @@ export interface ExpectStateResponse {
|
|
|
326
338
|
retryable?: boolean;
|
|
327
339
|
}
|
|
328
340
|
|
|
341
|
+
export interface WaitForUIChangeResponse {
|
|
342
|
+
success: boolean;
|
|
343
|
+
observed_change: 'hierarchy_diff' | 'text_change' | 'state_change' | null;
|
|
344
|
+
snapshot_revision?: number;
|
|
345
|
+
timeout: boolean;
|
|
346
|
+
elapsed_ms: number;
|
|
347
|
+
expected_change?: 'hierarchy_diff' | 'text_change' | 'state_change';
|
|
348
|
+
reason?: string;
|
|
349
|
+
loading_state?: LoadingState | null;
|
|
350
|
+
}
|
|
351
|
+
|
|
329
352
|
export interface SwipeResponse {
|
|
330
353
|
device: DeviceInfo;
|
|
331
354
|
success: boolean;
|
|
@@ -7,7 +7,6 @@ function run() {
|
|
|
7
7
|
const result = classifyActionOutcome({ uiChanged: true })
|
|
8
8
|
assert.strictEqual(result.outcome, 'success')
|
|
9
9
|
assert.ok(result.reasoning.length > 0)
|
|
10
|
-
assert.strictEqual(result.nextAction, undefined)
|
|
11
10
|
}
|
|
12
11
|
|
|
13
12
|
// Step 1 — expectedElementVisible → success
|
|
@@ -15,7 +14,6 @@ function run() {
|
|
|
15
14
|
const result = classifyActionOutcome({ uiChanged: false, expectedElementVisible: true })
|
|
16
15
|
assert.strictEqual(result.outcome, 'success')
|
|
17
16
|
assert.strictEqual(result.reasoning, 'expected element is visible')
|
|
18
|
-
assert.strictEqual(result.nextAction, undefined)
|
|
19
17
|
}
|
|
20
18
|
|
|
21
19
|
// Step 1 — both uiChanged and expectedElementVisible → success
|
|
@@ -24,24 +22,50 @@ function run() {
|
|
|
24
22
|
assert.strictEqual(result.outcome, 'success')
|
|
25
23
|
}
|
|
26
24
|
|
|
27
|
-
//
|
|
25
|
+
// No actionType supplied → unknown
|
|
28
26
|
{
|
|
29
27
|
const result = classifyActionOutcome({ uiChanged: false })
|
|
30
28
|
assert.strictEqual(result.outcome, 'unknown')
|
|
31
|
-
assert.
|
|
29
|
+
assert.ok(result.reasoning.includes('actionType was not supplied'))
|
|
32
30
|
}
|
|
33
31
|
|
|
34
|
-
//
|
|
32
|
+
// Local-state action routes to state verification rather than forced network probing
|
|
35
33
|
{
|
|
36
|
-
const result = classifyActionOutcome({ uiChanged: false,
|
|
34
|
+
const result = classifyActionOutcome({ uiChanged: false, actionType: 'tap' })
|
|
35
|
+
assert.strictEqual(result.outcome, 'no_op')
|
|
36
|
+
assert.ok(result.reasoning.includes('local-state action'))
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Local-state action with network data still prefers local-state semantics
|
|
40
|
+
{
|
|
41
|
+
const result = classifyActionOutcome({
|
|
42
|
+
uiChanged: false,
|
|
43
|
+
actionType: 'type_text',
|
|
44
|
+
networkRequests: []
|
|
45
|
+
})
|
|
46
|
+
assert.strictEqual(result.outcome, 'no_op')
|
|
47
|
+
assert.ok(result.reasoning.includes('local-state action'))
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Explicit side-effect action without networkRequests supplied → unknown
|
|
51
|
+
{
|
|
52
|
+
const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app' })
|
|
37
53
|
assert.strictEqual(result.outcome, 'unknown')
|
|
38
|
-
assert.
|
|
54
|
+
assert.ok(result.reasoning.includes('side-effect action'))
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Side-effect action with empty networkRequests → no_op
|
|
58
|
+
{
|
|
59
|
+
const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app', networkRequests: [] })
|
|
60
|
+
assert.strictEqual(result.outcome, 'no_op')
|
|
61
|
+
assert.ok(result.reasoning.includes('side-effect action'))
|
|
39
62
|
}
|
|
40
63
|
|
|
41
|
-
//
|
|
64
|
+
// Network failure → backend_failure
|
|
42
65
|
{
|
|
43
66
|
const result = classifyActionOutcome({
|
|
44
67
|
uiChanged: false,
|
|
68
|
+
actionType: 'start_app',
|
|
45
69
|
networkRequests: [{ endpoint: '/login', status: 'failure' }]
|
|
46
70
|
})
|
|
47
71
|
assert.strictEqual(result.outcome, 'backend_failure')
|
|
@@ -49,10 +73,11 @@ function run() {
|
|
|
49
73
|
assert.ok(result.reasoning.includes('failure'))
|
|
50
74
|
}
|
|
51
75
|
|
|
52
|
-
//
|
|
76
|
+
// Retryable status → backend_failure
|
|
53
77
|
{
|
|
54
78
|
const result = classifyActionOutcome({
|
|
55
79
|
uiChanged: false,
|
|
80
|
+
actionType: 'start_app',
|
|
56
81
|
networkRequests: [
|
|
57
82
|
{ endpoint: '/api/submit', status: 'retryable' },
|
|
58
83
|
{ endpoint: '/api/other', status: 'success' }
|
|
@@ -62,25 +87,11 @@ function run() {
|
|
|
62
87
|
assert.ok(result.reasoning.includes('/api/submit'))
|
|
63
88
|
}
|
|
64
89
|
|
|
65
|
-
//
|
|
66
|
-
{
|
|
67
|
-
const result = classifyActionOutcome({ uiChanged: false, networkRequests: [] })
|
|
68
|
-
assert.strictEqual(result.outcome, 'no_op')
|
|
69
|
-
assert.ok(result.reasoning.includes('no UI change'))
|
|
70
|
-
assert.ok(result.reasoning.includes('no network activity'))
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
// Step 4 — empty network requests with log errors → no_op with note
|
|
74
|
-
{
|
|
75
|
-
const result = classifyActionOutcome({ uiChanged: false, networkRequests: [], hasLogErrors: true })
|
|
76
|
-
assert.strictEqual(result.outcome, 'no_op')
|
|
77
|
-
assert.ok(result.reasoning.includes('log errors'))
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
// Step 5 — all requests succeeded but UI unchanged → ui_failure
|
|
90
|
+
// All requests succeeded and UI stayed unchanged → ui_failure
|
|
81
91
|
{
|
|
82
92
|
const result = classifyActionOutcome({
|
|
83
93
|
uiChanged: false,
|
|
94
|
+
actionType: 'start_app',
|
|
84
95
|
networkRequests: [
|
|
85
96
|
{ endpoint: '/api/save', status: 'success' },
|
|
86
97
|
{ endpoint: '/api/refresh', status: 'success' }
|
|
@@ -90,10 +101,18 @@ function run() {
|
|
|
90
101
|
assert.ok(result.reasoning.includes('network requests succeeded'))
|
|
91
102
|
}
|
|
92
103
|
|
|
104
|
+
// Empty network requests with log errors → no_op with note
|
|
105
|
+
{
|
|
106
|
+
const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app', networkRequests: [], hasLogErrors: true })
|
|
107
|
+
assert.strictEqual(result.outcome, 'no_op')
|
|
108
|
+
assert.ok(result.reasoning.includes('log errors'))
|
|
109
|
+
}
|
|
110
|
+
|
|
93
111
|
// Step 1 takes priority over network signals — success even when failures present
|
|
94
112
|
{
|
|
95
113
|
const result = classifyActionOutcome({
|
|
96
114
|
uiChanged: true,
|
|
115
|
+
actionType: 'start_app',
|
|
97
116
|
networkRequests: [{ endpoint: '/api/log', status: 'failure' }]
|
|
98
117
|
})
|
|
99
118
|
assert.strictEqual(result.outcome, 'success')
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import assert from 'assert'
|
|
2
|
+
import { ToolsInteract } from '../../../src/interact/index.js'
|
|
3
|
+
import { ToolsObserve } from '../../../src/observe/index.js'
|
|
4
|
+
|
|
5
|
+
async function run() {
|
|
6
|
+
const originalGetUITreeHandler = (ToolsObserve as any).getUITreeHandler
|
|
7
|
+
|
|
8
|
+
try {
|
|
9
|
+
let calls = 0
|
|
10
|
+
;(ToolsObserve as any).getUITreeHandler = async () => {
|
|
11
|
+
calls++
|
|
12
|
+
if (calls === 1) {
|
|
13
|
+
return {
|
|
14
|
+
device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true },
|
|
15
|
+
screen: 'Loading',
|
|
16
|
+
resolution: { width: 1080, height: 2400 },
|
|
17
|
+
elements: [{ text: 'Loading', type: 'TextView', bounds: [0, 0, 100, 40], visible: true }],
|
|
18
|
+
snapshot_revision: 1,
|
|
19
|
+
captured_at_ms: 1000
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
return {
|
|
24
|
+
device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true },
|
|
25
|
+
screen: 'Loaded',
|
|
26
|
+
resolution: { width: 1080, height: 2400 },
|
|
27
|
+
elements: [{ text: 'Loaded', type: 'TextView', bounds: [0, 0, 100, 40], visible: true }],
|
|
28
|
+
snapshot_revision: 2,
|
|
29
|
+
captured_at_ms: 2000
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const success = await ToolsInteract.waitForUIChangeHandler({
|
|
34
|
+
platform: 'android',
|
|
35
|
+
deviceId: 'mock',
|
|
36
|
+
expected_change: 'text_change',
|
|
37
|
+
timeout_ms: 1500,
|
|
38
|
+
stability_window_ms: 1
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
assert.strictEqual(success.success, true)
|
|
42
|
+
assert.strictEqual(success.observed_change, 'text_change')
|
|
43
|
+
assert.strictEqual(success.snapshot_revision, 2)
|
|
44
|
+
assert.strictEqual(success.timeout, false)
|
|
45
|
+
|
|
46
|
+
;(ToolsObserve as any).getUITreeHandler = async () => ({
|
|
47
|
+
device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true },
|
|
48
|
+
screen: 'Static',
|
|
49
|
+
resolution: { width: 1080, height: 2400 },
|
|
50
|
+
elements: [{ text: 'Static', type: 'TextView', bounds: [0, 0, 100, 40], visible: true }],
|
|
51
|
+
snapshot_revision: 9,
|
|
52
|
+
captured_at_ms: 3000
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
const timeout = await ToolsInteract.waitForUIChangeHandler({
|
|
56
|
+
platform: 'android',
|
|
57
|
+
deviceId: 'mock',
|
|
58
|
+
expected_change: 'state_change',
|
|
59
|
+
timeout_ms: 700,
|
|
60
|
+
stability_window_ms: 1
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
assert.strictEqual(timeout.success, false)
|
|
64
|
+
assert.strictEqual(timeout.observed_change, null)
|
|
65
|
+
assert.strictEqual(timeout.timeout, true)
|
|
66
|
+
|
|
67
|
+
console.log('wait_for_ui_change tests passed')
|
|
68
|
+
} finally {
|
|
69
|
+
;(ToolsObserve as any).getUITreeHandler = originalGetUITreeHandler
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
run().catch((error) => {
|
|
74
|
+
console.error(error)
|
|
75
|
+
process.exit(1)
|
|
76
|
+
})
|
|
@@ -68,15 +68,17 @@ async function run() {
|
|
|
68
68
|
|
|
69
69
|
const classifyActionOutcome = toolDefinitions.find((tool) => tool.name === 'classify_action_outcome')
|
|
70
70
|
assert(classifyActionOutcome, 'classify_action_outcome should be registered')
|
|
71
|
-
assert.match((classifyActionOutcome as any).description, /
|
|
72
|
-
assert.match((classifyActionOutcome as any).description, /
|
|
73
|
-
assert.match((classifyActionOutcome as any).description, /
|
|
71
|
+
assert.match((classifyActionOutcome as any).description, /action_type/i)
|
|
72
|
+
assert.match((classifyActionOutcome as any).description, /local-state/i)
|
|
73
|
+
assert.match((classifyActionOutcome as any).description, /side-effect/i)
|
|
74
|
+
assert.strictEqual((classifyActionOutcome as any).inputSchema.properties.actionType.type, 'string')
|
|
75
|
+
assert.match((classifyActionOutcome as any).inputSchema.properties.networkRequests.description, /optional network evidence/i)
|
|
74
76
|
|
|
75
77
|
const getNetworkActivity = toolDefinitions.find((tool) => tool.name === 'get_network_activity')
|
|
76
78
|
assert(getNetworkActivity, 'get_network_activity should be registered')
|
|
77
|
-
assert.match((getNetworkActivity as any).description, /
|
|
78
|
-
assert.doesNotMatch((getNetworkActivity as any).description, /
|
|
79
|
-
assert.match((getNetworkActivity as any).description, /
|
|
79
|
+
assert.match((getNetworkActivity as any).description, /side-effect/i)
|
|
80
|
+
assert.doesNotMatch((getNetworkActivity as any).description, /nextAction/i)
|
|
81
|
+
assert.match((getNetworkActivity as any).description, /only if the result is still ambiguous/i)
|
|
80
82
|
|
|
81
83
|
await assert.rejects(() => handleToolCall('unknown_tool'), /Unknown tool: unknown_tool/)
|
|
82
84
|
|
|
@@ -8,6 +8,7 @@ import { ToolsObserve } from '../../../src/observe/index.js'
|
|
|
8
8
|
async function run() {
|
|
9
9
|
const originalInstallAppHandler = (ToolsManage as any).installAppHandler
|
|
10
10
|
const originalWaitForUIHandler = (ToolsInteract as any).waitForUIHandler
|
|
11
|
+
const originalWaitForUIChangeHandler = (ToolsInteract as any).waitForUIChangeHandler
|
|
11
12
|
const originalTapElementHandler = (ToolsInteract as any).tapElementHandler
|
|
12
13
|
const originalTapHandler = (ToolsInteract as any).tapHandler
|
|
13
14
|
const originalExpectScreenHandler = (ToolsInteract as any).expectScreenHandler
|
|
@@ -145,12 +146,16 @@ async function run() {
|
|
|
145
146
|
;(ToolsObserve as any).getUITreeHandler = async () => ({
|
|
146
147
|
device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true },
|
|
147
148
|
resolution: { width: 1080, height: 2400 },
|
|
149
|
+
screen: 'Notifications',
|
|
148
150
|
elements: [{
|
|
149
151
|
text: 'Notifications',
|
|
150
152
|
depth: 0,
|
|
151
153
|
center: { x: 50, y: 20 },
|
|
152
154
|
state: { checked: true, selected: 'Notifications' }
|
|
153
|
-
}]
|
|
155
|
+
}],
|
|
156
|
+
snapshot_revision: 12,
|
|
157
|
+
captured_at_ms: 1710000000123,
|
|
158
|
+
loading_state: { active: true, signal: 'progress_indicator', source: 'ui_tree' }
|
|
154
159
|
})
|
|
155
160
|
|
|
156
161
|
;(ToolsInteract as any).expectStateHandler = async () => ({
|
|
@@ -227,8 +232,12 @@ async function run() {
|
|
|
227
232
|
|
|
228
233
|
;(ToolsObserve as any).getUITreeHandler = async () => ({
|
|
229
234
|
device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true },
|
|
235
|
+
screen: 'Login',
|
|
230
236
|
resolution: { width: 1080, height: 2400 },
|
|
231
|
-
elements: [{ text: 'Login', depth: 0, center: { x: 50, y: 20 } }]
|
|
237
|
+
elements: [{ text: 'Login', depth: 0, center: { x: 50, y: 20 } }],
|
|
238
|
+
snapshot_revision: 12,
|
|
239
|
+
captured_at_ms: 1710000000123,
|
|
240
|
+
loading_state: { active: true, signal: 'progress_indicator', source: 'ui_tree' }
|
|
232
241
|
})
|
|
233
242
|
|
|
234
243
|
const uiTreeResponse = await handleToolCall('get_ui_tree', { platform: 'android' })
|
|
@@ -236,16 +245,21 @@ async function run() {
|
|
|
236
245
|
assert.strictEqual(uiTreePayload.elements.length, 1)
|
|
237
246
|
assert.strictEqual(uiTreePayload.resolution.height, 2400)
|
|
238
247
|
assert.strictEqual(uiTreePayload.elements[0].text, 'Login')
|
|
248
|
+
assert.strictEqual(uiTreePayload.snapshot_revision, 12)
|
|
249
|
+
assert.strictEqual(uiTreePayload.loading_state.signal, 'progress_indicator')
|
|
239
250
|
|
|
240
251
|
;(ToolsObserve as any).captureDebugSnapshotHandler = async () => ({
|
|
241
252
|
raw: {
|
|
242
253
|
timestamp: 1710000000000,
|
|
254
|
+
snapshot_revision: 12,
|
|
255
|
+
captured_at_ms: 1710000000123,
|
|
243
256
|
reason: 'manual',
|
|
244
257
|
activity: 'com.example.MainActivity',
|
|
245
258
|
fingerprint: 'fp_raw',
|
|
246
259
|
screenshot: 'base64',
|
|
247
|
-
ui_tree: { screen: 'Home', elements: [] },
|
|
260
|
+
ui_tree: { screen: 'Home', elements: [], snapshot_revision: 12, captured_at_ms: 1710000000123, loading_state: { active: true, signal: 'spinner', source: 'snapshot' } },
|
|
248
261
|
logs: [],
|
|
262
|
+
loading_state: { active: true, signal: 'spinner', source: 'snapshot' },
|
|
249
263
|
device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true }
|
|
250
264
|
},
|
|
251
265
|
semantic: {
|
|
@@ -260,13 +274,33 @@ async function run() {
|
|
|
260
274
|
const snapshotResponse = await handleToolCall('capture_debug_snapshot', { platform: 'android' })
|
|
261
275
|
const snapshotPayload = JSON.parse((snapshotResponse as any).content[0].text)
|
|
262
276
|
assert.strictEqual(snapshotPayload.raw.fingerprint, 'fp_raw')
|
|
277
|
+
assert.strictEqual(snapshotPayload.raw.snapshot_revision, 12)
|
|
278
|
+
assert.strictEqual(snapshotPayload.raw.loading_state.signal, 'spinner')
|
|
263
279
|
assert.strictEqual(snapshotPayload.semantic.screen, 'Home')
|
|
264
280
|
assert.strictEqual(snapshotPayload.semantic.confidence, 0.8)
|
|
265
281
|
|
|
282
|
+
;(ToolsInteract as any).waitForUIChangeHandler = async () => ({
|
|
283
|
+
success: true,
|
|
284
|
+
observed_change: 'text_change',
|
|
285
|
+
snapshot_revision: 13,
|
|
286
|
+
timeout: false,
|
|
287
|
+
elapsed_ms: 1550,
|
|
288
|
+
expected_change: 'text_change',
|
|
289
|
+
loading_state: { active: false, signal: 'spinner', source: 'ui_tree' },
|
|
290
|
+
reason: 'UI change observed'
|
|
291
|
+
})
|
|
292
|
+
|
|
293
|
+
const waitForUIChangeResponse = await handleToolCall('wait_for_ui_change', { expected_change: 'text_change' })
|
|
294
|
+
const waitForUIChangePayload = JSON.parse((waitForUIChangeResponse as any).content[0].text)
|
|
295
|
+
assert.strictEqual(waitForUIChangePayload.success, true)
|
|
296
|
+
assert.strictEqual(waitForUIChangePayload.observed_change, 'text_change')
|
|
297
|
+
assert.strictEqual(waitForUIChangePayload.snapshot_revision, 13)
|
|
298
|
+
|
|
266
299
|
console.log('server response-shape tests passed')
|
|
267
300
|
} finally {
|
|
268
301
|
;(ToolsManage as any).installAppHandler = originalInstallAppHandler
|
|
269
302
|
;(ToolsInteract as any).waitForUIHandler = originalWaitForUIHandler
|
|
303
|
+
;(ToolsInteract as any).waitForUIChangeHandler = originalWaitForUIChangeHandler
|
|
270
304
|
;(ToolsInteract as any).tapElementHandler = originalTapElementHandler
|
|
271
305
|
;(ToolsInteract as any).tapHandler = originalTapHandler
|
|
272
306
|
;(ToolsInteract as any).expectScreenHandler = originalExpectScreenHandler
|