mobile-debug-mcp 0.26.0 → 0.26.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/interact/classify.js +48 -11
- package/dist/interact/index.js +26 -33
- package/dist/server/common.js +14 -1
- package/dist/server/tool-definitions.js +38 -15
- package/dist/server/tool-handlers.js +9 -0
- package/dist/server-core.js +1 -1
- package/docs/CHANGELOG.md +6 -0
- package/docs/ROADMAP.md +281 -88
- package/docs/rfcs/004-action-verification-routing.md +342 -0
- package/docs/rfcs/005-unified-action-execution-and-verification-model.md +216 -0
- package/docs/rfcs/006-runtime-action-instrumentation-and-binding-layer.md +230 -0
- package/docs/specs/mcp-tooling-spec-v1.md +7 -3
- package/docs/tools/interact.md +14 -8
- package/package.json +1 -1
- package/src/interact/classify.ts +53 -13
- package/src/interact/index.ts +27 -35
- package/src/server/common.ts +22 -1
- package/src/server/tool-definitions.ts +38 -15
- package/src/server/tool-handlers.ts +9 -0
- package/src/server-core.ts +1 -1
- package/src/types.ts +2 -0
- package/test/unit/interact/classify_action_outcome.test.ts +44 -25
- package/test/unit/server/contract.test.ts +8 -6
- package/test/unit/server/response_shapes.test.ts +8 -0
|
@@ -11,7 +11,9 @@ Inputs:
|
|
|
11
11
|
|
|
12
12
|
Output Structure:
|
|
13
13
|
- action_id, timestamp (ISO 8601), action_type
|
|
14
|
-
-
|
|
14
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
15
|
+
- source_module: runtime source of the action envelope
|
|
16
|
+
- target.selector = { appId }
|
|
15
17
|
- success = true when launch was dispatched successfully
|
|
16
18
|
- failure_code/retryable when launch dispatch fails
|
|
17
19
|
- ui_fingerprint_before/ui_fingerprint_after when available
|
|
@@ -84,7 +86,9 @@ Inputs:
|
|
|
84
86
|
|
|
85
87
|
Output Structure:
|
|
86
88
|
- action_id, timestamp (ISO 8601), action_type
|
|
87
|
-
-
|
|
89
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
90
|
+
- source_module: runtime source of the action envelope
|
|
91
|
+
- target.selector = { appId }
|
|
88
92
|
- success = true when the restart command completed
|
|
89
93
|
- failure_code/retryable when restart dispatch fails
|
|
90
94
|
- ui_fingerprint_before/ui_fingerprint_after when available
|
|
@@ -344,7 +348,7 @@ Capabilities:
|
|
|
344
348
|
Constraints:
|
|
345
349
|
- Does not verify correctness of the resulting state
|
|
346
350
|
- Must not be used alone to confirm action success when an applicable expect_* tool exists
|
|
347
|
-
-
|
|
351
|
+
- For backend/API activity without a visible UI change, pass the runtime action_type into classify_action_outcome and collect network evidence only if the result remains ambiguous
|
|
348
352
|
|
|
349
353
|
Recommended Usage:
|
|
350
354
|
1. Capture or define the expected outcome
|
|
@@ -617,7 +621,9 @@ Inputs:
|
|
|
617
621
|
|
|
618
622
|
Output Structure:
|
|
619
623
|
- action_id, timestamp (ISO 8601), action_type
|
|
620
|
-
-
|
|
624
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
625
|
+
- source_module: runtime source of the action envelope
|
|
626
|
+
- target.selector = { x, y }
|
|
621
627
|
- success = true when the tap was dispatched
|
|
622
628
|
- failure_code/retryable when dispatch fails
|
|
623
629
|
- ui_fingerprint_before/ui_fingerprint_after when available
|
|
@@ -673,6 +679,8 @@ Output Structure:
|
|
|
673
679
|
- action_id: unique timestamp-based action identifier
|
|
674
680
|
- timestamp: ISO 8601 timestamp for the action attempt
|
|
675
681
|
- action_type: "tap_element"
|
|
682
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
683
|
+
- source_module: runtime source of the action envelope
|
|
676
684
|
- target.selector: original target handle ({ elementId })
|
|
677
685
|
- target.resolved: minimal resolved element info used for the tap
|
|
678
686
|
- success: true when the tap was dispatched
|
|
@@ -725,6 +733,8 @@ Inputs:
|
|
|
725
733
|
|
|
726
734
|
Output Structure:
|
|
727
735
|
- action_id, timestamp (ISO 8601), action_type
|
|
736
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
737
|
+
- source_module: runtime source of the action envelope
|
|
728
738
|
- target.selector = { x1, y1, x2, y2, duration }
|
|
729
739
|
- success = true when the swipe was dispatched
|
|
730
740
|
- failure_code/retryable when dispatch fails
|
|
@@ -777,6 +787,8 @@ Inputs:
|
|
|
777
787
|
|
|
778
788
|
Output Structure:
|
|
779
789
|
- action_id, timestamp (ISO 8601), action_type
|
|
790
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
791
|
+
- source_module: runtime source of the action envelope
|
|
780
792
|
- target.selector = original selector
|
|
781
793
|
- target.resolved = minimal resolved element info when found
|
|
782
794
|
- success = true when scrolling produced a visible target element
|
|
@@ -831,6 +843,8 @@ Inputs:
|
|
|
831
843
|
|
|
832
844
|
Output Structure:
|
|
833
845
|
- action_id, timestamp (ISO 8601), action_type
|
|
846
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
847
|
+
- source_module: runtime source of the action envelope
|
|
834
848
|
- target.selector = { text }
|
|
835
849
|
- success = true when text input was dispatched
|
|
836
850
|
- failure_code/retryable when dispatch fails
|
|
@@ -880,6 +894,8 @@ Inputs:
|
|
|
880
894
|
|
|
881
895
|
Output Structure:
|
|
882
896
|
- action_id, timestamp (ISO 8601), action_type
|
|
897
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
898
|
+
- source_module: runtime source of the action envelope
|
|
883
899
|
- target.selector = { key: "back" }
|
|
884
900
|
- success = true when the back action was dispatched
|
|
885
901
|
- failure_code/retryable when dispatch fails
|
|
@@ -918,26 +934,29 @@ Failure Handling:
|
|
|
918
934
|
name: 'classify_action_outcome',
|
|
919
935
|
description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
|
|
920
936
|
|
|
921
|
-
|
|
922
|
-
Use this
|
|
923
|
-
For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action
|
|
937
|
+
Use the runtime action result's \`action_type\` as \`actionType\` so the classifier can distinguish local-state actions from side-effect actions.
|
|
938
|
+
Use this when the intended outcome is not already fully verified by the UI signal alone.
|
|
939
|
+
For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action if the outcome is still ambiguous.
|
|
924
940
|
|
|
925
941
|
HOW TO GATHER INPUTS before calling:
|
|
926
942
|
1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
|
|
927
943
|
2. If you checked for a specific element with wait_for_ui, set expectedElementVisible.
|
|
928
|
-
3.
|
|
944
|
+
3. Pass actionType from the action response when available.
|
|
945
|
+
4. Only provide networkRequests if you already collected them or want to classify a side-effect action with backend evidence.
|
|
929
946
|
|
|
930
947
|
RULES (applied in order — stop at first match):
|
|
931
948
|
1. If uiChanged=true OR expectedElementVisible=true → outcome=success
|
|
932
|
-
2.
|
|
949
|
+
2. If actionType is missing → outcome=unknown
|
|
933
950
|
3. If any request has status=failure or retryable → outcome=backend_failure
|
|
934
|
-
4. If
|
|
935
|
-
5. If
|
|
936
|
-
6.
|
|
951
|
+
4. If actionType maps to a local-state action → outcome=no_op; prefer state-based verification and avoid default network fallback
|
|
952
|
+
5. If actionType maps to a side-effect action and no networkRequests were supplied → outcome=unknown
|
|
953
|
+
6. If no requests returned → outcome=no_op
|
|
954
|
+
7. If all requests succeeded → outcome=ui_failure
|
|
955
|
+
8. Otherwise → outcome=unknown
|
|
937
956
|
|
|
938
957
|
BEHAVIOUR after outcome:
|
|
939
958
|
- success → continue
|
|
940
|
-
- no_op → retry
|
|
959
|
+
- no_op → retry with richer state verification or re-resolve the element
|
|
941
960
|
- backend_failure → stop and report the failing endpoint
|
|
942
961
|
- ui_failure → stop and report failure
|
|
943
962
|
- unknown → take one recovery step (e.g. capture_debug_snapshot), then stop`,
|
|
@@ -952,9 +971,13 @@ BEHAVIOUR after outcome:
|
|
|
952
971
|
type: 'boolean',
|
|
953
972
|
description: 'true if the element you expected to appear is now visible (from wait_for_ui). Omit if you did not check for a specific element.'
|
|
954
973
|
},
|
|
974
|
+
actionType: {
|
|
975
|
+
type: 'string',
|
|
976
|
+
description: 'The runtime action_type from the action response (for example tap, tap_element, swipe, type_text, press_back, start_app).'
|
|
977
|
+
},
|
|
955
978
|
networkRequests: {
|
|
956
979
|
type: 'array',
|
|
957
|
-
description: '
|
|
980
|
+
description: 'Optional network evidence collected after the action. Use it when the expected outcome is backend/API activity or when the UI signal is ambiguous.',
|
|
958
981
|
items: {
|
|
959
982
|
type: 'object',
|
|
960
983
|
properties: {
|
|
@@ -976,7 +999,7 @@ BEHAVIOUR after outcome:
|
|
|
976
999
|
name: 'get_network_activity',
|
|
977
1000
|
description: `Returns structured network events captured from platform logs since the last action.
|
|
978
1001
|
|
|
979
|
-
Call this
|
|
1002
|
+
Call this immediately after an action when you want backend evidence for a side-effect flow, only if the result is still ambiguous.
|
|
980
1003
|
Do not call more than once per action.
|
|
981
1004
|
|
|
982
1005
|
Events are filtered to significant (non-background) requests only.
|
|
@@ -47,6 +47,7 @@ async function handleStartApp(args: ToolCallArgs) {
|
|
|
47
47
|
const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId)
|
|
48
48
|
return wrapResponse(buildActionExecutionResult({
|
|
49
49
|
actionType: 'start_app',
|
|
50
|
+
sourceModule: 'server',
|
|
50
51
|
device: res.device,
|
|
51
52
|
selector: { appId },
|
|
52
53
|
success: !!res.appStarted,
|
|
@@ -82,6 +83,7 @@ async function handleRestartApp(args: ToolCallArgs) {
|
|
|
82
83
|
const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId)
|
|
83
84
|
return wrapResponse(buildActionExecutionResult({
|
|
84
85
|
actionType: 'restart_app',
|
|
86
|
+
sourceModule: 'server',
|
|
85
87
|
device: res.device,
|
|
86
88
|
selector: { appId },
|
|
87
89
|
success: !!res.appRestarted,
|
|
@@ -319,6 +321,7 @@ async function handleTap(args: ToolCallArgs) {
|
|
|
319
321
|
const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId)
|
|
320
322
|
return wrapResponse(buildActionExecutionResult({
|
|
321
323
|
actionType: 'tap',
|
|
324
|
+
sourceModule: 'server',
|
|
322
325
|
selector: { x, y },
|
|
323
326
|
success: !!res.success,
|
|
324
327
|
uiFingerprintBefore,
|
|
@@ -348,6 +351,7 @@ async function handleSwipe(args: ToolCallArgs) {
|
|
|
348
351
|
const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId)
|
|
349
352
|
return wrapResponse(buildActionExecutionResult({
|
|
350
353
|
actionType: 'swipe',
|
|
354
|
+
sourceModule: 'server',
|
|
351
355
|
selector: { x1, y1, x2, y2, duration },
|
|
352
356
|
success: !!res.success,
|
|
353
357
|
uiFingerprintBefore,
|
|
@@ -369,6 +373,7 @@ async function handleScrollToElement(args: ToolCallArgs) {
|
|
|
369
373
|
const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId)
|
|
370
374
|
return wrapResponse(buildActionExecutionResult({
|
|
371
375
|
actionType: 'scroll_to_element',
|
|
376
|
+
sourceModule: 'server',
|
|
372
377
|
selector: selector ?? null,
|
|
373
378
|
resolved: res?.success && res?.element ? {
|
|
374
379
|
elementId: null,
|
|
@@ -395,6 +400,7 @@ async function handleTypeText(args: ToolCallArgs) {
|
|
|
395
400
|
const uiFingerprintAfter = await captureActionFingerprint('android', deviceId)
|
|
396
401
|
return wrapResponse(buildActionExecutionResult({
|
|
397
402
|
actionType: 'type_text',
|
|
403
|
+
sourceModule: 'server',
|
|
398
404
|
selector: { text },
|
|
399
405
|
success: !!res.success,
|
|
400
406
|
uiFingerprintBefore,
|
|
@@ -411,6 +417,7 @@ async function handlePressBack(args: ToolCallArgs) {
|
|
|
411
417
|
const uiFingerprintAfter = await captureActionFingerprint('android', deviceId)
|
|
412
418
|
return wrapResponse(buildActionExecutionResult({
|
|
413
419
|
actionType: 'press_back',
|
|
420
|
+
sourceModule: 'server',
|
|
414
421
|
selector: { key: 'back' },
|
|
415
422
|
success: !!res.success,
|
|
416
423
|
uiFingerprintBefore,
|
|
@@ -448,11 +455,13 @@ async function handleStopLogStream(args: ToolCallArgs) {
|
|
|
448
455
|
function handleClassifyActionOutcome(args: ToolCallArgs) {
|
|
449
456
|
const uiChanged = requireBooleanArg(args, 'uiChanged')
|
|
450
457
|
const expectedElementVisible = getBooleanArg(args, 'expectedElementVisible')
|
|
458
|
+
const actionType = getStringArg(args, 'actionType')
|
|
451
459
|
const networkRequests = getArrayArg<ClassifyNetworkRequestArg>(args, 'networkRequests')
|
|
452
460
|
const hasLogErrors = getBooleanArg(args, 'hasLogErrors')
|
|
453
461
|
const result = classifyActionOutcome({
|
|
454
462
|
uiChanged,
|
|
455
463
|
expectedElementVisible: expectedElementVisible ?? null,
|
|
464
|
+
actionType: actionType ?? null,
|
|
456
465
|
networkRequests: networkRequests ?? null,
|
|
457
466
|
hasLogErrors: hasLogErrors ?? null
|
|
458
467
|
})
|
package/src/server-core.ts
CHANGED
package/src/types.ts
CHANGED
|
@@ -258,6 +258,8 @@ export interface ActionExecutionResult {
|
|
|
258
258
|
action_id: string;
|
|
259
259
|
timestamp: string;
|
|
260
260
|
action_type: string;
|
|
261
|
+
lifecycle_state?: 'pending_verification' | 'failed';
|
|
262
|
+
source_module?: 'server' | 'interact';
|
|
261
263
|
device?: DeviceInfo;
|
|
262
264
|
target: {
|
|
263
265
|
selector: Record<string, unknown> | null;
|
|
@@ -7,7 +7,6 @@ function run() {
|
|
|
7
7
|
const result = classifyActionOutcome({ uiChanged: true })
|
|
8
8
|
assert.strictEqual(result.outcome, 'success')
|
|
9
9
|
assert.ok(result.reasoning.length > 0)
|
|
10
|
-
assert.strictEqual(result.nextAction, undefined)
|
|
11
10
|
}
|
|
12
11
|
|
|
13
12
|
// Step 1 — expectedElementVisible → success
|
|
@@ -15,7 +14,6 @@ function run() {
|
|
|
15
14
|
const result = classifyActionOutcome({ uiChanged: false, expectedElementVisible: true })
|
|
16
15
|
assert.strictEqual(result.outcome, 'success')
|
|
17
16
|
assert.strictEqual(result.reasoning, 'expected element is visible')
|
|
18
|
-
assert.strictEqual(result.nextAction, undefined)
|
|
19
17
|
}
|
|
20
18
|
|
|
21
19
|
// Step 1 — both uiChanged and expectedElementVisible → success
|
|
@@ -24,24 +22,50 @@ function run() {
|
|
|
24
22
|
assert.strictEqual(result.outcome, 'success')
|
|
25
23
|
}
|
|
26
24
|
|
|
27
|
-
//
|
|
25
|
+
// No actionType supplied → unknown
|
|
28
26
|
{
|
|
29
27
|
const result = classifyActionOutcome({ uiChanged: false })
|
|
30
28
|
assert.strictEqual(result.outcome, 'unknown')
|
|
31
|
-
assert.
|
|
29
|
+
assert.ok(result.reasoning.includes('actionType was not supplied'))
|
|
32
30
|
}
|
|
33
31
|
|
|
34
|
-
//
|
|
32
|
+
// Local-state action routes to state verification rather than forced network probing
|
|
35
33
|
{
|
|
36
|
-
const result = classifyActionOutcome({ uiChanged: false,
|
|
34
|
+
const result = classifyActionOutcome({ uiChanged: false, actionType: 'tap' })
|
|
35
|
+
assert.strictEqual(result.outcome, 'no_op')
|
|
36
|
+
assert.ok(result.reasoning.includes('local-state action'))
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Local-state action with network data still prefers local-state semantics
|
|
40
|
+
{
|
|
41
|
+
const result = classifyActionOutcome({
|
|
42
|
+
uiChanged: false,
|
|
43
|
+
actionType: 'type_text',
|
|
44
|
+
networkRequests: []
|
|
45
|
+
})
|
|
46
|
+
assert.strictEqual(result.outcome, 'no_op')
|
|
47
|
+
assert.ok(result.reasoning.includes('local-state action'))
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Explicit side-effect action without networkRequests supplied → unknown
|
|
51
|
+
{
|
|
52
|
+
const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app' })
|
|
37
53
|
assert.strictEqual(result.outcome, 'unknown')
|
|
38
|
-
assert.
|
|
54
|
+
assert.ok(result.reasoning.includes('side-effect action'))
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Side-effect action with empty networkRequests → no_op
|
|
58
|
+
{
|
|
59
|
+
const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app', networkRequests: [] })
|
|
60
|
+
assert.strictEqual(result.outcome, 'no_op')
|
|
61
|
+
assert.ok(result.reasoning.includes('side-effect action'))
|
|
39
62
|
}
|
|
40
63
|
|
|
41
|
-
//
|
|
64
|
+
// Network failure → backend_failure
|
|
42
65
|
{
|
|
43
66
|
const result = classifyActionOutcome({
|
|
44
67
|
uiChanged: false,
|
|
68
|
+
actionType: 'start_app',
|
|
45
69
|
networkRequests: [{ endpoint: '/login', status: 'failure' }]
|
|
46
70
|
})
|
|
47
71
|
assert.strictEqual(result.outcome, 'backend_failure')
|
|
@@ -49,10 +73,11 @@ function run() {
|
|
|
49
73
|
assert.ok(result.reasoning.includes('failure'))
|
|
50
74
|
}
|
|
51
75
|
|
|
52
|
-
//
|
|
76
|
+
// Retryable status → backend_failure
|
|
53
77
|
{
|
|
54
78
|
const result = classifyActionOutcome({
|
|
55
79
|
uiChanged: false,
|
|
80
|
+
actionType: 'start_app',
|
|
56
81
|
networkRequests: [
|
|
57
82
|
{ endpoint: '/api/submit', status: 'retryable' },
|
|
58
83
|
{ endpoint: '/api/other', status: 'success' }
|
|
@@ -62,25 +87,11 @@ function run() {
|
|
|
62
87
|
assert.ok(result.reasoning.includes('/api/submit'))
|
|
63
88
|
}
|
|
64
89
|
|
|
65
|
-
//
|
|
66
|
-
{
|
|
67
|
-
const result = classifyActionOutcome({ uiChanged: false, networkRequests: [] })
|
|
68
|
-
assert.strictEqual(result.outcome, 'no_op')
|
|
69
|
-
assert.ok(result.reasoning.includes('no UI change'))
|
|
70
|
-
assert.ok(result.reasoning.includes('no network activity'))
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
// Step 4 — empty network requests with log errors → no_op with note
|
|
74
|
-
{
|
|
75
|
-
const result = classifyActionOutcome({ uiChanged: false, networkRequests: [], hasLogErrors: true })
|
|
76
|
-
assert.strictEqual(result.outcome, 'no_op')
|
|
77
|
-
assert.ok(result.reasoning.includes('log errors'))
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
// Step 5 — all requests succeeded but UI unchanged → ui_failure
|
|
90
|
+
// All requests succeeded and UI stayed unchanged → ui_failure
|
|
81
91
|
{
|
|
82
92
|
const result = classifyActionOutcome({
|
|
83
93
|
uiChanged: false,
|
|
94
|
+
actionType: 'start_app',
|
|
84
95
|
networkRequests: [
|
|
85
96
|
{ endpoint: '/api/save', status: 'success' },
|
|
86
97
|
{ endpoint: '/api/refresh', status: 'success' }
|
|
@@ -90,10 +101,18 @@ function run() {
|
|
|
90
101
|
assert.ok(result.reasoning.includes('network requests succeeded'))
|
|
91
102
|
}
|
|
92
103
|
|
|
104
|
+
// Empty network requests with log errors → no_op with note
|
|
105
|
+
{
|
|
106
|
+
const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app', networkRequests: [], hasLogErrors: true })
|
|
107
|
+
assert.strictEqual(result.outcome, 'no_op')
|
|
108
|
+
assert.ok(result.reasoning.includes('log errors'))
|
|
109
|
+
}
|
|
110
|
+
|
|
93
111
|
// Step 1 takes priority over network signals — success even when failures present
|
|
94
112
|
{
|
|
95
113
|
const result = classifyActionOutcome({
|
|
96
114
|
uiChanged: true,
|
|
115
|
+
actionType: 'start_app',
|
|
97
116
|
networkRequests: [{ endpoint: '/api/log', status: 'failure' }]
|
|
98
117
|
})
|
|
99
118
|
assert.strictEqual(result.outcome, 'success')
|
|
@@ -68,15 +68,17 @@ async function run() {
|
|
|
68
68
|
|
|
69
69
|
const classifyActionOutcome = toolDefinitions.find((tool) => tool.name === 'classify_action_outcome')
|
|
70
70
|
assert(classifyActionOutcome, 'classify_action_outcome should be registered')
|
|
71
|
-
assert.match((classifyActionOutcome as any).description, /
|
|
72
|
-
assert.match((classifyActionOutcome as any).description, /
|
|
73
|
-
assert.match((classifyActionOutcome as any).description, /
|
|
71
|
+
assert.match((classifyActionOutcome as any).description, /action_type/i)
|
|
72
|
+
assert.match((classifyActionOutcome as any).description, /local-state/i)
|
|
73
|
+
assert.match((classifyActionOutcome as any).description, /side-effect/i)
|
|
74
|
+
assert.strictEqual((classifyActionOutcome as any).inputSchema.properties.actionType.type, 'string')
|
|
75
|
+
assert.match((classifyActionOutcome as any).inputSchema.properties.networkRequests.description, /optional network evidence/i)
|
|
74
76
|
|
|
75
77
|
const getNetworkActivity = toolDefinitions.find((tool) => tool.name === 'get_network_activity')
|
|
76
78
|
assert(getNetworkActivity, 'get_network_activity should be registered')
|
|
77
|
-
assert.match((getNetworkActivity as any).description, /
|
|
78
|
-
assert.doesNotMatch((getNetworkActivity as any).description, /
|
|
79
|
-
assert.match((getNetworkActivity as any).description, /
|
|
79
|
+
assert.match((getNetworkActivity as any).description, /side-effect/i)
|
|
80
|
+
assert.doesNotMatch((getNetworkActivity as any).description, /nextAction/i)
|
|
81
|
+
assert.match((getNetworkActivity as any).description, /only if the result is still ambiguous/i)
|
|
80
82
|
|
|
81
83
|
await assert.rejects(() => handleToolCall('unknown_tool'), /Unknown tool: unknown_tool/)
|
|
82
84
|
|
|
@@ -61,6 +61,8 @@ async function run() {
|
|
|
61
61
|
action_id: 'tap_element_1',
|
|
62
62
|
timestamp: '2026-04-23T08:00:00.000Z',
|
|
63
63
|
action_type: 'tap_element',
|
|
64
|
+
lifecycle_state: 'pending_verification',
|
|
65
|
+
source_module: 'interact',
|
|
64
66
|
target: {
|
|
65
67
|
selector: { elementId: 'el_ready' },
|
|
66
68
|
resolved: { elementId: 'el_ready', text: 'Ready', resource_id: null, accessibility_id: null, class: 'Button', bounds: [0, 0, 10, 10], index: 0 }
|
|
@@ -74,6 +76,8 @@ async function run() {
|
|
|
74
76
|
const tapElementPayload = JSON.parse((tapElementResponse as any).content[0].text)
|
|
75
77
|
assert.strictEqual(tapElementPayload.success, true)
|
|
76
78
|
assert.strictEqual(tapElementPayload.action_type, 'tap_element')
|
|
79
|
+
assert.strictEqual(tapElementPayload.lifecycle_state, 'pending_verification')
|
|
80
|
+
assert.strictEqual(tapElementPayload.source_module, 'interact')
|
|
77
81
|
assert.match(tapElementPayload.timestamp, /^\d{4}-\d{2}-\d{2}T/)
|
|
78
82
|
assert.strictEqual(tapElementPayload.target.resolved.elementId, 'el_ready')
|
|
79
83
|
assert.strictEqual(tapElementPayload.ui_fingerprint_before, 'fp_before')
|
|
@@ -84,6 +88,8 @@ async function run() {
|
|
|
84
88
|
const tapPayload = JSON.parse((tapResponse as any).content[0].text)
|
|
85
89
|
assert.strictEqual(tapPayload.success, true)
|
|
86
90
|
assert.strictEqual(tapPayload.action_type, 'tap')
|
|
91
|
+
assert.strictEqual(tapPayload.lifecycle_state, 'pending_verification')
|
|
92
|
+
assert.strictEqual(tapPayload.source_module, 'server')
|
|
87
93
|
assert.match(tapPayload.timestamp, /^\d{4}-\d{2}-\d{2}T/)
|
|
88
94
|
assert.deepStrictEqual(tapPayload.target.selector, { x: 1, y: 2 })
|
|
89
95
|
assert.strictEqual(tapPayload.ui_fingerprint_before, 'fp_mock')
|
|
@@ -107,6 +113,8 @@ async function run() {
|
|
|
107
113
|
const startAppPayload = JSON.parse((startAppResponse as any).content[0].text)
|
|
108
114
|
assert.strictEqual(startAppPayload.success, true)
|
|
109
115
|
assert.strictEqual(startAppPayload.action_type, 'start_app')
|
|
116
|
+
assert.strictEqual(startAppPayload.lifecycle_state, 'pending_verification')
|
|
117
|
+
assert.strictEqual(startAppPayload.source_module, 'server')
|
|
110
118
|
assert.match(startAppPayload.timestamp, /^\d{4}-\d{2}-\d{2}T/)
|
|
111
119
|
assert.strictEqual(startAppPayload.device.id, 'emulator-5554')
|
|
112
120
|
assert.deepStrictEqual(startAppPayload.target.selector, { appId: 'com.example.app' })
|