mobile-debug-mcp 0.25.1 → 0.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/dist/interact/classify.js +48 -11
  2. package/dist/interact/index.js +113 -0
  3. package/dist/observe/android.js +10 -1
  4. package/dist/observe/index.js +19 -1
  5. package/dist/observe/ios.js +15 -1
  6. package/dist/observe/snapshot-metadata.js +88 -0
  7. package/dist/server/tool-definitions.js +49 -14
  8. package/dist/server/tool-handlers.js +12 -0
  9. package/dist/server-core.js +1 -1
  10. package/docs/CHANGELOG.md +9 -0
  11. package/docs/ROADMAP.md +66 -38
  12. package/docs/rfcs/003-wait-and-synchronization-reliability.md +296 -0
  13. package/docs/rfcs/004-action-verification-routing.md +342 -0
  14. package/docs/specs/mcp-tooling-spec-v1.md +11 -3
  15. package/docs/tools/interact.md +31 -8
  16. package/docs/tools/observe.md +4 -2
  17. package/package.json +1 -1
  18. package/skills/rfc-review/SKILL.md +52 -0
  19. package/skills/rfc-review/references/rfc-review-checklist.md +12 -0
  20. package/skills/rfc-review/references/rfc-review-template.md +28 -0
  21. package/src/interact/classify.ts +53 -13
  22. package/src/interact/index.ts +151 -0
  23. package/src/observe/android.ts +11 -1
  24. package/src/observe/index.ts +26 -1
  25. package/src/observe/ios.ts +28 -13
  26. package/src/observe/snapshot-metadata.ts +107 -0
  27. package/src/server/tool-definitions.ts +49 -14
  28. package/src/server/tool-handlers.ts +13 -0
  29. package/src/server-core.ts +1 -1
  30. package/src/types.ts +23 -0
  31. package/test/unit/interact/classify_action_outcome.test.ts +44 -25
  32. package/test/unit/interact/wait_for_ui_change.test.ts +76 -0
  33. package/test/unit/server/contract.test.ts +8 -6
  34. package/test/unit/server/response_shapes.test.ts +37 -3
  35. package/docs/rfcs/003-wait-and-synchronization-reliability +0 -232
@@ -240,7 +240,7 @@ Failure Handling:
240
240
  },
241
241
  {
242
242
  name: 'capture_debug_snapshot',
243
- description: 'Capture a complete debug snapshot (raw observation layer plus optional derived semantic layer). Returns structured JSON.',
243
+ description: 'Capture a complete debug snapshot (raw observation layer plus optional derived semantic layer). Returns structured JSON with snapshot_revision, captured_at_ms, and loading_state when detectable.',
244
244
  inputSchema: {
245
245
  type: 'object',
246
246
  properties: {
@@ -291,7 +291,7 @@ Failure Handling:
291
291
  },
292
292
  {
293
293
  name: 'get_ui_tree',
294
- description: 'Get the current UI hierarchy from an Android device or iOS simulator. Returns a structured JSON representation of the screen content.',
294
+ description: 'Get the current UI hierarchy from an Android device or iOS simulator. Returns a structured JSON representation of the screen content with snapshot metadata when available.',
295
295
  inputSchema: {
296
296
  type: 'object',
297
297
  properties: {
@@ -344,7 +344,7 @@ Capabilities:
344
344
  Constraints:
345
345
  - Does not verify correctness of the resulting state
346
346
  - Must not be used alone to confirm action success when an applicable expect_* tool exists
347
- - Use classify_action_outcome + get_network_activity when the expected outcome is backend/API activity without a visible UI change
347
+ - For backend/API activity without a visible UI change, pass the runtime action_type into classify_action_outcome and collect network evidence only if the result remains ambiguous
348
348
 
349
349
  Recommended Usage:
350
350
  1. Capture or define the expected outcome
@@ -363,6 +363,34 @@ Recommended Usage:
363
363
  required: ['previousFingerprint']
364
364
  }
365
365
  },
366
+ {
367
+ name: 'wait_for_ui_change',
368
+ description: `Purpose:
369
+ Wait for a non-navigation UI mutation or in-place update to become stable.
370
+
371
+ Inputs:
372
+ - expected_change (optional): hierarchy_diff, text_change, or state_change
373
+ - timeout_ms (optional)
374
+ - stability_window_ms (optional)
375
+
376
+ Guidance:
377
+ - Prefer wait_for_screen_change for navigation transitions.
378
+ - Prefer wait_for_ui_change for in-place mutations and non-navigation updates.
379
+ - Use the returned snapshot_revision as the observed synchronization point when available.
380
+
381
+ Failure Handling:
382
+ - TIMEOUT means the UI did not change in a stable way within the allotted time.`,
383
+ inputSchema: {
384
+ type: 'object',
385
+ properties: {
386
+ platform: { type: 'string', enum: ['android', 'ios'], description: 'Optional platform override (android|ios)' },
387
+ deviceId: { type: 'string', description: 'Optional device id/udid to target' },
388
+ expected_change: { type: 'string', enum: ['hierarchy_diff', 'text_change', 'state_change'], description: 'Optional type of UI change to wait for' },
389
+ timeout_ms: { type: 'number', description: 'Timeout in ms to wait for change (default 60000)', default: 60000 },
390
+ stability_window_ms: { type: 'number', description: 'How long the change must remain stable before success (default 250)', default: 250 }
391
+ }
392
+ }
393
+ },
366
394
  {
367
395
  name: 'expect_screen',
368
396
  description: `Purpose:
@@ -890,26 +918,29 @@ Failure Handling:
890
918
  name: 'classify_action_outcome',
891
919
  description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
892
920
 
893
- MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
894
- Use this with get_network_activity when the expected outcome is backend/API activity without a visible UI change.
895
- For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action instead of waiting for wait_for_screen_change.
921
+ Use the runtime action result's \`action_type\` as \`actionType\` so the classifier can distinguish local-state actions from side-effect actions.
922
+ Use this when the intended outcome is not already fully verified by the UI signal alone.
923
+ For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action if the outcome is still ambiguous.
896
924
 
897
925
  HOW TO GATHER INPUTS before calling:
898
926
  1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
899
927
  2. If you checked for a specific element with wait_for_ui, set expectedElementVisible.
900
- 3. Do NOT call get_network_activity yet omit networkRequests on the first call.
928
+ 3. Pass actionType from the action response when available.
929
+ 4. Only provide networkRequests if you already collected them or want to classify a side-effect action with backend evidence.
901
930
 
902
931
  RULES (applied in order — stop at first match):
903
932
  1. If uiChanged=true OR expectedElementVisible=true → outcome=success
904
- 2. Otherwise this tool returns nextAction="call_get_network_activity" — you MUST call get_network_activity once, then call classify_action_outcome again with the results in networkRequests.
933
+ 2. If actionType is missing → outcome=unknown
905
934
  3. If any request has status=failure or retryable → outcome=backend_failure
906
- 4. If no requests returned → outcome=no_op
907
- 5. If all requests succeeded → outcome=ui_failure
908
- 6. Otherwise → outcome=unknown
935
+ 4. If actionType maps to a local-state action → outcome=no_op; prefer state-based verification and avoid default network fallback
936
+ 5. If actionType maps to a side-effect action and no networkRequests were supplied → outcome=unknown
937
+ 6. If no requests returned → outcome=no_op
938
+ 7. If all requests succeeded → outcome=ui_failure
939
+ 8. Otherwise → outcome=unknown
909
940
 
910
941
  BEHAVIOUR after outcome:
911
942
  - success → continue
912
- - no_op → retry the action once or re-resolve the element
943
+ - no_op → retry with richer state verification or re-resolve the element
913
944
  - backend_failure → stop and report the failing endpoint
914
945
  - ui_failure → stop and report failure
915
946
  - unknown → take one recovery step (e.g. capture_debug_snapshot), then stop`,
@@ -924,9 +955,13 @@ BEHAVIOUR after outcome:
924
955
  type: 'boolean',
925
956
  description: 'true if the element you expected to appear is now visible (from wait_for_ui). Omit if you did not check for a specific element.'
926
957
  },
958
+ actionType: {
959
+ type: 'string',
960
+ description: 'The runtime action_type from the action response (for example tap, tap_element, swipe, type_text, press_back, start_app).'
961
+ },
927
962
  networkRequests: {
928
963
  type: 'array',
929
- description: 'Pass this only after calling get_network_activity as instructed by nextAction. Also use it when the expected outcome is backend/API activity without a visible UI change.',
964
+ description: 'Optional network evidence collected after the action. Use it when the expected outcome is backend/API activity or when the UI signal is ambiguous.',
930
965
  items: {
931
966
  type: 'object',
932
967
  properties: {
@@ -948,7 +983,7 @@ BEHAVIOUR after outcome:
948
983
  name: 'get_network_activity',
949
984
  description: `Returns structured network events captured from platform logs since the last action.
950
985
 
951
- Call this when classify_action_outcome returns nextAction="call_get_network_activity" or immediately after an action whose expected outcome is backend/API activity without a visible UI change.
986
+ Call this immediately after an action when you want backend evidence for a side-effect flow, only if the result is still ambiguous.
952
987
  Do not call more than once per action.
953
988
 
954
989
  Events are filtered to significant (non-background) requests only.
@@ -288,6 +288,16 @@ async function handleWaitForUI(args: ToolCallArgs) {
288
288
  return wrapResponse(res)
289
289
  }
290
290
 
291
+ async function handleWaitForUIChange(args: ToolCallArgs) {
292
+ const platform = getStringArg(args, 'platform') as PlatformArg | undefined
293
+ const deviceId = getStringArg(args, 'deviceId')
294
+ const timeout_ms = getNumberArg(args, 'timeout_ms') ?? 60000
295
+ const stability_window_ms = getNumberArg(args, 'stability_window_ms') ?? 250
296
+ const expected_change = getStringArg(args, 'expected_change') as 'hierarchy_diff' | 'text_change' | 'state_change' | undefined
297
+ const res = await ToolsInteract.waitForUIChangeHandler({ platform, deviceId, timeout_ms, stability_window_ms, expected_change })
298
+ return wrapResponse(res)
299
+ }
300
+
291
301
  async function handleFindElement(args: ToolCallArgs) {
292
302
  const query = requireStringArg(args, 'query')
293
303
  const exact = getBooleanArg(args, 'exact') ?? false
@@ -438,11 +448,13 @@ async function handleStopLogStream(args: ToolCallArgs) {
438
448
  function handleClassifyActionOutcome(args: ToolCallArgs) {
439
449
  const uiChanged = requireBooleanArg(args, 'uiChanged')
440
450
  const expectedElementVisible = getBooleanArg(args, 'expectedElementVisible')
451
+ const actionType = getStringArg(args, 'actionType')
441
452
  const networkRequests = getArrayArg<ClassifyNetworkRequestArg>(args, 'networkRequests')
442
453
  const hasLogErrors = getBooleanArg(args, 'hasLogErrors')
443
454
  const result = classifyActionOutcome({
444
455
  uiChanged,
445
456
  expectedElementVisible: expectedElementVisible ?? null,
457
+ actionType: actionType ?? null,
446
458
  networkRequests: networkRequests ?? null,
447
459
  hasLogErrors: hasLogErrors ?? null
448
460
  })
@@ -473,6 +485,7 @@ export const toolHandlers: Record<string, ToolHandler> = {
473
485
  get_current_screen: handleGetCurrentScreen,
474
486
  get_screen_fingerprint: handleGetScreenFingerprint,
475
487
  wait_for_screen_change: handleWaitForScreenChange,
488
+ wait_for_ui_change: handleWaitForUIChange,
476
489
  expect_screen: handleExpectScreen,
477
490
  expect_element_visible: handleExpectElementVisible,
478
491
  expect_state: handleExpectState,
@@ -13,7 +13,7 @@ export { wrapResponse, toolDefinitions, handleToolCall }
13
13
 
14
14
  export const serverInfo = {
15
15
  name: 'mobile-debug-mcp',
16
- version: '0.25.1'
16
+ version: '0.26.1'
17
17
  }
18
18
 
19
19
  export function createServer() {
package/src/types.ts CHANGED
@@ -109,6 +109,12 @@ export interface UIElementSemanticMetadata {
109
109
  is_container: boolean;
110
110
  }
111
111
 
112
+ export interface LoadingState {
113
+ active: boolean;
114
+ signal: string;
115
+ source: string;
116
+ }
117
+
112
118
  export interface CaptureAndroidScreenResponse {
113
119
  device: DeviceInfo;
114
120
  screenshot: string; // base64 encoded string
@@ -162,6 +168,9 @@ export interface GetUITreeResponse {
162
168
  height: number;
163
169
  };
164
170
  elements: UIElement[];
171
+ snapshot_revision: number;
172
+ captured_at_ms: number;
173
+ loading_state?: LoadingState | null;
165
174
  error?: string;
166
175
  }
167
176
 
@@ -183,12 +192,15 @@ export interface SnapshotSemanticResponse {
183
192
 
184
193
  export interface CaptureDebugSnapshotRawResponse {
185
194
  timestamp: number;
195
+ snapshot_revision: number;
196
+ captured_at_ms: number;
186
197
  reason: string;
187
198
  activity: string | null;
188
199
  fingerprint: string | null;
189
200
  screenshot: string | null;
190
201
  ui_tree: GetUITreeResponse | null;
191
202
  logs: StructuredLogEntry[];
203
+ loading_state?: LoadingState | null;
192
204
  device?: DeviceInfo;
193
205
  screenshot_error?: string;
194
206
  activity_error?: string;
@@ -326,6 +338,17 @@ export interface ExpectStateResponse {
326
338
  retryable?: boolean;
327
339
  }
328
340
 
341
+ export interface WaitForUIChangeResponse {
342
+ success: boolean;
343
+ observed_change: 'hierarchy_diff' | 'text_change' | 'state_change' | null;
344
+ snapshot_revision?: number;
345
+ timeout: boolean;
346
+ elapsed_ms: number;
347
+ expected_change?: 'hierarchy_diff' | 'text_change' | 'state_change';
348
+ reason?: string;
349
+ loading_state?: LoadingState | null;
350
+ }
351
+
329
352
  export interface SwipeResponse {
330
353
  device: DeviceInfo;
331
354
  success: boolean;
@@ -7,7 +7,6 @@ function run() {
7
7
  const result = classifyActionOutcome({ uiChanged: true })
8
8
  assert.strictEqual(result.outcome, 'success')
9
9
  assert.ok(result.reasoning.length > 0)
10
- assert.strictEqual(result.nextAction, undefined)
11
10
  }
12
11
 
13
12
  // Step 1 — expectedElementVisible → success
@@ -15,7 +14,6 @@ function run() {
15
14
  const result = classifyActionOutcome({ uiChanged: false, expectedElementVisible: true })
16
15
  assert.strictEqual(result.outcome, 'success')
17
16
  assert.strictEqual(result.reasoning, 'expected element is visible')
18
- assert.strictEqual(result.nextAction, undefined)
19
17
  }
20
18
 
21
19
  // Step 1 — both uiChanged and expectedElementVisible → success
@@ -24,24 +22,50 @@ function run() {
24
22
  assert.strictEqual(result.outcome, 'success')
25
23
  }
26
24
 
27
- // Step 2 UI did not change, networkRequests not yet provided nextAction required
25
+ // No actionType suppliedunknown
28
26
  {
29
27
  const result = classifyActionOutcome({ uiChanged: false })
30
28
  assert.strictEqual(result.outcome, 'unknown')
31
- assert.strictEqual(result.nextAction, 'call_get_network_activity')
29
+ assert.ok(result.reasoning.includes('actionType was not supplied'))
32
30
  }
33
31
 
34
- // Step 2 explicit null networkRequests nextAction required
32
+ // Local-state action routes to state verification rather than forced network probing
35
33
  {
36
- const result = classifyActionOutcome({ uiChanged: false, expectedElementVisible: null, networkRequests: null })
34
+ const result = classifyActionOutcome({ uiChanged: false, actionType: 'tap' })
35
+ assert.strictEqual(result.outcome, 'no_op')
36
+ assert.ok(result.reasoning.includes('local-state action'))
37
+ }
38
+
39
+ // Local-state action with network data still prefers local-state semantics
40
+ {
41
+ const result = classifyActionOutcome({
42
+ uiChanged: false,
43
+ actionType: 'type_text',
44
+ networkRequests: []
45
+ })
46
+ assert.strictEqual(result.outcome, 'no_op')
47
+ assert.ok(result.reasoning.includes('local-state action'))
48
+ }
49
+
50
+ // Explicit side-effect action without networkRequests supplied → unknown
51
+ {
52
+ const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app' })
37
53
  assert.strictEqual(result.outcome, 'unknown')
38
- assert.strictEqual(result.nextAction, 'call_get_network_activity')
54
+ assert.ok(result.reasoning.includes('side-effect action'))
55
+ }
56
+
57
+ // Side-effect action with empty networkRequests → no_op
58
+ {
59
+ const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app', networkRequests: [] })
60
+ assert.strictEqual(result.outcome, 'no_op')
61
+ assert.ok(result.reasoning.includes('side-effect action'))
39
62
  }
40
63
 
41
- // Step 3 — failure status → backend_failure
64
+ // Network failure → backend_failure
42
65
  {
43
66
  const result = classifyActionOutcome({
44
67
  uiChanged: false,
68
+ actionType: 'start_app',
45
69
  networkRequests: [{ endpoint: '/login', status: 'failure' }]
46
70
  })
47
71
  assert.strictEqual(result.outcome, 'backend_failure')
@@ -49,10 +73,11 @@ function run() {
49
73
  assert.ok(result.reasoning.includes('failure'))
50
74
  }
51
75
 
52
- // Step 3 — retryable status → backend_failure
76
+ // Retryable status → backend_failure
53
77
  {
54
78
  const result = classifyActionOutcome({
55
79
  uiChanged: false,
80
+ actionType: 'start_app',
56
81
  networkRequests: [
57
82
  { endpoint: '/api/submit', status: 'retryable' },
58
83
  { endpoint: '/api/other', status: 'success' }
@@ -62,25 +87,11 @@ function run() {
62
87
  assert.ok(result.reasoning.includes('/api/submit'))
63
88
  }
64
89
 
65
- // Step 4 empty network requestsno_op
66
- {
67
- const result = classifyActionOutcome({ uiChanged: false, networkRequests: [] })
68
- assert.strictEqual(result.outcome, 'no_op')
69
- assert.ok(result.reasoning.includes('no UI change'))
70
- assert.ok(result.reasoning.includes('no network activity'))
71
- }
72
-
73
- // Step 4 — empty network requests with log errors → no_op with note
74
- {
75
- const result = classifyActionOutcome({ uiChanged: false, networkRequests: [], hasLogErrors: true })
76
- assert.strictEqual(result.outcome, 'no_op')
77
- assert.ok(result.reasoning.includes('log errors'))
78
- }
79
-
80
- // Step 5 — all requests succeeded but UI unchanged → ui_failure
90
+ // All requests succeeded and UI stayed unchanged ui_failure
81
91
  {
82
92
  const result = classifyActionOutcome({
83
93
  uiChanged: false,
94
+ actionType: 'start_app',
84
95
  networkRequests: [
85
96
  { endpoint: '/api/save', status: 'success' },
86
97
  { endpoint: '/api/refresh', status: 'success' }
@@ -90,10 +101,18 @@ function run() {
90
101
  assert.ok(result.reasoning.includes('network requests succeeded'))
91
102
  }
92
103
 
104
+ // Empty network requests with log errors → no_op with note
105
+ {
106
+ const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app', networkRequests: [], hasLogErrors: true })
107
+ assert.strictEqual(result.outcome, 'no_op')
108
+ assert.ok(result.reasoning.includes('log errors'))
109
+ }
110
+
93
111
  // Step 1 takes priority over network signals — success even when failures present
94
112
  {
95
113
  const result = classifyActionOutcome({
96
114
  uiChanged: true,
115
+ actionType: 'start_app',
97
116
  networkRequests: [{ endpoint: '/api/log', status: 'failure' }]
98
117
  })
99
118
  assert.strictEqual(result.outcome, 'success')
@@ -0,0 +1,76 @@
1
+ import assert from 'assert'
2
+ import { ToolsInteract } from '../../../src/interact/index.js'
3
+ import { ToolsObserve } from '../../../src/observe/index.js'
4
+
5
+ async function run() {
6
+ const originalGetUITreeHandler = (ToolsObserve as any).getUITreeHandler
7
+
8
+ try {
9
+ let calls = 0
10
+ ;(ToolsObserve as any).getUITreeHandler = async () => {
11
+ calls++
12
+ if (calls === 1) {
13
+ return {
14
+ device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true },
15
+ screen: 'Loading',
16
+ resolution: { width: 1080, height: 2400 },
17
+ elements: [{ text: 'Loading', type: 'TextView', bounds: [0, 0, 100, 40], visible: true }],
18
+ snapshot_revision: 1,
19
+ captured_at_ms: 1000
20
+ }
21
+ }
22
+
23
+ return {
24
+ device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true },
25
+ screen: 'Loaded',
26
+ resolution: { width: 1080, height: 2400 },
27
+ elements: [{ text: 'Loaded', type: 'TextView', bounds: [0, 0, 100, 40], visible: true }],
28
+ snapshot_revision: 2,
29
+ captured_at_ms: 2000
30
+ }
31
+ }
32
+
33
+ const success = await ToolsInteract.waitForUIChangeHandler({
34
+ platform: 'android',
35
+ deviceId: 'mock',
36
+ expected_change: 'text_change',
37
+ timeout_ms: 1500,
38
+ stability_window_ms: 1
39
+ })
40
+
41
+ assert.strictEqual(success.success, true)
42
+ assert.strictEqual(success.observed_change, 'text_change')
43
+ assert.strictEqual(success.snapshot_revision, 2)
44
+ assert.strictEqual(success.timeout, false)
45
+
46
+ ;(ToolsObserve as any).getUITreeHandler = async () => ({
47
+ device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true },
48
+ screen: 'Static',
49
+ resolution: { width: 1080, height: 2400 },
50
+ elements: [{ text: 'Static', type: 'TextView', bounds: [0, 0, 100, 40], visible: true }],
51
+ snapshot_revision: 9,
52
+ captured_at_ms: 3000
53
+ })
54
+
55
+ const timeout = await ToolsInteract.waitForUIChangeHandler({
56
+ platform: 'android',
57
+ deviceId: 'mock',
58
+ expected_change: 'state_change',
59
+ timeout_ms: 700,
60
+ stability_window_ms: 1
61
+ })
62
+
63
+ assert.strictEqual(timeout.success, false)
64
+ assert.strictEqual(timeout.observed_change, null)
65
+ assert.strictEqual(timeout.timeout, true)
66
+
67
+ console.log('wait_for_ui_change tests passed')
68
+ } finally {
69
+ ;(ToolsObserve as any).getUITreeHandler = originalGetUITreeHandler
70
+ }
71
+ }
72
+
73
+ run().catch((error) => {
74
+ console.error(error)
75
+ process.exit(1)
76
+ })
@@ -68,15 +68,17 @@ async function run() {
68
68
 
69
69
  const classifyActionOutcome = toolDefinitions.find((tool) => tool.name === 'classify_action_outcome')
70
70
  assert(classifyActionOutcome, 'classify_action_outcome should be registered')
71
- assert.match((classifyActionOutcome as any).description, /backend\/API activity without a visible UI change/i)
72
- assert.match((classifyActionOutcome as any).description, /get_network_activity/i)
73
- assert.match((classifyActionOutcome as any).description, /immediately after the action/i)
71
+ assert.match((classifyActionOutcome as any).description, /action_type/i)
72
+ assert.match((classifyActionOutcome as any).description, /local-state/i)
73
+ assert.match((classifyActionOutcome as any).description, /side-effect/i)
74
+ assert.strictEqual((classifyActionOutcome as any).inputSchema.properties.actionType.type, 'string')
75
+ assert.match((classifyActionOutcome as any).inputSchema.properties.networkRequests.description, /optional network evidence/i)
74
76
 
75
77
  const getNetworkActivity = toolDefinitions.find((tool) => tool.name === 'get_network_activity')
76
78
  assert(getNetworkActivity, 'get_network_activity should be registered')
77
- assert.match((getNetworkActivity as any).description, /backend\/API activity without a visible UI change/i)
78
- assert.doesNotMatch((getNetworkActivity as any).description, /Call this only when/i)
79
- assert.match((getNetworkActivity as any).description, /immediately after an action/i)
79
+ assert.match((getNetworkActivity as any).description, /side-effect/i)
80
+ assert.doesNotMatch((getNetworkActivity as any).description, /nextAction/i)
81
+ assert.match((getNetworkActivity as any).description, /only if the result is still ambiguous/i)
80
82
 
81
83
  await assert.rejects(() => handleToolCall('unknown_tool'), /Unknown tool: unknown_tool/)
82
84
 
@@ -8,6 +8,7 @@ import { ToolsObserve } from '../../../src/observe/index.js'
8
8
  async function run() {
9
9
  const originalInstallAppHandler = (ToolsManage as any).installAppHandler
10
10
  const originalWaitForUIHandler = (ToolsInteract as any).waitForUIHandler
11
+ const originalWaitForUIChangeHandler = (ToolsInteract as any).waitForUIChangeHandler
11
12
  const originalTapElementHandler = (ToolsInteract as any).tapElementHandler
12
13
  const originalTapHandler = (ToolsInteract as any).tapHandler
13
14
  const originalExpectScreenHandler = (ToolsInteract as any).expectScreenHandler
@@ -145,12 +146,16 @@ async function run() {
145
146
  ;(ToolsObserve as any).getUITreeHandler = async () => ({
146
147
  device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true },
147
148
  resolution: { width: 1080, height: 2400 },
149
+ screen: 'Notifications',
148
150
  elements: [{
149
151
  text: 'Notifications',
150
152
  depth: 0,
151
153
  center: { x: 50, y: 20 },
152
154
  state: { checked: true, selected: 'Notifications' }
153
- }]
155
+ }],
156
+ snapshot_revision: 12,
157
+ captured_at_ms: 1710000000123,
158
+ loading_state: { active: true, signal: 'progress_indicator', source: 'ui_tree' }
154
159
  })
155
160
 
156
161
  ;(ToolsInteract as any).expectStateHandler = async () => ({
@@ -227,8 +232,12 @@ async function run() {
227
232
 
228
233
  ;(ToolsObserve as any).getUITreeHandler = async () => ({
229
234
  device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true },
235
+ screen: 'Login',
230
236
  resolution: { width: 1080, height: 2400 },
231
- elements: [{ text: 'Login', depth: 0, center: { x: 50, y: 20 } }]
237
+ elements: [{ text: 'Login', depth: 0, center: { x: 50, y: 20 } }],
238
+ snapshot_revision: 12,
239
+ captured_at_ms: 1710000000123,
240
+ loading_state: { active: true, signal: 'progress_indicator', source: 'ui_tree' }
232
241
  })
233
242
 
234
243
  const uiTreeResponse = await handleToolCall('get_ui_tree', { platform: 'android' })
@@ -236,16 +245,21 @@ async function run() {
236
245
  assert.strictEqual(uiTreePayload.elements.length, 1)
237
246
  assert.strictEqual(uiTreePayload.resolution.height, 2400)
238
247
  assert.strictEqual(uiTreePayload.elements[0].text, 'Login')
248
+ assert.strictEqual(uiTreePayload.snapshot_revision, 12)
249
+ assert.strictEqual(uiTreePayload.loading_state.signal, 'progress_indicator')
239
250
 
240
251
  ;(ToolsObserve as any).captureDebugSnapshotHandler = async () => ({
241
252
  raw: {
242
253
  timestamp: 1710000000000,
254
+ snapshot_revision: 12,
255
+ captured_at_ms: 1710000000123,
243
256
  reason: 'manual',
244
257
  activity: 'com.example.MainActivity',
245
258
  fingerprint: 'fp_raw',
246
259
  screenshot: 'base64',
247
- ui_tree: { screen: 'Home', elements: [] },
260
+ ui_tree: { screen: 'Home', elements: [], snapshot_revision: 12, captured_at_ms: 1710000000123, loading_state: { active: true, signal: 'spinner', source: 'snapshot' } },
248
261
  logs: [],
262
+ loading_state: { active: true, signal: 'spinner', source: 'snapshot' },
249
263
  device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true }
250
264
  },
251
265
  semantic: {
@@ -260,13 +274,33 @@ async function run() {
260
274
  const snapshotResponse = await handleToolCall('capture_debug_snapshot', { platform: 'android' })
261
275
  const snapshotPayload = JSON.parse((snapshotResponse as any).content[0].text)
262
276
  assert.strictEqual(snapshotPayload.raw.fingerprint, 'fp_raw')
277
+ assert.strictEqual(snapshotPayload.raw.snapshot_revision, 12)
278
+ assert.strictEqual(snapshotPayload.raw.loading_state.signal, 'spinner')
263
279
  assert.strictEqual(snapshotPayload.semantic.screen, 'Home')
264
280
  assert.strictEqual(snapshotPayload.semantic.confidence, 0.8)
265
281
 
282
+ ;(ToolsInteract as any).waitForUIChangeHandler = async () => ({
283
+ success: true,
284
+ observed_change: 'text_change',
285
+ snapshot_revision: 13,
286
+ timeout: false,
287
+ elapsed_ms: 1550,
288
+ expected_change: 'text_change',
289
+ loading_state: { active: false, signal: 'spinner', source: 'ui_tree' },
290
+ reason: 'UI change observed'
291
+ })
292
+
293
+ const waitForUIChangeResponse = await handleToolCall('wait_for_ui_change', { expected_change: 'text_change' })
294
+ const waitForUIChangePayload = JSON.parse((waitForUIChangeResponse as any).content[0].text)
295
+ assert.strictEqual(waitForUIChangePayload.success, true)
296
+ assert.strictEqual(waitForUIChangePayload.observed_change, 'text_change')
297
+ assert.strictEqual(waitForUIChangePayload.snapshot_revision, 13)
298
+
266
299
  console.log('server response-shape tests passed')
267
300
  } finally {
268
301
  ;(ToolsManage as any).installAppHandler = originalInstallAppHandler
269
302
  ;(ToolsInteract as any).waitForUIHandler = originalWaitForUIHandler
303
+ ;(ToolsInteract as any).waitForUIChangeHandler = originalWaitForUIChangeHandler
270
304
  ;(ToolsInteract as any).tapElementHandler = originalTapElementHandler
271
305
  ;(ToolsInteract as any).tapHandler = originalTapHandler
272
306
  ;(ToolsInteract as any).expectScreenHandler = originalExpectScreenHandler