mobile-debug-mcp 0.25.1 → 0.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/dist/interact/classify.js +48 -11
  2. package/dist/interact/index.js +113 -0
  3. package/dist/observe/android.js +10 -1
  4. package/dist/observe/index.js +19 -1
  5. package/dist/observe/ios.js +15 -1
  6. package/dist/observe/snapshot-metadata.js +88 -0
  7. package/dist/server/tool-definitions.js +49 -14
  8. package/dist/server/tool-handlers.js +12 -0
  9. package/dist/server-core.js +1 -1
  10. package/docs/CHANGELOG.md +9 -0
  11. package/docs/ROADMAP.md +66 -38
  12. package/docs/rfcs/003-wait-and-synchronization-reliability.md +296 -0
  13. package/docs/rfcs/004-action-verification-routing.md +342 -0
  14. package/docs/specs/mcp-tooling-spec-v1.md +11 -3
  15. package/docs/tools/interact.md +31 -8
  16. package/docs/tools/observe.md +4 -2
  17. package/package.json +1 -1
  18. package/skills/rfc-review/SKILL.md +52 -0
  19. package/skills/rfc-review/references/rfc-review-checklist.md +12 -0
  20. package/skills/rfc-review/references/rfc-review-template.md +28 -0
  21. package/src/interact/classify.ts +53 -13
  22. package/src/interact/index.ts +151 -0
  23. package/src/observe/android.ts +11 -1
  24. package/src/observe/index.ts +26 -1
  25. package/src/observe/ios.ts +28 -13
  26. package/src/observe/snapshot-metadata.ts +107 -0
  27. package/src/server/tool-definitions.ts +49 -14
  28. package/src/server/tool-handlers.ts +13 -0
  29. package/src/server-core.ts +1 -1
  30. package/src/types.ts +23 -0
  31. package/test/unit/interact/classify_action_outcome.test.ts +44 -25
  32. package/test/unit/interact/wait_for_ui_change.test.ts +76 -0
  33. package/test/unit/server/contract.test.ts +8 -6
  34. package/test/unit/server/response_shapes.test.ts +37 -3
  35. package/docs/rfcs/003-wait-and-synchronization-reliability +0 -232
@@ -1,35 +1,72 @@
1
+ const ACTION_CATEGORY_BY_TYPE = {
2
+ tap: 'local_state',
3
+ tap_element: 'local_state',
4
+ swipe: 'local_state',
5
+ scroll_to_element: 'local_state',
6
+ type_text: 'local_state',
7
+ press_back: 'local_state',
8
+ start_app: 'side_effect',
9
+ restart_app: 'side_effect',
10
+ terminate_app: 'side_effect',
11
+ reset_app_data: 'side_effect',
12
+ install_app: 'side_effect',
13
+ build_app: 'side_effect',
14
+ build_and_install: 'side_effect'
15
+ };
16
+ function inferActionCategory(actionType) {
17
+ if (typeof actionType !== 'string')
18
+ return null;
19
+ const normalized = actionType.trim().toLowerCase();
20
+ if (!normalized)
21
+ return null;
22
+ return ACTION_CATEGORY_BY_TYPE[normalized] ?? 'side_effect';
23
+ }
1
24
  /**
2
25
  * Pure deterministic classifier. Applies rules in fixed order.
3
26
  * Same inputs always produce the same output.
4
27
  */
5
28
  export function classifyActionOutcome(input) {
6
- const { uiChanged, expectedElementVisible, networkRequests, hasLogErrors } = input;
29
+ const { uiChanged, expectedElementVisible, actionType, networkRequests, hasLogErrors } = input;
30
+ const actionCategory = inferActionCategory(actionType);
7
31
  // Step 1 — UI signal is positive
8
32
  if (uiChanged || expectedElementVisible === true) {
9
33
  return { outcome: 'success', reasoning: expectedElementVisible === true ? 'expected element is visible' : 'UI changed after action' };
10
34
  }
11
- // Step 2 — UI did not change; network signal is required
12
- if (networkRequests === null || networkRequests === undefined) {
35
+ // Step 2 — no action type means we cannot choose a safe routing path
36
+ if (actionCategory === null) {
13
37
  return {
14
38
  outcome: 'unknown',
15
- reasoning: 'UI did not change; get_network_activity must be called before classification can proceed',
16
- nextAction: 'call_get_network_activity'
39
+ reasoning: 'actionType was not supplied; pass the runtime action_type so the classifier can distinguish local-state and side-effect routing'
17
40
  };
18
41
  }
19
- // Step 3 any network failure
20
- const failedRequest = networkRequests.find((r) => r.status === 'failure' || r.status === 'retryable');
42
+ const failedRequest = networkRequests?.find((r) => r.status === 'failure' || r.status === 'retryable');
21
43
  if (failedRequest) {
22
44
  return { outcome: 'backend_failure', reasoning: `network request ${failedRequest.endpoint} returned ${failedRequest.status}` };
23
45
  }
24
- // Step 4no network requests at all
46
+ // Step 3local-state actions should be verified with state-specific signals first
47
+ if (actionCategory === 'local_state') {
48
+ const logNote = hasLogErrors ? ' (log errors present)' : '';
49
+ return {
50
+ outcome: 'no_op',
51
+ reasoning: `local-state action${logNote}; use expect_state, refreshed snapshot comparison, or expect_element_visible instead of defaulting to network inspection`
52
+ };
53
+ }
54
+ // Step 4 — side-effect actions may legitimately need network or log inspection
55
+ if (networkRequests === null || networkRequests === undefined) {
56
+ return {
57
+ outcome: 'unknown',
58
+ reasoning: 'side-effect action without network data; inspect network or log signals only if the outcome is still ambiguous'
59
+ };
60
+ }
61
+ // Step 5 — no network requests at all
25
62
  if (networkRequests.length === 0) {
26
63
  const logNote = hasLogErrors ? ' (log errors present)' : '';
27
- return { outcome: 'no_op', reasoning: `no UI change and no network activity${logNote}` };
64
+ return { outcome: 'no_op', reasoning: `side-effect action and no network activity${logNote}` };
28
65
  }
29
- // Step 5 — network requests exist and all succeeded
66
+ // Step 6 — network requests exist and all succeeded
30
67
  if (networkRequests.every((r) => r.status === 'success')) {
31
68
  return { outcome: 'ui_failure', reasoning: 'network requests succeeded but UI did not change' };
32
69
  }
33
- // Step 6 — fallback
70
+ // Step 7 — fallback
34
71
  return { outcome: 'unknown', reasoning: 'signals are inconclusive' };
35
72
  }
@@ -4,9 +4,11 @@ import { iOSInteract } from './ios.js';
4
4
  export { AndroidInteract, iOSInteract };
5
5
  import { resolveTargetDevice } from '../utils/resolve-device.js';
6
6
  import { ToolsObserve } from '../observe/index.js';
7
+ import { computeSnapshotSignature } from '../observe/snapshot-metadata.js';
7
8
  import { nextActionId } from '../server/common.js';
8
9
  export class ToolsInteract {
9
10
  static _maxResolvedUiElements = 256;
11
+ static _uiChangeKinds = ['hierarchy_diff', 'text_change', 'state_change'];
10
12
  static _sliderSearchLookahead = 8;
11
13
  static _sliderNegativeGapTolerancePx = 32;
12
14
  static _sliderPositiveGapLimitPx = 640;
@@ -34,6 +36,9 @@ export class ToolsInteract {
34
36
  return null;
35
37
  return normalized;
36
38
  }
39
+ static _hash(value) {
40
+ return createHash('sha256').update(JSON.stringify(value)).digest('hex');
41
+ }
37
42
  static _matchesSelector(el, selector) {
38
43
  if (!selector)
39
44
  return false;
@@ -135,6 +140,52 @@ export class ToolsInteract {
135
140
  return null;
136
141
  }
137
142
  }
143
+ static _buildUiChangeSignatures(tree) {
144
+ const elements = Array.isArray(tree?.elements) ? tree.elements : [];
145
+ const textPayload = [];
146
+ const statePayload = [];
147
+ for (const el of elements) {
148
+ textPayload.push({
149
+ text: ToolsInteract._normalize(el?.text ?? el?.label ?? el?.value ?? ''),
150
+ contentDescription: ToolsInteract._normalize(el?.contentDescription ?? el?.contentDesc ?? el?.accessibilityLabel ?? ''),
151
+ resourceId: ToolsInteract._normalize(el?.resourceId ?? el?.resourceID ?? el?.id ?? '')
152
+ });
153
+ statePayload.push({
154
+ checked: el?.state?.checked ?? null,
155
+ selected: el?.state?.selected ?? null,
156
+ focused: el?.state?.focused ?? null,
157
+ expanded: el?.state?.expanded ?? null,
158
+ enabled: el?.state?.enabled ?? null,
159
+ text_value: el?.state?.text_value ?? null,
160
+ value: el?.state?.value ?? null,
161
+ raw_value: el?.state?.raw_value ?? null,
162
+ value_range: el?.state?.value_range ?? null
163
+ });
164
+ }
165
+ return {
166
+ hierarchy: computeSnapshotSignature(tree),
167
+ text: ToolsInteract._hash({
168
+ screen: ToolsInteract._normalize(tree?.screen),
169
+ elements: textPayload
170
+ }),
171
+ state: ToolsInteract._hash({
172
+ screen: ToolsInteract._normalize(tree?.screen),
173
+ elements: statePayload
174
+ })
175
+ };
176
+ }
177
+ static _matchesUiChange(expected, initial, current) {
178
+ const candidates = expected ? [expected] : ToolsInteract._uiChangeKinds;
179
+ for (const changeKind of candidates) {
180
+ if (changeKind === 'hierarchy_diff' && initial.hierarchy !== current.hierarchy)
181
+ return changeKind;
182
+ if (changeKind === 'text_change' && initial.text !== current.text)
183
+ return changeKind;
184
+ if (changeKind === 'state_change' && initial.state !== current.state)
185
+ return changeKind;
186
+ }
187
+ return null;
188
+ }
138
189
  static _resolvedTargetFromElement(elementId, element, index) {
139
190
  return {
140
191
  elementId,
@@ -876,6 +927,68 @@ export class ToolsInteract {
876
927
  }
877
928
  };
878
929
  }
930
+ static async waitForUIChangeHandler({ platform, deviceId, timeout_ms = 60000, stability_window_ms = 250, expected_change }) {
931
+ const start = Date.now();
932
+ const pollIntervalMs = 300;
933
+ const stabilityWindow = Math.max(0, typeof stability_window_ms === 'number' ? stability_window_ms : 250);
934
+ let baseline = null;
935
+ let lastObservedRevision = null;
936
+ let lastLoadingState = null;
937
+ while (Date.now() - start < timeout_ms) {
938
+ try {
939
+ const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId });
940
+ const signatures = ToolsInteract._buildUiChangeSignatures(tree);
941
+ lastObservedRevision = typeof tree?.snapshot_revision === 'number' ? tree.snapshot_revision : lastObservedRevision;
942
+ lastLoadingState = tree?.loading_state ?? lastLoadingState;
943
+ if (!baseline) {
944
+ baseline = signatures;
945
+ }
946
+ else {
947
+ const observedChange = ToolsInteract._matchesUiChange(expected_change, baseline, signatures);
948
+ if (observedChange) {
949
+ if (stabilityWindow > 0) {
950
+ await new Promise(resolve => setTimeout(resolve, stabilityWindow));
951
+ const confirmTree = await ToolsObserve.getUITreeHandler({ platform, deviceId });
952
+ const confirmSignatures = ToolsInteract._buildUiChangeSignatures(confirmTree);
953
+ const confirmChange = ToolsInteract._matchesUiChange(expected_change, baseline, confirmSignatures);
954
+ if (!confirmChange || confirmSignatures.hierarchy !== signatures.hierarchy || confirmSignatures.text !== signatures.text || confirmSignatures.state !== signatures.state) {
955
+ lastObservedRevision = typeof confirmTree?.snapshot_revision === 'number' ? confirmTree.snapshot_revision : lastObservedRevision;
956
+ lastLoadingState = confirmTree?.loading_state ?? lastLoadingState;
957
+ await new Promise(resolve => setTimeout(resolve, pollIntervalMs));
958
+ continue;
959
+ }
960
+ lastObservedRevision = typeof confirmTree?.snapshot_revision === 'number' ? confirmTree.snapshot_revision : lastObservedRevision;
961
+ lastLoadingState = confirmTree?.loading_state ?? lastLoadingState;
962
+ }
963
+ return {
964
+ success: true,
965
+ observed_change: observedChange,
966
+ snapshot_revision: lastObservedRevision ?? undefined,
967
+ timeout: false,
968
+ elapsed_ms: Date.now() - start,
969
+ expected_change,
970
+ loading_state: lastLoadingState ?? null,
971
+ reason: 'UI change observed'
972
+ };
973
+ }
974
+ }
975
+ }
976
+ catch {
977
+ // Keep polling until timeout; the observable surface should be best-effort.
978
+ }
979
+ await new Promise(resolve => setTimeout(resolve, pollIntervalMs));
980
+ }
981
+ return {
982
+ success: false,
983
+ observed_change: null,
984
+ snapshot_revision: lastObservedRevision ?? undefined,
985
+ timeout: true,
986
+ elapsed_ms: Date.now() - start,
987
+ expected_change,
988
+ loading_state: lastLoadingState ?? null,
989
+ reason: 'timeout'
990
+ };
991
+ }
879
992
  static async expectScreenHandler({ platform, fingerprint, screen, deviceId }) {
880
993
  const observedFingerprint = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId });
881
994
  const observedScreen = {
@@ -6,6 +6,7 @@ import { promises as fsPromises } from "fs";
6
6
  import path from "path";
7
7
  import { computeScreenFingerprint } from "../utils/ui/index.js";
8
8
  import { parsePngSize } from "../utils/image.js";
9
+ import { deriveSnapshotMetadata } from "./snapshot-metadata.js";
9
10
  const activeLogStreams = new Map();
10
11
  export class AndroidObserve {
11
12
  async getDeviceMetadata(appId, deviceId) {
@@ -61,21 +62,29 @@ export class AndroidObserve {
61
62
  traverseNode(result.hierarchy.node, elements);
62
63
  }
63
64
  }
65
+ const snapshotMetadata = deriveSnapshotMetadata(`android:${deviceInfo.id}`, {
66
+ screen: "",
67
+ resolution,
68
+ elements
69
+ }, 'ui_tree');
64
70
  return {
65
71
  device: deviceInfo,
66
72
  screen: "",
67
73
  resolution,
68
- elements
74
+ elements,
75
+ ...snapshotMetadata
69
76
  };
70
77
  }
71
78
  catch (e) {
72
79
  const errorMessage = `Failed to get UI tree. ADB Path: '${getAdbCmd()}'. Error: ${e instanceof Error ? e.message : String(e)}`;
73
80
  console.error(errorMessage);
81
+ const snapshotMetadata = deriveSnapshotMetadata(`android:${deviceInfo.id}`, null, 'ui_tree');
74
82
  return {
75
83
  device: deviceInfo,
76
84
  screen: "",
77
85
  resolution: { width: 0, height: 0 },
78
86
  elements: [],
87
+ ...snapshotMetadata,
79
88
  error: errorMessage
80
89
  };
81
90
  }
@@ -1,6 +1,7 @@
1
1
  import { resolveTargetDevice } from '../utils/resolve-device.js';
2
2
  import { AndroidObserve } from './android.js';
3
3
  import { iOSObserve } from './ios.js';
4
+ import { deriveSnapshotMetadata } from './snapshot-metadata.js';
4
5
  export { AndroidObserve } from './android.js';
5
6
  export { iOSObserve } from './ios.js';
6
7
  function normalizeHint(value) {
@@ -200,7 +201,17 @@ export class ToolsObserve {
200
201
  }
201
202
  static async captureDebugSnapshotHandler({ reason, includeLogs = true, logLines = 200, platform, appId, deviceId, sessionId } = {}) {
202
203
  const timestamp = Date.now();
203
- const raw = { timestamp, reason: reason || '', activity: null, fingerprint: null, screenshot: null, ui_tree: null, logs: [] };
204
+ const raw = {
205
+ timestamp,
206
+ snapshot_revision: 0,
207
+ captured_at_ms: timestamp,
208
+ reason: reason || '',
209
+ activity: null,
210
+ fingerprint: null,
211
+ screenshot: null,
212
+ ui_tree: null,
213
+ logs: []
214
+ };
204
215
  // Parallel fetches for performance: screenshot, current screen, fingerprint, ui tree, and log stream/get logs
205
216
  const sid = sessionId || 'default';
206
217
  const tasks = {
@@ -308,6 +319,13 @@ export class ToolsObserve {
308
319
  raw.logs_error = e instanceof Error ? e.message : String(e);
309
320
  }
310
321
  }
322
+ const snapshotDeviceKey = raw.ui_tree?.device
323
+ ? `${raw.ui_tree.device.platform}:${raw.ui_tree.device.id}`
324
+ : `${platform || 'unknown'}:${deviceId || 'default'}`;
325
+ const snapshotMetadata = deriveSnapshotMetadata(snapshotDeviceKey, raw.ui_tree, 'snapshot', raw.ui_tree?.snapshot_revision ? null : (raw.fingerprint || raw.activity || null));
326
+ raw.snapshot_revision = raw.ui_tree?.snapshot_revision ?? snapshotMetadata.snapshot_revision;
327
+ raw.captured_at_ms = raw.ui_tree?.captured_at_ms ?? snapshotMetadata.captured_at_ms;
328
+ raw.loading_state = raw.ui_tree?.loading_state ?? snapshotMetadata.loading_state;
311
329
  const semantic = deriveSnapshotSemantic(raw);
312
330
  return semantic ? { raw, semantic } : { raw };
313
331
  }
@@ -6,6 +6,7 @@ import path from 'path';
6
6
  import { parseLogLine } from '../utils/android/utils.js';
7
7
  import { computeScreenFingerprint } from '../utils/ui/index.js';
8
8
  import { parsePngSize } from '../utils/image.js';
9
+ import { deriveSnapshotMetadata } from './snapshot-metadata.js';
9
10
  const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
10
11
  let iosExecCommand = execCommand;
11
12
  export function _setIOSExecCommandForTests(fn) {
@@ -438,13 +439,16 @@ export class iOSObserve {
438
439
  }
439
440
  async getUITree(deviceId = "booted") {
440
441
  const device = await getIOSDeviceMetadata(deviceId);
442
+ const deviceKey = `ios:${device.id}`;
441
443
  const idbExists = await isIDBInstalled();
442
444
  if (!idbExists) {
445
+ const snapshotMetadata = deriveSnapshotMetadata(deviceKey, null, 'ui_tree');
443
446
  return {
444
447
  device,
445
448
  screen: "",
446
449
  resolution: { width: 0, height: 0 },
447
450
  elements: [],
451
+ ...snapshotMetadata,
448
452
  error: "iOS UI tree retrieval requires 'idb' (iOS Device Bridge). Please install it via Homebrew: `brew tap facebook/fb && brew install idb-companion` and `pip3 install fb-idb`."
449
453
  };
450
454
  }
@@ -485,11 +489,13 @@ export class iOSObserve {
485
489
  console.error(`Attempt ${attempts} failed: ${e}`);
486
490
  }
487
491
  if (attempts === maxAttempts) {
492
+ const snapshotMetadata = deriveSnapshotMetadata(deviceKey, null, 'ui_tree');
488
493
  return {
489
494
  device,
490
495
  screen: "",
491
496
  resolution: { width: 0, height: 0 },
492
497
  elements: [],
498
+ ...snapshotMetadata,
493
499
  error: `Failed to retrieve valid UI dump after ${maxAttempts} attempts.`
494
500
  };
495
501
  }
@@ -511,19 +517,27 @@ export class iOSObserve {
511
517
  width = rootBounds[2] - rootBounds[0];
512
518
  height = rootBounds[3] - rootBounds[1];
513
519
  }
520
+ const snapshotMetadata = deriveSnapshotMetadata(deviceKey, {
521
+ screen: "",
522
+ resolution: { width, height },
523
+ elements
524
+ }, 'ui_tree');
514
525
  return {
515
526
  device,
516
527
  screen: "",
517
528
  resolution: { width, height },
518
- elements
529
+ elements,
530
+ ...snapshotMetadata
519
531
  };
520
532
  }
521
533
  catch (e) {
534
+ const snapshotMetadata = deriveSnapshotMetadata(deviceKey, null, 'ui_tree');
522
535
  return {
523
536
  device,
524
537
  screen: "",
525
538
  resolution: { width: 0, height: 0 },
526
539
  elements: [],
540
+ ...snapshotMetadata,
527
541
  error: `Failed to parse idb output: ${e instanceof Error ? e.message : String(e)}`
528
542
  };
529
543
  }
@@ -0,0 +1,88 @@
1
+ import crypto from 'crypto';
2
+ const snapshotStateByDevice = new Map();
3
+ function normalize(value) {
4
+ if (value === null || value === undefined)
5
+ return '';
6
+ return String(value).trim().toLowerCase();
7
+ }
8
+ function normalizeBounds(bounds) {
9
+ if (!Array.isArray(bounds) || bounds.length < 4)
10
+ return null;
11
+ const normalized = bounds.slice(0, 4).map((value) => Number(value));
12
+ if (normalized.some((value) => Number.isNaN(value)))
13
+ return null;
14
+ return normalized;
15
+ }
16
+ function stableElementSignature(element) {
17
+ return {
18
+ text: normalize(element.text),
19
+ contentDescription: normalize(element.contentDescription),
20
+ resourceId: normalize(element.resourceId),
21
+ type: normalize(element.type),
22
+ stable_id: normalize(element.stable_id),
23
+ role: normalize(element.role),
24
+ test_tag: normalize(element.test_tag),
25
+ selector: normalize(element.selector?.value),
26
+ clickable: !!element.clickable,
27
+ enabled: !!element.enabled,
28
+ visible: !!element.visible,
29
+ state: element.state ?? null,
30
+ bounds: normalizeBounds(element.bounds)
31
+ };
32
+ }
33
+ export function computeSnapshotSignature(tree) {
34
+ if (!tree || tree.error)
35
+ return null;
36
+ const payload = {
37
+ screen: normalize(tree.screen),
38
+ resolution: tree.resolution || { width: 0, height: 0 },
39
+ elements: Array.isArray(tree.elements) ? tree.elements.map((element) => stableElementSignature(element)) : []
40
+ };
41
+ return crypto.createHash('sha256').update(JSON.stringify(payload)).digest('hex');
42
+ }
43
+ export function detectLoadingState(tree, source) {
44
+ if (!tree || tree.error || !Array.isArray(tree.elements))
45
+ return null;
46
+ for (const element of tree.elements) {
47
+ if (!element?.visible)
48
+ continue;
49
+ const text = normalize(element?.text ?? element?.contentDescription ?? '');
50
+ const type = normalize(element?.type ?? '');
51
+ const combined = `${type} ${text}`;
52
+ if (/progress|spinner|loading|please wait|busy|loading indicator|skeleton|pending/.test(combined)) {
53
+ const signal = /progress/.test(combined)
54
+ ? 'progress_indicator'
55
+ : /spinner/.test(combined)
56
+ ? 'spinner'
57
+ : /busy/.test(combined)
58
+ ? 'busy_indicator'
59
+ : /skeleton/.test(combined)
60
+ ? 'skeleton'
61
+ : 'loading_indicator';
62
+ return { active: true, signal, source };
63
+ }
64
+ }
65
+ return null;
66
+ }
67
+ export function deriveSnapshotMetadata(deviceKey, tree, source, signatureOverride) {
68
+ const signature = signatureOverride ?? computeSnapshotSignature(tree);
69
+ const previous = snapshotStateByDevice.get(deviceKey);
70
+ let revision = 1;
71
+ if (previous) {
72
+ if (signature === null) {
73
+ revision = previous.revision;
74
+ }
75
+ else {
76
+ revision = previous.signature === signature ? previous.revision : previous.revision + 1;
77
+ }
78
+ }
79
+ snapshotStateByDevice.set(deviceKey, { revision, signature });
80
+ return {
81
+ snapshot_revision: revision,
82
+ captured_at_ms: Date.now(),
83
+ loading_state: detectLoadingState(tree, source)
84
+ };
85
+ }
86
+ export function resetSnapshotMetadataForTests() {
87
+ snapshotStateByDevice.clear();
88
+ }
@@ -240,7 +240,7 @@ Failure Handling:
240
240
  },
241
241
  {
242
242
  name: 'capture_debug_snapshot',
243
- description: 'Capture a complete debug snapshot (raw observation layer plus optional derived semantic layer). Returns structured JSON.',
243
+ description: 'Capture a complete debug snapshot (raw observation layer plus optional derived semantic layer). Returns structured JSON with snapshot_revision, captured_at_ms, and loading_state when detectable.',
244
244
  inputSchema: {
245
245
  type: 'object',
246
246
  properties: {
@@ -291,7 +291,7 @@ Failure Handling:
291
291
  },
292
292
  {
293
293
  name: 'get_ui_tree',
294
- description: 'Get the current UI hierarchy from an Android device or iOS simulator. Returns a structured JSON representation of the screen content.',
294
+ description: 'Get the current UI hierarchy from an Android device or iOS simulator. Returns a structured JSON representation of the screen content with snapshot metadata when available.',
295
295
  inputSchema: {
296
296
  type: 'object',
297
297
  properties: {
@@ -344,7 +344,7 @@ Capabilities:
344
344
  Constraints:
345
345
  - Does not verify correctness of the resulting state
346
346
  - Must not be used alone to confirm action success when an applicable expect_* tool exists
347
- - Use classify_action_outcome + get_network_activity when the expected outcome is backend/API activity without a visible UI change
347
+ - For backend/API activity without a visible UI change, pass the runtime action_type into classify_action_outcome and collect network evidence only if the result remains ambiguous
348
348
 
349
349
  Recommended Usage:
350
350
  1. Capture or define the expected outcome
@@ -363,6 +363,34 @@ Recommended Usage:
363
363
  required: ['previousFingerprint']
364
364
  }
365
365
  },
366
+ {
367
+ name: 'wait_for_ui_change',
368
+ description: `Purpose:
369
+ Wait for a non-navigation UI mutation or in-place update to become stable.
370
+
371
+ Inputs:
372
+ - expected_change (optional): hierarchy_diff, text_change, or state_change
373
+ - timeout_ms (optional)
374
+ - stability_window_ms (optional)
375
+
376
+ Guidance:
377
+ - Prefer wait_for_screen_change for navigation transitions.
378
+ - Prefer wait_for_ui_change for in-place mutations and non-navigation updates.
379
+ - Use the returned snapshot_revision as the observed synchronization point when available.
380
+
381
+ Failure Handling:
382
+ - TIMEOUT means the UI did not change in a stable way within the allotted time.`,
383
+ inputSchema: {
384
+ type: 'object',
385
+ properties: {
386
+ platform: { type: 'string', enum: ['android', 'ios'], description: 'Optional platform override (android|ios)' },
387
+ deviceId: { type: 'string', description: 'Optional device id/udid to target' },
388
+ expected_change: { type: 'string', enum: ['hierarchy_diff', 'text_change', 'state_change'], description: 'Optional type of UI change to wait for' },
389
+ timeout_ms: { type: 'number', description: 'Timeout in ms to wait for change (default 60000)', default: 60000 },
390
+ stability_window_ms: { type: 'number', description: 'How long the change must remain stable before success (default 250)', default: 250 }
391
+ }
392
+ }
393
+ },
366
394
  {
367
395
  name: 'expect_screen',
368
396
  description: `Purpose:
@@ -890,26 +918,29 @@ Failure Handling:
890
918
  name: 'classify_action_outcome',
891
919
  description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
892
920
 
893
- MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
894
- Use this with get_network_activity when the expected outcome is backend/API activity without a visible UI change.
895
- For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action instead of waiting for wait_for_screen_change.
921
+ Use the runtime action result's \`action_type\` as \`actionType\` so the classifier can distinguish local-state actions from side-effect actions.
922
+ Use this when the intended outcome is not already fully verified by the UI signal alone.
923
+ For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action if the outcome is still ambiguous.
896
924
 
897
925
  HOW TO GATHER INPUTS before calling:
898
926
  1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
899
927
  2. If you checked for a specific element with wait_for_ui, set expectedElementVisible.
900
- 3. Do NOT call get_network_activity yet omit networkRequests on the first call.
928
+ 3. Pass actionType from the action response when available.
929
+ 4. Only provide networkRequests if you already collected them or want to classify a side-effect action with backend evidence.
901
930
 
902
931
  RULES (applied in order — stop at first match):
903
932
  1. If uiChanged=true OR expectedElementVisible=true → outcome=success
904
- 2. Otherwise this tool returns nextAction="call_get_network_activity" — you MUST call get_network_activity once, then call classify_action_outcome again with the results in networkRequests.
933
+ 2. If actionType is missing → outcome=unknown
905
934
  3. If any request has status=failure or retryable → outcome=backend_failure
906
- 4. If no requests returned → outcome=no_op
907
- 5. If all requests succeeded → outcome=ui_failure
908
- 6. Otherwise → outcome=unknown
935
+ 4. If actionType maps to a local-state action → outcome=no_op; prefer state-based verification and avoid default network fallback
936
+ 5. If actionType maps to a side-effect action and no networkRequests were supplied → outcome=unknown
937
+ 6. If no requests returned → outcome=no_op
938
+ 7. If all requests succeeded → outcome=ui_failure
939
+ 8. Otherwise → outcome=unknown
909
940
 
910
941
  BEHAVIOUR after outcome:
911
942
  - success → continue
912
- - no_op → retry the action once or re-resolve the element
943
+ - no_op → retry with richer state verification or re-resolve the element
913
944
  - backend_failure → stop and report the failing endpoint
914
945
  - ui_failure → stop and report failure
915
946
  - unknown → take one recovery step (e.g. capture_debug_snapshot), then stop`,
@@ -924,9 +955,13 @@ BEHAVIOUR after outcome:
924
955
  type: 'boolean',
925
956
  description: 'true if the element you expected to appear is now visible (from wait_for_ui). Omit if you did not check for a specific element.'
926
957
  },
958
+ actionType: {
959
+ type: 'string',
960
+ description: 'The runtime action_type from the action response (for example tap, tap_element, swipe, type_text, press_back, start_app).'
961
+ },
927
962
  networkRequests: {
928
963
  type: 'array',
929
- description: 'Pass this only after calling get_network_activity as instructed by nextAction. Also use it when the expected outcome is backend/API activity without a visible UI change.',
964
+ description: 'Optional network evidence collected after the action. Use it when the expected outcome is backend/API activity or when the UI signal is ambiguous.',
930
965
  items: {
931
966
  type: 'object',
932
967
  properties: {
@@ -948,7 +983,7 @@ BEHAVIOUR after outcome:
948
983
  name: 'get_network_activity',
949
984
  description: `Returns structured network events captured from platform logs since the last action.
950
985
 
951
- Call this when classify_action_outcome returns nextAction="call_get_network_activity" or immediately after an action whose expected outcome is backend/API activity without a visible UI change.
986
+ Call this immediately after an action when you want backend evidence for a side-effect flow, only if the result is still ambiguous.
952
987
  Do not call more than once per action.
953
988
 
954
989
  Events are filtered to significant (non-background) requests only.
@@ -236,6 +236,15 @@ async function handleWaitForUI(args) {
236
236
  const res = await ToolsInteract.waitForUIHandler({ selector, condition, timeout_ms, poll_interval_ms, match, retry, platform, deviceId });
237
237
  return wrapResponse(res);
238
238
  }
239
+ async function handleWaitForUIChange(args) {
240
+ const platform = getStringArg(args, 'platform');
241
+ const deviceId = getStringArg(args, 'deviceId');
242
+ const timeout_ms = getNumberArg(args, 'timeout_ms') ?? 60000;
243
+ const stability_window_ms = getNumberArg(args, 'stability_window_ms') ?? 250;
244
+ const expected_change = getStringArg(args, 'expected_change');
245
+ const res = await ToolsInteract.waitForUIChangeHandler({ platform, deviceId, timeout_ms, stability_window_ms, expected_change });
246
+ return wrapResponse(res);
247
+ }
239
248
  async function handleFindElement(args) {
240
249
  const query = requireStringArg(args, 'query');
241
250
  const exact = getBooleanArg(args, 'exact') ?? false;
@@ -376,11 +385,13 @@ async function handleStopLogStream(args) {
376
385
  function handleClassifyActionOutcome(args) {
377
386
  const uiChanged = requireBooleanArg(args, 'uiChanged');
378
387
  const expectedElementVisible = getBooleanArg(args, 'expectedElementVisible');
388
+ const actionType = getStringArg(args, 'actionType');
379
389
  const networkRequests = getArrayArg(args, 'networkRequests');
380
390
  const hasLogErrors = getBooleanArg(args, 'hasLogErrors');
381
391
  const result = classifyActionOutcome({
382
392
  uiChanged,
383
393
  expectedElementVisible: expectedElementVisible ?? null,
394
+ actionType: actionType ?? null,
384
395
  networkRequests: networkRequests ?? null,
385
396
  hasLogErrors: hasLogErrors ?? null
386
397
  });
@@ -409,6 +420,7 @@ export const toolHandlers = {
409
420
  get_current_screen: handleGetCurrentScreen,
410
421
  get_screen_fingerprint: handleGetScreenFingerprint,
411
422
  wait_for_screen_change: handleWaitForScreenChange,
423
+ wait_for_ui_change: handleWaitForUIChange,
412
424
  expect_screen: handleExpectScreen,
413
425
  expect_element_visible: handleExpectElementVisible,
414
426
  expect_state: handleExpectState,
@@ -6,7 +6,7 @@ import { handleToolCall } from './server/tool-handlers.js';
6
6
  export { wrapResponse, toolDefinitions, handleToolCall };
7
7
  export const serverInfo = {
8
8
  name: 'mobile-debug-mcp',
9
- version: '0.25.1'
9
+ version: '0.26.1'
10
10
  };
11
11
  export function createServer() {
12
12
  const server = new Server(serverInfo, {
package/docs/CHANGELOG.md CHANGED
@@ -2,6 +2,15 @@
2
2
 
3
3
  All notable changes to the **Mobile Debug MCP** project will be documented in this file.
4
4
 
5
+ ## [0.26.1]
6
+ - Fixed overuse of `get_network_activity`
7
+
8
+ ## [0.26.0]
9
+ - RFC-003 wait/synchronization contract with `snapshot_revision`, `captured_at_ms`, and `loading_state`
10
+ - Added `wait_for_ui_change` for stable in-place UI mutations
11
+ - Updated `get_ui_tree` and `capture_debug_snapshot` to surface snapshot metadata
12
+ - Emulator-validated the new UI-change flow against the Modul8 app
13
+
5
14
  ## [0.25.1]
6
15
  - Platform-native element identity metadata for UI targeting
7
16
  - Hierarchy-independent element references