mobile-debug-mcp 0.24.4 → 0.24.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,111 @@ import { AndroidObserve } from './android.js';
3
3
  import { iOSObserve } from './ios.js';
4
4
  export { AndroidObserve } from './android.js';
5
5
  export { iOSObserve } from './ios.js';
6
+ function normalizeHint(value) {
7
+ if (value === null || value === undefined)
8
+ return '';
9
+ return String(value).trim().replace(/\s+/g, ' ').toLowerCase();
10
+ }
11
+ function titleCase(value) {
12
+ return value
13
+ .replace(/[_-]+/g, ' ')
14
+ .replace(/\s+/g, ' ')
15
+ .trim()
16
+ .replace(/\b\w/g, (match) => match.toUpperCase());
17
+ }
18
+ function shortActivityName(activity) {
19
+ if (!activity)
20
+ return null;
21
+ const trimmed = String(activity).trim();
22
+ if (!trimmed)
23
+ return null;
24
+ const lastSegment = trimmed.split('.').pop() || trimmed;
25
+ const withoutSuffix = lastSegment.replace(/Activity$/, '');
26
+ return withoutSuffix ? titleCase(withoutSuffix) : titleCase(lastSegment);
27
+ }
28
+ function collectSnapshotTexts(tree) {
29
+ const elements = Array.isArray(tree?.elements) ? tree.elements : [];
30
+ const texts = [];
31
+ const actionables = [];
32
+ for (const element of elements) {
33
+ const rawText = element?.text ?? element?.contentDescription ?? element?.contentDesc ?? element?.accessibilityLabel ?? element?.resourceId ?? element?.id ?? '';
34
+ const text = normalizeHint(rawText);
35
+ if (text)
36
+ texts.push(text);
37
+ if (element?.clickable && element?.enabled !== false && text) {
38
+ actionables.push(text);
39
+ }
40
+ }
41
+ return {
42
+ texts: Array.from(new Set(texts)),
43
+ actionables: Array.from(new Set(actionables))
44
+ };
45
+ }
46
+ function inferSnapshotScreen(raw) {
47
+ const tree = raw.ui_tree;
48
+ const treeScreen = normalizeHint(tree?.screen);
49
+ if (treeScreen)
50
+ return titleCase(treeScreen);
51
+ const activity = shortActivityName(raw.activity);
52
+ if (activity)
53
+ return activity;
54
+ const { texts } = collectSnapshotTexts(tree);
55
+ if (texts.length > 0)
56
+ return titleCase(texts[0]);
57
+ return null;
58
+ }
59
+ function deriveSnapshotSemantic(raw) {
60
+ const tree = raw.ui_tree;
61
+ const { texts, actionables } = collectSnapshotTexts(tree);
62
+ const screenFromTree = normalizeHint(tree?.screen);
63
+ const activityHint = normalizeHint(raw.activity);
64
+ const screen = inferSnapshotScreen(raw);
65
+ if (!screen && !activityHint && texts.length === 0 && !raw.logs.length)
66
+ return null;
67
+ const hasErrorLogs = raw.logs.some((entry) => /error|fatal exception|exception|failed/i.test(entry.message));
68
+ const hasLoadingSignals = texts.some((text) => /loading|please wait|spinner|progress/i.test(text));
69
+ const hasPrimaryText = texts.some((text) => /sign in|log in|log in|login|home|checkout|settings|menu|profile|search/i.test(text));
70
+ const hasScreenshot = typeof raw.screenshot === 'string' && raw.screenshot.length > 0;
71
+ const hasUiTree = !!tree && Array.isArray(tree.elements);
72
+ const signals = {
73
+ has_activity: !!activityHint,
74
+ has_ui_tree: hasUiTree,
75
+ has_screenshot: hasScreenshot,
76
+ has_visible_text: texts.length > 0,
77
+ has_clickable_elements: actionables.length > 0,
78
+ has_error_logs: hasErrorLogs,
79
+ has_loading_signals: hasLoadingSignals,
80
+ has_primary_text: hasPrimaryText
81
+ };
82
+ const warnings = [];
83
+ if (screenFromTree && activityHint && screenFromTree !== activityHint) {
84
+ warnings.push('ui_tree.screen and activity hints differ');
85
+ }
86
+ if (!hasUiTree)
87
+ warnings.push('ui tree unavailable');
88
+ if (!activityHint)
89
+ warnings.push('activity unavailable');
90
+ if (hasErrorLogs)
91
+ warnings.push('error signals present in logs');
92
+ const evidenceScore = (hasUiTree ? 0.35 : 0) +
93
+ (screen ? 0.2 : 0) +
94
+ (activityHint ? 0.15 : 0) +
95
+ (actionables.length > 0 ? 0.15 : 0) +
96
+ (texts.length > 0 ? 0.1 : 0) +
97
+ (hasScreenshot ? 0.05 : 0) +
98
+ (hasErrorLogs ? -0.15 : 0) +
99
+ (hasLoadingSignals ? -0.05 : 0);
100
+ const confidence = Math.max(0, Math.min(1, Number(evidenceScore.toFixed(2))));
101
+ if (!screen && confidence < 0.3)
102
+ return null;
103
+ return {
104
+ screen,
105
+ signals,
106
+ actions_available: actionables.length > 0 ? actionables.slice(0, 10) : null,
107
+ confidence,
108
+ warnings: confidence >= 0.7 && warnings.length === 0 ? [] : warnings
109
+ };
110
+ }
6
111
  export class ToolsObserve {
7
112
  // Resolve a target device and return the appropriate observe instance and resolved info.
8
113
  static async resolveObserve(platform, deviceId, appId) {
@@ -95,7 +200,7 @@ export class ToolsObserve {
95
200
  }
96
201
  static async captureDebugSnapshotHandler({ reason, includeLogs = true, logLines = 200, platform, appId, deviceId, sessionId } = {}) {
97
202
  const timestamp = Date.now();
98
- const out = { timestamp, reason: reason || '', activity: null, fingerprint: null, screenshot: null, ui_tree: null, logs: [] };
203
+ const raw = { timestamp, reason: reason || '', activity: null, fingerprint: null, screenshot: null, ui_tree: null, logs: [] };
99
204
  // Parallel fetches for performance: screenshot, current screen, fingerprint, ui tree, and log stream/get logs
100
205
  const sid = sessionId || 'default';
101
206
  const tasks = {
@@ -114,59 +219,59 @@ export class ToolsObserve {
114
219
  if (res.status === 'fulfilled') {
115
220
  const val = res.value;
116
221
  if (key === 'screenshot') {
117
- out.screenshot = val && val.screenshot ? val.screenshot : null;
222
+ raw.screenshot = val && val.screenshot ? val.screenshot : null;
118
223
  }
119
224
  else if (key === 'currentScreen') {
120
- out.activity = val && ((val.activity || val.shortActivity)) ? (val.activity || val.shortActivity) : out.activity || '';
225
+ raw.activity = val && ((val.activity || val.shortActivity)) ? (val.activity || val.shortActivity) : raw.activity || '';
121
226
  }
122
227
  else if (key === 'fingerprint') {
123
228
  if (val && val.fingerprint)
124
- out.fingerprint = val.fingerprint;
229
+ raw.fingerprint = val.fingerprint;
125
230
  if (val && val.activity)
126
- out.activity = out.activity || val.activity;
231
+ raw.activity = raw.activity || val.activity;
127
232
  if (val && val.error)
128
- out.fingerprint_error = val.error;
233
+ raw.fingerprint_error = val.error;
129
234
  }
130
235
  else if (key === 'uiTree') {
131
- out.ui_tree = val;
236
+ raw.ui_tree = val;
132
237
  if (val && val.error)
133
- out.ui_tree_error = val.error;
238
+ raw.ui_tree_error = val.error;
134
239
  }
135
240
  else if (key === 'readLogStream') {
136
241
  // handle below after evaluating fallback
137
242
  // temporarily attach to out._streamEntries
138
- out._streamEntries = val && val.entries ? val.entries : [];
243
+ raw.logs = Array.isArray(val?.entries) ? val.entries : [];
139
244
  }
140
245
  }
141
246
  else {
142
247
  const errMsg = res.reason instanceof Error ? res.reason.message : String(res.reason);
143
248
  if (key === 'screenshot')
144
- out.screenshot_error = errMsg;
249
+ raw.screenshot_error = errMsg;
145
250
  if (key === 'currentScreen')
146
- out.activity_error = errMsg;
251
+ raw.activity_error = errMsg;
147
252
  if (key === 'fingerprint') {
148
- out.fingerprint = null;
149
- out.fingerprint_error = errMsg;
253
+ raw.fingerprint = null;
254
+ raw.fingerprint_error = errMsg;
150
255
  }
151
256
  if (key === 'uiTree') {
152
- out.ui_tree = null;
153
- out.ui_tree_error = errMsg;
257
+ raw.ui_tree = null;
258
+ raw.ui_tree_error = errMsg;
154
259
  }
155
260
  if (key === 'readLogStream') {
156
- out._streamEntries = [];
157
- out.logs_error = errMsg;
261
+ raw.logs = [];
262
+ raw.logs_error = errMsg;
158
263
  }
159
264
  }
160
265
  }
161
266
  // Logs: prefer stream entries, fallback to snapshot logs when empty
162
267
  if (includeLogs) {
163
268
  try {
164
- let entries = Array.isArray(out._streamEntries) ? out._streamEntries : [];
269
+ let entries = Array.isArray(raw.logs) ? raw.logs : [];
165
270
  if (!entries || entries.length === 0) {
166
271
  const gl = await ToolsObserve.getLogsHandler({ platform, appId, deviceId, lines: logLines });
167
- const raw = (gl && gl.logs) ? gl.logs : [];
272
+ const snapshotLogs = (gl && gl.logs) ? gl.logs : [];
168
273
  // raw may be structured entries or strings
169
- entries = raw.slice(-Math.max(0, logLines)).map(item => {
274
+ entries = snapshotLogs.slice(-Math.max(0, logLines)).map(item => {
170
275
  if (!item)
171
276
  return { timestamp: null, level: 'INFO', message: '' };
172
277
  if (typeof item === 'string') {
@@ -196,15 +301,14 @@ export class ToolsObserve {
196
301
  return { timestamp: tsNum, level, message: msg };
197
302
  });
198
303
  }
199
- out.logs = entries;
304
+ raw.logs = entries;
200
305
  }
201
306
  catch (e) {
202
- out.logs = [];
203
- out.logs_error = e instanceof Error ? e.message : String(e);
307
+ raw.logs = [];
308
+ raw.logs_error = e instanceof Error ? e.message : String(e);
204
309
  }
205
310
  }
206
- // Clean up internal temporary field
207
- delete out._streamEntries;
208
- return out;
311
+ const semantic = deriveSnapshotSemantic(raw);
312
+ return semantic ? { raw, semantic } : { raw };
209
313
  }
210
314
  }
@@ -240,7 +240,7 @@ Failure Handling:
240
240
  },
241
241
  {
242
242
  name: 'capture_debug_snapshot',
243
- description: 'Capture a complete debug snapshot (screenshot, ui tree, activity, fingerprint, logs). Returns structured JSON.',
243
+ description: 'Capture a complete debug snapshot (raw observation layer plus optional derived semantic layer). Returns structured JSON.',
244
244
  inputSchema: {
245
245
  type: 'object',
246
246
  properties: {
package/docs/CHANGELOG.md CHANGED
@@ -2,6 +2,9 @@
2
2
 
3
3
  All notable changes to the **Mobile Debug MCP** project will be documented in this file.
4
4
 
5
+ ## [0.24.5]
6
+ - Improved snapshots
7
+
5
8
  ## [0.24.4]
6
9
  - Moving agents away from `wait_for_screen_change`
7
10
 
@@ -209,7 +209,78 @@ String-only errors are not allowed, including fallback handler errors.
209
209
 
210
210
  Note: string diagnostics may still appear inside structured JSON payloads where explicitly defined by a tool.
211
211
 
212
- ## 9. Classification
212
+ ## 9. Observation Tools (Extended Semantics)
213
+
214
+ Observation tools inspect application state without mutating it.
215
+
216
+ Examples:
217
+
218
+ - `capture_debug_snapshot`
219
+ - `get_screen_fingerprint`
220
+ - `get_network_activity`
221
+ - `get_logs`
222
+
223
+ ### 9.1 Snapshot Response Model
224
+
225
+ `capture_debug_snapshot` MUST return a dual-layer response:
226
+
227
+ - `raw`: required object
228
+ - `semantic`: optional object
229
+
230
+ The raw layer is authoritative and MUST remain unchanged from the underlying observation data. It is the source of truth and MUST NOT be interpreted or rewritten.
231
+
232
+ The semantic layer is derived, best-effort, and MUST be generated exclusively from the raw layer.
233
+
234
+ Raw layer contents include:
235
+
236
+ - UI hierarchy or accessibility tree
237
+ - screenshot when available
238
+ - element-level attributes
239
+ - logs and fingerprint/activity observations
240
+ - raw error fields when partial collection fails
241
+
242
+ Semantic layer shape when present:
243
+
244
+ ```ts
245
+ {
246
+ screen: string | null,
247
+ signals: Record<string, string | number | boolean> | null,
248
+ actions_available: string[] | null,
249
+ confidence: number,
250
+ warnings: string[]
251
+ }
252
+ ```
253
+
254
+ Rules:
255
+
256
+ - `confidence` MUST be between 0 and 1
257
+ - `warnings` MUST be present when `semantic` is present
258
+ - `semantic` MAY be omitted entirely when derivation is not reliable
259
+ - `semantic` MUST be treated as unreliable if it conflicts with raw data
260
+ - `actions_available` are hints only and MUST NOT be treated as guaranteed executable actions
261
+
262
+ ### 9.2 Agent Usage Contract
263
+
264
+ Agents SHOULD use `semantic` for primary decision-making when present.
265
+
266
+ Agents MUST fall back to `raw` when:
267
+
268
+ - `semantic` is missing
269
+ - `confidence < 0.7`
270
+ - `warnings` is non-empty
271
+ - semantic output conflicts with expected state or raw data
272
+
273
+ `semantic` is for planning only and MUST NOT be used for verification.
274
+
275
+ ### 9.3 Relationship to Classification
276
+
277
+ Semantic signals MAY be used as input to `classify_action_outcome`.
278
+
279
+ Semantic output MUST NOT replace classification or verification.
280
+
281
+ Classification remains a supplementary, post-action interpretation mechanism.
282
+
283
+ ## 10. Classification
213
284
 
214
285
  Tool: `classify_action_outcome`
215
286
 
@@ -223,7 +294,7 @@ Rules:
223
294
 
224
295
  It is not a verification mechanism.
225
296
 
226
- ## 10. Execution Patterns
297
+ ## 11. Execution Patterns
227
298
 
228
299
  Canonical pattern:
229
300
 
@@ -235,7 +306,7 @@ Interpretation:
235
306
  - `wait_for_screen_change.success` = UI changed
236
307
  - `expect_screen.success` = correct outcome verified
237
308
 
238
- ## 11. Known Deviations
309
+ ## 12. Known Deviations
239
310
 
240
311
  Explicitly allowed:
241
312
 
@@ -246,7 +317,7 @@ Explicitly allowed:
246
317
  - `scroll_to_element` outcome-based success (temporary exception)
247
318
  - extended runtime fields in `list_devices`
248
319
 
249
- ## 12. Migration Rules
320
+ ## 13. Migration Rules
250
321
 
251
322
  Must change now:
252
323
 
@@ -258,6 +329,7 @@ Should align when touched:
258
329
  - `start_app`, `restart_app`
259
330
  - `scroll_to_element`
260
331
  - `wait_for_ui`
332
+ - `capture_debug_snapshot`
261
333
 
262
334
  No change required:
263
335
 
@@ -266,7 +338,7 @@ No change required:
266
338
  - `expect_element_visible`
267
339
  - `wait_for_screen_change`
268
340
 
269
- ## 13. Guiding Principles
341
+ ## 14. Guiding Principles
270
342
 
271
343
  - Actions execute
272
344
  - Verification proves
@@ -132,24 +132,40 @@ Behavior:
132
132
  - Returns partial data when components fail and includes per-part error fields (e.g. `screenshot_error`, `ui_tree_error`).
133
133
  - Caps logs to `logLines` entries and prefers recent entries.
134
134
  - Fast by default: does not wait for new logs and avoids long blocking operations.
135
+ - Returns a dual-layer payload:
136
+ - `raw` is authoritative and contains the underlying observation data unchanged.
137
+ - `semantic` is optional, derived from `raw`, and intended for planning only.
135
138
 
136
139
  Response (example):
137
140
 
138
141
  ```json
139
142
  {
140
- "timestamp": 1710000000,
141
- "reason": "Crash after tapping checkout",
142
- "activity": "CheckoutActivity",
143
- "fingerprint": "abc123",
144
- "screenshot": "<base64 PNG string>",
145
- "ui_tree": { ... },
146
- "logs": [ { "timestamp": 1710000000, "level": "ERROR", "message": "NullPointerException at CheckoutViewModel" } ]
143
+ "raw": {
144
+ "timestamp": 1710000000,
145
+ "reason": "Crash after tapping checkout",
146
+ "activity": "CheckoutActivity",
147
+ "fingerprint": "abc123",
148
+ "screenshot": "<base64 PNG string>",
149
+ "ui_tree": { ... },
150
+ "logs": [ { "timestamp": 1710000000, "level": "ERROR", "message": "NullPointerException at CheckoutViewModel" } ]
151
+ },
152
+ "semantic": {
153
+ "screen": "Checkout",
154
+ "signals": {
155
+ "has_error_logs": true,
156
+ "has_clickable_elements": false
157
+ },
158
+ "actions_available": ["review checkout", "inspect error"],
159
+ "confidence": 0.82,
160
+ "warnings": []
161
+ }
147
162
  }
148
163
  ```
149
164
 
150
165
  Notes:
151
166
  - Useful immediately after detecting crashes or unexpected UI behaviour.
152
167
  - Do not expect perfect data during a crash; tool is designed to return best-effort context and include errors for failed parts.
168
+ - Treat `semantic` as planning guidance only; `raw` remains the source of truth.
153
169
 
154
170
  ---
155
171
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobile-debug-mcp",
3
- "version": "0.24.4",
3
+ "version": "0.24.5",
4
4
  "description": "MCP server for mobile app debugging (Android + iOS), with focus on security and reliability",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,10 +1,146 @@
1
1
  import { resolveTargetDevice } from '../utils/resolve-device.js'
2
2
  import { AndroidObserve } from './android.js'
3
3
  import { iOSObserve } from './ios.js'
4
+ import type {
5
+ CaptureDebugSnapshotRawResponse,
6
+ SnapshotSemanticResponse
7
+ } from '../types.js'
4
8
 
5
9
  export { AndroidObserve } from './android.js'
6
10
  export { iOSObserve } from './ios.js'
7
11
 
12
+ interface SnapshotTreeElementLike {
13
+ text?: string | null
14
+ contentDescription?: string | null
15
+ contentDesc?: string | null
16
+ accessibilityLabel?: string | null
17
+ resourceId?: string | null
18
+ id?: string | null
19
+ type?: string | null
20
+ class?: string | null
21
+ clickable?: boolean
22
+ enabled?: boolean
23
+ visible?: boolean
24
+ }
25
+
26
+ interface SnapshotTreeLike {
27
+ screen?: string | null
28
+ elements?: SnapshotTreeElementLike[]
29
+ }
30
+
31
+ function normalizeHint(value: unknown): string {
32
+ if (value === null || value === undefined) return ''
33
+ return String(value).trim().replace(/\s+/g, ' ').toLowerCase()
34
+ }
35
+
36
+ function titleCase(value: string): string {
37
+ return value
38
+ .replace(/[_-]+/g, ' ')
39
+ .replace(/\s+/g, ' ')
40
+ .trim()
41
+ .replace(/\b\w/g, (match) => match.toUpperCase())
42
+ }
43
+
44
+ function shortActivityName(activity: string | null | undefined): string | null {
45
+ if (!activity) return null
46
+ const trimmed = String(activity).trim()
47
+ if (!trimmed) return null
48
+ const lastSegment = trimmed.split('.').pop() || trimmed
49
+ const withoutSuffix = lastSegment.replace(/Activity$/, '')
50
+ return withoutSuffix ? titleCase(withoutSuffix) : titleCase(lastSegment)
51
+ }
52
+
53
+ function collectSnapshotTexts(tree: SnapshotTreeLike | null | undefined) {
54
+ const elements = Array.isArray(tree?.elements) ? tree!.elements! : []
55
+ const texts: string[] = []
56
+ const actionables: string[] = []
57
+
58
+ for (const element of elements) {
59
+ const rawText = element?.text ?? element?.contentDescription ?? element?.contentDesc ?? element?.accessibilityLabel ?? element?.resourceId ?? element?.id ?? ''
60
+ const text = normalizeHint(rawText)
61
+ if (text) texts.push(text)
62
+ if (element?.clickable && element?.enabled !== false && text) {
63
+ actionables.push(text)
64
+ }
65
+ }
66
+
67
+ return {
68
+ texts: Array.from(new Set(texts)),
69
+ actionables: Array.from(new Set(actionables))
70
+ }
71
+ }
72
+
73
+ function inferSnapshotScreen(raw: CaptureDebugSnapshotRawResponse): string | null {
74
+ const tree = raw.ui_tree as SnapshotTreeLike | null | undefined
75
+ const treeScreen = normalizeHint(tree?.screen)
76
+ if (treeScreen) return titleCase(treeScreen)
77
+
78
+ const activity = shortActivityName(raw.activity)
79
+ if (activity) return activity
80
+
81
+ const { texts } = collectSnapshotTexts(tree)
82
+ if (texts.length > 0) return titleCase(texts[0])
83
+
84
+ return null
85
+ }
86
+
87
+ function deriveSnapshotSemantic(raw: CaptureDebugSnapshotRawResponse): SnapshotSemanticResponse | null {
88
+ const tree = raw.ui_tree as SnapshotTreeLike | null | undefined
89
+ const { texts, actionables } = collectSnapshotTexts(tree)
90
+ const screenFromTree = normalizeHint(tree?.screen)
91
+ const activityHint = normalizeHint(raw.activity)
92
+ const screen = inferSnapshotScreen(raw)
93
+
94
+ if (!screen && !activityHint && texts.length === 0 && !raw.logs.length) return null
95
+
96
+ const hasErrorLogs = raw.logs.some((entry) => /error|fatal exception|exception|failed/i.test(entry.message))
97
+ const hasLoadingSignals = texts.some((text) => /loading|please wait|spinner|progress/i.test(text))
98
+ const hasPrimaryText = texts.some((text) => /sign in|log in|log in|login|home|checkout|settings|menu|profile|search/i.test(text))
99
+ const hasScreenshot = typeof raw.screenshot === 'string' && raw.screenshot.length > 0
100
+ const hasUiTree = !!tree && Array.isArray(tree.elements)
101
+
102
+ const signals: Record<string, string | number | boolean> = {
103
+ has_activity: !!activityHint,
104
+ has_ui_tree: hasUiTree,
105
+ has_screenshot: hasScreenshot,
106
+ has_visible_text: texts.length > 0,
107
+ has_clickable_elements: actionables.length > 0,
108
+ has_error_logs: hasErrorLogs,
109
+ has_loading_signals: hasLoadingSignals,
110
+ has_primary_text: hasPrimaryText
111
+ }
112
+
113
+ const warnings: string[] = []
114
+ if (screenFromTree && activityHint && screenFromTree !== activityHint) {
115
+ warnings.push('ui_tree.screen and activity hints differ')
116
+ }
117
+ if (!hasUiTree) warnings.push('ui tree unavailable')
118
+ if (!activityHint) warnings.push('activity unavailable')
119
+ if (hasErrorLogs) warnings.push('error signals present in logs')
120
+
121
+ const evidenceScore =
122
+ (hasUiTree ? 0.35 : 0) +
123
+ (screen ? 0.2 : 0) +
124
+ (activityHint ? 0.15 : 0) +
125
+ (actionables.length > 0 ? 0.15 : 0) +
126
+ (texts.length > 0 ? 0.1 : 0) +
127
+ (hasScreenshot ? 0.05 : 0) +
128
+ (hasErrorLogs ? -0.15 : 0) +
129
+ (hasLoadingSignals ? -0.05 : 0)
130
+
131
+ const confidence = Math.max(0, Math.min(1, Number(evidenceScore.toFixed(2))))
132
+
133
+ if (!screen && confidence < 0.3) return null
134
+
135
+ return {
136
+ screen,
137
+ signals,
138
+ actions_available: actionables.length > 0 ? actionables.slice(0, 10) : null,
139
+ confidence,
140
+ warnings: confidence >= 0.7 && warnings.length === 0 ? [] : warnings
141
+ }
142
+ }
143
+
8
144
  export class ToolsObserve {
9
145
  // Resolve a target device and return the appropriate observe instance and resolved info.
10
146
  private static async resolveObserve(platform?: 'android' | 'ios', deviceId?: string, appId?: string) {
@@ -103,7 +239,7 @@ export class ToolsObserve {
103
239
 
104
240
  static async captureDebugSnapshotHandler({ reason, includeLogs = true, logLines = 200, platform, appId, deviceId, sessionId }: { reason?: string; includeLogs?: boolean; logLines?: number; platform?: 'android' | 'ios'; appId?: string; deviceId?: string; sessionId?: string } = {}) {
105
241
  const timestamp = Date.now()
106
- const out: any = { timestamp, reason: reason || '', activity: null, fingerprint: null, screenshot: null, ui_tree: null, logs: [] }
242
+ const raw: CaptureDebugSnapshotRawResponse = { timestamp, reason: reason || '', activity: null, fingerprint: null, screenshot: null, ui_tree: null, logs: [] }
107
243
 
108
244
  // Parallel fetches for performance: screenshot, current screen, fingerprint, ui tree, and log stream/get logs
109
245
  const sid = sessionId || 'default'
@@ -125,40 +261,40 @@ export class ToolsObserve {
125
261
  if (res.status === 'fulfilled') {
126
262
  const val = res.value
127
263
  if (key === 'screenshot') {
128
- out.screenshot = val && val.screenshot ? val.screenshot : null
264
+ raw.screenshot = val && val.screenshot ? val.screenshot : null
129
265
  } else if (key === 'currentScreen') {
130
- out.activity = val && ((val.activity || val.shortActivity)) ? (val.activity || val.shortActivity) : out.activity || ''
266
+ raw.activity = val && ((val.activity || val.shortActivity)) ? (val.activity || val.shortActivity) : raw.activity || ''
131
267
  } else if (key === 'fingerprint') {
132
- if (val && val.fingerprint) out.fingerprint = val.fingerprint
133
- if (val && val.activity) out.activity = out.activity || val.activity
134
- if (val && val.error) out.fingerprint_error = val.error
268
+ if (val && val.fingerprint) raw.fingerprint = val.fingerprint
269
+ if (val && val.activity) raw.activity = raw.activity || val.activity
270
+ if (val && val.error) raw.fingerprint_error = val.error
135
271
  } else if (key === 'uiTree') {
136
- out.ui_tree = val
137
- if (val && val.error) out.ui_tree_error = val.error
272
+ raw.ui_tree = val
273
+ if (val && val.error) raw.ui_tree_error = val.error
138
274
  } else if (key === 'readLogStream') {
139
275
  // handle below after evaluating fallback
140
276
  // temporarily attach to out._streamEntries
141
- out._streamEntries = val && val.entries ? val.entries : []
277
+ raw.logs = Array.isArray(val?.entries) ? val.entries : []
142
278
  }
143
279
  } else {
144
280
  const errMsg = res.reason instanceof Error ? res.reason.message : String(res.reason)
145
- if (key === 'screenshot') out.screenshot_error = errMsg
146
- if (key === 'currentScreen') out.activity_error = errMsg
147
- if (key === 'fingerprint') { out.fingerprint = null; out.fingerprint_error = errMsg }
148
- if (key === 'uiTree') { out.ui_tree = null; out.ui_tree_error = errMsg }
149
- if (key === 'readLogStream') { out._streamEntries = [] ; out.logs_error = errMsg }
281
+ if (key === 'screenshot') raw.screenshot_error = errMsg
282
+ if (key === 'currentScreen') raw.activity_error = errMsg
283
+ if (key === 'fingerprint') { raw.fingerprint = null; raw.fingerprint_error = errMsg }
284
+ if (key === 'uiTree') { raw.ui_tree = null; raw.ui_tree_error = errMsg }
285
+ if (key === 'readLogStream') { raw.logs = []; raw.logs_error = errMsg }
150
286
  }
151
287
  }
152
288
 
153
289
  // Logs: prefer stream entries, fallback to snapshot logs when empty
154
290
  if (includeLogs) {
155
291
  try {
156
- let entries: any[] = Array.isArray(out._streamEntries) ? out._streamEntries : []
292
+ let entries: any[] = Array.isArray(raw.logs) ? raw.logs : []
157
293
  if (!entries || entries.length === 0) {
158
294
  const gl = await ToolsObserve.getLogsHandler({ platform, appId, deviceId, lines: logLines })
159
- const raw: any[] = (gl && (gl as any).logs) ? (gl as any).logs : []
295
+ const snapshotLogs: any[] = (gl && (gl as any).logs) ? (gl as any).logs : []
160
296
  // raw may be structured entries or strings
161
- entries = raw.slice(-Math.max(0, logLines)).map(item => {
297
+ entries = snapshotLogs.slice(-Math.max(0, logLines)).map(item => {
162
298
  if (!item) return { timestamp: null, level: 'INFO', message: '' }
163
299
  if (typeof item === 'string') {
164
300
  const level = /\b(FATAL EXCEPTION|ERROR| E )\b/i.test(item) ? 'ERROR' : /\b(WARN| W )\b/i.test(item) ? 'WARN' : 'INFO'
@@ -186,16 +322,14 @@ export class ToolsObserve {
186
322
  })
187
323
  }
188
324
 
189
- out.logs = entries
325
+ raw.logs = entries
190
326
  } catch (e) {
191
- out.logs = []
192
- out.logs_error = e instanceof Error ? e.message : String(e)
327
+ raw.logs = []
328
+ raw.logs_error = e instanceof Error ? e.message : String(e)
193
329
  }
194
330
  }
195
331
 
196
- // Clean up internal temporary field
197
- delete out._streamEntries
198
-
199
- return out
332
+ const semantic = deriveSnapshotSemantic(raw)
333
+ return semantic ? { raw, semantic } : { raw }
200
334
  }
201
335
  }
@@ -240,7 +240,7 @@ Failure Handling:
240
240
  },
241
241
  {
242
242
  name: 'capture_debug_snapshot',
243
- description: 'Capture a complete debug snapshot (screenshot, ui tree, activity, fingerprint, logs). Returns structured JSON.',
243
+ description: 'Capture a complete debug snapshot (raw observation layer plus optional derived semantic layer). Returns structured JSON.',
244
244
  inputSchema: {
245
245
  type: 'object',
246
246
  properties: {
package/src/types.ts CHANGED
@@ -137,6 +137,35 @@ export interface GetCurrentScreenResponse {
137
137
  error?: string;
138
138
  }
139
139
 
140
+ export interface SnapshotSemanticResponse {
141
+ screen: string | null;
142
+ signals: Record<string, string | number | boolean> | null;
143
+ actions_available: string[] | null;
144
+ confidence: number;
145
+ warnings: string[];
146
+ }
147
+
148
+ export interface CaptureDebugSnapshotRawResponse {
149
+ timestamp: number;
150
+ reason: string;
151
+ activity: string | null;
152
+ fingerprint: string | null;
153
+ screenshot: string | null;
154
+ ui_tree: unknown | null;
155
+ logs: StructuredLogEntry[];
156
+ device?: DeviceInfo;
157
+ screenshot_error?: string;
158
+ activity_error?: string;
159
+ fingerprint_error?: string;
160
+ ui_tree_error?: string;
161
+ logs_error?: string;
162
+ }
163
+
164
+ export interface CaptureDebugSnapshotResponse {
165
+ raw: CaptureDebugSnapshotRawResponse;
166
+ semantic?: SnapshotSemanticResponse | null;
167
+ }
168
+
140
169
  export interface WaitForElementResponse {
141
170
  device: DeviceInfo;
142
171
  found: boolean;
@@ -35,8 +35,11 @@ async function run() {
35
35
 
36
36
  const res1: any = await ToolsObserve.captureDebugSnapshotHandler({ platform: 'android', includeLogs: true, logLines: 50, sessionId: 's1' })
37
37
  console.log('res1:', JSON.stringify(res1, null, 2))
38
- const pass1 = res1 && res1.screenshot === 'BASE64PNG' && res1.activity && res1.fingerprint === 'abc123' && Array.isArray(res1.logs) && res1.logs.length === 1
38
+ const pass1 = res1 && res1.raw && res1.raw.screenshot === 'BASE64PNG' && res1.raw.activity && res1.raw.fingerprint === 'abc123' && Array.isArray(res1.raw.logs) && res1.raw.logs.length === 1
39
39
  assert.ok(pass1, 'captureDebugSnapshot should aggregate successful handler results')
40
+ assert.strictEqual(res1.semantic.screen, 'Main')
41
+ assert.strictEqual(res1.semantic.confidence >= 0.7, true)
42
+ assert.deepStrictEqual(res1.semantic.actions_available, null)
40
43
  console.log('Test 1:', pass1 ? 'PASS' : 'FAIL')
41
44
 
42
45
  // Restore handlers before next test
@@ -55,7 +58,7 @@ async function run() {
55
58
 
56
59
  const res2: any = await ToolsObserve.captureDebugSnapshotHandler({ platform: 'android', includeLogs: true, logLines: 10, appId: 'com.example' })
57
60
  console.log('res2:', JSON.stringify(res2, null, 2))
58
- const pass2 = res2 && res2.screenshot_error && res2.ui_tree_error && Array.isArray(res2.logs) && res2.logs.length === 2
61
+ const pass2 = res2 && res2.raw && res2.raw.screenshot_error && res2.raw.ui_tree_error && Array.isArray(res2.raw.logs) && res2.raw.logs.length === 2
59
62
  assert.ok(pass2, 'captureDebugSnapshot should surface partial failures and fallback logs')
60
63
  console.log('Test 2:', pass2 ? 'PASS' : 'FAIL')
61
64
 
@@ -76,7 +79,7 @@ async function run() {
76
79
 
77
80
  const res3: any = await ToolsObserve.captureDebugSnapshotHandler({ platform: 'android', includeLogs: false })
78
81
  console.log('res3:', JSON.stringify(res3, null, 2))
79
- const pass3 = res3 && typeof res3.logs !== 'undefined' && res3.logs.length === 0
82
+ const pass3 = res3 && res3.raw && typeof res3.raw.logs !== 'undefined' && res3.raw.logs.length === 0
80
83
  assert.ok(pass3, 'captureDebugSnapshot should return an empty logs array when includeLogs is false')
81
84
  console.log('Test 3:', pass3 ? 'PASS' : 'FAIL')
82
85
 
@@ -32,6 +32,8 @@ async function run() {
32
32
  assert(captureDebugSnapshot, 'capture_debug_snapshot should be registered')
33
33
  assert.strictEqual((captureDebugSnapshot as any).inputSchema.properties.includeLogs.default, true)
34
34
  assert.strictEqual((captureDebugSnapshot as any).inputSchema.properties.logLines.default, 200)
35
+ assert.match((captureDebugSnapshot as any).description, /raw observation layer/i)
36
+ assert.match((captureDebugSnapshot as any).description, /optional derived semantic layer/i)
35
37
 
36
38
  const startLogStream = toolDefinitions.find((tool) => tool.name === 'start_log_stream')
37
39
  assert(startLogStream, 'start_log_stream should be registered')
@@ -16,6 +16,7 @@ async function run() {
16
16
  const originalCaptureScreenshotHandler = (ToolsObserve as any).captureScreenshotHandler
17
17
  const originalGetUITreeHandler = (ToolsObserve as any).getUITreeHandler
18
18
  const originalGetScreenFingerprintHandler = (ToolsObserve as any).getScreenFingerprintHandler
19
+ const originalCaptureDebugSnapshotHandler = (ToolsObserve as any).captureDebugSnapshotHandler
19
20
 
20
21
  try {
21
22
  ;(ToolsManage as any).installAppHandler = async () => ({
@@ -181,6 +182,32 @@ async function run() {
181
182
  assert.strictEqual(uiTreePayload.resolution.height, 2400)
182
183
  assert.strictEqual(uiTreePayload.elements[0].text, 'Login')
183
184
 
185
+ ;(ToolsObserve as any).captureDebugSnapshotHandler = async () => ({
186
+ raw: {
187
+ timestamp: 1710000000000,
188
+ reason: 'manual',
189
+ activity: 'com.example.MainActivity',
190
+ fingerprint: 'fp_raw',
191
+ screenshot: 'base64',
192
+ ui_tree: { screen: 'Home', elements: [] },
193
+ logs: [],
194
+ device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true }
195
+ },
196
+ semantic: {
197
+ screen: 'Home',
198
+ signals: { has_activity: true },
199
+ actions_available: ['open settings'],
200
+ confidence: 0.8,
201
+ warnings: []
202
+ }
203
+ })
204
+
205
+ const snapshotResponse = await handleToolCall('capture_debug_snapshot', { platform: 'android' })
206
+ const snapshotPayload = JSON.parse((snapshotResponse as any).content[0].text)
207
+ assert.strictEqual(snapshotPayload.raw.fingerprint, 'fp_raw')
208
+ assert.strictEqual(snapshotPayload.semantic.screen, 'Home')
209
+ assert.strictEqual(snapshotPayload.semantic.confidence, 0.8)
210
+
184
211
  console.log('server response-shape tests passed')
185
212
  } finally {
186
213
  ;(ToolsManage as any).installAppHandler = originalInstallAppHandler
@@ -193,6 +220,7 @@ async function run() {
193
220
  ;(ToolsObserve as any).captureScreenshotHandler = originalCaptureScreenshotHandler
194
221
  ;(ToolsObserve as any).getUITreeHandler = originalGetUITreeHandler
195
222
  ;(ToolsObserve as any).getScreenFingerprintHandler = originalGetScreenFingerprintHandler
223
+ ;(ToolsObserve as any).captureDebugSnapshotHandler = originalCaptureDebugSnapshotHandler
196
224
  }
197
225
  }
198
226