mobile-debug-mcp 0.24.4 → 0.24.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/observe/index.js +130 -26
- package/dist/server/tool-definitions.js +1 -1
- package/docs/CHANGELOG.md +6 -0
- package/docs/specs/mcp-tooling-spec-v1.md +77 -5
- package/docs/tools/observe.md +24 -8
- package/package.json +1 -1
- package/src/observe/index.ts +158 -24
- package/src/server/tool-definitions.ts +1 -1
- package/src/types.ts +29 -0
- package/test/unit/observe/capture_debug_snapshot.test.ts +6 -3
- package/test/unit/server/contract.test.ts +2 -0
- package/test/unit/server/response_shapes.test.ts +28 -0
package/dist/observe/index.js
CHANGED
|
@@ -3,6 +3,111 @@ import { AndroidObserve } from './android.js';
|
|
|
3
3
|
import { iOSObserve } from './ios.js';
|
|
4
4
|
export { AndroidObserve } from './android.js';
|
|
5
5
|
export { iOSObserve } from './ios.js';
|
|
6
|
+
function normalizeHint(value) {
|
|
7
|
+
if (value === null || value === undefined)
|
|
8
|
+
return '';
|
|
9
|
+
return String(value).trim().replace(/\s+/g, ' ').toLowerCase();
|
|
10
|
+
}
|
|
11
|
+
function titleCase(value) {
|
|
12
|
+
return value
|
|
13
|
+
.replace(/[_-]+/g, ' ')
|
|
14
|
+
.replace(/\s+/g, ' ')
|
|
15
|
+
.trim()
|
|
16
|
+
.replace(/\b\w/g, (match) => match.toUpperCase());
|
|
17
|
+
}
|
|
18
|
+
function shortActivityName(activity) {
|
|
19
|
+
if (!activity)
|
|
20
|
+
return null;
|
|
21
|
+
const trimmed = String(activity).trim();
|
|
22
|
+
if (!trimmed)
|
|
23
|
+
return null;
|
|
24
|
+
const lastSegment = trimmed.split('.').pop() || trimmed;
|
|
25
|
+
const withoutSuffix = lastSegment.replace(/Activity$/, '');
|
|
26
|
+
return withoutSuffix ? titleCase(withoutSuffix) : titleCase(lastSegment);
|
|
27
|
+
}
|
|
28
|
+
function collectSnapshotTexts(tree) {
|
|
29
|
+
const elements = Array.isArray(tree?.elements) ? tree.elements : [];
|
|
30
|
+
const texts = [];
|
|
31
|
+
const actionables = [];
|
|
32
|
+
for (const element of elements) {
|
|
33
|
+
const rawText = element?.text ?? element?.contentDescription ?? element?.contentDesc ?? element?.accessibilityLabel ?? element?.resourceId ?? element?.id ?? '';
|
|
34
|
+
const text = normalizeHint(rawText);
|
|
35
|
+
if (text)
|
|
36
|
+
texts.push(text);
|
|
37
|
+
if (element?.clickable && element?.enabled !== false && text) {
|
|
38
|
+
actionables.push(text);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return {
|
|
42
|
+
texts: Array.from(new Set(texts)),
|
|
43
|
+
actionables: Array.from(new Set(actionables))
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
function inferSnapshotScreen(raw) {
|
|
47
|
+
const tree = raw.ui_tree;
|
|
48
|
+
const treeScreen = normalizeHint(tree?.screen);
|
|
49
|
+
if (treeScreen)
|
|
50
|
+
return titleCase(treeScreen);
|
|
51
|
+
const activity = shortActivityName(raw.activity);
|
|
52
|
+
if (activity)
|
|
53
|
+
return activity;
|
|
54
|
+
const { texts } = collectSnapshotTexts(tree);
|
|
55
|
+
if (texts.length > 0)
|
|
56
|
+
return titleCase(texts[0]);
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
function deriveSnapshotSemantic(raw) {
|
|
60
|
+
const tree = raw.ui_tree;
|
|
61
|
+
const { texts, actionables } = collectSnapshotTexts(tree);
|
|
62
|
+
const screenFromTree = normalizeHint(tree?.screen);
|
|
63
|
+
const activityHint = normalizeHint(raw.activity);
|
|
64
|
+
const screen = inferSnapshotScreen(raw);
|
|
65
|
+
if (!screen && !activityHint && texts.length === 0 && !raw.logs.length)
|
|
66
|
+
return null;
|
|
67
|
+
const hasErrorLogs = raw.logs.some((entry) => /error|fatal exception|exception|failed/i.test(entry.message));
|
|
68
|
+
const hasLoadingSignals = texts.some((text) => /loading|please wait|spinner|progress/i.test(text));
|
|
69
|
+
const hasPrimaryText = texts.some((text) => /sign in|log in|login|home|checkout|settings|menu|profile|search/i.test(text));
|
|
70
|
+
const hasScreenshot = typeof raw.screenshot === 'string' && raw.screenshot.length > 0;
|
|
71
|
+
const hasUiTree = !!tree && Array.isArray(tree.elements);
|
|
72
|
+
const signals = {
|
|
73
|
+
has_activity: !!activityHint,
|
|
74
|
+
has_ui_tree: hasUiTree,
|
|
75
|
+
has_screenshot: hasScreenshot,
|
|
76
|
+
has_visible_text: texts.length > 0,
|
|
77
|
+
has_clickable_elements: actionables.length > 0,
|
|
78
|
+
has_error_logs: hasErrorLogs,
|
|
79
|
+
has_loading_signals: hasLoadingSignals,
|
|
80
|
+
has_primary_text: hasPrimaryText
|
|
81
|
+
};
|
|
82
|
+
const warnings = [];
|
|
83
|
+
if (screenFromTree && activityHint && screenFromTree !== activityHint) {
|
|
84
|
+
warnings.push('ui_tree.screen and activity hints differ');
|
|
85
|
+
}
|
|
86
|
+
if (!hasUiTree)
|
|
87
|
+
warnings.push('ui tree unavailable');
|
|
88
|
+
if (!activityHint)
|
|
89
|
+
warnings.push('activity unavailable');
|
|
90
|
+
if (hasErrorLogs)
|
|
91
|
+
warnings.push('error signals present in logs');
|
|
92
|
+
const evidenceScore = (hasUiTree ? 0.35 : 0) +
|
|
93
|
+
(screen ? 0.2 : 0) +
|
|
94
|
+
(activityHint ? 0.15 : 0) +
|
|
95
|
+
(actionables.length > 0 ? 0.15 : 0) +
|
|
96
|
+
(texts.length > 0 ? 0.1 : 0) +
|
|
97
|
+
(hasScreenshot ? 0.05 : 0) +
|
|
98
|
+
(hasErrorLogs ? -0.15 : 0) +
|
|
99
|
+
(hasLoadingSignals ? -0.05 : 0);
|
|
100
|
+
const confidence = Math.max(0, Math.min(1, Number(evidenceScore.toFixed(2))));
|
|
101
|
+
if (!screen && confidence < 0.3)
|
|
102
|
+
return null;
|
|
103
|
+
return {
|
|
104
|
+
screen,
|
|
105
|
+
signals,
|
|
106
|
+
actions_available: actionables.length > 0 ? actionables.slice(0, 10) : null,
|
|
107
|
+
confidence,
|
|
108
|
+
warnings
|
|
109
|
+
};
|
|
110
|
+
}
|
|
6
111
|
export class ToolsObserve {
|
|
7
112
|
// Resolve a target device and return the appropriate observe instance and resolved info.
|
|
8
113
|
static async resolveObserve(platform, deviceId, appId) {
|
|
@@ -95,7 +200,7 @@ export class ToolsObserve {
|
|
|
95
200
|
}
|
|
96
201
|
static async captureDebugSnapshotHandler({ reason, includeLogs = true, logLines = 200, platform, appId, deviceId, sessionId } = {}) {
|
|
97
202
|
const timestamp = Date.now();
|
|
98
|
-
const
|
|
203
|
+
const raw = { timestamp, reason: reason || '', activity: null, fingerprint: null, screenshot: null, ui_tree: null, logs: [] };
|
|
99
204
|
// Parallel fetches for performance: screenshot, current screen, fingerprint, ui tree, and log stream/get logs
|
|
100
205
|
const sid = sessionId || 'default';
|
|
101
206
|
const tasks = {
|
|
@@ -114,59 +219,59 @@ export class ToolsObserve {
|
|
|
114
219
|
if (res.status === 'fulfilled') {
|
|
115
220
|
const val = res.value;
|
|
116
221
|
if (key === 'screenshot') {
|
|
117
|
-
|
|
222
|
+
raw.screenshot = val && val.screenshot ? val.screenshot : null;
|
|
118
223
|
}
|
|
119
224
|
else if (key === 'currentScreen') {
|
|
120
|
-
|
|
225
|
+
raw.activity = val && ((val.activity || val.shortActivity)) ? (val.activity || val.shortActivity) : raw.activity || '';
|
|
121
226
|
}
|
|
122
227
|
else if (key === 'fingerprint') {
|
|
123
228
|
if (val && val.fingerprint)
|
|
124
|
-
|
|
229
|
+
raw.fingerprint = val.fingerprint;
|
|
125
230
|
if (val && val.activity)
|
|
126
|
-
|
|
231
|
+
raw.activity = raw.activity || val.activity;
|
|
127
232
|
if (val && val.error)
|
|
128
|
-
|
|
233
|
+
raw.fingerprint_error = val.error;
|
|
129
234
|
}
|
|
130
235
|
else if (key === 'uiTree') {
|
|
131
|
-
|
|
236
|
+
raw.ui_tree = val;
|
|
132
237
|
if (val && val.error)
|
|
133
|
-
|
|
238
|
+
raw.ui_tree_error = val.error;
|
|
134
239
|
}
|
|
135
240
|
else if (key === 'readLogStream') {
|
|
136
241
|
// handle below after evaluating fallback
|
|
137
242
|
// temporarily attach to out._streamEntries
|
|
138
|
-
|
|
243
|
+
raw.logs = Array.isArray(val?.entries) ? val.entries : [];
|
|
139
244
|
}
|
|
140
245
|
}
|
|
141
246
|
else {
|
|
142
247
|
const errMsg = res.reason instanceof Error ? res.reason.message : String(res.reason);
|
|
143
248
|
if (key === 'screenshot')
|
|
144
|
-
|
|
249
|
+
raw.screenshot_error = errMsg;
|
|
145
250
|
if (key === 'currentScreen')
|
|
146
|
-
|
|
251
|
+
raw.activity_error = errMsg;
|
|
147
252
|
if (key === 'fingerprint') {
|
|
148
|
-
|
|
149
|
-
|
|
253
|
+
raw.fingerprint = null;
|
|
254
|
+
raw.fingerprint_error = errMsg;
|
|
150
255
|
}
|
|
151
256
|
if (key === 'uiTree') {
|
|
152
|
-
|
|
153
|
-
|
|
257
|
+
raw.ui_tree = null;
|
|
258
|
+
raw.ui_tree_error = errMsg;
|
|
154
259
|
}
|
|
155
260
|
if (key === 'readLogStream') {
|
|
156
|
-
|
|
157
|
-
|
|
261
|
+
raw.logs = [];
|
|
262
|
+
raw.logs_error = errMsg;
|
|
158
263
|
}
|
|
159
264
|
}
|
|
160
265
|
}
|
|
161
266
|
// Logs: prefer stream entries, fallback to snapshot logs when empty
|
|
162
267
|
if (includeLogs) {
|
|
163
268
|
try {
|
|
164
|
-
let entries = Array.isArray(
|
|
269
|
+
let entries = Array.isArray(raw.logs) ? raw.logs : [];
|
|
165
270
|
if (!entries || entries.length === 0) {
|
|
166
271
|
const gl = await ToolsObserve.getLogsHandler({ platform, appId, deviceId, lines: logLines });
|
|
167
|
-
const
|
|
272
|
+
const snapshotLogs = (gl && gl.logs) ? gl.logs : [];
|
|
168
273
|
// raw may be structured entries or strings
|
|
169
|
-
entries =
|
|
274
|
+
entries = snapshotLogs.slice(-Math.max(0, logLines)).map(item => {
|
|
170
275
|
if (!item)
|
|
171
276
|
return { timestamp: null, level: 'INFO', message: '' };
|
|
172
277
|
if (typeof item === 'string') {
|
|
@@ -196,15 +301,14 @@ export class ToolsObserve {
|
|
|
196
301
|
return { timestamp: tsNum, level, message: msg };
|
|
197
302
|
});
|
|
198
303
|
}
|
|
199
|
-
|
|
304
|
+
raw.logs = entries;
|
|
200
305
|
}
|
|
201
306
|
catch (e) {
|
|
202
|
-
|
|
203
|
-
|
|
307
|
+
raw.logs = [];
|
|
308
|
+
raw.logs_error = e instanceof Error ? e.message : String(e);
|
|
204
309
|
}
|
|
205
310
|
}
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
return out;
|
|
311
|
+
const semantic = deriveSnapshotSemantic(raw);
|
|
312
|
+
return semantic ? { raw, semantic } : { raw };
|
|
209
313
|
}
|
|
210
314
|
}
|
|
@@ -240,7 +240,7 @@ Failure Handling:
|
|
|
240
240
|
},
|
|
241
241
|
{
|
|
242
242
|
name: 'capture_debug_snapshot',
|
|
243
|
-
description: 'Capture a complete debug snapshot (
|
|
243
|
+
description: 'Capture a complete debug snapshot (raw observation layer plus optional derived semantic layer). Returns structured JSON.',
|
|
244
244
|
inputSchema: {
|
|
245
245
|
type: 'object',
|
|
246
246
|
properties: {
|
package/docs/CHANGELOG.md
CHANGED
|
@@ -209,7 +209,78 @@ String-only errors are not allowed, including fallback handler errors.
|
|
|
209
209
|
|
|
210
210
|
Note: string diagnostics may still appear inside structured JSON payloads where explicitly defined by a tool.
|
|
211
211
|
|
|
212
|
-
## 9.
|
|
212
|
+
## 9. Observation Tools (Extended Semantics)
|
|
213
|
+
|
|
214
|
+
Observation tools inspect application state without mutating it.
|
|
215
|
+
|
|
216
|
+
Examples:
|
|
217
|
+
|
|
218
|
+
- `capture_debug_snapshot`
|
|
219
|
+
- `get_screen_fingerprint`
|
|
220
|
+
- `get_network_activity`
|
|
221
|
+
- `get_logs`
|
|
222
|
+
|
|
223
|
+
### 9.1 Snapshot Response Model
|
|
224
|
+
|
|
225
|
+
`capture_debug_snapshot` MUST return a dual-layer response:
|
|
226
|
+
|
|
227
|
+
- `raw`: required object
|
|
228
|
+
- `semantic`: optional object
|
|
229
|
+
|
|
230
|
+
The raw layer is authoritative and MUST remain unchanged from the underlying observation data. It is the source of truth and MUST NOT be interpreted or rewritten.
|
|
231
|
+
|
|
232
|
+
The semantic layer is derived, best-effort, and MUST be generated exclusively from the raw layer.
|
|
233
|
+
|
|
234
|
+
Raw layer contents include:
|
|
235
|
+
|
|
236
|
+
- UI hierarchy or accessibility tree
|
|
237
|
+
- screenshot when available
|
|
238
|
+
- element-level attributes
|
|
239
|
+
- logs and fingerprint/activity observations
|
|
240
|
+
- raw error fields when partial collection fails
|
|
241
|
+
|
|
242
|
+
Semantic layer shape when present:
|
|
243
|
+
|
|
244
|
+
```ts
|
|
245
|
+
{
|
|
246
|
+
screen: string | null,
|
|
247
|
+
signals: Record<string, string | number | boolean> | null,
|
|
248
|
+
actions_available: string[] | null,
|
|
249
|
+
confidence: number,
|
|
250
|
+
warnings: string[]
|
|
251
|
+
}
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
Rules:
|
|
255
|
+
|
|
256
|
+
- `confidence` MUST be between 0 and 1
|
|
257
|
+
- `warnings` MUST be present when `semantic` is present
|
|
258
|
+
- `semantic` MAY be omitted entirely when derivation is not reliable
|
|
259
|
+
- `semantic` MUST be treated as unreliable if it conflicts with raw data
|
|
260
|
+
- `actions_available` are hints only and MUST NOT be treated as guaranteed executable actions
|
|
261
|
+
|
|
262
|
+
### 9.2 Agent Usage Contract
|
|
263
|
+
|
|
264
|
+
Agents SHOULD use `semantic` for primary decision-making when present.
|
|
265
|
+
|
|
266
|
+
Agents MUST fall back to `raw` when:
|
|
267
|
+
|
|
268
|
+
- `semantic` is missing
|
|
269
|
+
- `confidence < 0.7`
|
|
270
|
+
- `warnings` is non-empty
|
|
271
|
+
- semantic output conflicts with expected state or raw data
|
|
272
|
+
|
|
273
|
+
`semantic` is for planning only and MUST NOT be used for verification.
|
|
274
|
+
|
|
275
|
+
### 9.3 Relationship to Classification
|
|
276
|
+
|
|
277
|
+
Semantic signals MAY be used as input to `classify_action_outcome`.
|
|
278
|
+
|
|
279
|
+
Semantic output MUST NOT replace classification or verification.
|
|
280
|
+
|
|
281
|
+
Classification remains a supplementary, post-action interpretation mechanism.
|
|
282
|
+
|
|
283
|
+
## 10. Classification
|
|
213
284
|
|
|
214
285
|
Tool: `classify_action_outcome`
|
|
215
286
|
|
|
@@ -223,7 +294,7 @@ Rules:
|
|
|
223
294
|
|
|
224
295
|
It is not a verification mechanism.
|
|
225
296
|
|
|
226
|
-
##
|
|
297
|
+
## 11. Execution Patterns
|
|
227
298
|
|
|
228
299
|
Canonical pattern:
|
|
229
300
|
|
|
@@ -235,7 +306,7 @@ Interpretation:
|
|
|
235
306
|
- `wait_for_screen_change.success` = UI changed
|
|
236
307
|
- `expect_screen.success` = correct outcome verified
|
|
237
308
|
|
|
238
|
-
##
|
|
309
|
+
## 12. Known Deviations
|
|
239
310
|
|
|
240
311
|
Explicitly allowed:
|
|
241
312
|
|
|
@@ -246,7 +317,7 @@ Explicitly allowed:
|
|
|
246
317
|
- `scroll_to_element` outcome-based success (temporary exception)
|
|
247
318
|
- extended runtime fields in `list_devices`
|
|
248
319
|
|
|
249
|
-
##
|
|
320
|
+
## 13. Migration Rules
|
|
250
321
|
|
|
251
322
|
Must change now:
|
|
252
323
|
|
|
@@ -258,6 +329,7 @@ Should align when touched:
|
|
|
258
329
|
- `start_app`, `restart_app`
|
|
259
330
|
- `scroll_to_element`
|
|
260
331
|
- `wait_for_ui`
|
|
332
|
+
- `capture_debug_snapshot`
|
|
261
333
|
|
|
262
334
|
No change required:
|
|
263
335
|
|
|
@@ -266,7 +338,7 @@ No change required:
|
|
|
266
338
|
- `expect_element_visible`
|
|
267
339
|
- `wait_for_screen_change`
|
|
268
340
|
|
|
269
|
-
##
|
|
341
|
+
## 14. Guiding Principles
|
|
270
342
|
|
|
271
343
|
- Actions execute
|
|
272
344
|
- Verification proves
|
package/docs/tools/observe.md
CHANGED
|
@@ -132,24 +132,40 @@ Behavior:
|
|
|
132
132
|
- Returns partial data when components fail and includes per-part error fields (e.g. `screenshot_error`, `ui_tree_error`).
|
|
133
133
|
- Caps logs to `logLines` entries and prefers recent entries.
|
|
134
134
|
- Fast by default: does not wait for new logs and avoids long blocking operations.
|
|
135
|
+
- Returns a dual-layer payload:
|
|
136
|
+
- `raw` is authoritative and contains the underlying observation data unchanged.
|
|
137
|
+
- `semantic` is optional, derived from `raw`, and intended for planning only.
|
|
135
138
|
|
|
136
139
|
Response (example):
|
|
137
140
|
|
|
138
141
|
```json
|
|
139
142
|
{
|
|
140
|
-
"
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
143
|
+
"raw": {
|
|
144
|
+
"timestamp": 1710000000,
|
|
145
|
+
"reason": "Crash after tapping checkout",
|
|
146
|
+
"activity": "CheckoutActivity",
|
|
147
|
+
"fingerprint": "abc123",
|
|
148
|
+
"screenshot": "<base64 PNG string>",
|
|
149
|
+
"ui_tree": { ... },
|
|
150
|
+
"logs": [ { "timestamp": "2024-03-09T12:00:00.000Z", "level": "ERROR", "tag": "CheckoutViewModel", "pid": 1234, "message": "NullPointerException at CheckoutViewModel" } ]
|
|
151
|
+
},
|
|
152
|
+
"semantic": {
|
|
153
|
+
"screen": "Checkout",
|
|
154
|
+
"signals": {
|
|
155
|
+
"has_error_logs": true,
|
|
156
|
+
"has_clickable_elements": false
|
|
157
|
+
},
|
|
158
|
+
"actions_available": ["review checkout", "inspect error"],
|
|
159
|
+
"confidence": 0.82,
|
|
160
|
+
"warnings": []
|
|
161
|
+
}
|
|
147
162
|
}
|
|
148
163
|
```
|
|
149
164
|
|
|
150
165
|
Notes:
|
|
151
166
|
- Useful immediately after detecting crashes or unexpected UI behaviour.
|
|
152
167
|
- Do not expect perfect data during a crash; tool is designed to return best-effort context and include errors for failed parts.
|
|
168
|
+
- Treat `semantic` as planning guidance only; `raw` remains the source of truth.
|
|
153
169
|
|
|
154
170
|
---
|
|
155
171
|
|
|
@@ -187,5 +203,5 @@ Start a background adb logcat stream and retrieve parsed NDJSON entries.
|
|
|
187
203
|
read_log_stream response example:
|
|
188
204
|
|
|
189
205
|
```json
|
|
190
|
-
{ "entries": [ { "timestamp": "2026-03-20T...Z", "level": "
|
|
206
|
+
{ "entries": [ { "timestamp": "2026-03-20T...Z", "level": "ERROR", "tag": "AppTag", "pid": 1234, "message": "FATAL EXCEPTION" } ], "crash_summary": { "crash_detected": true } }
|
|
191
207
|
```
|
package/package.json
CHANGED
package/src/observe/index.ts
CHANGED
|
@@ -1,10 +1,146 @@
|
|
|
1
1
|
import { resolveTargetDevice } from '../utils/resolve-device.js'
|
|
2
2
|
import { AndroidObserve } from './android.js'
|
|
3
3
|
import { iOSObserve } from './ios.js'
|
|
4
|
+
import type {
|
|
5
|
+
CaptureDebugSnapshotRawResponse,
|
|
6
|
+
SnapshotSemanticResponse
|
|
7
|
+
} from '../types.js'
|
|
4
8
|
|
|
5
9
|
export { AndroidObserve } from './android.js'
|
|
6
10
|
export { iOSObserve } from './ios.js'
|
|
7
11
|
|
|
12
|
+
interface SnapshotTreeElementLike {
|
|
13
|
+
text?: string | null
|
|
14
|
+
contentDescription?: string | null
|
|
15
|
+
contentDesc?: string | null
|
|
16
|
+
accessibilityLabel?: string | null
|
|
17
|
+
resourceId?: string | null
|
|
18
|
+
id?: string | null
|
|
19
|
+
type?: string | null
|
|
20
|
+
class?: string | null
|
|
21
|
+
clickable?: boolean
|
|
22
|
+
enabled?: boolean
|
|
23
|
+
visible?: boolean
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
interface SnapshotTreeLike {
|
|
27
|
+
screen?: string | null
|
|
28
|
+
elements?: SnapshotTreeElementLike[]
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function normalizeHint(value: unknown): string {
|
|
32
|
+
if (value === null || value === undefined) return ''
|
|
33
|
+
return String(value).trim().replace(/\s+/g, ' ').toLowerCase()
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function titleCase(value: string): string {
|
|
37
|
+
return value
|
|
38
|
+
.replace(/[_-]+/g, ' ')
|
|
39
|
+
.replace(/\s+/g, ' ')
|
|
40
|
+
.trim()
|
|
41
|
+
.replace(/\b\w/g, (match) => match.toUpperCase())
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function shortActivityName(activity: string | null | undefined): string | null {
|
|
45
|
+
if (!activity) return null
|
|
46
|
+
const trimmed = String(activity).trim()
|
|
47
|
+
if (!trimmed) return null
|
|
48
|
+
const lastSegment = trimmed.split('.').pop() || trimmed
|
|
49
|
+
const withoutSuffix = lastSegment.replace(/Activity$/, '')
|
|
50
|
+
return withoutSuffix ? titleCase(withoutSuffix) : titleCase(lastSegment)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function collectSnapshotTexts(tree: SnapshotTreeLike | null | undefined) {
|
|
54
|
+
const elements = Array.isArray(tree?.elements) ? tree!.elements! : []
|
|
55
|
+
const texts: string[] = []
|
|
56
|
+
const actionables: string[] = []
|
|
57
|
+
|
|
58
|
+
for (const element of elements) {
|
|
59
|
+
const rawText = element?.text ?? element?.contentDescription ?? element?.contentDesc ?? element?.accessibilityLabel ?? element?.resourceId ?? element?.id ?? ''
|
|
60
|
+
const text = normalizeHint(rawText)
|
|
61
|
+
if (text) texts.push(text)
|
|
62
|
+
if (element?.clickable && element?.enabled !== false && text) {
|
|
63
|
+
actionables.push(text)
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return {
|
|
68
|
+
texts: Array.from(new Set(texts)),
|
|
69
|
+
actionables: Array.from(new Set(actionables))
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function inferSnapshotScreen(raw: CaptureDebugSnapshotRawResponse): string | null {
|
|
74
|
+
const tree = raw.ui_tree as SnapshotTreeLike | null | undefined
|
|
75
|
+
const treeScreen = normalizeHint(tree?.screen)
|
|
76
|
+
if (treeScreen) return titleCase(treeScreen)
|
|
77
|
+
|
|
78
|
+
const activity = shortActivityName(raw.activity)
|
|
79
|
+
if (activity) return activity
|
|
80
|
+
|
|
81
|
+
const { texts } = collectSnapshotTexts(tree)
|
|
82
|
+
if (texts.length > 0) return titleCase(texts[0])
|
|
83
|
+
|
|
84
|
+
return null
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function deriveSnapshotSemantic(raw: CaptureDebugSnapshotRawResponse): SnapshotSemanticResponse | null {
|
|
88
|
+
const tree = raw.ui_tree as SnapshotTreeLike | null | undefined
|
|
89
|
+
const { texts, actionables } = collectSnapshotTexts(tree)
|
|
90
|
+
const screenFromTree = normalizeHint(tree?.screen)
|
|
91
|
+
const activityHint = normalizeHint(raw.activity)
|
|
92
|
+
const screen = inferSnapshotScreen(raw)
|
|
93
|
+
|
|
94
|
+
if (!screen && !activityHint && texts.length === 0 && !raw.logs.length) return null
|
|
95
|
+
|
|
96
|
+
const hasErrorLogs = raw.logs.some((entry) => /error|fatal exception|exception|failed/i.test(entry.message))
|
|
97
|
+
const hasLoadingSignals = texts.some((text) => /loading|please wait|spinner|progress/i.test(text))
|
|
98
|
+
const hasPrimaryText = texts.some((text) => /sign in|log in|login|home|checkout|settings|menu|profile|search/i.test(text))
|
|
99
|
+
const hasScreenshot = typeof raw.screenshot === 'string' && raw.screenshot.length > 0
|
|
100
|
+
const hasUiTree = !!tree && Array.isArray(tree.elements)
|
|
101
|
+
|
|
102
|
+
const signals: Record<string, string | number | boolean> = {
|
|
103
|
+
has_activity: !!activityHint,
|
|
104
|
+
has_ui_tree: hasUiTree,
|
|
105
|
+
has_screenshot: hasScreenshot,
|
|
106
|
+
has_visible_text: texts.length > 0,
|
|
107
|
+
has_clickable_elements: actionables.length > 0,
|
|
108
|
+
has_error_logs: hasErrorLogs,
|
|
109
|
+
has_loading_signals: hasLoadingSignals,
|
|
110
|
+
has_primary_text: hasPrimaryText
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const warnings: string[] = []
|
|
114
|
+
if (screenFromTree && activityHint && screenFromTree !== activityHint) {
|
|
115
|
+
warnings.push('ui_tree.screen and activity hints differ')
|
|
116
|
+
}
|
|
117
|
+
if (!hasUiTree) warnings.push('ui tree unavailable')
|
|
118
|
+
if (!activityHint) warnings.push('activity unavailable')
|
|
119
|
+
if (hasErrorLogs) warnings.push('error signals present in logs')
|
|
120
|
+
|
|
121
|
+
const evidenceScore =
|
|
122
|
+
(hasUiTree ? 0.35 : 0) +
|
|
123
|
+
(screen ? 0.2 : 0) +
|
|
124
|
+
(activityHint ? 0.15 : 0) +
|
|
125
|
+
(actionables.length > 0 ? 0.15 : 0) +
|
|
126
|
+
(texts.length > 0 ? 0.1 : 0) +
|
|
127
|
+
(hasScreenshot ? 0.05 : 0) +
|
|
128
|
+
(hasErrorLogs ? -0.15 : 0) +
|
|
129
|
+
(hasLoadingSignals ? -0.05 : 0)
|
|
130
|
+
|
|
131
|
+
const confidence = Math.max(0, Math.min(1, Number(evidenceScore.toFixed(2))))
|
|
132
|
+
|
|
133
|
+
if (!screen && confidence < 0.3) return null
|
|
134
|
+
|
|
135
|
+
return {
|
|
136
|
+
screen,
|
|
137
|
+
signals,
|
|
138
|
+
actions_available: actionables.length > 0 ? actionables.slice(0, 10) : null,
|
|
139
|
+
confidence,
|
|
140
|
+
warnings
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
8
144
|
export class ToolsObserve {
|
|
9
145
|
// Resolve a target device and return the appropriate observe instance and resolved info.
|
|
10
146
|
private static async resolveObserve(platform?: 'android' | 'ios', deviceId?: string, appId?: string) {
|
|
@@ -103,7 +239,7 @@ export class ToolsObserve {
|
|
|
103
239
|
|
|
104
240
|
static async captureDebugSnapshotHandler({ reason, includeLogs = true, logLines = 200, platform, appId, deviceId, sessionId }: { reason?: string; includeLogs?: boolean; logLines?: number; platform?: 'android' | 'ios'; appId?: string; deviceId?: string; sessionId?: string } = {}) {
|
|
105
241
|
const timestamp = Date.now()
|
|
106
|
-
const
|
|
242
|
+
const raw: CaptureDebugSnapshotRawResponse = { timestamp, reason: reason || '', activity: null, fingerprint: null, screenshot: null, ui_tree: null, logs: [] }
|
|
107
243
|
|
|
108
244
|
// Parallel fetches for performance: screenshot, current screen, fingerprint, ui tree, and log stream/get logs
|
|
109
245
|
const sid = sessionId || 'default'
|
|
@@ -125,40 +261,40 @@ export class ToolsObserve {
|
|
|
125
261
|
if (res.status === 'fulfilled') {
|
|
126
262
|
const val = res.value
|
|
127
263
|
if (key === 'screenshot') {
|
|
128
|
-
|
|
264
|
+
raw.screenshot = val && val.screenshot ? val.screenshot : null
|
|
129
265
|
} else if (key === 'currentScreen') {
|
|
130
|
-
|
|
266
|
+
raw.activity = val && ((val.activity || val.shortActivity)) ? (val.activity || val.shortActivity) : raw.activity || ''
|
|
131
267
|
} else if (key === 'fingerprint') {
|
|
132
|
-
if (val && val.fingerprint)
|
|
133
|
-
if (val && val.activity)
|
|
134
|
-
if (val && val.error)
|
|
268
|
+
if (val && val.fingerprint) raw.fingerprint = val.fingerprint
|
|
269
|
+
if (val && val.activity) raw.activity = raw.activity || val.activity
|
|
270
|
+
if (val && val.error) raw.fingerprint_error = val.error
|
|
135
271
|
} else if (key === 'uiTree') {
|
|
136
|
-
|
|
137
|
-
if (val && val.error)
|
|
272
|
+
raw.ui_tree = val
|
|
273
|
+
if (val && val.error) raw.ui_tree_error = val.error
|
|
138
274
|
} else if (key === 'readLogStream') {
|
|
139
275
|
// handle below after evaluating fallback
|
|
140
276
|
// temporarily attach to out._streamEntries
|
|
141
|
-
|
|
277
|
+
raw.logs = Array.isArray(val?.entries) ? val.entries : []
|
|
142
278
|
}
|
|
143
279
|
} else {
|
|
144
280
|
const errMsg = res.reason instanceof Error ? res.reason.message : String(res.reason)
|
|
145
|
-
if (key === 'screenshot')
|
|
146
|
-
if (key === 'currentScreen')
|
|
147
|
-
if (key === 'fingerprint') {
|
|
148
|
-
if (key === 'uiTree') {
|
|
149
|
-
if (key === 'readLogStream') {
|
|
281
|
+
if (key === 'screenshot') raw.screenshot_error = errMsg
|
|
282
|
+
if (key === 'currentScreen') raw.activity_error = errMsg
|
|
283
|
+
if (key === 'fingerprint') { raw.fingerprint = null; raw.fingerprint_error = errMsg }
|
|
284
|
+
if (key === 'uiTree') { raw.ui_tree = null; raw.ui_tree_error = errMsg }
|
|
285
|
+
if (key === 'readLogStream') { raw.logs = []; raw.logs_error = errMsg }
|
|
150
286
|
}
|
|
151
287
|
}
|
|
152
288
|
|
|
153
289
|
// Logs: prefer stream entries, fallback to snapshot logs when empty
|
|
154
290
|
if (includeLogs) {
|
|
155
291
|
try {
|
|
156
|
-
let entries: any[] = Array.isArray(
|
|
292
|
+
let entries: any[] = Array.isArray(raw.logs) ? raw.logs : []
|
|
157
293
|
if (!entries || entries.length === 0) {
|
|
158
294
|
const gl = await ToolsObserve.getLogsHandler({ platform, appId, deviceId, lines: logLines })
|
|
159
|
-
const
|
|
295
|
+
const snapshotLogs: any[] = (gl && (gl as any).logs) ? (gl as any).logs : []
|
|
160
296
|
// raw may be structured entries or strings
|
|
161
|
-
entries =
|
|
297
|
+
entries = snapshotLogs.slice(-Math.max(0, logLines)).map(item => {
|
|
162
298
|
if (!item) return { timestamp: null, level: 'INFO', message: '' }
|
|
163
299
|
if (typeof item === 'string') {
|
|
164
300
|
const level = /\b(FATAL EXCEPTION|ERROR| E )\b/i.test(item) ? 'ERROR' : /\b(WARN| W )\b/i.test(item) ? 'WARN' : 'INFO'
|
|
@@ -186,16 +322,14 @@ export class ToolsObserve {
|
|
|
186
322
|
})
|
|
187
323
|
}
|
|
188
324
|
|
|
189
|
-
|
|
325
|
+
raw.logs = entries
|
|
190
326
|
} catch (e) {
|
|
191
|
-
|
|
192
|
-
|
|
327
|
+
raw.logs = []
|
|
328
|
+
raw.logs_error = e instanceof Error ? e.message : String(e)
|
|
193
329
|
}
|
|
194
330
|
}
|
|
195
331
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
return out
|
|
332
|
+
const semantic = deriveSnapshotSemantic(raw)
|
|
333
|
+
return semantic ? { raw, semantic } : { raw }
|
|
200
334
|
}
|
|
201
335
|
}
|
|
@@ -240,7 +240,7 @@ Failure Handling:
|
|
|
240
240
|
},
|
|
241
241
|
{
|
|
242
242
|
name: 'capture_debug_snapshot',
|
|
243
|
-
description: 'Capture a complete debug snapshot (
|
|
243
|
+
description: 'Capture a complete debug snapshot (raw observation layer plus optional derived semantic layer). Returns structured JSON.',
|
|
244
244
|
inputSchema: {
|
|
245
245
|
type: 'object',
|
|
246
246
|
properties: {
|
package/src/types.ts
CHANGED
|
@@ -137,6 +137,35 @@ export interface GetCurrentScreenResponse {
|
|
|
137
137
|
error?: string;
|
|
138
138
|
}
|
|
139
139
|
|
|
140
|
+
export interface SnapshotSemanticResponse {
|
|
141
|
+
screen: string | null;
|
|
142
|
+
signals: Record<string, string | number | boolean> | null;
|
|
143
|
+
actions_available: string[] | null;
|
|
144
|
+
confidence: number;
|
|
145
|
+
warnings: string[];
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
export interface CaptureDebugSnapshotRawResponse {
|
|
149
|
+
timestamp: number;
|
|
150
|
+
reason: string;
|
|
151
|
+
activity: string | null;
|
|
152
|
+
fingerprint: string | null;
|
|
153
|
+
screenshot: string | null;
|
|
154
|
+
ui_tree: unknown | null;
|
|
155
|
+
logs: StructuredLogEntry[];
|
|
156
|
+
device?: DeviceInfo;
|
|
157
|
+
screenshot_error?: string;
|
|
158
|
+
activity_error?: string;
|
|
159
|
+
fingerprint_error?: string;
|
|
160
|
+
ui_tree_error?: string;
|
|
161
|
+
logs_error?: string;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export interface CaptureDebugSnapshotResponse {
|
|
165
|
+
raw: CaptureDebugSnapshotRawResponse;
|
|
166
|
+
semantic?: SnapshotSemanticResponse | null;
|
|
167
|
+
}
|
|
168
|
+
|
|
140
169
|
export interface WaitForElementResponse {
|
|
141
170
|
device: DeviceInfo;
|
|
142
171
|
found: boolean;
|
|
@@ -35,8 +35,11 @@ async function run() {
|
|
|
35
35
|
|
|
36
36
|
const res1: any = await ToolsObserve.captureDebugSnapshotHandler({ platform: 'android', includeLogs: true, logLines: 50, sessionId: 's1' })
|
|
37
37
|
console.log('res1:', JSON.stringify(res1, null, 2))
|
|
38
|
-
const pass1 = res1 && res1.screenshot === 'BASE64PNG' && res1.activity && res1.fingerprint === 'abc123' && Array.isArray(res1.logs) && res1.logs.length === 1
|
|
38
|
+
const pass1 = res1 && res1.raw && res1.raw.screenshot === 'BASE64PNG' && res1.raw.activity && res1.raw.fingerprint === 'abc123' && Array.isArray(res1.raw.logs) && res1.raw.logs.length === 1
|
|
39
39
|
assert.ok(pass1, 'captureDebugSnapshot should aggregate successful handler results')
|
|
40
|
+
assert.strictEqual(res1.semantic.screen, 'Main')
|
|
41
|
+
assert.strictEqual(res1.semantic.confidence >= 0.7, true)
|
|
42
|
+
assert.deepStrictEqual(res1.semantic.actions_available, null)
|
|
40
43
|
console.log('Test 1:', pass1 ? 'PASS' : 'FAIL')
|
|
41
44
|
|
|
42
45
|
// Restore handlers before next test
|
|
@@ -55,7 +58,7 @@ async function run() {
|
|
|
55
58
|
|
|
56
59
|
const res2: any = await ToolsObserve.captureDebugSnapshotHandler({ platform: 'android', includeLogs: true, logLines: 10, appId: 'com.example' })
|
|
57
60
|
console.log('res2:', JSON.stringify(res2, null, 2))
|
|
58
|
-
const pass2 = res2 && res2.screenshot_error && res2.ui_tree_error && Array.isArray(res2.logs) && res2.logs.length === 2
|
|
61
|
+
const pass2 = res2 && res2.raw && res2.raw.screenshot_error && res2.raw.ui_tree_error && Array.isArray(res2.raw.logs) && res2.raw.logs.length === 2
|
|
59
62
|
assert.ok(pass2, 'captureDebugSnapshot should surface partial failures and fallback logs')
|
|
60
63
|
console.log('Test 2:', pass2 ? 'PASS' : 'FAIL')
|
|
61
64
|
|
|
@@ -76,7 +79,7 @@ async function run() {
|
|
|
76
79
|
|
|
77
80
|
const res3: any = await ToolsObserve.captureDebugSnapshotHandler({ platform: 'android', includeLogs: false })
|
|
78
81
|
console.log('res3:', JSON.stringify(res3, null, 2))
|
|
79
|
-
const pass3 = res3 && typeof res3.logs !== 'undefined' && res3.logs.length === 0
|
|
82
|
+
const pass3 = res3 && res3.raw && typeof res3.raw.logs !== 'undefined' && res3.raw.logs.length === 0
|
|
80
83
|
assert.ok(pass3, 'captureDebugSnapshot should return an empty logs array when includeLogs is false')
|
|
81
84
|
console.log('Test 3:', pass3 ? 'PASS' : 'FAIL')
|
|
82
85
|
|
|
@@ -32,6 +32,8 @@ async function run() {
|
|
|
32
32
|
assert(captureDebugSnapshot, 'capture_debug_snapshot should be registered')
|
|
33
33
|
assert.strictEqual((captureDebugSnapshot as any).inputSchema.properties.includeLogs.default, true)
|
|
34
34
|
assert.strictEqual((captureDebugSnapshot as any).inputSchema.properties.logLines.default, 200)
|
|
35
|
+
assert.match((captureDebugSnapshot as any).description, /raw observation layer/i)
|
|
36
|
+
assert.match((captureDebugSnapshot as any).description, /optional derived semantic layer/i)
|
|
35
37
|
|
|
36
38
|
const startLogStream = toolDefinitions.find((tool) => tool.name === 'start_log_stream')
|
|
37
39
|
assert(startLogStream, 'start_log_stream should be registered')
|
|
@@ -16,6 +16,7 @@ async function run() {
|
|
|
16
16
|
const originalCaptureScreenshotHandler = (ToolsObserve as any).captureScreenshotHandler
|
|
17
17
|
const originalGetUITreeHandler = (ToolsObserve as any).getUITreeHandler
|
|
18
18
|
const originalGetScreenFingerprintHandler = (ToolsObserve as any).getScreenFingerprintHandler
|
|
19
|
+
const originalCaptureDebugSnapshotHandler = (ToolsObserve as any).captureDebugSnapshotHandler
|
|
19
20
|
|
|
20
21
|
try {
|
|
21
22
|
;(ToolsManage as any).installAppHandler = async () => ({
|
|
@@ -181,6 +182,32 @@ async function run() {
|
|
|
181
182
|
assert.strictEqual(uiTreePayload.resolution.height, 2400)
|
|
182
183
|
assert.strictEqual(uiTreePayload.elements[0].text, 'Login')
|
|
183
184
|
|
|
185
|
+
;(ToolsObserve as any).captureDebugSnapshotHandler = async () => ({
|
|
186
|
+
raw: {
|
|
187
|
+
timestamp: 1710000000000,
|
|
188
|
+
reason: 'manual',
|
|
189
|
+
activity: 'com.example.MainActivity',
|
|
190
|
+
fingerprint: 'fp_raw',
|
|
191
|
+
screenshot: 'base64',
|
|
192
|
+
ui_tree: { screen: 'Home', elements: [] },
|
|
193
|
+
logs: [],
|
|
194
|
+
device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true }
|
|
195
|
+
},
|
|
196
|
+
semantic: {
|
|
197
|
+
screen: 'Home',
|
|
198
|
+
signals: { has_activity: true },
|
|
199
|
+
actions_available: ['open settings'],
|
|
200
|
+
confidence: 0.8,
|
|
201
|
+
warnings: []
|
|
202
|
+
}
|
|
203
|
+
})
|
|
204
|
+
|
|
205
|
+
const snapshotResponse = await handleToolCall('capture_debug_snapshot', { platform: 'android' })
|
|
206
|
+
const snapshotPayload = JSON.parse((snapshotResponse as any).content[0].text)
|
|
207
|
+
assert.strictEqual(snapshotPayload.raw.fingerprint, 'fp_raw')
|
|
208
|
+
assert.strictEqual(snapshotPayload.semantic.screen, 'Home')
|
|
209
|
+
assert.strictEqual(snapshotPayload.semantic.confidence, 0.8)
|
|
210
|
+
|
|
184
211
|
console.log('server response-shape tests passed')
|
|
185
212
|
} finally {
|
|
186
213
|
;(ToolsManage as any).installAppHandler = originalInstallAppHandler
|
|
@@ -193,6 +220,7 @@ async function run() {
|
|
|
193
220
|
;(ToolsObserve as any).captureScreenshotHandler = originalCaptureScreenshotHandler
|
|
194
221
|
;(ToolsObserve as any).getUITreeHandler = originalGetUITreeHandler
|
|
195
222
|
;(ToolsObserve as any).getScreenFingerprintHandler = originalGetScreenFingerprintHandler
|
|
223
|
+
;(ToolsObserve as any).captureDebugSnapshotHandler = originalCaptureDebugSnapshotHandler
|
|
196
224
|
}
|
|
197
225
|
}
|
|
198
226
|
|