mobile-debug-mcp 0.24.3 → 0.24.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/observe/index.js +130 -26
- package/dist/server/tool-definitions.js +6 -3
- package/docs/CHANGELOG.md +6 -0
- package/docs/specs/mcp-tooling-spec-v1.md +86 -5
- package/docs/tools/interact.md +24 -0
- package/docs/tools/observe.md +23 -7
- package/package.json +1 -1
- package/src/observe/index.ts +158 -24
- package/src/server/tool-definitions.ts +6 -3
- package/src/types.ts +29 -0
- package/test/unit/observe/capture_debug_snapshot.test.ts +6 -3
- package/test/unit/server/contract.test.ts +15 -0
- package/test/unit/server/response_shapes.test.ts +28 -0
package/dist/observe/index.js
CHANGED
|
@@ -3,6 +3,111 @@ import { AndroidObserve } from './android.js';
|
|
|
3
3
|
import { iOSObserve } from './ios.js';
|
|
4
4
|
export { AndroidObserve } from './android.js';
|
|
5
5
|
export { iOSObserve } from './ios.js';
|
|
6
|
+
function normalizeHint(value) {
|
|
7
|
+
if (value === null || value === undefined)
|
|
8
|
+
return '';
|
|
9
|
+
return String(value).trim().replace(/\s+/g, ' ').toLowerCase();
|
|
10
|
+
}
|
|
11
|
+
function titleCase(value) {
|
|
12
|
+
return value
|
|
13
|
+
.replace(/[_-]+/g, ' ')
|
|
14
|
+
.replace(/\s+/g, ' ')
|
|
15
|
+
.trim()
|
|
16
|
+
.replace(/\b\w/g, (match) => match.toUpperCase());
|
|
17
|
+
}
|
|
18
|
+
function shortActivityName(activity) {
|
|
19
|
+
if (!activity)
|
|
20
|
+
return null;
|
|
21
|
+
const trimmed = String(activity).trim();
|
|
22
|
+
if (!trimmed)
|
|
23
|
+
return null;
|
|
24
|
+
const lastSegment = trimmed.split('.').pop() || trimmed;
|
|
25
|
+
const withoutSuffix = lastSegment.replace(/Activity$/, '');
|
|
26
|
+
return withoutSuffix ? titleCase(withoutSuffix) : titleCase(lastSegment);
|
|
27
|
+
}
|
|
28
|
+
function collectSnapshotTexts(tree) {
|
|
29
|
+
const elements = Array.isArray(tree?.elements) ? tree.elements : [];
|
|
30
|
+
const texts = [];
|
|
31
|
+
const actionables = [];
|
|
32
|
+
for (const element of elements) {
|
|
33
|
+
const rawText = element?.text ?? element?.contentDescription ?? element?.contentDesc ?? element?.accessibilityLabel ?? element?.resourceId ?? element?.id ?? '';
|
|
34
|
+
const text = normalizeHint(rawText);
|
|
35
|
+
if (text)
|
|
36
|
+
texts.push(text);
|
|
37
|
+
if (element?.clickable && element?.enabled !== false && text) {
|
|
38
|
+
actionables.push(text);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return {
|
|
42
|
+
texts: Array.from(new Set(texts)),
|
|
43
|
+
actionables: Array.from(new Set(actionables))
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
function inferSnapshotScreen(raw) {
|
|
47
|
+
const tree = raw.ui_tree;
|
|
48
|
+
const treeScreen = normalizeHint(tree?.screen);
|
|
49
|
+
if (treeScreen)
|
|
50
|
+
return titleCase(treeScreen);
|
|
51
|
+
const activity = shortActivityName(raw.activity);
|
|
52
|
+
if (activity)
|
|
53
|
+
return activity;
|
|
54
|
+
const { texts } = collectSnapshotTexts(tree);
|
|
55
|
+
if (texts.length > 0)
|
|
56
|
+
return titleCase(texts[0]);
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
function deriveSnapshotSemantic(raw) {
|
|
60
|
+
const tree = raw.ui_tree;
|
|
61
|
+
const { texts, actionables } = collectSnapshotTexts(tree);
|
|
62
|
+
const screenFromTree = normalizeHint(tree?.screen);
|
|
63
|
+
const activityHint = normalizeHint(raw.activity);
|
|
64
|
+
const screen = inferSnapshotScreen(raw);
|
|
65
|
+
if (!screen && !activityHint && texts.length === 0 && !raw.logs.length)
|
|
66
|
+
return null;
|
|
67
|
+
const hasErrorLogs = raw.logs.some((entry) => /error|fatal exception|exception|failed/i.test(entry.message));
|
|
68
|
+
const hasLoadingSignals = texts.some((text) => /loading|please wait|spinner|progress/i.test(text));
|
|
69
|
+
const hasPrimaryText = texts.some((text) => /sign in|log in|log in|login|home|checkout|settings|menu|profile|search/i.test(text));
|
|
70
|
+
const hasScreenshot = typeof raw.screenshot === 'string' && raw.screenshot.length > 0;
|
|
71
|
+
const hasUiTree = !!tree && Array.isArray(tree.elements);
|
|
72
|
+
const signals = {
|
|
73
|
+
has_activity: !!activityHint,
|
|
74
|
+
has_ui_tree: hasUiTree,
|
|
75
|
+
has_screenshot: hasScreenshot,
|
|
76
|
+
has_visible_text: texts.length > 0,
|
|
77
|
+
has_clickable_elements: actionables.length > 0,
|
|
78
|
+
has_error_logs: hasErrorLogs,
|
|
79
|
+
has_loading_signals: hasLoadingSignals,
|
|
80
|
+
has_primary_text: hasPrimaryText
|
|
81
|
+
};
|
|
82
|
+
const warnings = [];
|
|
83
|
+
if (screenFromTree && activityHint && screenFromTree !== activityHint) {
|
|
84
|
+
warnings.push('ui_tree.screen and activity hints differ');
|
|
85
|
+
}
|
|
86
|
+
if (!hasUiTree)
|
|
87
|
+
warnings.push('ui tree unavailable');
|
|
88
|
+
if (!activityHint)
|
|
89
|
+
warnings.push('activity unavailable');
|
|
90
|
+
if (hasErrorLogs)
|
|
91
|
+
warnings.push('error signals present in logs');
|
|
92
|
+
const evidenceScore = (hasUiTree ? 0.35 : 0) +
|
|
93
|
+
(screen ? 0.2 : 0) +
|
|
94
|
+
(activityHint ? 0.15 : 0) +
|
|
95
|
+
(actionables.length > 0 ? 0.15 : 0) +
|
|
96
|
+
(texts.length > 0 ? 0.1 : 0) +
|
|
97
|
+
(hasScreenshot ? 0.05 : 0) +
|
|
98
|
+
(hasErrorLogs ? -0.15 : 0) +
|
|
99
|
+
(hasLoadingSignals ? -0.05 : 0);
|
|
100
|
+
const confidence = Math.max(0, Math.min(1, Number(evidenceScore.toFixed(2))));
|
|
101
|
+
if (!screen && confidence < 0.3)
|
|
102
|
+
return null;
|
|
103
|
+
return {
|
|
104
|
+
screen,
|
|
105
|
+
signals,
|
|
106
|
+
actions_available: actionables.length > 0 ? actionables.slice(0, 10) : null,
|
|
107
|
+
confidence,
|
|
108
|
+
warnings: confidence >= 0.7 && warnings.length === 0 ? [] : warnings
|
|
109
|
+
};
|
|
110
|
+
}
|
|
6
111
|
export class ToolsObserve {
|
|
7
112
|
// Resolve a target device and return the appropriate observe instance and resolved info.
|
|
8
113
|
static async resolveObserve(platform, deviceId, appId) {
|
|
@@ -95,7 +200,7 @@ export class ToolsObserve {
|
|
|
95
200
|
}
|
|
96
201
|
static async captureDebugSnapshotHandler({ reason, includeLogs = true, logLines = 200, platform, appId, deviceId, sessionId } = {}) {
|
|
97
202
|
const timestamp = Date.now();
|
|
98
|
-
const
|
|
203
|
+
const raw = { timestamp, reason: reason || '', activity: null, fingerprint: null, screenshot: null, ui_tree: null, logs: [] };
|
|
99
204
|
// Parallel fetches for performance: screenshot, current screen, fingerprint, ui tree, and log stream/get logs
|
|
100
205
|
const sid = sessionId || 'default';
|
|
101
206
|
const tasks = {
|
|
@@ -114,59 +219,59 @@ export class ToolsObserve {
|
|
|
114
219
|
if (res.status === 'fulfilled') {
|
|
115
220
|
const val = res.value;
|
|
116
221
|
if (key === 'screenshot') {
|
|
117
|
-
|
|
222
|
+
raw.screenshot = val && val.screenshot ? val.screenshot : null;
|
|
118
223
|
}
|
|
119
224
|
else if (key === 'currentScreen') {
|
|
120
|
-
|
|
225
|
+
raw.activity = val && ((val.activity || val.shortActivity)) ? (val.activity || val.shortActivity) : raw.activity || '';
|
|
121
226
|
}
|
|
122
227
|
else if (key === 'fingerprint') {
|
|
123
228
|
if (val && val.fingerprint)
|
|
124
|
-
|
|
229
|
+
raw.fingerprint = val.fingerprint;
|
|
125
230
|
if (val && val.activity)
|
|
126
|
-
|
|
231
|
+
raw.activity = raw.activity || val.activity;
|
|
127
232
|
if (val && val.error)
|
|
128
|
-
|
|
233
|
+
raw.fingerprint_error = val.error;
|
|
129
234
|
}
|
|
130
235
|
else if (key === 'uiTree') {
|
|
131
|
-
|
|
236
|
+
raw.ui_tree = val;
|
|
132
237
|
if (val && val.error)
|
|
133
|
-
|
|
238
|
+
raw.ui_tree_error = val.error;
|
|
134
239
|
}
|
|
135
240
|
else if (key === 'readLogStream') {
|
|
136
241
|
// handle below after evaluating fallback
|
|
137
242
|
// temporarily attach to out._streamEntries
|
|
138
|
-
|
|
243
|
+
raw.logs = Array.isArray(val?.entries) ? val.entries : [];
|
|
139
244
|
}
|
|
140
245
|
}
|
|
141
246
|
else {
|
|
142
247
|
const errMsg = res.reason instanceof Error ? res.reason.message : String(res.reason);
|
|
143
248
|
if (key === 'screenshot')
|
|
144
|
-
|
|
249
|
+
raw.screenshot_error = errMsg;
|
|
145
250
|
if (key === 'currentScreen')
|
|
146
|
-
|
|
251
|
+
raw.activity_error = errMsg;
|
|
147
252
|
if (key === 'fingerprint') {
|
|
148
|
-
|
|
149
|
-
|
|
253
|
+
raw.fingerprint = null;
|
|
254
|
+
raw.fingerprint_error = errMsg;
|
|
150
255
|
}
|
|
151
256
|
if (key === 'uiTree') {
|
|
152
|
-
|
|
153
|
-
|
|
257
|
+
raw.ui_tree = null;
|
|
258
|
+
raw.ui_tree_error = errMsg;
|
|
154
259
|
}
|
|
155
260
|
if (key === 'readLogStream') {
|
|
156
|
-
|
|
157
|
-
|
|
261
|
+
raw.logs = [];
|
|
262
|
+
raw.logs_error = errMsg;
|
|
158
263
|
}
|
|
159
264
|
}
|
|
160
265
|
}
|
|
161
266
|
// Logs: prefer stream entries, fallback to snapshot logs when empty
|
|
162
267
|
if (includeLogs) {
|
|
163
268
|
try {
|
|
164
|
-
let entries = Array.isArray(
|
|
269
|
+
let entries = Array.isArray(raw.logs) ? raw.logs : [];
|
|
165
270
|
if (!entries || entries.length === 0) {
|
|
166
271
|
const gl = await ToolsObserve.getLogsHandler({ platform, appId, deviceId, lines: logLines });
|
|
167
|
-
const
|
|
272
|
+
const snapshotLogs = (gl && gl.logs) ? gl.logs : [];
|
|
168
273
|
// raw may be structured entries or strings
|
|
169
|
-
entries =
|
|
274
|
+
entries = snapshotLogs.slice(-Math.max(0, logLines)).map(item => {
|
|
170
275
|
if (!item)
|
|
171
276
|
return { timestamp: null, level: 'INFO', message: '' };
|
|
172
277
|
if (typeof item === 'string') {
|
|
@@ -196,15 +301,14 @@ export class ToolsObserve {
|
|
|
196
301
|
return { timestamp: tsNum, level, message: msg };
|
|
197
302
|
});
|
|
198
303
|
}
|
|
199
|
-
|
|
304
|
+
raw.logs = entries;
|
|
200
305
|
}
|
|
201
306
|
catch (e) {
|
|
202
|
-
|
|
203
|
-
|
|
307
|
+
raw.logs = [];
|
|
308
|
+
raw.logs_error = e instanceof Error ? e.message : String(e);
|
|
204
309
|
}
|
|
205
310
|
}
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
return out;
|
|
311
|
+
const semantic = deriveSnapshotSemantic(raw);
|
|
312
|
+
return semantic ? { raw, semantic } : { raw };
|
|
209
313
|
}
|
|
210
314
|
}
|
|
@@ -240,7 +240,7 @@ Failure Handling:
|
|
|
240
240
|
},
|
|
241
241
|
{
|
|
242
242
|
name: 'capture_debug_snapshot',
|
|
243
|
-
description: 'Capture a complete debug snapshot (
|
|
243
|
+
description: 'Capture a complete debug snapshot (raw observation layer plus optional derived semantic layer). Returns structured JSON.',
|
|
244
244
|
inputSchema: {
|
|
245
245
|
type: 'object',
|
|
246
246
|
properties: {
|
|
@@ -344,6 +344,7 @@ Capabilities:
|
|
|
344
344
|
Constraints:
|
|
345
345
|
- Does not verify correctness of the resulting state
|
|
346
346
|
- Must not be used alone to confirm action success when an applicable expect_* tool exists
|
|
347
|
+
- Use classify_action_outcome + get_network_activity when the expected outcome is backend/API activity without a visible UI change
|
|
347
348
|
|
|
348
349
|
Recommended Usage:
|
|
349
350
|
1. Capture or define the expected outcome
|
|
@@ -835,6 +836,8 @@ Failure Handling:
|
|
|
835
836
|
description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
|
|
836
837
|
|
|
837
838
|
MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
|
|
839
|
+
Use this with get_network_activity when the expected outcome is backend/API activity without a visible UI change.
|
|
840
|
+
For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action instead of waiting for wait_for_screen_change.
|
|
838
841
|
|
|
839
842
|
HOW TO GATHER INPUTS before calling:
|
|
840
843
|
1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
|
|
@@ -868,7 +871,7 @@ BEHAVIOUR after outcome:
|
|
|
868
871
|
},
|
|
869
872
|
networkRequests: {
|
|
870
873
|
type: 'array',
|
|
871
|
-
description: 'Pass this only after calling get_network_activity as instructed by nextAction.
|
|
874
|
+
description: 'Pass this only after calling get_network_activity as instructed by nextAction. Also use it when the expected outcome is backend/API activity without a visible UI change.',
|
|
872
875
|
items: {
|
|
873
876
|
type: 'object',
|
|
874
877
|
properties: {
|
|
@@ -890,7 +893,7 @@ BEHAVIOUR after outcome:
|
|
|
890
893
|
name: 'get_network_activity',
|
|
891
894
|
description: `Returns structured network events captured from platform logs since the last action.
|
|
892
895
|
|
|
893
|
-
Call this
|
|
896
|
+
Call this when classify_action_outcome returns nextAction="call_get_network_activity" or immediately after an action whose expected outcome is backend/API activity without a visible UI change.
|
|
894
897
|
Do not call more than once per action.
|
|
895
898
|
|
|
896
899
|
Events are filtered to significant (non-background) requests only.
|
package/docs/CHANGELOG.md
CHANGED
|
@@ -36,6 +36,14 @@ It does not apply to:
|
|
|
36
36
|
- observation-only flows
|
|
37
37
|
- non-verifiable or exploratory actions
|
|
38
38
|
|
|
39
|
+
Outcome-specific guidance:
|
|
40
|
+
|
|
41
|
+
- visible navigation expected -> `wait_for_screen_change` (optional) -> `expect_screen`
|
|
42
|
+
- local UI change expected -> `wait_for_ui` (optional) -> `expect_element_visible`
|
|
43
|
+
- backend/API activity expected without a visible UI change -> compare `get_screen_fingerprint` before/after, then call `get_network_activity` immediately after the action and `classify_action_outcome` with the observed requests
|
|
44
|
+
|
|
45
|
+
For backend/API activity, `wait_for_screen_change` is not the right verification tool unless a visible transition is also expected.
|
|
46
|
+
|
|
39
47
|
## 4. Action Tools
|
|
40
48
|
|
|
41
49
|
### 4.1 Definition
|
|
@@ -201,7 +209,78 @@ String-only errors are not allowed, including fallback handler errors.
|
|
|
201
209
|
|
|
202
210
|
Note: string diagnostics may still appear inside structured JSON payloads where explicitly defined by a tool.
|
|
203
211
|
|
|
204
|
-
## 9.
|
|
212
|
+
## 9. Observation Tools (Extended Semantics)
|
|
213
|
+
|
|
214
|
+
Observation tools inspect application state without mutating it.
|
|
215
|
+
|
|
216
|
+
Examples:
|
|
217
|
+
|
|
218
|
+
- `capture_debug_snapshot`
|
|
219
|
+
- `get_screen_fingerprint`
|
|
220
|
+
- `get_network_activity`
|
|
221
|
+
- `get_logs`
|
|
222
|
+
|
|
223
|
+
### 9.1 Snapshot Response Model
|
|
224
|
+
|
|
225
|
+
`capture_debug_snapshot` MUST return a dual-layer response:
|
|
226
|
+
|
|
227
|
+
- `raw`: required object
|
|
228
|
+
- `semantic`: optional object
|
|
229
|
+
|
|
230
|
+
The raw layer is authoritative and MUST remain unchanged from the underlying observation data. It is the source of truth and MUST NOT be interpreted or rewritten.
|
|
231
|
+
|
|
232
|
+
The semantic layer is derived, best-effort, and MUST be generated exclusively from the raw layer.
|
|
233
|
+
|
|
234
|
+
Raw layer contents include:
|
|
235
|
+
|
|
236
|
+
- UI hierarchy or accessibility tree
|
|
237
|
+
- screenshot when available
|
|
238
|
+
- element-level attributes
|
|
239
|
+
- logs and fingerprint/activity observations
|
|
240
|
+
- raw error fields when partial collection fails
|
|
241
|
+
|
|
242
|
+
Semantic layer shape when present:
|
|
243
|
+
|
|
244
|
+
```ts
|
|
245
|
+
{
|
|
246
|
+
screen: string | null,
|
|
247
|
+
signals: Record<string, string | number | boolean> | null,
|
|
248
|
+
actions_available: string[] | null,
|
|
249
|
+
confidence: number,
|
|
250
|
+
warnings: string[]
|
|
251
|
+
}
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
Rules:
|
|
255
|
+
|
|
256
|
+
- `confidence` MUST be between 0 and 1
|
|
257
|
+
- `warnings` MUST be present when `semantic` is present
|
|
258
|
+
- `semantic` MAY be omitted entirely when derivation is not reliable
|
|
259
|
+
- `semantic` MUST be treated as unreliable if it conflicts with raw data
|
|
260
|
+
- `actions_available` are hints only and MUST NOT be treated as guaranteed executable actions
|
|
261
|
+
|
|
262
|
+
### 9.2 Agent Usage Contract
|
|
263
|
+
|
|
264
|
+
Agents SHOULD use `semantic` for primary decision-making when present.
|
|
265
|
+
|
|
266
|
+
Agents MUST fall back to `raw` when:
|
|
267
|
+
|
|
268
|
+
- `semantic` is missing
|
|
269
|
+
- `confidence < 0.7`
|
|
270
|
+
- `warnings` is non-empty
|
|
271
|
+
- semantic output conflicts with expected state or raw data
|
|
272
|
+
|
|
273
|
+
`semantic` is for planning only and MUST NOT be used for verification.
|
|
274
|
+
|
|
275
|
+
### 9.3 Relationship to Classification
|
|
276
|
+
|
|
277
|
+
Semantic signals MAY be used as input to `classify_action_outcome`.
|
|
278
|
+
|
|
279
|
+
Semantic output MUST NOT replace classification or verification.
|
|
280
|
+
|
|
281
|
+
Classification remains a supplementary, post-action interpretation mechanism.
|
|
282
|
+
|
|
283
|
+
## 10. Classification
|
|
205
284
|
|
|
206
285
|
Tool: `classify_action_outcome`
|
|
207
286
|
|
|
@@ -211,10 +290,11 @@ Rules:
|
|
|
211
290
|
- MUST be deterministic
|
|
212
291
|
- MUST NOT replace `expect_*` tools
|
|
213
292
|
- MUST be treated as a supplementary signal only
|
|
293
|
+
- SHOULD be used with `get_network_activity` when the expected outcome is backend/API activity without a visible UI change
|
|
214
294
|
|
|
215
295
|
It is not a verification mechanism.
|
|
216
296
|
|
|
217
|
-
##
|
|
297
|
+
## 11. Execution Patterns
|
|
218
298
|
|
|
219
299
|
Canonical pattern:
|
|
220
300
|
|
|
@@ -226,7 +306,7 @@ Interpretation:
|
|
|
226
306
|
- `wait_for_screen_change.success` = UI changed
|
|
227
307
|
- `expect_screen.success` = correct outcome verified
|
|
228
308
|
|
|
229
|
-
##
|
|
309
|
+
## 12. Known Deviations
|
|
230
310
|
|
|
231
311
|
Explicitly allowed:
|
|
232
312
|
|
|
@@ -237,7 +317,7 @@ Explicitly allowed:
|
|
|
237
317
|
- `scroll_to_element` outcome-based success (temporary exception)
|
|
238
318
|
- extended runtime fields in `list_devices`
|
|
239
319
|
|
|
240
|
-
##
|
|
320
|
+
## 13. Migration Rules
|
|
241
321
|
|
|
242
322
|
Must change now:
|
|
243
323
|
|
|
@@ -249,6 +329,7 @@ Should align when touched:
|
|
|
249
329
|
- `start_app`, `restart_app`
|
|
250
330
|
- `scroll_to_element`
|
|
251
331
|
- `wait_for_ui`
|
|
332
|
+
- `capture_debug_snapshot`
|
|
252
333
|
|
|
253
334
|
No change required:
|
|
254
335
|
|
|
@@ -257,7 +338,7 @@ No change required:
|
|
|
257
338
|
- `expect_element_visible`
|
|
258
339
|
- `wait_for_screen_change`
|
|
259
340
|
|
|
260
|
-
##
|
|
341
|
+
## 14. Guiding Principles
|
|
261
342
|
|
|
262
343
|
- Actions execute
|
|
263
344
|
- Verification proves
|
package/docs/tools/interact.md
CHANGED
|
@@ -53,6 +53,10 @@ Preferred verification:
|
|
|
53
53
|
|
|
54
54
|
- navigation outcome known -> `expect_screen`
|
|
55
55
|
- local UI change known -> `expect_element_visible`
|
|
56
|
+
- backend/API activity expected -> `classify_action_outcome` + `get_network_activity`
|
|
57
|
+
|
|
58
|
+
Use `wait_for_screen_change` only when a visible transition is the expected outcome. If a button should trigger an API request but the screen should stay the same, rely on network activity and classification instead.
|
|
59
|
+
For backend-only actions, prefer comparing `get_screen_fingerprint` before/after and call `get_network_activity` immediately after the action; do not wait on `wait_for_screen_change` if no visible transition is expected.
|
|
56
60
|
|
|
57
61
|
---
|
|
58
62
|
|
|
@@ -139,6 +143,7 @@ Notes:
|
|
|
139
143
|
- Treats `null` fingerprints as transient and keeps polling.
|
|
140
144
|
- Adds a stability confirmation before returning success to avoid transient animation frames.
|
|
141
145
|
- Follow with `expect_screen` when the expected destination is known.
|
|
146
|
+
- Do not use this as the main success check for backend/API activity that does not change the visible UI.
|
|
142
147
|
|
|
143
148
|
---
|
|
144
149
|
|
|
@@ -451,3 +456,22 @@ Notes:
|
|
|
451
456
|
- The tool resolves the selector internally when needed.
|
|
452
457
|
- On failure, `reason` and `observed` tell you whether the selector was missing entirely or present but not yet visible.
|
|
453
458
|
- Use when the screen should remain on the same destination but a specific element should appear or become visible.
|
|
459
|
+
|
|
460
|
+
---
|
|
461
|
+
|
|
462
|
+
## classify_action_outcome + get_network_activity
|
|
463
|
+
|
|
464
|
+
Use this pair when the action is expected to trigger network/backend work and the screen may not visibly change.
|
|
465
|
+
|
|
466
|
+
Pattern:
|
|
467
|
+
|
|
468
|
+
1. perform the action
|
|
469
|
+
2. call `classify_action_outcome` with `uiChanged` from `wait_for_screen_change` or a screen fingerprint comparison
|
|
470
|
+
3. if the classifier asks for it, call `get_network_activity`
|
|
471
|
+
4. call `classify_action_outcome` again with `networkRequests`
|
|
472
|
+
|
|
473
|
+
Guidance:
|
|
474
|
+
|
|
475
|
+
- `uiChanged=true` or `expectedElementVisible=true` means the action outcome is already verified
|
|
476
|
+
- `nextAction="call_get_network_activity"` means the UI signal was inconclusive and the agent should inspect network activity
|
|
477
|
+
- if network requests succeed but the UI stays unchanged, treat the outcome as a backend/API result rather than a screen transition
|
package/docs/tools/observe.md
CHANGED
|
@@ -132,24 +132,40 @@ Behavior:
|
|
|
132
132
|
- Returns partial data when components fail and includes per-part error fields (e.g. `screenshot_error`, `ui_tree_error`).
|
|
133
133
|
- Caps logs to `logLines` entries and prefers recent entries.
|
|
134
134
|
- Fast by default: does not wait for new logs and avoids long blocking operations.
|
|
135
|
+
- Returns a dual-layer payload:
|
|
136
|
+
- `raw` is authoritative and contains the underlying observation data unchanged.
|
|
137
|
+
- `semantic` is optional, derived from `raw`, and intended for planning only.
|
|
135
138
|
|
|
136
139
|
Response (example):
|
|
137
140
|
|
|
138
141
|
```json
|
|
139
142
|
{
|
|
140
|
-
"
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
143
|
+
"raw": {
|
|
144
|
+
"timestamp": 1710000000,
|
|
145
|
+
"reason": "Crash after tapping checkout",
|
|
146
|
+
"activity": "CheckoutActivity",
|
|
147
|
+
"fingerprint": "abc123",
|
|
148
|
+
"screenshot": "<base64 PNG string>",
|
|
149
|
+
"ui_tree": { ... },
|
|
150
|
+
"logs": [ { "timestamp": 1710000000, "level": "ERROR", "message": "NullPointerException at CheckoutViewModel" } ]
|
|
151
|
+
},
|
|
152
|
+
"semantic": {
|
|
153
|
+
"screen": "Checkout",
|
|
154
|
+
"signals": {
|
|
155
|
+
"has_error_logs": true,
|
|
156
|
+
"has_clickable_elements": false
|
|
157
|
+
},
|
|
158
|
+
"actions_available": ["review checkout", "inspect error"],
|
|
159
|
+
"confidence": 0.82,
|
|
160
|
+
"warnings": []
|
|
161
|
+
}
|
|
147
162
|
}
|
|
148
163
|
```
|
|
149
164
|
|
|
150
165
|
Notes:
|
|
151
166
|
- Useful immediately after detecting crashes or unexpected UI behaviour.
|
|
152
167
|
- Do not expect perfect data during a crash; tool is designed to return best-effort context and include errors for failed parts.
|
|
168
|
+
- Treat `semantic` as planning guidance only; `raw` remains the source of truth.
|
|
153
169
|
|
|
154
170
|
---
|
|
155
171
|
|
package/package.json
CHANGED
package/src/observe/index.ts
CHANGED
|
@@ -1,10 +1,146 @@
|
|
|
1
1
|
import { resolveTargetDevice } from '../utils/resolve-device.js'
|
|
2
2
|
import { AndroidObserve } from './android.js'
|
|
3
3
|
import { iOSObserve } from './ios.js'
|
|
4
|
+
import type {
|
|
5
|
+
CaptureDebugSnapshotRawResponse,
|
|
6
|
+
SnapshotSemanticResponse
|
|
7
|
+
} from '../types.js'
|
|
4
8
|
|
|
5
9
|
export { AndroidObserve } from './android.js'
|
|
6
10
|
export { iOSObserve } from './ios.js'
|
|
7
11
|
|
|
12
|
+
interface SnapshotTreeElementLike {
|
|
13
|
+
text?: string | null
|
|
14
|
+
contentDescription?: string | null
|
|
15
|
+
contentDesc?: string | null
|
|
16
|
+
accessibilityLabel?: string | null
|
|
17
|
+
resourceId?: string | null
|
|
18
|
+
id?: string | null
|
|
19
|
+
type?: string | null
|
|
20
|
+
class?: string | null
|
|
21
|
+
clickable?: boolean
|
|
22
|
+
enabled?: boolean
|
|
23
|
+
visible?: boolean
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
interface SnapshotTreeLike {
|
|
27
|
+
screen?: string | null
|
|
28
|
+
elements?: SnapshotTreeElementLike[]
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function normalizeHint(value: unknown): string {
|
|
32
|
+
if (value === null || value === undefined) return ''
|
|
33
|
+
return String(value).trim().replace(/\s+/g, ' ').toLowerCase()
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function titleCase(value: string): string {
|
|
37
|
+
return value
|
|
38
|
+
.replace(/[_-]+/g, ' ')
|
|
39
|
+
.replace(/\s+/g, ' ')
|
|
40
|
+
.trim()
|
|
41
|
+
.replace(/\b\w/g, (match) => match.toUpperCase())
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function shortActivityName(activity: string | null | undefined): string | null {
|
|
45
|
+
if (!activity) return null
|
|
46
|
+
const trimmed = String(activity).trim()
|
|
47
|
+
if (!trimmed) return null
|
|
48
|
+
const lastSegment = trimmed.split('.').pop() || trimmed
|
|
49
|
+
const withoutSuffix = lastSegment.replace(/Activity$/, '')
|
|
50
|
+
return withoutSuffix ? titleCase(withoutSuffix) : titleCase(lastSegment)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function collectSnapshotTexts(tree: SnapshotTreeLike | null | undefined) {
|
|
54
|
+
const elements = Array.isArray(tree?.elements) ? tree!.elements! : []
|
|
55
|
+
const texts: string[] = []
|
|
56
|
+
const actionables: string[] = []
|
|
57
|
+
|
|
58
|
+
for (const element of elements) {
|
|
59
|
+
const rawText = element?.text ?? element?.contentDescription ?? element?.contentDesc ?? element?.accessibilityLabel ?? element?.resourceId ?? element?.id ?? ''
|
|
60
|
+
const text = normalizeHint(rawText)
|
|
61
|
+
if (text) texts.push(text)
|
|
62
|
+
if (element?.clickable && element?.enabled !== false && text) {
|
|
63
|
+
actionables.push(text)
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return {
|
|
68
|
+
texts: Array.from(new Set(texts)),
|
|
69
|
+
actionables: Array.from(new Set(actionables))
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function inferSnapshotScreen(raw: CaptureDebugSnapshotRawResponse): string | null {
|
|
74
|
+
const tree = raw.ui_tree as SnapshotTreeLike | null | undefined
|
|
75
|
+
const treeScreen = normalizeHint(tree?.screen)
|
|
76
|
+
if (treeScreen) return titleCase(treeScreen)
|
|
77
|
+
|
|
78
|
+
const activity = shortActivityName(raw.activity)
|
|
79
|
+
if (activity) return activity
|
|
80
|
+
|
|
81
|
+
const { texts } = collectSnapshotTexts(tree)
|
|
82
|
+
if (texts.length > 0) return titleCase(texts[0])
|
|
83
|
+
|
|
84
|
+
return null
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function deriveSnapshotSemantic(raw: CaptureDebugSnapshotRawResponse): SnapshotSemanticResponse | null {
|
|
88
|
+
const tree = raw.ui_tree as SnapshotTreeLike | null | undefined
|
|
89
|
+
const { texts, actionables } = collectSnapshotTexts(tree)
|
|
90
|
+
const screenFromTree = normalizeHint(tree?.screen)
|
|
91
|
+
const activityHint = normalizeHint(raw.activity)
|
|
92
|
+
const screen = inferSnapshotScreen(raw)
|
|
93
|
+
|
|
94
|
+
if (!screen && !activityHint && texts.length === 0 && !raw.logs.length) return null
|
|
95
|
+
|
|
96
|
+
const hasErrorLogs = raw.logs.some((entry) => /error|fatal exception|exception|failed/i.test(entry.message))
|
|
97
|
+
const hasLoadingSignals = texts.some((text) => /loading|please wait|spinner|progress/i.test(text))
|
|
98
|
+
const hasPrimaryText = texts.some((text) => /sign in|log in|log in|login|home|checkout|settings|menu|profile|search/i.test(text))
|
|
99
|
+
const hasScreenshot = typeof raw.screenshot === 'string' && raw.screenshot.length > 0
|
|
100
|
+
const hasUiTree = !!tree && Array.isArray(tree.elements)
|
|
101
|
+
|
|
102
|
+
const signals: Record<string, string | number | boolean> = {
|
|
103
|
+
has_activity: !!activityHint,
|
|
104
|
+
has_ui_tree: hasUiTree,
|
|
105
|
+
has_screenshot: hasScreenshot,
|
|
106
|
+
has_visible_text: texts.length > 0,
|
|
107
|
+
has_clickable_elements: actionables.length > 0,
|
|
108
|
+
has_error_logs: hasErrorLogs,
|
|
109
|
+
has_loading_signals: hasLoadingSignals,
|
|
110
|
+
has_primary_text: hasPrimaryText
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const warnings: string[] = []
|
|
114
|
+
if (screenFromTree && activityHint && screenFromTree !== activityHint) {
|
|
115
|
+
warnings.push('ui_tree.screen and activity hints differ')
|
|
116
|
+
}
|
|
117
|
+
if (!hasUiTree) warnings.push('ui tree unavailable')
|
|
118
|
+
if (!activityHint) warnings.push('activity unavailable')
|
|
119
|
+
if (hasErrorLogs) warnings.push('error signals present in logs')
|
|
120
|
+
|
|
121
|
+
const evidenceScore =
|
|
122
|
+
(hasUiTree ? 0.35 : 0) +
|
|
123
|
+
(screen ? 0.2 : 0) +
|
|
124
|
+
(activityHint ? 0.15 : 0) +
|
|
125
|
+
(actionables.length > 0 ? 0.15 : 0) +
|
|
126
|
+
(texts.length > 0 ? 0.1 : 0) +
|
|
127
|
+
(hasScreenshot ? 0.05 : 0) +
|
|
128
|
+
(hasErrorLogs ? -0.15 : 0) +
|
|
129
|
+
(hasLoadingSignals ? -0.05 : 0)
|
|
130
|
+
|
|
131
|
+
const confidence = Math.max(0, Math.min(1, Number(evidenceScore.toFixed(2))))
|
|
132
|
+
|
|
133
|
+
if (!screen && confidence < 0.3) return null
|
|
134
|
+
|
|
135
|
+
return {
|
|
136
|
+
screen,
|
|
137
|
+
signals,
|
|
138
|
+
actions_available: actionables.length > 0 ? actionables.slice(0, 10) : null,
|
|
139
|
+
confidence,
|
|
140
|
+
warnings: confidence >= 0.7 && warnings.length === 0 ? [] : warnings
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
8
144
|
export class ToolsObserve {
|
|
9
145
|
// Resolve a target device and return the appropriate observe instance and resolved info.
|
|
10
146
|
private static async resolveObserve(platform?: 'android' | 'ios', deviceId?: string, appId?: string) {
|
|
@@ -103,7 +239,7 @@ export class ToolsObserve {
|
|
|
103
239
|
|
|
104
240
|
static async captureDebugSnapshotHandler({ reason, includeLogs = true, logLines = 200, platform, appId, deviceId, sessionId }: { reason?: string; includeLogs?: boolean; logLines?: number; platform?: 'android' | 'ios'; appId?: string; deviceId?: string; sessionId?: string } = {}) {
|
|
105
241
|
const timestamp = Date.now()
|
|
106
|
-
const
|
|
242
|
+
const raw: CaptureDebugSnapshotRawResponse = { timestamp, reason: reason || '', activity: null, fingerprint: null, screenshot: null, ui_tree: null, logs: [] }
|
|
107
243
|
|
|
108
244
|
// Parallel fetches for performance: screenshot, current screen, fingerprint, ui tree, and log stream/get logs
|
|
109
245
|
const sid = sessionId || 'default'
|
|
@@ -125,40 +261,40 @@ export class ToolsObserve {
|
|
|
125
261
|
if (res.status === 'fulfilled') {
|
|
126
262
|
const val = res.value
|
|
127
263
|
if (key === 'screenshot') {
|
|
128
|
-
|
|
264
|
+
raw.screenshot = val && val.screenshot ? val.screenshot : null
|
|
129
265
|
} else if (key === 'currentScreen') {
|
|
130
|
-
|
|
266
|
+
raw.activity = val && ((val.activity || val.shortActivity)) ? (val.activity || val.shortActivity) : raw.activity || ''
|
|
131
267
|
} else if (key === 'fingerprint') {
|
|
132
|
-
if (val && val.fingerprint)
|
|
133
|
-
if (val && val.activity)
|
|
134
|
-
if (val && val.error)
|
|
268
|
+
if (val && val.fingerprint) raw.fingerprint = val.fingerprint
|
|
269
|
+
if (val && val.activity) raw.activity = raw.activity || val.activity
|
|
270
|
+
if (val && val.error) raw.fingerprint_error = val.error
|
|
135
271
|
} else if (key === 'uiTree') {
|
|
136
|
-
|
|
137
|
-
if (val && val.error)
|
|
272
|
+
raw.ui_tree = val
|
|
273
|
+
if (val && val.error) raw.ui_tree_error = val.error
|
|
138
274
|
} else if (key === 'readLogStream') {
|
|
139
275
|
// handle below after evaluating fallback
|
|
140
276
|
// temporarily attach to out._streamEntries
|
|
141
|
-
|
|
277
|
+
raw.logs = Array.isArray(val?.entries) ? val.entries : []
|
|
142
278
|
}
|
|
143
279
|
} else {
|
|
144
280
|
const errMsg = res.reason instanceof Error ? res.reason.message : String(res.reason)
|
|
145
|
-
if (key === 'screenshot')
|
|
146
|
-
if (key === 'currentScreen')
|
|
147
|
-
if (key === 'fingerprint') {
|
|
148
|
-
if (key === 'uiTree') {
|
|
149
|
-
if (key === 'readLogStream') {
|
|
281
|
+
if (key === 'screenshot') raw.screenshot_error = errMsg
|
|
282
|
+
if (key === 'currentScreen') raw.activity_error = errMsg
|
|
283
|
+
if (key === 'fingerprint') { raw.fingerprint = null; raw.fingerprint_error = errMsg }
|
|
284
|
+
if (key === 'uiTree') { raw.ui_tree = null; raw.ui_tree_error = errMsg }
|
|
285
|
+
if (key === 'readLogStream') { raw.logs = []; raw.logs_error = errMsg }
|
|
150
286
|
}
|
|
151
287
|
}
|
|
152
288
|
|
|
153
289
|
// Logs: prefer stream entries, fallback to snapshot logs when empty
|
|
154
290
|
if (includeLogs) {
|
|
155
291
|
try {
|
|
156
|
-
let entries: any[] = Array.isArray(
|
|
292
|
+
let entries: any[] = Array.isArray(raw.logs) ? raw.logs : []
|
|
157
293
|
if (!entries || entries.length === 0) {
|
|
158
294
|
const gl = await ToolsObserve.getLogsHandler({ platform, appId, deviceId, lines: logLines })
|
|
159
|
-
const
|
|
295
|
+
const snapshotLogs: any[] = (gl && (gl as any).logs) ? (gl as any).logs : []
|
|
160
296
|
// raw may be structured entries or strings
|
|
161
|
-
entries =
|
|
297
|
+
entries = snapshotLogs.slice(-Math.max(0, logLines)).map(item => {
|
|
162
298
|
if (!item) return { timestamp: null, level: 'INFO', message: '' }
|
|
163
299
|
if (typeof item === 'string') {
|
|
164
300
|
const level = /\b(FATAL EXCEPTION|ERROR| E )\b/i.test(item) ? 'ERROR' : /\b(WARN| W )\b/i.test(item) ? 'WARN' : 'INFO'
|
|
@@ -186,16 +322,14 @@ export class ToolsObserve {
|
|
|
186
322
|
})
|
|
187
323
|
}
|
|
188
324
|
|
|
189
|
-
|
|
325
|
+
raw.logs = entries
|
|
190
326
|
} catch (e) {
|
|
191
|
-
|
|
192
|
-
|
|
327
|
+
raw.logs = []
|
|
328
|
+
raw.logs_error = e instanceof Error ? e.message : String(e)
|
|
193
329
|
}
|
|
194
330
|
}
|
|
195
331
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
return out
|
|
332
|
+
const semantic = deriveSnapshotSemantic(raw)
|
|
333
|
+
return semantic ? { raw, semantic } : { raw }
|
|
200
334
|
}
|
|
201
335
|
}
|
|
@@ -240,7 +240,7 @@ Failure Handling:
|
|
|
240
240
|
},
|
|
241
241
|
{
|
|
242
242
|
name: 'capture_debug_snapshot',
|
|
243
|
-
description: 'Capture a complete debug snapshot (
|
|
243
|
+
description: 'Capture a complete debug snapshot (raw observation layer plus optional derived semantic layer). Returns structured JSON.',
|
|
244
244
|
inputSchema: {
|
|
245
245
|
type: 'object',
|
|
246
246
|
properties: {
|
|
@@ -344,6 +344,7 @@ Capabilities:
|
|
|
344
344
|
Constraints:
|
|
345
345
|
- Does not verify correctness of the resulting state
|
|
346
346
|
- Must not be used alone to confirm action success when an applicable expect_* tool exists
|
|
347
|
+
- Use classify_action_outcome + get_network_activity when the expected outcome is backend/API activity without a visible UI change
|
|
347
348
|
|
|
348
349
|
Recommended Usage:
|
|
349
350
|
1. Capture or define the expected outcome
|
|
@@ -835,6 +836,8 @@ Failure Handling:
|
|
|
835
836
|
description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
|
|
836
837
|
|
|
837
838
|
MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
|
|
839
|
+
Use this with get_network_activity when the expected outcome is backend/API activity without a visible UI change.
|
|
840
|
+
For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action instead of waiting for wait_for_screen_change.
|
|
838
841
|
|
|
839
842
|
HOW TO GATHER INPUTS before calling:
|
|
840
843
|
1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
|
|
@@ -868,7 +871,7 @@ BEHAVIOUR after outcome:
|
|
|
868
871
|
},
|
|
869
872
|
networkRequests: {
|
|
870
873
|
type: 'array',
|
|
871
|
-
description: 'Pass this only after calling get_network_activity as instructed by nextAction.
|
|
874
|
+
description: 'Pass this only after calling get_network_activity as instructed by nextAction. Also use it when the expected outcome is backend/API activity without a visible UI change.',
|
|
872
875
|
items: {
|
|
873
876
|
type: 'object',
|
|
874
877
|
properties: {
|
|
@@ -890,7 +893,7 @@ BEHAVIOUR after outcome:
|
|
|
890
893
|
name: 'get_network_activity',
|
|
891
894
|
description: `Returns structured network events captured from platform logs since the last action.
|
|
892
895
|
|
|
893
|
-
Call this
|
|
896
|
+
Call this when classify_action_outcome returns nextAction="call_get_network_activity" or immediately after an action whose expected outcome is backend/API activity without a visible UI change.
|
|
894
897
|
Do not call more than once per action.
|
|
895
898
|
|
|
896
899
|
Events are filtered to significant (non-background) requests only.
|
package/src/types.ts
CHANGED
|
@@ -137,6 +137,35 @@ export interface GetCurrentScreenResponse {
|
|
|
137
137
|
error?: string;
|
|
138
138
|
}
|
|
139
139
|
|
|
140
|
+
export interface SnapshotSemanticResponse {
|
|
141
|
+
screen: string | null;
|
|
142
|
+
signals: Record<string, string | number | boolean> | null;
|
|
143
|
+
actions_available: string[] | null;
|
|
144
|
+
confidence: number;
|
|
145
|
+
warnings: string[];
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
export interface CaptureDebugSnapshotRawResponse {
|
|
149
|
+
timestamp: number;
|
|
150
|
+
reason: string;
|
|
151
|
+
activity: string | null;
|
|
152
|
+
fingerprint: string | null;
|
|
153
|
+
screenshot: string | null;
|
|
154
|
+
ui_tree: unknown | null;
|
|
155
|
+
logs: StructuredLogEntry[];
|
|
156
|
+
device?: DeviceInfo;
|
|
157
|
+
screenshot_error?: string;
|
|
158
|
+
activity_error?: string;
|
|
159
|
+
fingerprint_error?: string;
|
|
160
|
+
ui_tree_error?: string;
|
|
161
|
+
logs_error?: string;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export interface CaptureDebugSnapshotResponse {
|
|
165
|
+
raw: CaptureDebugSnapshotRawResponse;
|
|
166
|
+
semantic?: SnapshotSemanticResponse | null;
|
|
167
|
+
}
|
|
168
|
+
|
|
140
169
|
export interface WaitForElementResponse {
|
|
141
170
|
device: DeviceInfo;
|
|
142
171
|
found: boolean;
|
|
@@ -35,8 +35,11 @@ async function run() {
|
|
|
35
35
|
|
|
36
36
|
const res1: any = await ToolsObserve.captureDebugSnapshotHandler({ platform: 'android', includeLogs: true, logLines: 50, sessionId: 's1' })
|
|
37
37
|
console.log('res1:', JSON.stringify(res1, null, 2))
|
|
38
|
-
const pass1 = res1 && res1.screenshot === 'BASE64PNG' && res1.activity && res1.fingerprint === 'abc123' && Array.isArray(res1.logs) && res1.logs.length === 1
|
|
38
|
+
const pass1 = res1 && res1.raw && res1.raw.screenshot === 'BASE64PNG' && res1.raw.activity && res1.raw.fingerprint === 'abc123' && Array.isArray(res1.raw.logs) && res1.raw.logs.length === 1
|
|
39
39
|
assert.ok(pass1, 'captureDebugSnapshot should aggregate successful handler results')
|
|
40
|
+
assert.strictEqual(res1.semantic.screen, 'Main')
|
|
41
|
+
assert.strictEqual(res1.semantic.confidence >= 0.7, true)
|
|
42
|
+
assert.deepStrictEqual(res1.semantic.actions_available, null)
|
|
40
43
|
console.log('Test 1:', pass1 ? 'PASS' : 'FAIL')
|
|
41
44
|
|
|
42
45
|
// Restore handlers before next test
|
|
@@ -55,7 +58,7 @@ async function run() {
|
|
|
55
58
|
|
|
56
59
|
const res2: any = await ToolsObserve.captureDebugSnapshotHandler({ platform: 'android', includeLogs: true, logLines: 10, appId: 'com.example' })
|
|
57
60
|
console.log('res2:', JSON.stringify(res2, null, 2))
|
|
58
|
-
const pass2 = res2 && res2.screenshot_error && res2.ui_tree_error && Array.isArray(res2.logs) && res2.logs.length === 2
|
|
61
|
+
const pass2 = res2 && res2.raw && res2.raw.screenshot_error && res2.raw.ui_tree_error && Array.isArray(res2.raw.logs) && res2.raw.logs.length === 2
|
|
59
62
|
assert.ok(pass2, 'captureDebugSnapshot should surface partial failures and fallback logs')
|
|
60
63
|
console.log('Test 2:', pass2 ? 'PASS' : 'FAIL')
|
|
61
64
|
|
|
@@ -76,7 +79,7 @@ async function run() {
|
|
|
76
79
|
|
|
77
80
|
const res3: any = await ToolsObserve.captureDebugSnapshotHandler({ platform: 'android', includeLogs: false })
|
|
78
81
|
console.log('res3:', JSON.stringify(res3, null, 2))
|
|
79
|
-
const pass3 = res3 && typeof res3.logs !== 'undefined' && res3.logs.length === 0
|
|
82
|
+
const pass3 = res3 && res3.raw && typeof res3.raw.logs !== 'undefined' && res3.raw.logs.length === 0
|
|
80
83
|
assert.ok(pass3, 'captureDebugSnapshot should return an empty logs array when includeLogs is false')
|
|
81
84
|
console.log('Test 3:', pass3 ? 'PASS' : 'FAIL')
|
|
82
85
|
|
|
@@ -26,11 +26,14 @@ async function run() {
|
|
|
26
26
|
assert(waitForScreenChange, 'wait_for_screen_change should be registered')
|
|
27
27
|
assert.match((waitForScreenChange as any).description, /does not verify correctness of the resulting state/i)
|
|
28
28
|
assert.match((waitForScreenChange as any).description, /follow with expect_screen/i)
|
|
29
|
+
assert.match((waitForScreenChange as any).description, /backend\/API activity without a visible UI change/i)
|
|
29
30
|
|
|
30
31
|
const captureDebugSnapshot = toolDefinitions.find((tool) => tool.name === 'capture_debug_snapshot')
|
|
31
32
|
assert(captureDebugSnapshot, 'capture_debug_snapshot should be registered')
|
|
32
33
|
assert.strictEqual((captureDebugSnapshot as any).inputSchema.properties.includeLogs.default, true)
|
|
33
34
|
assert.strictEqual((captureDebugSnapshot as any).inputSchema.properties.logLines.default, 200)
|
|
35
|
+
assert.match((captureDebugSnapshot as any).description, /raw observation layer/i)
|
|
36
|
+
assert.match((captureDebugSnapshot as any).description, /optional derived semantic layer/i)
|
|
34
37
|
|
|
35
38
|
const startLogStream = toolDefinitions.find((tool) => tool.name === 'start_log_stream')
|
|
36
39
|
assert(startLogStream, 'start_log_stream should be registered')
|
|
@@ -60,6 +63,18 @@ async function run() {
|
|
|
60
63
|
assert.match((expectElementVisible as any).description, /selector is the primary input/i)
|
|
61
64
|
assert.match((expectElementVisible as any).description, /Returns structured binary success\/failure only/i)
|
|
62
65
|
|
|
66
|
+
const classifyActionOutcome = toolDefinitions.find((tool) => tool.name === 'classify_action_outcome')
|
|
67
|
+
assert(classifyActionOutcome, 'classify_action_outcome should be registered')
|
|
68
|
+
assert.match((classifyActionOutcome as any).description, /backend\/API activity without a visible UI change/i)
|
|
69
|
+
assert.match((classifyActionOutcome as any).description, /get_network_activity/i)
|
|
70
|
+
assert.match((classifyActionOutcome as any).description, /immediately after the action/i)
|
|
71
|
+
|
|
72
|
+
const getNetworkActivity = toolDefinitions.find((tool) => tool.name === 'get_network_activity')
|
|
73
|
+
assert(getNetworkActivity, 'get_network_activity should be registered')
|
|
74
|
+
assert.match((getNetworkActivity as any).description, /backend\/API activity without a visible UI change/i)
|
|
75
|
+
assert.doesNotMatch((getNetworkActivity as any).description, /Call this only when/i)
|
|
76
|
+
assert.match((getNetworkActivity as any).description, /immediately after an action/i)
|
|
77
|
+
|
|
63
78
|
await assert.rejects(() => handleToolCall('unknown_tool'), /Unknown tool: unknown_tool/)
|
|
64
79
|
|
|
65
80
|
console.log('server contract tests passed')
|
|
@@ -16,6 +16,7 @@ async function run() {
|
|
|
16
16
|
const originalCaptureScreenshotHandler = (ToolsObserve as any).captureScreenshotHandler
|
|
17
17
|
const originalGetUITreeHandler = (ToolsObserve as any).getUITreeHandler
|
|
18
18
|
const originalGetScreenFingerprintHandler = (ToolsObserve as any).getScreenFingerprintHandler
|
|
19
|
+
const originalCaptureDebugSnapshotHandler = (ToolsObserve as any).captureDebugSnapshotHandler
|
|
19
20
|
|
|
20
21
|
try {
|
|
21
22
|
;(ToolsManage as any).installAppHandler = async () => ({
|
|
@@ -181,6 +182,32 @@ async function run() {
|
|
|
181
182
|
assert.strictEqual(uiTreePayload.resolution.height, 2400)
|
|
182
183
|
assert.strictEqual(uiTreePayload.elements[0].text, 'Login')
|
|
183
184
|
|
|
185
|
+
;(ToolsObserve as any).captureDebugSnapshotHandler = async () => ({
|
|
186
|
+
raw: {
|
|
187
|
+
timestamp: 1710000000000,
|
|
188
|
+
reason: 'manual',
|
|
189
|
+
activity: 'com.example.MainActivity',
|
|
190
|
+
fingerprint: 'fp_raw',
|
|
191
|
+
screenshot: 'base64',
|
|
192
|
+
ui_tree: { screen: 'Home', elements: [] },
|
|
193
|
+
logs: [],
|
|
194
|
+
device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true }
|
|
195
|
+
},
|
|
196
|
+
semantic: {
|
|
197
|
+
screen: 'Home',
|
|
198
|
+
signals: { has_activity: true },
|
|
199
|
+
actions_available: ['open settings'],
|
|
200
|
+
confidence: 0.8,
|
|
201
|
+
warnings: []
|
|
202
|
+
}
|
|
203
|
+
})
|
|
204
|
+
|
|
205
|
+
const snapshotResponse = await handleToolCall('capture_debug_snapshot', { platform: 'android' })
|
|
206
|
+
const snapshotPayload = JSON.parse((snapshotResponse as any).content[0].text)
|
|
207
|
+
assert.strictEqual(snapshotPayload.raw.fingerprint, 'fp_raw')
|
|
208
|
+
assert.strictEqual(snapshotPayload.semantic.screen, 'Home')
|
|
209
|
+
assert.strictEqual(snapshotPayload.semantic.confidence, 0.8)
|
|
210
|
+
|
|
184
211
|
console.log('server response-shape tests passed')
|
|
185
212
|
} finally {
|
|
186
213
|
;(ToolsManage as any).installAppHandler = originalInstallAppHandler
|
|
@@ -193,6 +220,7 @@ async function run() {
|
|
|
193
220
|
;(ToolsObserve as any).captureScreenshotHandler = originalCaptureScreenshotHandler
|
|
194
221
|
;(ToolsObserve as any).getUITreeHandler = originalGetUITreeHandler
|
|
195
222
|
;(ToolsObserve as any).getScreenFingerprintHandler = originalGetScreenFingerprintHandler
|
|
223
|
+
;(ToolsObserve as any).captureDebugSnapshotHandler = originalCaptureDebugSnapshotHandler
|
|
196
224
|
}
|
|
197
225
|
}
|
|
198
226
|
|