mobile-debug-mcp 0.26.1 → 0.26.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +3 -0
- package/dist/interact/index.js +169 -102
- package/dist/server/common.js +14 -1
- package/dist/server/tool-definitions.js +22 -4
- package/dist/server/tool-handlers.js +7 -0
- package/dist/server-core.js +1 -1
- package/docs/CHANGELOG.md +6 -0
- package/docs/ROADMAP.md +242 -76
- package/docs/rfcs/005-unified-action-execution-and-verification-model.md +216 -0
- package/docs/rfcs/006-runtime-action-instrumentation-and-binding-layer.md +230 -0
- package/docs/rfcs/007-actionability-resolution-and-executable-target-selection.md +277 -0
- package/docs/specs/mcp-tooling-spec-v1.md +4 -0
- package/docs/tools/interact.md +13 -1
- package/package.json +1 -1
- package/src/interact/index.ts +203 -107
- package/src/server/common.ts +22 -1
- package/src/server/tool-definitions.ts +22 -4
- package/src/server/tool-handlers.ts +7 -0
- package/src/server-core.ts +1 -1
- package/src/types.ts +75 -0
- package/test/unit/observe/find_element.test.ts +5 -0
- package/test/unit/server/response_shapes.test.ts +8 -0
|
@@ -47,6 +47,7 @@ async function handleStartApp(args: ToolCallArgs) {
|
|
|
47
47
|
const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId)
|
|
48
48
|
return wrapResponse(buildActionExecutionResult({
|
|
49
49
|
actionType: 'start_app',
|
|
50
|
+
sourceModule: 'server',
|
|
50
51
|
device: res.device,
|
|
51
52
|
selector: { appId },
|
|
52
53
|
success: !!res.appStarted,
|
|
@@ -82,6 +83,7 @@ async function handleRestartApp(args: ToolCallArgs) {
|
|
|
82
83
|
const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId)
|
|
83
84
|
return wrapResponse(buildActionExecutionResult({
|
|
84
85
|
actionType: 'restart_app',
|
|
86
|
+
sourceModule: 'server',
|
|
85
87
|
device: res.device,
|
|
86
88
|
selector: { appId },
|
|
87
89
|
success: !!res.appRestarted,
|
|
@@ -319,6 +321,7 @@ async function handleTap(args: ToolCallArgs) {
|
|
|
319
321
|
const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId)
|
|
320
322
|
return wrapResponse(buildActionExecutionResult({
|
|
321
323
|
actionType: 'tap',
|
|
324
|
+
sourceModule: 'server',
|
|
322
325
|
selector: { x, y },
|
|
323
326
|
success: !!res.success,
|
|
324
327
|
uiFingerprintBefore,
|
|
@@ -348,6 +351,7 @@ async function handleSwipe(args: ToolCallArgs) {
|
|
|
348
351
|
const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId)
|
|
349
352
|
return wrapResponse(buildActionExecutionResult({
|
|
350
353
|
actionType: 'swipe',
|
|
354
|
+
sourceModule: 'server',
|
|
351
355
|
selector: { x1, y1, x2, y2, duration },
|
|
352
356
|
success: !!res.success,
|
|
353
357
|
uiFingerprintBefore,
|
|
@@ -369,6 +373,7 @@ async function handleScrollToElement(args: ToolCallArgs) {
|
|
|
369
373
|
const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId)
|
|
370
374
|
return wrapResponse(buildActionExecutionResult({
|
|
371
375
|
actionType: 'scroll_to_element',
|
|
376
|
+
sourceModule: 'server',
|
|
372
377
|
selector: selector ?? null,
|
|
373
378
|
resolved: res?.success && res?.element ? {
|
|
374
379
|
elementId: null,
|
|
@@ -395,6 +400,7 @@ async function handleTypeText(args: ToolCallArgs) {
|
|
|
395
400
|
const uiFingerprintAfter = await captureActionFingerprint('android', deviceId)
|
|
396
401
|
return wrapResponse(buildActionExecutionResult({
|
|
397
402
|
actionType: 'type_text',
|
|
403
|
+
sourceModule: 'server',
|
|
398
404
|
selector: { text },
|
|
399
405
|
success: !!res.success,
|
|
400
406
|
uiFingerprintBefore,
|
|
@@ -411,6 +417,7 @@ async function handlePressBack(args: ToolCallArgs) {
|
|
|
411
417
|
const uiFingerprintAfter = await captureActionFingerprint('android', deviceId)
|
|
412
418
|
return wrapResponse(buildActionExecutionResult({
|
|
413
419
|
actionType: 'press_back',
|
|
420
|
+
sourceModule: 'server',
|
|
414
421
|
selector: { key: 'back' },
|
|
415
422
|
success: !!res.success,
|
|
416
423
|
uiFingerprintBefore,
|
package/src/server-core.ts
CHANGED
package/src/types.ts
CHANGED
|
@@ -254,10 +254,85 @@ export interface ActionTargetResolved {
|
|
|
254
254
|
semantic?: UIElementSemanticMetadata | null;
|
|
255
255
|
}
|
|
256
256
|
|
|
257
|
+
export interface ResolutionAlternate {
|
|
258
|
+
text: string | null;
|
|
259
|
+
resource_id: string | null;
|
|
260
|
+
accessibility_id: string | null;
|
|
261
|
+
class: string | null;
|
|
262
|
+
bounds: {
|
|
263
|
+
left: number;
|
|
264
|
+
top: number;
|
|
265
|
+
right: number;
|
|
266
|
+
bottom: number;
|
|
267
|
+
} | null;
|
|
268
|
+
clickable: boolean;
|
|
269
|
+
enabled: boolean;
|
|
270
|
+
score: number;
|
|
271
|
+
reason: string;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
export interface ResolutionSummary {
|
|
275
|
+
confidence: number;
|
|
276
|
+
reason: string;
|
|
277
|
+
fallback_available: boolean;
|
|
278
|
+
matched_count: number;
|
|
279
|
+
alternates: ResolutionAlternate[];
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
export interface FindElementElement {
|
|
283
|
+
text: string | null;
|
|
284
|
+
resourceId: string | null;
|
|
285
|
+
contentDesc: string | null;
|
|
286
|
+
class: string | null;
|
|
287
|
+
bounds: {
|
|
288
|
+
left: number;
|
|
289
|
+
top: number;
|
|
290
|
+
right: number;
|
|
291
|
+
bottom: number;
|
|
292
|
+
} | null;
|
|
293
|
+
clickable: boolean;
|
|
294
|
+
enabled: boolean;
|
|
295
|
+
stable_id?: string | null;
|
|
296
|
+
role?: string | null;
|
|
297
|
+
test_tag?: string | null;
|
|
298
|
+
selector?: UIResolutionSelector | null;
|
|
299
|
+
semantic?: UIElementSemanticMetadata | null;
|
|
300
|
+
tapCoordinates: {
|
|
301
|
+
x: number;
|
|
302
|
+
y: number;
|
|
303
|
+
} | null;
|
|
304
|
+
telemetry: {
|
|
305
|
+
matchedIndex: number | null;
|
|
306
|
+
matchedInteractable: boolean;
|
|
307
|
+
sliderLike: boolean;
|
|
308
|
+
};
|
|
309
|
+
interactionHint?: {
|
|
310
|
+
kind: 'slider';
|
|
311
|
+
axis: 'horizontal' | 'vertical';
|
|
312
|
+
trackBounds: {
|
|
313
|
+
left: number;
|
|
314
|
+
top: number;
|
|
315
|
+
right: number;
|
|
316
|
+
bottom: number;
|
|
317
|
+
} | null;
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
export interface FindElementResponse {
|
|
322
|
+
found: boolean;
|
|
323
|
+
element?: FindElementElement | null;
|
|
324
|
+
score?: number;
|
|
325
|
+
confidence?: number;
|
|
326
|
+
resolution?: ResolutionSummary | null;
|
|
327
|
+
error?: string;
|
|
328
|
+
}
|
|
329
|
+
|
|
257
330
|
export interface ActionExecutionResult {
|
|
258
331
|
action_id: string;
|
|
259
332
|
timestamp: string;
|
|
260
333
|
action_type: string;
|
|
334
|
+
lifecycle_state?: 'pending_verification' | 'failed';
|
|
335
|
+
source_module?: 'server' | 'interact';
|
|
261
336
|
device?: DeviceInfo;
|
|
262
337
|
target: {
|
|
263
338
|
selector: Record<string, unknown> | null;
|
|
@@ -73,6 +73,9 @@ async function run() {
|
|
|
73
73
|
process.stdout.write('res4 ' + JSON.stringify(res4, null, 2) + '\n');
|
|
74
74
|
const pass4 = res4.found === true && res4.element && res4.element.clickable === true && res4.element.resourceId === 'btn_generate' && res4.element.tapCoordinates && typeof res4.element.tapCoordinates.x === 'number' && typeof res4.element.tapCoordinates.y === 'number' && typeof res4.confidence === 'number'
|
|
75
75
|
assert.ok(pass4, 'Child text should resolve to a clickable parent ancestor')
|
|
76
|
+
assert.strictEqual(res4.resolution?.reason, 'clickable_parent_preferred')
|
|
77
|
+
assert.strictEqual(res4.resolution?.fallback_available, true)
|
|
78
|
+
assert.ok((res4.resolution?.alternates || []).length >= 1, 'Parent promotion should preserve alternates')
|
|
76
79
|
process.stdout.write('Test 4: ' + (pass4 ? 'PASS' : 'FAIL') + '\n');
|
|
77
80
|
|
|
78
81
|
// Test 5: duration label should resolve to the nearby slider control
|
|
@@ -111,6 +114,8 @@ async function run() {
|
|
|
111
114
|
process.stdout.write('Test 6: ' + (pass6 ? 'PASS' : 'FAIL') + '\n');
|
|
112
115
|
const pass6b = res6.element && res6.element.telemetry && res6.element.telemetry.sliderLike === true && res6.element.interactionHint && res6.element.interactionHint.kind === 'slider'
|
|
113
116
|
assert.ok(pass6b, 'Duration lookup should include slider-specific telemetry')
|
|
117
|
+
assert.strictEqual(res6.resolution?.reason, 'slider_track_preferred')
|
|
118
|
+
assert.strictEqual(res6.resolution?.fallback_available, true)
|
|
114
119
|
process.stdout.write('Test 6b: ' + (pass6b ? 'PASS' : 'FAIL') + '\n');
|
|
115
120
|
|
|
116
121
|
// Test 7: prefer vertical track-like control over a closer text button
|
|
@@ -61,6 +61,8 @@ async function run() {
|
|
|
61
61
|
action_id: 'tap_element_1',
|
|
62
62
|
timestamp: '2026-04-23T08:00:00.000Z',
|
|
63
63
|
action_type: 'tap_element',
|
|
64
|
+
lifecycle_state: 'pending_verification',
|
|
65
|
+
source_module: 'interact',
|
|
64
66
|
target: {
|
|
65
67
|
selector: { elementId: 'el_ready' },
|
|
66
68
|
resolved: { elementId: 'el_ready', text: 'Ready', resource_id: null, accessibility_id: null, class: 'Button', bounds: [0, 0, 10, 10], index: 0 }
|
|
@@ -74,6 +76,8 @@ async function run() {
|
|
|
74
76
|
const tapElementPayload = JSON.parse((tapElementResponse as any).content[0].text)
|
|
75
77
|
assert.strictEqual(tapElementPayload.success, true)
|
|
76
78
|
assert.strictEqual(tapElementPayload.action_type, 'tap_element')
|
|
79
|
+
assert.strictEqual(tapElementPayload.lifecycle_state, 'pending_verification')
|
|
80
|
+
assert.strictEqual(tapElementPayload.source_module, 'interact')
|
|
77
81
|
assert.match(tapElementPayload.timestamp, /^\d{4}-\d{2}-\d{2}T/)
|
|
78
82
|
assert.strictEqual(tapElementPayload.target.resolved.elementId, 'el_ready')
|
|
79
83
|
assert.strictEqual(tapElementPayload.ui_fingerprint_before, 'fp_before')
|
|
@@ -84,6 +88,8 @@ async function run() {
|
|
|
84
88
|
const tapPayload = JSON.parse((tapResponse as any).content[0].text)
|
|
85
89
|
assert.strictEqual(tapPayload.success, true)
|
|
86
90
|
assert.strictEqual(tapPayload.action_type, 'tap')
|
|
91
|
+
assert.strictEqual(tapPayload.lifecycle_state, 'pending_verification')
|
|
92
|
+
assert.strictEqual(tapPayload.source_module, 'server')
|
|
87
93
|
assert.match(tapPayload.timestamp, /^\d{4}-\d{2}-\d{2}T/)
|
|
88
94
|
assert.deepStrictEqual(tapPayload.target.selector, { x: 1, y: 2 })
|
|
89
95
|
assert.strictEqual(tapPayload.ui_fingerprint_before, 'fp_mock')
|
|
@@ -107,6 +113,8 @@ async function run() {
|
|
|
107
113
|
const startAppPayload = JSON.parse((startAppResponse as any).content[0].text)
|
|
108
114
|
assert.strictEqual(startAppPayload.success, true)
|
|
109
115
|
assert.strictEqual(startAppPayload.action_type, 'start_app')
|
|
116
|
+
assert.strictEqual(startAppPayload.lifecycle_state, 'pending_verification')
|
|
117
|
+
assert.strictEqual(startAppPayload.source_module, 'server')
|
|
110
118
|
assert.match(startAppPayload.timestamp, /^\d{4}-\d{2}-\d{2}T/)
|
|
111
119
|
assert.strictEqual(startAppPayload.device.id, 'emulator-5554')
|
|
112
120
|
assert.deepStrictEqual(startAppPayload.target.selector, { appId: 'com.example.app' })
|