@beeos-ai/device-mcp-server 0.2.3 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/backends/android-adb.d.ts +147 -6
- package/dist/backends/android-adb.js +776 -40
- package/dist/backends/android-adb.js.map +1 -1
- package/dist/backends/base.d.ts +243 -7
- package/dist/backends/base.js +81 -2
- package/dist/backends/base.js.map +1 -1
- package/dist/backends/desktop.d.ts +3 -2
- package/dist/backends/desktop.js +9 -3
- package/dist/backends/desktop.js.map +1 -1
- package/dist/backends/linux.js +3 -0
- package/dist/backends/linux.js.map +1 -1
- package/dist/backends/mac.d.ts +11 -2
- package/dist/backends/mac.js +39 -1
- package/dist/backends/mac.js.map +1 -1
- package/dist/backends/stubs/windows.js +3 -0
- package/dist/backends/stubs/windows.js.map +1 -1
- package/dist/cli.d.ts +40 -26
- package/dist/cli.js +118 -84
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +9 -6
- package/dist/index.js +9 -6
- package/dist/index.js.map +1 -1
- package/dist/server/app.d.ts +60 -17
- package/dist/server/app.js +182 -138
- package/dist/server/app.js.map +1 -1
- package/dist/server/mcp-server.d.ts +25 -0
- package/dist/server/mcp-server.js +33 -0
- package/dist/server/mcp-server.js.map +1 -0
- package/dist/server/registry.d.ts +111 -0
- package/dist/server/registry.js +191 -0
- package/dist/server/registry.js.map +1 -0
- package/dist/server/stdio.d.ts +29 -0
- package/dist/server/stdio.js +35 -0
- package/dist/server/stdio.js.map +1 -0
- package/dist/server/tool-registry.d.ts +60 -35
- package/dist/server/tool-registry.js +911 -434
- package/dist/server/tool-registry.js.map +1 -1
- package/dist/util/adb-files.d.ts +25 -1
- package/dist/util/adb-files.js +95 -0
- package/dist/util/adb-files.js.map +1 -1
- package/dist/util/locale.d.ts +16 -0
- package/dist/util/locale.js +31 -0
- package/dist/util/locale.js.map +1 -0
- package/dist/util/logger.d.ts +27 -0
- package/dist/util/logger.js +27 -0
- package/dist/util/logger.js.map +1 -0
- package/dist/util/output-path.d.ts +60 -0
- package/dist/util/output-path.js +123 -0
- package/dist/util/output-path.js.map +1 -0
- package/dist/util/package-name.d.ts +26 -0
- package/dist/util/package-name.js +41 -0
- package/dist/util/package-name.js.map +1 -0
- package/package.json +6 -4
- package/dist/backends/stubs/macos.d.ts +0 -13
- package/dist/backends/stubs/macos.js +0 -27
- package/dist/backends/stubs/macos.js.map +0 -1
- package/dist/server/action-mapping.d.ts +0 -21
- package/dist/server/action-mapping.js +0 -153
- package/dist/server/action-mapping.js.map +0 -1
|
@@ -1,42 +1,82 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* SDK-style tool catalogue for device-mcp-server.
|
|
3
3
|
*
|
|
4
|
-
* Each tool
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
4
|
+
* Each tool is a `ToolSpec` describing:
|
|
5
|
+
* - `name` MCP tool identifier (`screenshot`, `tap`, ...)
|
|
6
|
+
* - `description` human-readable verb
|
|
7
|
+
* - `inputShape` zod raw shape — every shape gets an implicit
|
|
8
|
+
* `device?: z.string().optional()` injected by
|
|
9
|
+
* `registerAllTools` so multi-device callers can
|
|
10
|
+
* target a specific backend per call
|
|
11
|
+
* - `annotations` `ToolAnnotations` (read-only / destructive /
|
|
12
|
+
* idempotent / open-world hints)
|
|
13
|
+
* - `requires` MCP tool name the resolved backend must declare
|
|
14
|
+
* in `backend.tools`. Tools with `requires:
|
|
15
|
+
* "registry"` are registry-level (don't touch a
|
|
16
|
+
* backend) — used for `list_available_devices` /
|
|
17
|
+
* `use_device`.
|
|
18
|
+
* - `handler` async `(args, ctx) => CallToolResult`
|
|
8
19
|
*
|
|
9
|
-
*
|
|
10
|
-
* `
|
|
11
|
-
*
|
|
12
|
-
* support gates of its own. `listToolDescriptorsFor(backend)` filters
|
|
13
|
-
* via `backend.tools.has(name)`; `getToolFor(name, backend)` mirrors
|
|
14
|
-
* the same gate so `/mcp/tools/call` returns 404 (instead of 200 +
|
|
15
|
-
* runtime `unsupported`) for any tool the active backend doesn't
|
|
16
|
-
* advertise.
|
|
20
|
+
* `registerAllTools(server, registry)` iterates the catalogue and calls
|
|
21
|
+
* `server.registerTool` for each entry. The handler closure captures the
|
|
22
|
+
* registry so it can resolve the per-call backend via `registry.resolve`.
|
|
17
23
|
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
24
|
+
* Returning shapes:
|
|
25
|
+
* - Every handler returns `{ content, structuredContent }`. `content`
|
|
26
|
+
* carries the human-friendly text summary (printable in MCP clients
|
|
27
|
+
* like Claude Desktop / Cursor); `structuredContent` carries the
|
|
28
|
+
* typed payload that programmatic callers (`device-agent`) parse.
|
|
20
29
|
*/
|
|
30
|
+
import { z } from "zod";
|
|
31
|
+
import { DeviceError, } from "@beeos-ai/device-common";
|
|
21
32
|
import { formatUiXmlToolOutput } from "../util/ui-xml.js";
|
|
33
|
+
import { assertSafeUrl, PRESS_BUTTON_KEYMAP } from "../backends/android-adb.js";
|
|
34
|
+
import { assertSafeOutputPathWithExt, } from "../util/output-path.js";
|
|
35
|
+
import { assertValidPackageName } from "../util/package-name.js";
|
|
36
|
+
import { assertValidLocale } from "../util/locale.js";
|
|
22
37
|
/* ----------------------------------------------------------------------- */
|
|
23
|
-
/* Helpers
|
|
38
|
+
/* Helpers */
|
|
24
39
|
/* ----------------------------------------------------------------------- */
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
40
|
+
const xy = {
|
|
41
|
+
x: z.number().describe("Click coordinate x (device pixels)"),
|
|
42
|
+
y: z.number().describe("Click coordinate y (device pixels)"),
|
|
43
|
+
};
|
|
44
|
+
const segment = {
|
|
45
|
+
x1: z.number(),
|
|
46
|
+
y1: z.number(),
|
|
47
|
+
x2: z.number(),
|
|
48
|
+
y2: z.number(),
|
|
49
|
+
};
|
|
50
|
+
/**
|
|
51
|
+
* Shared `durationMs` schema for gesture tools (mobile-mcp parity, r3).
|
|
52
|
+
*
|
|
53
|
+
* mobile-mcp clamps duration into `[1, 10000]` ms — without an upper
|
|
54
|
+
* bound a prompt-injected `durationMs: 600000` would wedge an `adb
|
|
55
|
+
* input swipe` for 10 minutes. We pick the same range and require an
|
|
56
|
+
* integer (the underlying `input swipe` rounds anyway, so accepting
|
|
57
|
+
* floats just hides typos).
|
|
58
|
+
*/
|
|
59
|
+
const durationMsField = z.number().int().min(1).max(10000).optional();
|
|
60
|
+
function ok(text, structured) {
|
|
61
|
+
return {
|
|
62
|
+
content: [{ type: "text", text }],
|
|
63
|
+
...(structured !== undefined ? { structuredContent: structured } : {}),
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
function ack(verb) {
|
|
67
|
+
return {
|
|
68
|
+
content: [{ type: "text", text: `${verb} ok` }],
|
|
69
|
+
structuredContent: { ok: true },
|
|
70
|
+
};
|
|
31
71
|
}
|
|
32
|
-
function
|
|
72
|
+
function getStr(args, key) {
|
|
33
73
|
const v = args[key];
|
|
34
74
|
if (typeof v !== "string") {
|
|
35
75
|
throw new TypeError(`argument '${key}' must be a string`);
|
|
36
76
|
}
|
|
37
77
|
return v;
|
|
38
78
|
}
|
|
39
|
-
function
|
|
79
|
+
function optStr(args, key) {
|
|
40
80
|
const v = args[key];
|
|
41
81
|
if (v === undefined || v === null)
|
|
42
82
|
return undefined;
|
|
@@ -44,7 +84,14 @@ function optS(args, key) {
|
|
|
44
84
|
throw new TypeError(`argument '${key}' must be a string`);
|
|
45
85
|
return v;
|
|
46
86
|
}
|
|
47
|
-
function
|
|
87
|
+
function num(args, key) {
|
|
88
|
+
const v = args[key];
|
|
89
|
+
if (typeof v !== "number" || !Number.isFinite(v)) {
|
|
90
|
+
throw new TypeError(`argument '${key}' must be a finite number`);
|
|
91
|
+
}
|
|
92
|
+
return v;
|
|
93
|
+
}
|
|
94
|
+
function optNum(args, key) {
|
|
48
95
|
const v = args[key];
|
|
49
96
|
if (v === undefined || v === null)
|
|
50
97
|
return undefined;
|
|
@@ -53,452 +100,882 @@ function optN(args, key) {
|
|
|
53
100
|
}
|
|
54
101
|
return v;
|
|
55
102
|
}
|
|
56
|
-
const xy = {
|
|
57
|
-
type: "object",
|
|
58
|
-
properties: {
|
|
59
|
-
x: { type: "number" },
|
|
60
|
-
y: { type: "number" },
|
|
61
|
-
},
|
|
62
|
-
required: ["x", "y"],
|
|
63
|
-
};
|
|
64
103
|
/* ----------------------------------------------------------------------- */
|
|
65
|
-
/* Tool
|
|
104
|
+
/* Tool catalogue */
|
|
66
105
|
/* ----------------------------------------------------------------------- */
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
106
|
+
const READ_ONLY = { readOnlyHint: true, destructiveHint: false, openWorldHint: false };
|
|
107
|
+
const MUTATING = { readOnlyHint: false, destructiveHint: false, openWorldHint: false };
|
|
108
|
+
const DESTRUCTIVE = { readOnlyHint: false, destructiveHint: true, openWorldHint: false };
|
|
109
|
+
const OPEN_WORLD_READ_ONLY = { readOnlyHint: true, destructiveHint: false, openWorldHint: true };
|
|
110
|
+
export const TOOL_SPECS = [
|
|
111
|
+
/* ---------------- Registry-level (no backend) ---------------- */
|
|
112
|
+
{
|
|
113
|
+
name: "list_available_devices",
|
|
114
|
+
description: "List every device currently registered in this device-mcp-server. Returns id, OS, default flag, and source (adb / desktop). Pass an `id` from this list as the `device` argument on subsequent tool calls.",
|
|
115
|
+
inputShape: {},
|
|
116
|
+
annotations: { ...READ_ONLY, title: "List available devices" },
|
|
117
|
+
requires: "registry",
|
|
118
|
+
handler: async (_args, { registry }) => {
|
|
119
|
+
const devices = registry.list();
|
|
120
|
+
const text = devices.length
|
|
121
|
+
? devices
|
|
122
|
+
.map((d) => `- ${d.id}${d.isDefault ? " (default)" : ""} os=${d.os} source=${d.source}`)
|
|
123
|
+
.join("\n")
|
|
124
|
+
: "(no devices connected)";
|
|
125
|
+
return ok(text, { devices });
|
|
76
126
|
},
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
name: "use_device",
|
|
130
|
+
description: "Set the session-sticky default device for subsequent tool calls. Only meaningful when the client uses the stdio transport (single persistent session). Stateless HTTP callers should pass `device` on every tool call instead.",
|
|
131
|
+
inputShape: {
|
|
132
|
+
device: z.string().describe("Device id from list_available_devices."),
|
|
133
|
+
},
|
|
134
|
+
annotations: { ...MUTATING, title: "Use device" },
|
|
135
|
+
requires: "registry",
|
|
136
|
+
handler: async (args, { registry, sessionId }) => {
|
|
137
|
+
const device = getStr(args, "device");
|
|
138
|
+
registry.useDevice(sessionId, device);
|
|
139
|
+
return ok(`active device set to ${device}`, { device });
|
|
140
|
+
},
|
|
141
|
+
},
|
|
142
|
+
/* ---------------- Observation ---------------- */
|
|
143
|
+
{
|
|
144
|
+
name: "screenshot",
|
|
145
|
+
description: "Capture a screenshot. Returns base64 image bytes plus the actual encoded image dimensions (width/height) and the click coordinate-system size (deviceWidth/deviceHeight). Optional `displayId` (Android multi-display), `maxEdge` (resize cap; pass `0` for no resize), `quality` (JPEG 1-100), and `format` (`png`/`jpeg`) override the env defaults `GUI_AGENT_SS_MAX_EDGE` / `GUI_AGENT_SS_JPEG_QUALITY` per call — useful when an LLM running against a 4K Android / Retina macOS host needs a smaller image without restarting the server.",
|
|
146
|
+
inputShape: {
|
|
147
|
+
displayId: z
|
|
148
|
+
.number()
|
|
149
|
+
.int()
|
|
150
|
+
.nonnegative()
|
|
151
|
+
.optional()
|
|
152
|
+
.describe("Android-only: capture display id N (Android 10+). Other backends ignore."),
|
|
153
|
+
maxEdge: z
|
|
154
|
+
.number()
|
|
155
|
+
.int()
|
|
156
|
+
.nonnegative()
|
|
157
|
+
.optional()
|
|
158
|
+
.describe("Cap the longest edge to this many pixels (resizes proportionally). Pass `0` to disable resizing for this call. Overrides `GUI_AGENT_SS_MAX_EDGE`."),
|
|
159
|
+
quality: z
|
|
160
|
+
.number()
|
|
161
|
+
.int()
|
|
162
|
+
.min(1)
|
|
163
|
+
.max(100)
|
|
164
|
+
.optional()
|
|
165
|
+
.describe("JPEG quality (1-100). Honoured only on the JPEG re-encode path. Overrides `GUI_AGENT_SS_JPEG_QUALITY`."),
|
|
166
|
+
format: z
|
|
167
|
+
.enum(["png", "jpeg"])
|
|
168
|
+
.optional()
|
|
169
|
+
.describe("Force a specific output codec. `jpeg` triggers re-encode even when the raw capture is under the resize cap (for deterministic payload sizing)."),
|
|
170
|
+
},
|
|
171
|
+
annotations: { ...READ_ONLY, title: "Take a screenshot" },
|
|
172
|
+
requires: "screenshot",
|
|
173
|
+
handler: async (args, { backend }) => {
|
|
174
|
+
const [shot, size, currentApp] = await Promise.all([
|
|
175
|
+
backend.screenshot({
|
|
176
|
+
displayId: optNum(args, "displayId"),
|
|
177
|
+
maxEdge: optNum(args, "maxEdge"),
|
|
178
|
+
quality: optNum(args, "quality"),
|
|
179
|
+
format: optStr(args, "format") ?? undefined,
|
|
180
|
+
}),
|
|
181
|
+
backend.screenSize().catch(() => null),
|
|
182
|
+
backend.getCurrentApp?.().catch(() => "") ?? Promise.resolve(""),
|
|
183
|
+
]);
|
|
184
|
+
const result = {
|
|
87
185
|
b64: shot.data.toString("base64"),
|
|
88
186
|
format: shot.format,
|
|
89
187
|
width: shot.width,
|
|
90
188
|
height: shot.height,
|
|
91
|
-
deviceWidth: size.
|
|
92
|
-
deviceHeight: size.
|
|
189
|
+
deviceWidth: size?.w ?? shot.width,
|
|
190
|
+
deviceHeight: size?.h ?? shot.height,
|
|
191
|
+
...(currentApp ? { currentApp } : {}),
|
|
192
|
+
};
|
|
193
|
+
return {
|
|
194
|
+
content: [
|
|
195
|
+
{
|
|
196
|
+
type: "image",
|
|
197
|
+
data: result.b64,
|
|
198
|
+
mimeType: shot.format === "png" ? "image/png" : "image/jpeg",
|
|
199
|
+
},
|
|
200
|
+
],
|
|
201
|
+
structuredContent: result,
|
|
93
202
|
};
|
|
94
203
|
},
|
|
95
204
|
},
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
},
|
|
106
|
-
},
|
|
107
|
-
},
|
|
205
|
+
{
|
|
206
|
+
name: "ui_dump",
|
|
207
|
+
description: "Dump the active UI hierarchy. Optional `query` filters nodes server-side. `format=raw` returns the original uiautomator XML; `format=compact` (default) returns a one-line-per-node summary.",
|
|
208
|
+
inputShape: {
|
|
209
|
+
query: z.string().optional(),
|
|
210
|
+
format: z.enum(["compact", "raw"]).optional(),
|
|
211
|
+
},
|
|
212
|
+
annotations: { ...READ_ONLY, title: "Dump UI hierarchy" },
|
|
213
|
+
requires: "ui_dump",
|
|
108
214
|
handler: async (args, { backend }) => {
|
|
109
215
|
const out = await backend.uiDump();
|
|
110
|
-
if (!out.supported)
|
|
111
|
-
return { supported: false };
|
|
112
|
-
|
|
113
|
-
const
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
},
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
handler: async (_args, { backend }) => backend.info(),
|
|
216
|
+
if (!out.supported) {
|
|
217
|
+
return ok("UI dump not supported by this backend.", { supported: false });
|
|
218
|
+
}
|
|
219
|
+
const fmt = optStr(args, "format") ?? "compact";
|
|
220
|
+
const query = optStr(args, "query");
|
|
221
|
+
const content = fmt === "raw"
|
|
222
|
+
? (out.content ?? "")
|
|
223
|
+
: formatUiXmlToolOutput(out.content ?? "", query);
|
|
224
|
+
return ok(content, { supported: true, content });
|
|
225
|
+
},
|
|
226
|
+
},
|
|
227
|
+
{
|
|
228
|
+
name: "device_info",
|
|
229
|
+
description: "Return DeviceInfo (type, name, screen size, capabilities, metadata).",
|
|
230
|
+
inputShape: {},
|
|
231
|
+
annotations: { ...READ_ONLY, title: "Device info" },
|
|
232
|
+
requires: "device_info",
|
|
233
|
+
handler: async (_args, { backend }) => {
|
|
234
|
+
const info = await backend.info();
|
|
235
|
+
return ok(`${info.name} (${info.type}) ${info.width}x${info.height}`, info);
|
|
236
|
+
},
|
|
132
237
|
},
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
handler: async (_args, { backend }) =>
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
click: {
|
|
143
|
-
descriptor: {
|
|
144
|
-
name: "click",
|
|
145
|
-
description: "Mouse click at (x, y). Equivalent to tap on touch backends.",
|
|
146
|
-
inputSchema: {
|
|
147
|
-
type: "object",
|
|
148
|
-
properties: {
|
|
149
|
-
...xy.properties,
|
|
150
|
-
button: { type: "string", enum: ["left", "right", "middle"] },
|
|
151
|
-
clicks: { type: "number" },
|
|
152
|
-
},
|
|
153
|
-
required: ["x", "y"],
|
|
154
|
-
},
|
|
155
|
-
},
|
|
156
|
-
handler: async (args, { backend }) => {
|
|
157
|
-
await backend.click(n(args, "x"), n(args, "y"), optS(args, "button") ?? "left", optN(args, "clicks") ?? 1);
|
|
158
|
-
return { ok: true };
|
|
159
|
-
},
|
|
160
|
-
},
|
|
161
|
-
double_click: {
|
|
162
|
-
descriptor: {
|
|
163
|
-
name: "double_click",
|
|
164
|
-
description: "Double click at (x, y).",
|
|
165
|
-
inputSchema: { ...xy },
|
|
166
|
-
},
|
|
167
|
-
handler: async (args, { backend }) => {
|
|
168
|
-
await backend.doubleClick(n(args, "x"), n(args, "y"));
|
|
169
|
-
return { ok: true };
|
|
170
|
-
},
|
|
171
|
-
},
|
|
172
|
-
right_click: {
|
|
173
|
-
descriptor: {
|
|
174
|
-
name: "right_click",
|
|
175
|
-
description: "Right click at (x, y).",
|
|
176
|
-
inputSchema: { ...xy },
|
|
177
|
-
},
|
|
178
|
-
handler: async (args, { backend }) => {
|
|
179
|
-
await backend.rightClick(n(args, "x"), n(args, "y"));
|
|
180
|
-
return { ok: true };
|
|
181
|
-
},
|
|
182
|
-
},
|
|
183
|
-
tap: {
|
|
184
|
-
descriptor: {
|
|
185
|
-
name: "tap",
|
|
186
|
-
description: "Tap at (x, y) on a touch device.",
|
|
187
|
-
inputSchema: { ...xy },
|
|
188
|
-
},
|
|
189
|
-
handler: async (args, { backend }) => {
|
|
190
|
-
await backend.tap(n(args, "x"), n(args, "y"));
|
|
191
|
-
return { ok: true };
|
|
192
|
-
},
|
|
193
|
-
},
|
|
194
|
-
long_press: {
|
|
195
|
-
descriptor: {
|
|
196
|
-
name: "long_press",
|
|
197
|
-
description: "Long-press at (x, y) for `durationMs` (default 3000).",
|
|
198
|
-
inputSchema: {
|
|
199
|
-
type: "object",
|
|
200
|
-
properties: {
|
|
201
|
-
...xy.properties,
|
|
202
|
-
durationMs: { type: "number" },
|
|
203
|
-
},
|
|
204
|
-
required: ["x", "y"],
|
|
205
|
-
},
|
|
206
|
-
},
|
|
207
|
-
handler: async (args, { backend }) => {
|
|
208
|
-
await backend.longPress(n(args, "x"), n(args, "y"), optN(args, "durationMs"));
|
|
209
|
-
return { ok: true };
|
|
210
|
-
},
|
|
211
|
-
},
|
|
212
|
-
drag: {
|
|
213
|
-
descriptor: {
|
|
214
|
-
name: "drag",
|
|
215
|
-
description: "Drag from (x1, y1) to (x2, y2).",
|
|
216
|
-
inputSchema: {
|
|
217
|
-
type: "object",
|
|
218
|
-
properties: {
|
|
219
|
-
x1: { type: "number" },
|
|
220
|
-
y1: { type: "number" },
|
|
221
|
-
x2: { type: "number" },
|
|
222
|
-
y2: { type: "number" },
|
|
223
|
-
durationMs: { type: "number" },
|
|
224
|
-
},
|
|
225
|
-
required: ["x1", "y1", "x2", "y2"],
|
|
226
|
-
},
|
|
227
|
-
},
|
|
228
|
-
handler: async (args, { backend }) => {
|
|
229
|
-
await backend.drag(n(args, "x1"), n(args, "y1"), n(args, "x2"), n(args, "y2"), optN(args, "durationMs"));
|
|
230
|
-
return { ok: true };
|
|
231
|
-
},
|
|
232
|
-
},
|
|
233
|
-
swipe: {
|
|
234
|
-
descriptor: {
|
|
235
|
-
name: "swipe",
|
|
236
|
-
description: "Swipe from (x1, y1) to (x2, y2).",
|
|
237
|
-
inputSchema: {
|
|
238
|
-
type: "object",
|
|
239
|
-
properties: {
|
|
240
|
-
x1: { type: "number" },
|
|
241
|
-
y1: { type: "number" },
|
|
242
|
-
x2: { type: "number" },
|
|
243
|
-
y2: { type: "number" },
|
|
244
|
-
durationMs: { type: "number" },
|
|
245
|
-
},
|
|
246
|
-
required: ["x1", "y1", "x2", "y2"],
|
|
247
|
-
},
|
|
238
|
+
{
|
|
239
|
+
name: "screen_size",
|
|
240
|
+
description: "Return current screen size (w, h) in click coordinates.",
|
|
241
|
+
inputShape: {},
|
|
242
|
+
annotations: { ...READ_ONLY, title: "Screen size" },
|
|
243
|
+
requires: "screen_size",
|
|
244
|
+
handler: async (_args, { backend }) => {
|
|
245
|
+
const size = await backend.screenSize();
|
|
246
|
+
return ok(`${size.w}x${size.h}`, size);
|
|
248
247
|
},
|
|
248
|
+
},
|
|
249
|
+
{
|
|
250
|
+
name: "list_elements",
|
|
251
|
+
description: "Return a structured list of UI elements (text/contentDesc/resourceId/className + rect + center) from the active screen. Optional `query` filters elements by substring.",
|
|
252
|
+
inputShape: {
|
|
253
|
+
query: z.string().optional(),
|
|
254
|
+
},
|
|
255
|
+
annotations: { ...READ_ONLY, title: "List UI elements" },
|
|
256
|
+
requires: "list_elements",
|
|
249
257
|
handler: async (args, { backend }) => {
|
|
250
|
-
await backend.
|
|
251
|
-
|
|
258
|
+
const elements = await backend.listElements({
|
|
259
|
+
query: optStr(args, "query"),
|
|
260
|
+
});
|
|
261
|
+
const text = elements
|
|
262
|
+
.slice(0, 200)
|
|
263
|
+
.map((e) => `(${e.center.x},${e.center.y}) ${e.className ?? ""} ${e.text ? `"${e.text}"` : ""}${e.contentDesc ? ` desc="${e.contentDesc}"` : ""}${e.clickable ? " *click" : ""}`)
|
|
264
|
+
.join("\n");
|
|
265
|
+
return ok(text || "(no elements)", { elements });
|
|
266
|
+
},
|
|
267
|
+
},
|
|
268
|
+
{
|
|
269
|
+
name: "get_orientation",
|
|
270
|
+
description: "Return the current screen orientation (portrait | landscape | reverse-portrait | reverse-landscape).",
|
|
271
|
+
inputShape: {},
|
|
272
|
+
annotations: { ...READ_ONLY, title: "Get orientation" },
|
|
273
|
+
requires: "get_orientation",
|
|
274
|
+
handler: async (_args, { backend }) => {
|
|
275
|
+
const o = await backend.getOrientation();
|
|
276
|
+
return ok(o, { orientation: o });
|
|
252
277
|
},
|
|
253
278
|
},
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
return { ok: true };
|
|
272
|
-
},
|
|
273
|
-
},
|
|
274
|
-
/* — keyboard — */
|
|
275
|
-
type: {
|
|
276
|
-
descriptor: {
|
|
277
|
-
name: "type",
|
|
278
|
-
description: "Type the given Unicode `text` into the focused field.",
|
|
279
|
-
inputSchema: {
|
|
280
|
-
type: "object",
|
|
281
|
-
properties: { text: { type: "string" } },
|
|
282
|
-
required: ["text"],
|
|
283
|
-
},
|
|
284
|
-
},
|
|
285
|
-
handler: async (args, { backend }) => {
|
|
286
|
-
await backend.typeText(s(args, "text"));
|
|
287
|
-
return { ok: true };
|
|
288
|
-
},
|
|
289
|
-
},
|
|
290
|
-
key: {
|
|
291
|
-
descriptor: {
|
|
292
|
-
name: "key",
|
|
293
|
-
description: "Press a single named key (e.g. `KEYCODE_HOME`, `Escape`).",
|
|
294
|
-
inputSchema: {
|
|
295
|
-
type: "object",
|
|
296
|
-
properties: { key: { type: "string" } },
|
|
297
|
-
required: ["key"],
|
|
298
|
-
},
|
|
299
|
-
},
|
|
300
|
-
handler: async (args, { backend }) => {
|
|
301
|
-
await backend.pressKey(s(args, "key"));
|
|
302
|
-
return { ok: true };
|
|
303
|
-
},
|
|
304
|
-
},
|
|
305
|
-
hotkey: {
|
|
306
|
-
descriptor: {
|
|
307
|
-
name: "hotkey",
|
|
308
|
-
description: "Press a hotkey combo, e.g. ['Cmd', 'Shift', 'P'].",
|
|
309
|
-
inputSchema: {
|
|
310
|
-
type: "object",
|
|
311
|
-
properties: {
|
|
312
|
-
keys: { type: "array", items: { type: "string" } },
|
|
313
|
-
},
|
|
314
|
-
required: ["keys"],
|
|
315
|
-
},
|
|
279
|
+
{
|
|
280
|
+
name: "set_orientation",
|
|
281
|
+
description: "Set the screen orientation. Mobile only.",
|
|
282
|
+
inputShape: {
|
|
283
|
+
orientation: z.enum([
|
|
284
|
+
"portrait",
|
|
285
|
+
"landscape",
|
|
286
|
+
"reverse-portrait",
|
|
287
|
+
"reverse-landscape",
|
|
288
|
+
]),
|
|
289
|
+
},
|
|
290
|
+
annotations: { ...MUTATING, title: "Set orientation" },
|
|
291
|
+
requires: "set_orientation",
|
|
292
|
+
handler: async (args, { backend }) => {
|
|
293
|
+
const o = getStr(args, "orientation");
|
|
294
|
+
await backend.setOrientation(o);
|
|
295
|
+
return ack("set_orientation");
|
|
316
296
|
},
|
|
297
|
+
},
|
|
298
|
+
/* ---------------- Pointer / gesture ---------------- */
|
|
299
|
+
{
|
|
300
|
+
name: "click",
|
|
301
|
+
description: "Mouse click at (x, y). Equivalent to tap on touch backends.",
|
|
302
|
+
inputShape: {
|
|
303
|
+
...xy,
|
|
304
|
+
button: z.enum(["left", "right", "middle"]).optional(),
|
|
305
|
+
clicks: z.number().int().positive().optional(),
|
|
306
|
+
},
|
|
307
|
+
annotations: { ...MUTATING, title: "Click" },
|
|
308
|
+
requires: "click",
|
|
317
309
|
handler: async (args, { backend }) => {
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
310
|
+
await backend.click(num(args, "x"), num(args, "y"), optStr(args, "button") ?? "left", optNum(args, "clicks") ?? 1);
|
|
311
|
+
return ack("click");
|
|
312
|
+
},
|
|
313
|
+
},
|
|
314
|
+
{
|
|
315
|
+
name: "double_click",
|
|
316
|
+
description: "Double click at (x, y).",
|
|
317
|
+
inputShape: { ...xy },
|
|
318
|
+
annotations: { ...MUTATING, title: "Double-click" },
|
|
319
|
+
requires: "double_click",
|
|
320
|
+
handler: async (args, { backend }) => {
|
|
321
|
+
await backend.doubleClick(num(args, "x"), num(args, "y"));
|
|
322
|
+
return ack("double_click");
|
|
323
|
+
},
|
|
324
|
+
},
|
|
325
|
+
{
|
|
326
|
+
name: "right_click",
|
|
327
|
+
description: "Right click at (x, y).",
|
|
328
|
+
inputShape: { ...xy },
|
|
329
|
+
annotations: { ...MUTATING, title: "Right-click" },
|
|
330
|
+
requires: "right_click",
|
|
331
|
+
handler: async (args, { backend }) => {
|
|
332
|
+
await backend.rightClick(num(args, "x"), num(args, "y"));
|
|
333
|
+
return ack("right_click");
|
|
334
|
+
},
|
|
335
|
+
},
|
|
336
|
+
{
|
|
337
|
+
name: "tap",
|
|
338
|
+
description: "Tap at (x, y) on a touch device.",
|
|
339
|
+
inputShape: { ...xy },
|
|
340
|
+
annotations: { ...MUTATING, title: "Tap" },
|
|
341
|
+
requires: "tap",
|
|
342
|
+
handler: async (args, { backend }) => {
|
|
343
|
+
await backend.tap(num(args, "x"), num(args, "y"));
|
|
344
|
+
return ack("tap");
|
|
345
|
+
},
|
|
346
|
+
},
|
|
347
|
+
{
|
|
348
|
+
name: "double_tap",
|
|
349
|
+
description: "Double-tap at (x, y) on a touch device. Mobile-side alias for `double_click` — the underlying backend method is the same (`doubleClick`); this wire name exists for prompt parity with `mobile_double_tap_on_screen` (mobile-mcp).",
|
|
350
|
+
inputShape: { ...xy },
|
|
351
|
+
annotations: { ...MUTATING, title: "Double-tap" },
|
|
352
|
+
requires: "double_tap",
|
|
353
|
+
handler: async (args, { backend }) => {
|
|
354
|
+
await backend.doubleClick(num(args, "x"), num(args, "y"));
|
|
355
|
+
return ack("double_tap");
|
|
356
|
+
},
|
|
357
|
+
},
|
|
358
|
+
{
|
|
359
|
+
name: "long_press",
|
|
360
|
+
description: "Long-press at (x, y) for `durationMs` (default 3000, clamped to 1-10000 ms — mobile-mcp parity, r3).",
|
|
361
|
+
inputShape: {
|
|
362
|
+
...xy,
|
|
363
|
+
durationMs: durationMsField,
|
|
364
|
+
},
|
|
365
|
+
annotations: { ...MUTATING, title: "Long-press" },
|
|
366
|
+
requires: "long_press",
|
|
367
|
+
handler: async (args, { backend }) => {
|
|
368
|
+
await backend.longPress(num(args, "x"), num(args, "y"), optNum(args, "durationMs"));
|
|
369
|
+
return ack("long_press");
|
|
370
|
+
},
|
|
371
|
+
},
|
|
372
|
+
{
|
|
373
|
+
name: "drag",
|
|
374
|
+
description: "Drag from (x1, y1) to (x2, y2). Optional `durationMs` clamped to 1-10000.",
|
|
375
|
+
inputShape: {
|
|
376
|
+
...segment,
|
|
377
|
+
durationMs: durationMsField,
|
|
378
|
+
},
|
|
379
|
+
annotations: { ...MUTATING, title: "Drag" },
|
|
380
|
+
requires: "drag",
|
|
381
|
+
handler: async (args, { backend }) => {
|
|
382
|
+
await backend.drag(num(args, "x1"), num(args, "y1"), num(args, "x2"), num(args, "y2"), optNum(args, "durationMs"));
|
|
383
|
+
return ack("drag");
|
|
384
|
+
},
|
|
385
|
+
},
|
|
386
|
+
{
|
|
387
|
+
name: "swipe",
|
|
388
|
+
description: "Swipe from (x1, y1) to (x2, y2). Optional `durationMs` clamped to 1-10000.",
|
|
389
|
+
inputShape: {
|
|
390
|
+
...segment,
|
|
391
|
+
durationMs: durationMsField,
|
|
392
|
+
},
|
|
393
|
+
annotations: { ...MUTATING, title: "Swipe" },
|
|
394
|
+
requires: "swipe",
|
|
395
|
+
handler: async (args, { backend }) => {
|
|
396
|
+
await backend.swipe(num(args, "x1"), num(args, "y1"), num(args, "x2"), num(args, "y2"), optNum(args, "durationMs"));
|
|
397
|
+
return ack("swipe");
|
|
398
|
+
},
|
|
399
|
+
},
|
|
400
|
+
{
|
|
401
|
+
name: "scroll",
|
|
402
|
+
description: "Scroll at (x, y) in `direction` (up | down | left | right). `amount` is a multiplier — each unit ≈ 300 px of swipe distance (default 1). Pass `(0, 0)` to auto-pick the centre of the screen.",
|
|
403
|
+
inputShape: {
|
|
404
|
+
x: z.number().optional(),
|
|
405
|
+
y: z.number().optional(),
|
|
406
|
+
direction: z.enum(["up", "down", "left", "right"]),
|
|
407
|
+
amount: z
|
|
408
|
+
.number()
|
|
409
|
+
.positive()
|
|
410
|
+
.optional()
|
|
411
|
+
.describe("Scroll distance multiplier (≈ 300 px per unit). Default 1; pass 2-3 for long pages."),
|
|
412
|
+
},
|
|
413
|
+
annotations: { ...MUTATING, title: "Scroll" },
|
|
414
|
+
requires: "scroll",
|
|
415
|
+
handler: async (args, { backend }) => {
|
|
416
|
+
await backend.scroll(optNum(args, "x") ?? 0, optNum(args, "y") ?? 0, getStr(args, "direction"), optNum(args, "amount"));
|
|
417
|
+
return ack("scroll");
|
|
418
|
+
},
|
|
419
|
+
},
|
|
420
|
+
/* ---------------- Keyboard ---------------- */
|
|
421
|
+
{
|
|
422
|
+
name: "type",
|
|
423
|
+
description: "Type the given Unicode `text` into the focused field. Set `submit: true` to auto-press ENTER after typing — saves a round-trip on chat / search / login forms.",
|
|
424
|
+
inputShape: {
|
|
425
|
+
text: z.string(),
|
|
426
|
+
submit: z
|
|
427
|
+
.boolean()
|
|
428
|
+
.optional()
|
|
429
|
+
.describe("When true, send a press-ENTER (mobile: KEYCODE_ENTER, desktop: Return) after the text — useful for search bars / chat composers."),
|
|
430
|
+
},
|
|
431
|
+
annotations: { ...MUTATING, title: "Type text" },
|
|
432
|
+
requires: "type",
|
|
433
|
+
handler: async (args, { backend }) => {
|
|
434
|
+
await backend.typeText(getStr(args, "text"));
|
|
435
|
+
if (args.submit === true) {
|
|
436
|
+
// Mobile uses Android `KEYCODE_ENTER`; desktop accepts the
|
|
437
|
+
// friendly `Enter` / `Return` strings via their own pressKey
|
|
438
|
+
// mapping. We try `KEYCODE_ENTER` first, fall back to `Enter`
|
|
439
|
+
// for desktop backends that don't speak Android keycodes.
|
|
440
|
+
try {
|
|
441
|
+
await backend.pressKey("KEYCODE_ENTER");
|
|
442
|
+
}
|
|
443
|
+
catch {
|
|
444
|
+
await backend.pressKey("Enter");
|
|
445
|
+
}
|
|
321
446
|
}
|
|
322
|
-
|
|
323
|
-
|
|
447
|
+
return ack("type");
|
|
448
|
+
},
|
|
449
|
+
},
|
|
450
|
+
{
|
|
451
|
+
name: "key",
|
|
452
|
+
description: "Press a single named key (e.g. `KEYCODE_HOME`, `Escape`).",
|
|
453
|
+
inputShape: { key: z.string() },
|
|
454
|
+
annotations: { ...MUTATING, title: "Press key" },
|
|
455
|
+
requires: "key",
|
|
456
|
+
handler: async (args, { backend }) => {
|
|
457
|
+
await backend.pressKey(getStr(args, "key"));
|
|
458
|
+
return ack("key");
|
|
324
459
|
},
|
|
325
460
|
},
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
461
|
+
{
|
|
462
|
+
name: "hotkey",
|
|
463
|
+
description: "Press a hotkey combo, e.g. ['Cmd', 'Shift', 'P'].",
|
|
464
|
+
inputShape: { keys: z.array(z.string()).min(1) },
|
|
465
|
+
annotations: { ...MUTATING, title: "Press hotkey" },
|
|
466
|
+
requires: "hotkey",
|
|
467
|
+
handler: async (args, { backend }) => {
|
|
468
|
+
const keys = args.keys;
|
|
469
|
+
await backend.hotkey(...keys);
|
|
470
|
+
return ack("hotkey");
|
|
332
471
|
},
|
|
472
|
+
},
|
|
473
|
+
/* ---------------- System nav ---------------- */
|
|
474
|
+
{
|
|
475
|
+
name: "back",
|
|
476
|
+
description: "Press the system back button (Android) / browser back.",
|
|
477
|
+
inputShape: {},
|
|
478
|
+
annotations: { ...MUTATING, title: "Back" },
|
|
479
|
+
requires: "back",
|
|
333
480
|
handler: async (_args, { backend }) => {
|
|
334
481
|
await backend.back();
|
|
335
|
-
return
|
|
482
|
+
return ack("back");
|
|
336
483
|
},
|
|
337
484
|
},
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
485
|
+
{
|
|
486
|
+
name: "home",
|
|
487
|
+
description: "Press the system home button (Android).",
|
|
488
|
+
inputShape: {},
|
|
489
|
+
annotations: { ...MUTATING, title: "Home" },
|
|
490
|
+
requires: "home",
|
|
344
491
|
handler: async (_args, { backend }) => {
|
|
345
492
|
await backend.home();
|
|
346
|
-
return
|
|
347
|
-
},
|
|
348
|
-
},
|
|
349
|
-
/* — app — */
|
|
350
|
-
launch_app: {
|
|
351
|
-
descriptor: {
|
|
352
|
-
name: "launch_app",
|
|
353
|
-
description: "Launch app by package or display name.",
|
|
354
|
-
inputSchema: {
|
|
355
|
-
type: "object",
|
|
356
|
-
properties: {
|
|
357
|
-
pkg: { type: "string" },
|
|
358
|
-
activity: { type: "string" },
|
|
359
|
-
},
|
|
360
|
-
required: ["pkg"],
|
|
361
|
-
},
|
|
362
|
-
},
|
|
363
|
-
handler: async (args, { backend }) => {
|
|
364
|
-
await backend.launchApp(s(args, "pkg"), optS(args, "activity"));
|
|
365
|
-
return { ok: true };
|
|
366
|
-
},
|
|
367
|
-
},
|
|
368
|
-
/* — shell / files — */
|
|
369
|
-
execute_command: {
|
|
370
|
-
descriptor: {
|
|
371
|
-
name: "execute_command",
|
|
372
|
-
description: "Run a shell command on the sandbox. Returns stdout/stderr/exitCode.",
|
|
373
|
-
inputSchema: {
|
|
374
|
-
type: "object",
|
|
375
|
-
properties: {
|
|
376
|
-
body: { type: "string" },
|
|
377
|
-
timeoutS: { type: "number" },
|
|
378
|
-
cwd: { type: "string" },
|
|
379
|
-
},
|
|
380
|
-
required: ["body"],
|
|
381
|
-
},
|
|
382
|
-
},
|
|
383
|
-
handler: async (args, { backend }) => backend.executeCommand(s(args, "body"), {
|
|
384
|
-
timeoutS: optN(args, "timeoutS"),
|
|
385
|
-
cwd: optS(args, "cwd"),
|
|
386
|
-
}),
|
|
387
|
-
},
|
|
388
|
-
file_read: {
|
|
389
|
-
descriptor: {
|
|
390
|
-
name: "file_read",
|
|
391
|
-
description: "Read a file from the sandbox. Returns base64 content.",
|
|
392
|
-
inputSchema: {
|
|
393
|
-
type: "object",
|
|
394
|
-
properties: { path: { type: "string" } },
|
|
395
|
-
required: ["path"],
|
|
396
|
-
},
|
|
397
|
-
},
|
|
398
|
-
handler: async (args, { backend }) => {
|
|
399
|
-
const buf = await backend.fileRead(s(args, "path"));
|
|
400
|
-
return { contentB64: buf.toString("base64"), bytes: buf.length };
|
|
401
|
-
},
|
|
402
|
-
},
|
|
403
|
-
file_write: {
|
|
404
|
-
descriptor: {
|
|
405
|
-
name: "file_write",
|
|
406
|
-
description: "Write base64 content to a file in the sandbox.",
|
|
407
|
-
inputSchema: {
|
|
408
|
-
type: "object",
|
|
409
|
-
properties: {
|
|
410
|
-
path: { type: "string" },
|
|
411
|
-
contentB64: { type: "string" },
|
|
412
|
-
},
|
|
413
|
-
required: ["path", "contentB64"],
|
|
414
|
-
},
|
|
415
|
-
},
|
|
416
|
-
handler: async (args, { backend }) => {
|
|
417
|
-
const buf = Buffer.from(s(args, "contentB64"), "base64");
|
|
418
|
-
await backend.fileWrite(s(args, "path"), buf);
|
|
419
|
-
return { ok: true, bytes: buf.length };
|
|
420
|
-
},
|
|
421
|
-
},
|
|
422
|
-
list_directory: {
|
|
423
|
-
descriptor: {
|
|
424
|
-
name: "list_directory",
|
|
425
|
-
description: "List directory entries.",
|
|
426
|
-
inputSchema: {
|
|
427
|
-
type: "object",
|
|
428
|
-
properties: { path: { type: "string" } },
|
|
429
|
-
required: ["path"],
|
|
430
|
-
},
|
|
431
|
-
},
|
|
432
|
-
handler: async (args, { backend }) => {
|
|
433
|
-
const entries = await backend.listDirectory(s(args, "path"));
|
|
434
|
-
return { entries };
|
|
435
|
-
},
|
|
436
|
-
},
|
|
437
|
-
/**
|
|
438
|
-
* `install_package` — backend-agnostic native-package install.
|
|
439
|
-
*
|
|
440
|
-
* Dispatches to whichever installer the backend implements (APK on
|
|
441
|
-
* Android, DMG/PKG on macOS, MSI/EXE on Windows, DEB/RPM on Linux).
|
|
442
|
-
* Wire-compatible alias `install_apk` is kept registered for
|
|
443
|
-
* existing agents that still hardcode the Android verb name.
|
|
444
|
-
*
|
|
445
|
-
* Reference: plan §3.4 — "工具命名上,去掉 backend 私有词".
|
|
446
|
-
*/
|
|
447
|
-
install_package: installToolEntry("install_package"),
|
|
448
|
-
install_apk: installToolEntry("install_apk"),
|
|
449
|
-
};
|
|
450
|
-
function installToolEntry(name) {
|
|
451
|
-
return {
|
|
452
|
-
descriptor: {
|
|
453
|
-
name,
|
|
454
|
-
description: name === "install_package"
|
|
455
|
-
? "Install a native package at `path`. Backend-agnostic — dispatches " +
|
|
456
|
-
"to APK / DMG / MSI / DEB based on the active backend."
|
|
457
|
-
: "Install an APK at `path` (Android only). Alias for `install_package`.",
|
|
458
|
-
inputSchema: {
|
|
459
|
-
type: "object",
|
|
460
|
-
properties: { path: { type: "string" } },
|
|
461
|
-
required: ["path"],
|
|
462
|
-
},
|
|
463
|
-
},
|
|
464
|
-
handler: async (args, { backend }) => {
|
|
465
|
-
await backend.install(s(args, "path"));
|
|
466
|
-
return { ok: true };
|
|
493
|
+
return ack("home");
|
|
467
494
|
},
|
|
468
|
-
}
|
|
469
|
-
|
|
495
|
+
},
|
|
496
|
+
/* ---------------- App lifecycle ---------------- */
|
|
497
|
+
{
|
|
498
|
+
name: "launch_app",
|
|
499
|
+
description: "Launch app by package or display name. Optional `locale` (BCP-47, e.g. `fr-FR`, `zh-Hant-TW`) applies a per-app locale on Android 13+ via `cmd locale set-app-locales` (best-effort — silently no-ops on older OS or non-Android backends). Mirrors mobile-mcp's `mobile_launch_app.locale`.",
|
|
500
|
+
inputShape: {
|
|
501
|
+
pkg: z.string(),
|
|
502
|
+
activity: z.string().optional(),
|
|
503
|
+
locale: z
|
|
504
|
+
.string()
|
|
505
|
+
.optional()
|
|
506
|
+
.describe("BCP-47 locale tag (e.g. `fr-FR`, `de`, `zh-Hant-TW`). Best-effort per-app locale on Android 13+; ignored on older Android, iOS, and desktop."),
|
|
507
|
+
},
|
|
508
|
+
annotations: { ...MUTATING, title: "Launch app", openWorldHint: true },
|
|
509
|
+
requires: "launch_app",
|
|
510
|
+
handler: async (args, { backend }) => {
|
|
511
|
+
// r3: locale must match `[a-zA-Z0-9,\\- ]+` so a prompt-injected
|
|
512
|
+
// `$(...)` value is rejected before it reaches `adb shell`. The
|
|
513
|
+
// sh\`...\` template literal in android-adb already escapes the
|
|
514
|
+
// value, so this is defence-in-depth + a clearer error.
|
|
515
|
+
const rawLocale = optStr(args, "locale");
|
|
516
|
+
const locale = rawLocale ? assertValidLocale(rawLocale) : undefined;
|
|
517
|
+
await backend.launchApp(getStr(args, "pkg"), {
|
|
518
|
+
activity: optStr(args, "activity"),
|
|
519
|
+
...(locale !== undefined ? { locale } : {}),
|
|
520
|
+
});
|
|
521
|
+
return ack("launch_app");
|
|
522
|
+
},
|
|
523
|
+
},
|
|
524
|
+
{
|
|
525
|
+
name: "list_apps",
|
|
526
|
+
description: "List installed apps. Set `includeSystem` to include OS-shipped packages (Android only). Set `launchableOnly` to drop headless system services and return only apps with a launcher activity (mobile-mcp parity, recommended for LLM prompts).",
|
|
527
|
+
inputShape: {
|
|
528
|
+
includeSystem: z.boolean().optional(),
|
|
529
|
+
launchableOnly: z
|
|
530
|
+
.boolean()
|
|
531
|
+
.optional()
|
|
532
|
+
.describe("Android-only: when true, run `cmd package query-activities -a MAIN -c LAUNCHER` and dedupe by package id. Drops headless services / system providers."),
|
|
533
|
+
},
|
|
534
|
+
annotations: { ...READ_ONLY, title: "List apps" },
|
|
535
|
+
requires: "list_apps",
|
|
536
|
+
handler: async (args, { backend }) => {
|
|
537
|
+
const includeSystem = args.includeSystem === true;
|
|
538
|
+
const launchableOnly = args.launchableOnly === true;
|
|
539
|
+
const apps = await backend.listApps({ includeSystem, launchableOnly });
|
|
540
|
+
const text = apps.length
|
|
541
|
+
? apps
|
|
542
|
+
.slice(0, 100)
|
|
543
|
+
.map((a) => `${a.packageId}${a.label ? ` ${a.label}` : ""}`)
|
|
544
|
+
.join("\n")
|
|
545
|
+
: "(no apps)";
|
|
546
|
+
return ok(text, { apps });
|
|
547
|
+
},
|
|
548
|
+
},
|
|
549
|
+
{
|
|
550
|
+
name: "terminate_app",
|
|
551
|
+
description: "Force-stop an app by package id (Android `am force-stop`). `pkg` is validated against the Android package-name grammar (`[a-zA-Z0-9._]+`) at the tool boundary (mobile-mcp parity, r3).",
|
|
552
|
+
inputShape: { pkg: z.string() },
|
|
553
|
+
annotations: { ...DESTRUCTIVE, title: "Terminate app" },
|
|
554
|
+
requires: "terminate_app",
|
|
555
|
+
handler: async (args, { backend }) => {
|
|
556
|
+
// r3: fail malformed package ids EARLY at the tool boundary —
|
|
557
|
+
// before the runner shell-escapes them and `pm` returns a vague
|
|
558
|
+
// "package not found". `terminate_app` does NOT accept display
|
|
559
|
+
// names so a strict validator is safe here.
|
|
560
|
+
const pkg = assertValidPackageName(getStr(args, "pkg"));
|
|
561
|
+
await backend.terminateApp(pkg);
|
|
562
|
+
return ack("terminate_app");
|
|
563
|
+
},
|
|
564
|
+
},
|
|
565
|
+
{
|
|
566
|
+
name: "open_url",
|
|
567
|
+
description: "Open a URL in the platform's default browser / VIEW intent. Mobile uses `am start -a android.intent.action.VIEW`; macOS uses `open`; Linux/Windows are intentionally unsupported on the stub backends. By default only `http`/`https` schemes are accepted — set `BEEOS_DEVICE_ALLOW_UNSAFE_URLS=1` to permit `intent:`/`file:`/custom URLs (mobile-mcp's `MOBILEMCP_ALLOW_UNSAFE_URLS=1` is also honoured).",
|
|
568
|
+
inputShape: { url: z.string().min(1) },
|
|
569
|
+
annotations: { ...OPEN_WORLD_READ_ONLY, title: "Open URL" },
|
|
570
|
+
requires: "open_url",
|
|
571
|
+
handler: async (args, { backend }) => {
|
|
572
|
+
// Validate the URL at the registry layer so backends without
|
|
573
|
+
// native scheme guards (mock desktop / mac / linux / windows)
|
|
574
|
+
// pick up the same protection as Android.
|
|
575
|
+
const url = getStr(args, "url");
|
|
576
|
+
assertSafeUrl(url);
|
|
577
|
+
await backend.openUrl(url);
|
|
578
|
+
return ack("open_url");
|
|
579
|
+
},
|
|
580
|
+
},
|
|
581
|
+
/* ---------------- Shell / files ---------------- */
|
|
582
|
+
{
|
|
583
|
+
name: "execute_command",
|
|
584
|
+
description: "Run a shell command on the sandbox. Returns stdout/stderr/exitCode.",
|
|
585
|
+
inputShape: {
|
|
586
|
+
body: z.string(),
|
|
587
|
+
timeoutS: z.number().positive().optional(),
|
|
588
|
+
cwd: z.string().optional(),
|
|
589
|
+
},
|
|
590
|
+
annotations: { ...DESTRUCTIVE, title: "Execute shell command", openWorldHint: true },
|
|
591
|
+
requires: "execute_command",
|
|
592
|
+
handler: async (args, { backend }) => {
|
|
593
|
+
const out = await backend.executeCommand(getStr(args, "body"), {
|
|
594
|
+
timeoutS: optNum(args, "timeoutS"),
|
|
595
|
+
cwd: optStr(args, "cwd"),
|
|
596
|
+
});
|
|
597
|
+
const text = out.stdout || out.stderr || `exit=${out.exitCode}`;
|
|
598
|
+
return ok(text, out);
|
|
599
|
+
},
|
|
600
|
+
},
|
|
601
|
+
{
|
|
602
|
+
name: "file_read",
|
|
603
|
+
description: "Read a file from the sandbox. Returns base64 content + size.",
|
|
604
|
+
inputShape: { path: z.string() },
|
|
605
|
+
annotations: { ...READ_ONLY, title: "Read file" },
|
|
606
|
+
requires: "file_read",
|
|
607
|
+
handler: async (args, { backend }) => {
|
|
608
|
+
const buf = await backend.fileRead(getStr(args, "path"));
|
|
609
|
+
const result = { contentB64: buf.toString("base64"), bytes: buf.length };
|
|
610
|
+
return ok(`${buf.length} bytes`, result);
|
|
611
|
+
},
|
|
612
|
+
},
|
|
613
|
+
{
|
|
614
|
+
name: "file_write",
|
|
615
|
+
description: "Write base64 content to a file in the sandbox.",
|
|
616
|
+
inputShape: {
|
|
617
|
+
path: z.string(),
|
|
618
|
+
contentB64: z.string(),
|
|
619
|
+
},
|
|
620
|
+
annotations: { ...DESTRUCTIVE, title: "Write file" },
|
|
621
|
+
requires: "file_write",
|
|
622
|
+
handler: async (args, { backend }) => {
|
|
623
|
+
const buf = Buffer.from(getStr(args, "contentB64"), "base64");
|
|
624
|
+
await backend.fileWrite(getStr(args, "path"), buf);
|
|
625
|
+
return ok(`wrote ${buf.length} bytes`, { ok: true, bytes: buf.length });
|
|
626
|
+
},
|
|
627
|
+
},
|
|
628
|
+
{
|
|
629
|
+
name: "list_directory",
|
|
630
|
+
description: "List directory entries.",
|
|
631
|
+
inputShape: { path: z.string() },
|
|
632
|
+
annotations: { ...READ_ONLY, title: "List directory" },
|
|
633
|
+
requires: "list_directory",
|
|
634
|
+
handler: async (args, { backend }) => {
|
|
635
|
+
const entries = await backend.listDirectory(getStr(args, "path"));
|
|
636
|
+
return ok(`${entries.length} entries`, { entries });
|
|
637
|
+
},
|
|
638
|
+
},
|
|
639
|
+
{
|
|
640
|
+
name: "install_package",
|
|
641
|
+
description: "Install a native package at `path`. Backend-agnostic — dispatches to APK / DMG / MSI / DEB based on the active backend.",
|
|
642
|
+
inputShape: { path: z.string() },
|
|
643
|
+
annotations: { ...DESTRUCTIVE, title: "Install package" },
|
|
644
|
+
requires: "install_package",
|
|
645
|
+
handler: async (args, { backend }) => {
|
|
646
|
+
await backend.install(getStr(args, "path"));
|
|
647
|
+
return ack("install_package");
|
|
648
|
+
},
|
|
649
|
+
},
|
|
650
|
+
{
|
|
651
|
+
name: "install_apk",
|
|
652
|
+
description: "Install an APK at `path` (Android only). Wire-compatible alias for `install_package`.",
|
|
653
|
+
inputShape: { path: z.string() },
|
|
654
|
+
annotations: { ...DESTRUCTIVE, title: "Install APK" },
|
|
655
|
+
requires: "install_apk",
|
|
656
|
+
handler: async (args, { backend }) => {
|
|
657
|
+
await backend.install(getStr(args, "path"));
|
|
658
|
+
return ack("install_apk");
|
|
659
|
+
},
|
|
660
|
+
},
|
|
661
|
+
{
|
|
662
|
+
name: "uninstall_app",
|
|
663
|
+
description: "Uninstall an app by package id (Android `adb uninstall`). `pkg` is validated against the Android package-name grammar (`[a-zA-Z0-9._]+`) at the tool boundary (mobile-mcp parity, r3). The dual of `install_apk` — multi-device CI / test flows need both.",
|
|
664
|
+
inputShape: { pkg: z.string() },
|
|
665
|
+
annotations: { ...DESTRUCTIVE, title: "Uninstall app" },
|
|
666
|
+
requires: "uninstall_app",
|
|
667
|
+
handler: async (args, { backend }) => {
|
|
668
|
+
const pkg = assertValidPackageName(getStr(args, "pkg"));
|
|
669
|
+
await backend.uninstallApp(pkg);
|
|
670
|
+
return ack("uninstall_app");
|
|
671
|
+
},
|
|
672
|
+
},
|
|
673
|
+
/* ---------------- Diagnostics (mobile-mcp parity) ---------------- */
|
|
674
|
+
{
|
|
675
|
+
name: "list_crashes",
|
|
676
|
+
description: "List recent crash / ANR / WTF entries from `dumpsys dropbox`. Returns a `CrashSummary[]` — pass each `id` to `get_crash` for the full body.",
|
|
677
|
+
inputShape: {},
|
|
678
|
+
annotations: { ...READ_ONLY, title: "List crashes" },
|
|
679
|
+
requires: "list_crashes",
|
|
680
|
+
handler: async (_args, { backend }) => {
|
|
681
|
+
const crashes = await backend.listCrashes();
|
|
682
|
+
const text = crashes.length
|
|
683
|
+
? crashes
|
|
684
|
+
.slice(0, 50)
|
|
685
|
+
.map((c) => `${c.timestamp} ${c.type} ${c.app ?? "(no-app)"} id=${c.id}${c.headline ? `\n ${c.headline}` : ""}`)
|
|
686
|
+
.join("\n")
|
|
687
|
+
: "(no crashes)";
|
|
688
|
+
return ok(text, { crashes });
|
|
689
|
+
},
|
|
690
|
+
},
|
|
691
|
+
{
|
|
692
|
+
name: "get_crash",
|
|
693
|
+
description: "Fetch the full body (stack trace / log block) of a specific crash entry. `id` comes from `list_crashes`.",
|
|
694
|
+
inputShape: { id: z.string() },
|
|
695
|
+
annotations: { ...READ_ONLY, title: "Get crash details" },
|
|
696
|
+
requires: "get_crash",
|
|
697
|
+
handler: async (args, { backend }) => {
|
|
698
|
+
const id = getStr(args, "id");
|
|
699
|
+
const body = await backend.getCrash(id);
|
|
700
|
+
return ok(body, { id, body });
|
|
701
|
+
},
|
|
702
|
+
},
|
|
703
|
+
/* ---------------- Screen recording (mobile-mcp parity) ---------------- */
|
|
704
|
+
{
|
|
705
|
+
name: "screen_record_start",
|
|
706
|
+
description: "Start recording the device screen. Returns a `ScreenRecordHandle` whose `id` must be passed to `screen_record_stop`. Each recording is hard-capped at `timeLimitS` seconds (1-300, default 300; matches Android `screenrecord --time-limit` ceiling) — a missed `stop` will not leak space on the device. Optional `output` registers the host-side `.mp4` destination at start time (mobile-mcp parity, r3); when set, `screen_record_stop` writes there unless overridden.",
|
|
707
|
+
inputShape: {
|
|
708
|
+
output: z
|
|
709
|
+
.string()
|
|
710
|
+
.optional()
|
|
711
|
+
.describe("Optional host-side destination path (`.mp4`). Sandboxed to cwd / tmpdir / `BEEOS_DEVICE_OUTPUT_DIRS`. When set, `screen_record_stop` writes the pulled MP4 here unless its own `path` overrides."),
|
|
712
|
+
timeLimitS: z
|
|
713
|
+
.number()
|
|
714
|
+
.int()
|
|
715
|
+
.min(1)
|
|
716
|
+
.max(300)
|
|
717
|
+
.optional()
|
|
718
|
+
.describe("Hard cap on recording length in seconds (1-300, mobile-mcp range). Defaults to the backend default (300s on Android)."),
|
|
719
|
+
},
|
|
720
|
+
annotations: { ...MUTATING, title: "Start screen recording" },
|
|
721
|
+
requires: "screen_record_start",
|
|
722
|
+
handler: async (args, { backend }) => {
|
|
723
|
+
// r3: caller may pre-declare the host-side `.mp4` path and the
|
|
724
|
+
// device-side time-limit. Both pass through their respective
|
|
725
|
+
// sandboxes / clamps before reaching the backend.
|
|
726
|
+
const userOutput = optStr(args, "output");
|
|
727
|
+
const localPath = userOutput
|
|
728
|
+
? assertSafeOutputPathWithExt(userOutput, [".mp4"])
|
|
729
|
+
: undefined;
|
|
730
|
+
const handle = await backend.startScreenRecord({
|
|
731
|
+
...(localPath !== undefined ? { localPath } : {}),
|
|
732
|
+
...(args.timeLimitS !== undefined
|
|
733
|
+
? { timeLimitS: optNum(args, "timeLimitS") }
|
|
734
|
+
: {}),
|
|
735
|
+
});
|
|
736
|
+
return ok(`recording started: id=${handle.id} cap=${handle.timeLimitS}s${handle.localPath ? ` -> ${handle.localPath}` : ""}`, handle);
|
|
737
|
+
},
|
|
738
|
+
},
|
|
739
|
+
{
|
|
740
|
+
name: "screen_record_stop",
|
|
741
|
+
description: "Stop the recording started by `screen_record_start`, pull the MP4 to the host filesystem, and return its path / size / duration. Destination resolution: caller-supplied `path` here wins over the start-time `output`; both are sandboxed to cwd / tmpdir / `BEEOS_DEVICE_OUTPUT_DIRS` AND must end in `.mp4` (mobile-mcp parity, r3). When neither is supplied, the backend picks a per-recording temp path.",
|
|
742
|
+
inputShape: {
|
|
743
|
+
id: z.string(),
|
|
744
|
+
path: z
|
|
745
|
+
.string()
|
|
746
|
+
.optional()
|
|
747
|
+
.describe("Optional host-side destination path (`.mp4`). Overrides the start-time `output` when provided. Must resolve under cwd, the system tmpdir, or one of `BEEOS_DEVICE_OUTPUT_DIRS`."),
|
|
748
|
+
},
|
|
749
|
+
annotations: { ...MUTATING, title: "Stop screen recording" },
|
|
750
|
+
requires: "screen_record_stop",
|
|
751
|
+
handler: async (args, { backend }) => {
|
|
752
|
+
// r3: extension allow-list (.mp4) on top of the sandbox check —
|
|
753
|
+
// a screenrecord that lands as `.mov` confuses every player on
|
|
754
|
+
// earth. Default (no path) lets the backend honour the start-
|
|
755
|
+
// time `output` (set via screen_record_start) or fall back to a
|
|
756
|
+
// tmp dir, both already-trusted.
|
|
757
|
+
const userPath = optStr(args, "path");
|
|
758
|
+
const safePath = userPath
|
|
759
|
+
? assertSafeOutputPathWithExt(userPath, [".mp4"])
|
|
760
|
+
: undefined;
|
|
761
|
+
const result = await backend.stopScreenRecord(getStr(args, "id"), {
|
|
762
|
+
path: safePath,
|
|
763
|
+
});
|
|
764
|
+
return ok(`recording saved: ${result.path} (${result.bytes} bytes, ${result.durationMs}ms)`, result);
|
|
765
|
+
},
|
|
766
|
+
},
|
|
767
|
+
/* ---------------- screenshot_to_file ---------------- */
|
|
768
|
+
{
|
|
769
|
+
name: "screenshot_to_file",
|
|
770
|
+
description: "Capture a screenshot and write the encoded bytes directly to a host filesystem `path`. Bypasses the JSON-RPC body cap (16 MB) — use this when an in-band `screenshot` would be too large (4K Android, Retina captures). `path` is sandboxed to cwd / tmpdir / `BEEOS_DEVICE_OUTPUT_DIRS` AND must end in `.png` / `.jpg` / `.jpeg` (mobile-mcp parity, r3).",
|
|
771
|
+
inputShape: {
|
|
772
|
+
path: z
|
|
773
|
+
.string()
|
|
774
|
+
.describe("Host-side destination path (`.png` / `.jpg` / `.jpeg`). Must resolve under cwd, the system tmpdir, or one of `BEEOS_DEVICE_OUTPUT_DIRS` (`,`/`:`-separated)."),
|
|
775
|
+
displayId: z
|
|
776
|
+
.number()
|
|
777
|
+
.int()
|
|
778
|
+
.nonnegative()
|
|
779
|
+
.optional()
|
|
780
|
+
.describe("Android-only: capture display id N (Android 10+, see `getDisplayCount`). Other backends ignore."),
|
|
781
|
+
},
|
|
782
|
+
annotations: { ...READ_ONLY, title: "Save screenshot to file" },
|
|
783
|
+
requires: "screenshot_to_file",
|
|
784
|
+
handler: async (args, { backend }) => {
|
|
785
|
+
// 0.4.1 sandbox + 0.4.1 r3 extension allow-list: caller-supplied
|
|
786
|
+
// destination must resolve under cwd/tmpdir/BEEOS_DEVICE_OUTPUT_DIRS
|
|
787
|
+
// AND end in a screenshot extension. The extension check is
|
|
788
|
+
// mobile-mcp parity (validateFileExtension) — without it, a
|
|
789
|
+
// prompt-injected agent could write `cute.png.exe` (cwd-relative).
|
|
790
|
+
const safePath = assertSafeOutputPathWithExt(getStr(args, "path"), [
|
|
791
|
+
".png",
|
|
792
|
+
".jpg",
|
|
793
|
+
".jpeg",
|
|
794
|
+
]);
|
|
795
|
+
const result = await backend.saveScreenshot(safePath, {
|
|
796
|
+
displayId: optNum(args, "displayId"),
|
|
797
|
+
});
|
|
798
|
+
return ok(`wrote ${result.bytes} bytes to ${result.path} (${result.width}x${result.height} ${result.format})`, result);
|
|
799
|
+
},
|
|
800
|
+
},
|
|
801
|
+
/* ---------------- High-level mobile gestures ---------------- */
|
|
802
|
+
{
|
|
803
|
+
name: "swipe_direction",
|
|
804
|
+
description: "High-level swipe in a cardinal `direction` from an optional anchor point. When `x`/`y` are omitted the centre of the screen is used; `distance` defaults to ~30% of the screen edge. Prefer this over the low-level `swipe(x1,y1,x2,y2)` in agent prompts — it's far easier for an LLM to reason about.",
|
|
805
|
+
inputShape: {
|
|
806
|
+
direction: z.enum(["up", "down", "left", "right"]),
|
|
807
|
+
x: z.number().optional(),
|
|
808
|
+
y: z.number().optional(),
|
|
809
|
+
distance: z
|
|
810
|
+
.number()
|
|
811
|
+
.positive()
|
|
812
|
+
.optional()
|
|
813
|
+
.describe("Pixel distance to swipe (default ≈ 30% of screen edge)."),
|
|
814
|
+
durationMs: durationMsField,
|
|
815
|
+
},
|
|
816
|
+
annotations: { ...MUTATING, title: "Swipe in a direction" },
|
|
817
|
+
requires: "swipe_direction",
|
|
818
|
+
handler: async (args, { backend }) => {
|
|
819
|
+
const direction = getStr(args, "direction");
|
|
820
|
+
// Resolve anchor + distance against the live screen size so the
|
|
821
|
+
// gesture lands inside the device viewport regardless of which
|
|
822
|
+
// device the registry routed to.
|
|
823
|
+
const size = await backend
|
|
824
|
+
.screenSize()
|
|
825
|
+
.catch(() => ({ w: 1080, h: 1920 }));
|
|
826
|
+
const anchorX = optNum(args, "x") ?? Math.round(size.w / 2);
|
|
827
|
+
const anchorY = optNum(args, "y") ?? Math.round(size.h / 2);
|
|
828
|
+
const horizontal = direction === "left" || direction === "right";
|
|
829
|
+
const fallbackDist = Math.max(80, Math.round((horizontal ? size.w : size.h) * 0.3));
|
|
830
|
+
const dist = optNum(args, "distance") ?? fallbackDist;
|
|
831
|
+
const dx = direction === "left" ? -dist : direction === "right" ? dist : 0;
|
|
832
|
+
const dy = direction === "up" ? -dist : direction === "down" ? dist : 0;
|
|
833
|
+
// Clamp endpoints so we don't try to swipe off the viewport.
|
|
834
|
+
const clamp = (v, lo, hi) => Math.max(lo, Math.min(hi, v));
|
|
835
|
+
const x2 = clamp(anchorX + dx, 0, size.w - 1);
|
|
836
|
+
const y2 = clamp(anchorY + dy, 0, size.h - 1);
|
|
837
|
+
await backend.swipe(anchorX, anchorY, x2, y2, optNum(args, "durationMs"));
|
|
838
|
+
return ack("swipe_direction");
|
|
839
|
+
},
|
|
840
|
+
},
|
|
841
|
+
{
|
|
842
|
+
name: "press_button",
|
|
843
|
+
description: "Press a friendly hardware / nav button on the device (BACK / HOME / DPAD_* / VOLUME_* / MEDIA_* / ENTER, …). Mobile-only — desktop backends should use `key`/`hotkey` instead.",
|
|
844
|
+
inputShape: {
|
|
845
|
+
button: z
|
|
846
|
+
.enum(Object.keys(PRESS_BUTTON_KEYMAP))
|
|
847
|
+
.describe("Friendly button name (case-insensitive)."),
|
|
848
|
+
},
|
|
849
|
+
annotations: { ...MUTATING, title: "Press hardware button" },
|
|
850
|
+
requires: "press_button",
|
|
851
|
+
handler: async (args, { backend }) => {
|
|
852
|
+
const name = getStr(args, "button").toUpperCase();
|
|
853
|
+
const keycode = PRESS_BUTTON_KEYMAP[name];
|
|
854
|
+
if (!keycode) {
|
|
855
|
+
throw new DeviceError(`press_button: unknown button '${name}'. Known: ${Object.keys(PRESS_BUTTON_KEYMAP).join(", ")}`, { subtype: "invalid_args", retriable: false });
|
|
856
|
+
}
|
|
857
|
+
await backend.pressKey(keycode);
|
|
858
|
+
return ack("press_button");
|
|
859
|
+
},
|
|
860
|
+
},
|
|
861
|
+
/* ---------------- Desktop pointer / nav ---------------- */
|
|
862
|
+
{
|
|
863
|
+
name: "move",
|
|
864
|
+
description: "Move the pointer to (x, y) without clicking. Desktop only — used for hover triggers, tooltips, and slow-click prelude.",
|
|
865
|
+
inputShape: { ...xy },
|
|
866
|
+
annotations: { ...MUTATING, title: "Move pointer" },
|
|
867
|
+
requires: "move",
|
|
868
|
+
handler: async (args, { backend }) => {
|
|
869
|
+
await backend.move(num(args, "x"), num(args, "y"));
|
|
870
|
+
return ack("move");
|
|
871
|
+
},
|
|
872
|
+
},
|
|
873
|
+
{
|
|
874
|
+
name: "navigate",
|
|
875
|
+
description: "Navigate the active surface — `back`/`forward`/`up`. Mobile maps `back` → KEYCODE_BACK, `up` → KEYCODE_HOME (no `forward` analogue). Desktop maps to browser-style hotkeys when implemented.",
|
|
876
|
+
inputShape: {
|
|
877
|
+
direction: z.enum(["back", "forward", "up"]),
|
|
878
|
+
},
|
|
879
|
+
annotations: { ...MUTATING, title: "Navigate" },
|
|
880
|
+
requires: "navigate",
|
|
881
|
+
handler: async (args, { backend }) => {
|
|
882
|
+
await backend.navigate(getStr(args, "direction"));
|
|
883
|
+
return ack("navigate");
|
|
884
|
+
},
|
|
885
|
+
},
|
|
886
|
+
];
|
|
470
887
|
/* ----------------------------------------------------------------------- */
|
|
471
|
-
/* Public API
|
|
888
|
+
/* Public API */
|
|
472
889
|
/* ----------------------------------------------------------------------- */
|
|
473
890
|
/**
|
|
474
|
-
*
|
|
475
|
-
*
|
|
476
|
-
* Useful for documentation / tests that want to enumerate the catalog.
|
|
477
|
-
* Wire callers SHOULD use `listToolDescriptorsFor(backend)` so the list
|
|
478
|
-
* only contains tools the active backend can actually execute.
|
|
891
|
+
* Look up a tool spec by name. Used by tests / introspection — production
|
|
892
|
+
* code goes through `registerAllTools(server, registry)`.
|
|
479
893
|
*/
|
|
480
|
-
export function
|
|
481
|
-
return
|
|
482
|
-
}
|
|
483
|
-
/** Return tool descriptors filtered by the backend's advertised tool set. */
|
|
484
|
-
export function listToolDescriptorsFor(backend) {
|
|
485
|
-
return Object.values(TOOLS)
|
|
486
|
-
.filter((t) => backend.tools.has(t.descriptor.name))
|
|
487
|
-
.map((t) => t.descriptor);
|
|
894
|
+
export function getToolSpec(name) {
|
|
895
|
+
return TOOL_SPECS.find((t) => t.name === name);
|
|
488
896
|
}
|
|
489
|
-
/**
|
|
490
|
-
export function
|
|
491
|
-
return
|
|
897
|
+
/** Ordered list of every tool name advertised by the server. */
|
|
898
|
+
export function allToolNames() {
|
|
899
|
+
return TOOL_SPECS.map((t) => t.name);
|
|
492
900
|
}
|
|
493
901
|
/**
|
|
494
|
-
*
|
|
495
|
-
*
|
|
496
|
-
*
|
|
497
|
-
*
|
|
902
|
+
* Register every tool spec onto the given `McpServer`. Each handler closure
|
|
903
|
+
* captures the registry so per-call backend resolution stays inside the
|
|
904
|
+
* tool layer (the SDK itself is backend-agnostic).
|
|
905
|
+
*
|
|
906
|
+
* Backend-bound tools are gated on the resolved backend's `tools` set —
|
|
907
|
+
* if the active backend doesn't advertise the tool we throw a
|
|
908
|
+
* `DeviceError("unsupported")` so the SDK returns a structured error to
|
|
909
|
+
* the client. Registry-level tools (`list_available_devices`,
|
|
910
|
+
* `use_device`) skip this gate.
|
|
498
911
|
*/
|
|
499
|
-
export function
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
912
|
+
export function registerAllTools(server, registry) {
|
|
913
|
+
for (const spec of TOOL_SPECS) {
|
|
914
|
+
const inputShape = {
|
|
915
|
+
...spec.inputShape,
|
|
916
|
+
// Multi-device dispatch hint. Optional — registry.resolve() falls
|
|
917
|
+
// back to use_device sticky / single-device default when absent.
|
|
918
|
+
device: z
|
|
919
|
+
.string()
|
|
920
|
+
.optional()
|
|
921
|
+
.describe("Target device id from list_available_devices. Required when more than one device is connected."),
|
|
922
|
+
};
|
|
923
|
+
server.registerTool(spec.name, {
|
|
924
|
+
description: spec.description,
|
|
925
|
+
inputSchema: inputShape,
|
|
926
|
+
annotations: spec.annotations,
|
|
927
|
+
}, async (args, extra) => {
|
|
928
|
+
const sessionId = extra?.sessionId;
|
|
929
|
+
const a = (args ?? {});
|
|
930
|
+
try {
|
|
931
|
+
if (spec.requires === "registry") {
|
|
932
|
+
return await spec.handler(a, {
|
|
933
|
+
backend: undefined,
|
|
934
|
+
registry,
|
|
935
|
+
sessionId,
|
|
936
|
+
});
|
|
937
|
+
}
|
|
938
|
+
const backend = registry.resolve(a, sessionId);
|
|
939
|
+
if (!backend.tools.has(spec.requires)) {
|
|
940
|
+
throw new DeviceError(`tool '${spec.name}' is not supported by backend '${backend.os}'`, { subtype: "unsupported", retriable: false });
|
|
941
|
+
}
|
|
942
|
+
return await spec.handler(a, { backend, registry, sessionId });
|
|
943
|
+
}
|
|
944
|
+
catch (e) {
|
|
945
|
+
// mobile-mcp parity (r3): user-actionable DeviceErrors come
|
|
946
|
+
// back as plain text WITHOUT `isError: true`, so the LLM can
|
|
947
|
+
// read the message as a normal observation and react ("install
|
|
948
|
+
// adb", "approve permission", "fix the path") instead of
|
|
949
|
+
// surfacing a protocol-level failure to the caller.
|
|
950
|
+
//
|
|
951
|
+
// What counts as actionable is decided by `DeviceError`'s
|
|
952
|
+
// subtype-based default (see `isSubtypeUserActionable` in
|
|
953
|
+
// common/src/errors.ts) or by an explicit `userActionable`
|
|
954
|
+
// override at the throw site.
|
|
955
|
+
//
|
|
956
|
+
// Anything else (internal errors, raw `Error`s, network
|
|
957
|
+
// hiccups, screenshot pipeline failures) keeps the existing
|
|
958
|
+
// `isError: true` path so callers can distinguish "you can
|
|
959
|
+
// fix this" from "the server hit a real failure".
|
|
960
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
961
|
+
if (e instanceof DeviceError && e.userActionable) {
|
|
962
|
+
return {
|
|
963
|
+
content: [{ type: "text", text: message }],
|
|
964
|
+
structuredContent: {
|
|
965
|
+
actionable: true,
|
|
966
|
+
subtype: e.subtype,
|
|
967
|
+
error: message,
|
|
968
|
+
},
|
|
969
|
+
};
|
|
970
|
+
}
|
|
971
|
+
const subtype = e instanceof DeviceError ? e.subtype : "internal_error";
|
|
972
|
+
return {
|
|
973
|
+
isError: true,
|
|
974
|
+
content: [{ type: "text", text: message }],
|
|
975
|
+
structuredContent: { error: message, subtype },
|
|
976
|
+
};
|
|
977
|
+
}
|
|
978
|
+
});
|
|
979
|
+
}
|
|
503
980
|
}
|
|
504
981
|
//# sourceMappingURL=tool-registry.js.map
|