replicant-mcp 1.6.2 → 1.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/adapters/ui-automator.d.ts +6 -6
  2. package/dist/adapters/ui-automator.js +19 -31
  3. package/dist/adapters/ui-fallback-find.js +44 -4
  4. package/dist/parsers/ui-dump.d.ts +1 -0
  5. package/dist/parsers/ui-dump.js +34 -2
  6. package/dist/schemas/derive.d.ts +8 -0
  7. package/dist/schemas/derive.js +19 -0
  8. package/dist/schemas/inputs.d.ts +9 -0
  9. package/dist/schemas/inputs.js +44 -0
  10. package/dist/server.js +3 -18
  11. package/dist/tools/adb-app.d.ts +6 -30
  12. package/dist/tools/adb-app.js +10 -29
  13. package/dist/tools/adb-device.d.ts +4 -12
  14. package/dist/tools/adb-device.js +4 -12
  15. package/dist/tools/adb-logcat.d.ts +5 -29
  16. package/dist/tools/adb-logcat.js +6 -14
  17. package/dist/tools/adb-shell.d.ts +8 -28
  18. package/dist/tools/adb-shell.js +10 -16
  19. package/dist/tools/cache.d.ts +9 -32
  20. package/dist/tools/cache.js +9 -25
  21. package/dist/tools/emulator-device.d.ts +4 -25
  22. package/dist/tools/emulator-device.js +5 -27
  23. package/dist/tools/gradle-build.d.ts +4 -16
  24. package/dist/tools/gradle-build.js +5 -14
  25. package/dist/tools/gradle-get-details.d.ts +7 -27
  26. package/dist/tools/gradle-get-details.js +11 -27
  27. package/dist/tools/gradle-list.d.ts +4 -13
  28. package/dist/tools/gradle-list.js +5 -13
  29. package/dist/tools/gradle-test.d.ts +4 -20
  30. package/dist/tools/gradle-test.js +9 -19
  31. package/dist/tools/index.d.ts +197 -0
  32. package/dist/tools/index.js +33 -0
  33. package/dist/tools/rtfm.d.ts +4 -12
  34. package/dist/tools/rtfm.js +10 -11
  35. package/dist/tools/ui-action.d.ts +18 -41
  36. package/dist/tools/ui-action.js +179 -35
  37. package/dist/tools/ui-capture.d.ts +7 -26
  38. package/dist/tools/ui-capture.js +9 -21
  39. package/dist/tools/ui-query.d.ts +13 -72
  40. package/dist/tools/ui-query.js +39 -46
  41. package/dist/types/errors.d.ts +1 -0
  42. package/dist/types/errors.js +1 -0
  43. package/dist/types/schemas/ui-output.d.ts +92 -6
  44. package/dist/types/schemas/ui-output.js +20 -0
  45. package/docs/contracts/replicant-mcp.contract.json +241 -57
  46. package/docs/contracts/tool-schema-tokens.json +67 -0
  47. package/package.json +9 -6
@@ -13,3 +13,200 @@ export * from "./ui-query.js";
13
13
  export * from "./ui-action.js";
14
14
  export * from "./ui-capture.js";
15
15
  export * from "./ui-find.js";
16
+ export declare const ALL_TOOL_DEFINITIONS: readonly [{
17
+ name: string;
18
+ description: string;
19
+ inputSchema: {
20
+ type: "object";
21
+ properties?: Record<string, unknown>;
22
+ required?: string[];
23
+ };
24
+ annotations: {
25
+ readOnlyHint: boolean;
26
+ destructiveHint: boolean;
27
+ idempotentHint: boolean;
28
+ openWorldHint: boolean;
29
+ };
30
+ }, {
31
+ name: string;
32
+ description: string;
33
+ inputSchema: {
34
+ type: "object";
35
+ properties?: Record<string, unknown>;
36
+ required?: string[];
37
+ };
38
+ annotations: {
39
+ readOnlyHint: boolean;
40
+ destructiveHint: boolean;
41
+ idempotentHint: boolean;
42
+ openWorldHint: boolean;
43
+ };
44
+ }, {
45
+ name: string;
46
+ description: string;
47
+ inputSchema: {
48
+ type: "object";
49
+ properties?: Record<string, unknown>;
50
+ required?: string[];
51
+ };
52
+ annotations: {
53
+ readOnlyHint: boolean;
54
+ destructiveHint: boolean;
55
+ idempotentHint: boolean;
56
+ openWorldHint: boolean;
57
+ };
58
+ }, {
59
+ name: string;
60
+ description: string;
61
+ inputSchema: {
62
+ type: "object";
63
+ properties?: Record<string, unknown>;
64
+ required?: string[];
65
+ };
66
+ annotations: {
67
+ readOnlyHint: boolean;
68
+ destructiveHint: boolean;
69
+ idempotentHint: boolean;
70
+ openWorldHint: boolean;
71
+ };
72
+ }, {
73
+ name: string;
74
+ description: string;
75
+ inputSchema: {
76
+ type: "object";
77
+ properties?: Record<string, unknown>;
78
+ required?: string[];
79
+ };
80
+ annotations: {
81
+ readOnlyHint: boolean;
82
+ destructiveHint: boolean;
83
+ idempotentHint: boolean;
84
+ openWorldHint: boolean;
85
+ };
86
+ }, {
87
+ name: string;
88
+ description: string;
89
+ inputSchema: {
90
+ type: "object";
91
+ properties?: Record<string, unknown>;
92
+ required?: string[];
93
+ };
94
+ annotations: {
95
+ readOnlyHint: boolean;
96
+ destructiveHint: boolean;
97
+ idempotentHint: boolean;
98
+ openWorldHint: boolean;
99
+ };
100
+ }, {
101
+ name: string;
102
+ description: string;
103
+ inputSchema: {
104
+ type: "object";
105
+ properties?: Record<string, unknown>;
106
+ required?: string[];
107
+ };
108
+ annotations: {
109
+ readOnlyHint: boolean;
110
+ destructiveHint: boolean;
111
+ idempotentHint: boolean;
112
+ openWorldHint: boolean;
113
+ };
114
+ }, {
115
+ name: string;
116
+ description: string;
117
+ inputSchema: {
118
+ type: "object";
119
+ properties?: Record<string, unknown>;
120
+ required?: string[];
121
+ };
122
+ annotations: {
123
+ readOnlyHint: boolean;
124
+ destructiveHint: boolean;
125
+ idempotentHint: boolean;
126
+ openWorldHint: boolean;
127
+ };
128
+ }, {
129
+ name: string;
130
+ description: string;
131
+ inputSchema: {
132
+ type: "object";
133
+ properties?: Record<string, unknown>;
134
+ required?: string[];
135
+ };
136
+ annotations: {
137
+ readOnlyHint: boolean;
138
+ destructiveHint: boolean;
139
+ idempotentHint: boolean;
140
+ openWorldHint: boolean;
141
+ };
142
+ }, {
143
+ name: string;
144
+ description: string;
145
+ inputSchema: {
146
+ type: "object";
147
+ properties?: Record<string, unknown>;
148
+ required?: string[];
149
+ };
150
+ annotations: {
151
+ readOnlyHint: boolean;
152
+ destructiveHint: boolean;
153
+ idempotentHint: boolean;
154
+ openWorldHint: boolean;
155
+ };
156
+ }, {
157
+ name: string;
158
+ description: string;
159
+ inputSchema: {
160
+ type: "object";
161
+ properties?: Record<string, unknown>;
162
+ required?: string[];
163
+ };
164
+ annotations: {
165
+ readOnlyHint: boolean;
166
+ destructiveHint: boolean;
167
+ idempotentHint: boolean;
168
+ openWorldHint: boolean;
169
+ };
170
+ }, {
171
+ name: string;
172
+ description: string;
173
+ inputSchema: {
174
+ type: "object";
175
+ properties?: Record<string, unknown>;
176
+ required?: string[];
177
+ };
178
+ annotations: {
179
+ readOnlyHint: boolean;
180
+ destructiveHint: boolean;
181
+ idempotentHint: boolean;
182
+ openWorldHint: boolean;
183
+ };
184
+ }, {
185
+ name: string;
186
+ description: string;
187
+ inputSchema: {
188
+ type: "object";
189
+ properties?: Record<string, unknown>;
190
+ required?: string[];
191
+ };
192
+ annotations: {
193
+ readOnlyHint: boolean;
194
+ destructiveHint: boolean;
195
+ idempotentHint: boolean;
196
+ openWorldHint: boolean;
197
+ };
198
+ }, {
199
+ name: string;
200
+ description: string;
201
+ inputSchema: {
202
+ type: "object";
203
+ properties?: Record<string, unknown>;
204
+ required?: string[];
205
+ };
206
+ annotations: {
207
+ readOnlyHint: boolean;
208
+ destructiveHint: boolean;
209
+ idempotentHint: boolean;
210
+ openWorldHint: boolean;
211
+ };
212
+ }];
@@ -13,3 +13,36 @@ export * from "./ui-query.js";
13
13
  export * from "./ui-action.js";
14
14
  export * from "./ui-capture.js";
15
15
  export * from "./ui-find.js";
16
+ import { cacheToolDefinition } from "./cache.js";
17
+ import { rtfmToolDefinition } from "./rtfm.js";
18
+ import { adbDeviceToolDefinition } from "./adb-device.js";
19
+ import { adbAppToolDefinition } from "./adb-app.js";
20
+ import { adbLogcatToolDefinition } from "./adb-logcat.js";
21
+ import { adbShellToolDefinition } from "./adb-shell.js";
22
+ import { emulatorDeviceToolDefinition } from "./emulator-device.js";
23
+ import { gradleBuildToolDefinition } from "./gradle-build.js";
24
+ import { gradleTestToolDefinition } from "./gradle-test.js";
25
+ import { gradleListToolDefinition } from "./gradle-list.js";
26
+ import { gradleGetDetailsToolDefinition } from "./gradle-get-details.js";
27
+ import { uiQueryToolDefinition } from "./ui-query.js";
28
+ import { uiActionToolDefinition } from "./ui-action.js";
29
+ import { uiCaptureToolDefinition } from "./ui-capture.js";
30
+ // Single registry of all MCP tool definitions. Consumed by the server at
31
+ // registration time, the contract generator, and the token-snapshot
32
+ // generator — so adding a tool here is enough to keep all three in sync.
33
+ export const ALL_TOOL_DEFINITIONS = [
34
+ cacheToolDefinition,
35
+ rtfmToolDefinition,
36
+ adbDeviceToolDefinition,
37
+ adbAppToolDefinition,
38
+ adbLogcatToolDefinition,
39
+ adbShellToolDefinition,
40
+ emulatorDeviceToolDefinition,
41
+ gradleBuildToolDefinition,
42
+ gradleTestToolDefinition,
43
+ gradleListToolDefinition,
44
+ gradleGetDetailsToolDefinition,
45
+ uiQueryToolDefinition,
46
+ uiActionToolDefinition,
47
+ uiCaptureToolDefinition,
48
+ ];
@@ -2,7 +2,7 @@ import { z } from "zod";
2
2
  export declare const rtfmInputSchema: z.ZodObject<{
3
3
  category: z.ZodOptional<z.ZodString>;
4
4
  tool: z.ZodOptional<z.ZodString>;
5
- }, z.core.$strip>;
5
+ }, z.core.$strict>;
6
6
  export type RtfmInput = z.infer<typeof rtfmInputSchema>;
7
7
  export declare function handleRtfmTool(input: RtfmInput): Promise<{
8
8
  content: string;
@@ -11,17 +11,9 @@ export declare const rtfmToolDefinition: {
11
11
  name: string;
12
12
  description: string;
13
13
  inputSchema: {
14
- type: string;
15
- properties: {
16
- category: {
17
- type: string;
18
- description: string;
19
- };
20
- tool: {
21
- type: string;
22
- description: string;
23
- };
24
- };
14
+ type: "object";
15
+ properties?: Record<string, unknown>;
16
+ required?: string[];
25
17
  };
26
18
  annotations: {
27
19
  readOnlyHint: boolean;
@@ -2,11 +2,16 @@ import { z } from "zod";
2
2
  import { readFile } from "fs/promises";
3
3
  import { join, dirname } from "path";
4
4
  import { fileURLToPath } from "url";
5
+ import { toolSchema } from "../schemas/inputs.js";
6
+ import { toMcpJsonSchema } from "../schemas/derive.js";
5
7
  const __dirname = dirname(fileURLToPath(import.meta.url));
6
8
  const RTFM_DIR = join(__dirname, "../../docs/rtfm");
7
- export const rtfmInputSchema = z.object({
8
- category: z.string().optional(),
9
- tool: z.string().optional(),
9
+ export const rtfmInputSchema = toolSchema({
10
+ category: z
11
+ .string()
12
+ .optional()
13
+ .describe("Category: build, adb, emulator, ui, cache, index"),
14
+ tool: z.string().optional().describe("Tool name (e.g., 'ui-query') for tool-specific docs"),
10
15
  });
11
16
  const TOOL_TO_CATEGORY = {
12
17
  "gradle-build": "build",
@@ -55,14 +60,8 @@ function extractToolSection(content, toolName) {
55
60
  }
56
61
  export const rtfmToolDefinition = {
57
62
  name: "rtfm",
58
- description: "Get documentation. Pass category or tool name.",
59
- inputSchema: {
60
- type: "object",
61
- properties: {
62
- category: { type: "string", description: "Category: build, adb, emulator, ui, cache" },
63
- tool: { type: "string", description: "Tool name for specific docs" },
64
- },
65
- },
63
+ description: "Get documentation. Pass category ('build'|'adb'|'emulator'|'ui'|'cache'|'index') or tool (tool name like 'ui-query').",
64
+ inputSchema: toMcpJsonSchema(rtfmInputSchema),
66
65
  annotations: {
67
66
  readOnlyHint: true,
68
67
  destructiveHint: false,
@@ -1,14 +1,22 @@
1
1
  import { z } from "zod";
2
2
  import { ServerContext } from "../server.js";
3
+ import { UiConfig } from "../types/index.js";
3
4
  export declare const uiActionInputSchema: z.ZodObject<{
4
5
  operation: z.ZodEnum<{
5
6
  tap: "tap";
6
7
  input: "input";
7
8
  scroll: "scroll";
8
9
  }>;
9
- x: z.ZodOptional<z.ZodNumber>;
10
- y: z.ZodOptional<z.ZodNumber>;
11
- elementIndex: z.ZodOptional<z.ZodNumber>;
10
+ x: z.ZodOptional<z.ZodPipe<z.ZodTransform<string | number, unknown>, z.ZodCoercedNumber<unknown>>>;
11
+ y: z.ZodOptional<z.ZodPipe<z.ZodTransform<string | number, unknown>, z.ZodCoercedNumber<unknown>>>;
12
+ elementIndex: z.ZodOptional<z.ZodPipe<z.ZodTransform<string | number, unknown>, z.ZodCoercedNumber<unknown>>>;
13
+ selector: z.ZodOptional<z.ZodPipe<z.ZodTransform<any, unknown>, z.ZodObject<{
14
+ resourceId: z.ZodOptional<z.ZodString>;
15
+ text: z.ZodOptional<z.ZodString>;
16
+ textContains: z.ZodOptional<z.ZodString>;
17
+ className: z.ZodOptional<z.ZodString>;
18
+ nearestTo: z.ZodOptional<z.ZodString>;
19
+ }, z.core.$strict>>>;
12
20
  text: z.ZodOptional<z.ZodString>;
13
21
  direction: z.ZodOptional<z.ZodEnum<{
14
22
  up: "up";
@@ -16,49 +24,18 @@ export declare const uiActionInputSchema: z.ZodObject<{
16
24
  left: "left";
17
25
  right: "right";
18
26
  }>>;
19
- amount: z.ZodOptional<z.ZodNumber>;
20
- deviceSpace: z.ZodOptional<z.ZodBoolean>;
21
- }, z.core.$strip>;
27
+ amount: z.ZodOptional<z.ZodPipe<z.ZodTransform<string | number, unknown>, z.ZodCoercedNumber<unknown>>>;
28
+ deviceSpace: z.ZodOptional<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodBoolean>>;
29
+ }, z.core.$strict>;
22
30
  export type UiActionInput = z.infer<typeof uiActionInputSchema>;
23
- export declare function handleUiActionTool(input: UiActionInput, context: ServerContext): Promise<Record<string, unknown>>;
31
+ export declare function handleUiActionTool(input: UiActionInput, context: ServerContext, uiConfig?: UiConfig): Promise<Record<string, unknown>>;
24
32
  export declare const uiActionToolDefinition: {
25
33
  name: string;
26
34
  description: string;
27
35
  inputSchema: {
28
- type: string;
29
- properties: {
30
- operation: {
31
- type: string;
32
- enum: string[];
33
- };
34
- x: {
35
- type: string;
36
- };
37
- y: {
38
- type: string;
39
- };
40
- elementIndex: {
41
- type: string;
42
- };
43
- text: {
44
- type: string;
45
- };
46
- direction: {
47
- type: string;
48
- enum: string[];
49
- };
50
- amount: {
51
- type: string;
52
- minimum: number;
53
- maximum: number;
54
- description: string;
55
- };
56
- deviceSpace: {
57
- type: string;
58
- description: string;
59
- };
60
- };
61
- required: string[];
36
+ type: "object";
37
+ properties?: Record<string, unknown>;
38
+ required?: string[];
62
39
  };
63
40
  annotations: {
64
41
  readOnlyHint: boolean;
@@ -1,32 +1,145 @@
1
1
  import { z } from "zod";
2
2
  import { ReplicantError, ErrorCode } from "../types/index.js";
3
- import { getElementCenter } from "./ui-find.js";
4
- export const uiActionInputSchema = z.object({
3
+ import { DEFAULT_CONFIG } from "../types/config.js";
4
+ import { getElementCenter, handleFind, isAccessibilityNode } from "./ui-find.js";
5
+ import { flattenTree } from "../parsers/ui-dump.js";
6
+ import { booleanInput, jsonObjectInput, numberInput, toolSchema } from "../schemas/inputs.js";
7
+ import { toMcpJsonSchema } from "../schemas/derive.js";
8
+ export const uiActionInputSchema = toolSchema({
5
9
  operation: z.enum(["tap", "input", "scroll"]),
6
- x: z.number().optional(),
7
- y: z.number().optional(),
8
- elementIndex: z.number().optional(),
10
+ x: numberInput().optional(),
11
+ y: numberInput().optional(),
12
+ elementIndex: numberInput().optional(),
13
+ selector: jsonObjectInput({
14
+ resourceId: z.string().optional(),
15
+ text: z.string().optional(),
16
+ textContains: z.string().optional(),
17
+ className: z.string().optional(),
18
+ nearestTo: z.string().optional(),
19
+ }).optional(),
9
20
  text: z.string().optional(),
10
21
  direction: z.enum(["up", "down", "left", "right"]).optional(),
11
- amount: z.number().min(0).max(1).optional(),
12
- deviceSpace: z.boolean().optional(),
22
+ amount: numberInput({ min: 0, max: 1 })
23
+ .optional()
24
+ .describe("Scroll fraction (0-1, default: 0.5)"),
25
+ deviceSpace: booleanInput()
26
+ .optional()
27
+ .describe("x/y in device-space (default true). Set false only for image-space coords."),
13
28
  });
14
29
  const operations = {
15
30
  tap: handleTap,
16
31
  input: handleInput,
17
32
  scroll: handleScroll,
18
33
  };
19
- export async function handleUiActionTool(input, context) {
34
+ export async function handleUiActionTool(input, context, uiConfig) {
20
35
  const device = await context.deviceState.ensureDevice(context.adb);
36
+ const config = uiConfig ?? DEFAULT_CONFIG.ui;
21
37
  const handler = operations[input.operation];
22
38
  if (!handler) {
23
39
  throw new ReplicantError(ErrorCode.INVALID_OPERATION, `Unknown operation: ${input.operation}`, "Valid operations: tap, input, scroll");
24
40
  }
25
- return handler(input, context, device.id);
41
+ return handler(input, context, config, device.id);
26
42
  }
27
- async function handleTap(input, context, deviceId) {
43
+ function describeMatches(matches) {
44
+ return matches.map((el, index) => {
45
+ const base = { index };
46
+ if (isAccessibilityNode(el)) {
47
+ base.text = el.text || el.contentDesc || undefined;
48
+ base.resourceId = el.resourceId || undefined;
49
+ base.bounds = el.bounds;
50
+ }
51
+ else {
52
+ base.text = el.text;
53
+ base.center = el.center;
54
+ base.bounds = el.bounds;
55
+ }
56
+ return base;
57
+ });
58
+ }
59
+ async function resolveSelector(input, context, config, deviceId) {
60
+ if (!input.selector)
61
+ return { elements: [] };
62
+ const response = await handleFind({ selector: input.selector }, context, config, deviceId);
63
+ return {
64
+ elements: context.lastFindResults,
65
+ candidates: response.candidates,
66
+ visualFallback: response.visualFallback,
67
+ };
68
+ }
69
+ /**
70
+ * Resolves a selector match for an action operation.
71
+ * - 0 matches → ELEMENT_NOT_FOUND, preserving any fallback candidates the
72
+ * resolver already produced (so the caller doesn't pay the
73
+ * screenshot/dump/crop cost twice).
74
+ * - 1 match → take it
75
+ * - 1+ matches with `nearestTo` set → take matches[0]; the find resolver
76
+ * already proximity-sorted them.
77
+ * - 1+ matches without `nearestTo` → AMBIGUOUS_MATCH with candidate list.
78
+ */
79
+ function pickSelectorMatch(resolution, selector, operation) {
80
+ const { elements: matches, candidates, visualFallback } = resolution;
81
+ if (matches.length === 0) {
82
+ const hasFallbackPayload = candidates !== undefined || visualFallback !== undefined;
83
+ throw new ReplicantError(ErrorCode.ELEMENT_NOT_FOUND, `No element matched selector: ${JSON.stringify(selector)}`, hasFallbackPayload
84
+ ? "Inspect the candidates/visualFallback in error details, or refine the selector."
85
+ : operation === "scroll"
86
+ ? "Selector must resolve to an element inside a scrollable container."
87
+ : "Try a broader selector (textContains), or use ui-query find for fallback tiers.", hasFallbackPayload ? { buildResult: { candidates, visualFallback } } : undefined);
88
+ }
89
+ if (matches.length > 1 && !selector.nearestTo) {
90
+ throw new ReplicantError(ErrorCode.AMBIGUOUS_MATCH, `Selector matched ${matches.length} elements; cannot decide which to ${operation}.`, "Disambiguate via 'nearestTo', a tighter resourceId, or use ui-query find + elementIndex.", { buildResult: { matches: describeMatches(matches) } });
91
+ }
92
+ return matches[0];
93
+ }
94
+ function findScrollableAncestor(tree, target) {
95
+ const flat = flattenTree(tree);
96
+ let best = null;
97
+ let smallestArea = Infinity;
98
+ for (const node of flat) {
99
+ if (!isScrollableContainer(node))
100
+ continue;
101
+ const { bounds: b } = node;
102
+ if (target.centerX >= b.left &&
103
+ target.centerX <= b.right &&
104
+ target.centerY >= b.top &&
105
+ target.centerY <= b.bottom) {
106
+ const area = (b.right - b.left) * (b.bottom - b.top);
107
+ if (area < smallestArea) {
108
+ smallestArea = area;
109
+ best = node;
110
+ }
111
+ }
112
+ }
113
+ return best;
114
+ }
115
+ function isScrollableContainer(node) {
116
+ if (node.scrollable !== undefined)
117
+ return node.scrollable;
118
+ const scrollableClassFragments = [
119
+ "ScrollView",
120
+ "RecyclerView",
121
+ "ListView",
122
+ "ViewPager",
123
+ "AndroidComposeView",
124
+ "ComposeView",
125
+ "GridView",
126
+ "Gallery",
127
+ "NumberPicker",
128
+ ];
129
+ return scrollableClassFragments.some((fragment) => node.className.includes(fragment));
130
+ }
131
+ async function handleTap(input, context, config, deviceId) {
28
132
  let x, y;
29
- if (input.elementIndex !== undefined) {
133
+ let usedSelector = false;
134
+ if (input.selector) {
135
+ const resolution = await resolveSelector(input, context, config, deviceId);
136
+ const match = pickSelectorMatch(resolution, input.selector, "tap");
137
+ const center = getElementCenter(match);
138
+ x = center.x;
139
+ y = center.y;
140
+ usedSelector = true;
141
+ }
142
+ else if (input.elementIndex !== undefined) {
30
143
  if (!context.lastFindResults[input.elementIndex]) {
31
144
  throw new ReplicantError(ErrorCode.ELEMENT_NOT_FOUND, `Element at index ${input.elementIndex} not found. Run 'find' first.`, "Use ui-query find to locate elements, then reference them by index");
32
145
  }
@@ -40,46 +153,77 @@ async function handleTap(input, context, deviceId) {
40
153
  y = input.y;
41
154
  }
42
155
  else {
43
- throw new ReplicantError(ErrorCode.INPUT_VALIDATION_FAILED, "Either x/y coordinates or elementIndex is required for tap", "Provide x and y coordinates, or use elementIndex from a previous ui-query find result");
156
+ throw new ReplicantError(ErrorCode.INPUT_VALIDATION_FAILED, "tap requires x/y, elementIndex, or selector", "Provide one of: x+y coords, elementIndex from a prior find, or a selector.");
44
157
  }
45
- await context.ui.tap(deviceId, x, y, input.deviceSpace);
46
- return { tapped: { x, y, deviceSpace: input.deviceSpace ?? false }, deviceId };
158
+ // Selector and elementIndex paths always yield device-space coords (the find
159
+ // result is already in device space). Only the raw x/y path lets the caller
160
+ // override the space; default true matches the new ui-query dump contract.
161
+ const fromResolvedElement = usedSelector || input.elementIndex !== undefined;
162
+ const deviceSpace = fromResolvedElement ? true : (input.deviceSpace ?? true);
163
+ await context.ui.tap(deviceId, x, y, deviceSpace);
164
+ const response = { tapped: { x, y, deviceSpace }, deviceId };
165
+ if (usedSelector)
166
+ response.matchedSelector = input.selector;
167
+ return response;
47
168
  }
48
- async function handleInput(input, context, deviceId) {
169
+ async function handleInput(input, context, config, deviceId) {
49
170
  if (!input.text) {
50
171
  throw new ReplicantError(ErrorCode.INPUT_VALIDATION_FAILED, "text is required for input operation", "Provide the text string to input");
51
172
  }
173
+ if (input.selector) {
174
+ const resolution = await resolveSelector(input, context, config, deviceId);
175
+ const match = pickSelectorMatch(resolution, input.selector, "input");
176
+ const center = getElementCenter(match);
177
+ await context.ui.tap(deviceId, center.x, center.y, true);
178
+ }
52
179
  await context.ui.input(deviceId, input.text);
53
- return { input: input.text, deviceId };
180
+ return {
181
+ input: input.text,
182
+ deviceId,
183
+ ...(input.selector ? { matchedSelector: input.selector } : {}),
184
+ };
54
185
  }
55
- async function handleScroll(input, context, deviceId) {
186
+ async function handleScroll(input, context, config, deviceId) {
56
187
  if (!input.direction) {
57
188
  throw new ReplicantError(ErrorCode.INPUT_VALIDATION_FAILED, "direction is required for scroll operation", "Provide a direction: up, down, left, or right");
58
189
  }
59
190
  const amount = input.amount ?? 0.5;
191
+ if (input.selector) {
192
+ const resolution = await resolveSelector(input, context, config, deviceId);
193
+ const target = pickSelectorMatch(resolution, input.selector, "scroll");
194
+ if (!isAccessibilityNode(target)) {
195
+ // OCR/grid match — fall back to screen-center scroll with a warning.
196
+ await context.ui.scroll(deviceId, input.direction, amount);
197
+ return {
198
+ scrolled: { direction: input.direction, amount },
199
+ deviceId,
200
+ warning: "selector resolved to a non-accessibility match; scrolled the screen center.",
201
+ };
202
+ }
203
+ const tree = await context.ui.dump(deviceId);
204
+ const scrollable = findScrollableAncestor(tree, target);
205
+ if (!scrollable) {
206
+ await context.ui.scroll(deviceId, input.direction, amount);
207
+ return {
208
+ scrolled: { direction: input.direction, amount },
209
+ deviceId,
210
+ warning: "no scrollable container found; scrolled the screen center.",
211
+ };
212
+ }
213
+ await context.ui.scroll(deviceId, input.direction, amount, scrollable.bounds);
214
+ return {
215
+ scrolled: { direction: input.direction, amount, container: scrollable.className },
216
+ deviceId,
217
+ matchedSelector: input.selector,
218
+ };
219
+ }
60
220
  await context.ui.scroll(deviceId, input.direction, amount);
61
221
  return { scrolled: { direction: input.direction, amount }, deviceId };
62
222
  }
63
223
  export const uiActionToolDefinition = {
64
224
  name: "ui-action",
65
- description: "Interact with app UI: tap, input, scroll.",
66
- inputSchema: {
67
- type: "object",
68
- properties: {
69
- operation: {
70
- type: "string",
71
- enum: ["tap", "input", "scroll"],
72
- },
73
- x: { type: "number" },
74
- y: { type: "number" },
75
- elementIndex: { type: "number" },
76
- text: { type: "string" },
77
- direction: { type: "string", enum: ["up", "down", "left", "right"] },
78
- amount: { type: "number", minimum: 0, maximum: 1, description: "Scroll fraction (0-1, default: 0.5)" },
79
- deviceSpace: { type: "boolean", description: "Treat x/y as device coordinates (skip scaling)" },
80
- },
81
- required: ["operation"],
82
- },
225
+ description: "Interact with app UI: tap, input, scroll. Use selector or coords.",
226
+ inputSchema: toMcpJsonSchema(uiActionInputSchema),
83
227
  annotations: {
84
228
  readOnlyHint: false,
85
229
  destructiveHint: true,