replicant-mcp 1.3.2 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/dist/adapters/ui-automator.d.ts +2 -1
- package/dist/adapters/ui-automator.js +45 -12
- package/dist/tools/ui.d.ts +5 -0
- package/dist/tools/ui.js +7 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -40,7 +40,8 @@ replicant-mcp wraps all of this into a clean interface that AI can understand an
|
|
|
40
40
|
| **Device Control** | List connected devices, select active device, query device properties |
|
|
41
41
|
| **App Management** | Install, uninstall, launch, stop apps; clear app data; list installed packages |
|
|
42
42
|
| **Log Analysis** | Filter logcat by package, tag, level, time; configurable line limits |
|
|
43
|
-
| **UI Automation** | Accessibility-first element finding with multi-tier fallback (accessibility → OCR → visual), spatial proximity search (`nearestTo`), grid-based precision tapping, tap, text input, screenshots |
|
|
43
|
+
| **UI Automation** | Accessibility-first element finding with multi-tier fallback (accessibility → OCR → visual), spatial proximity search (`nearestTo`), grid-based precision tapping, tap (with deviceSpace option), text input, screenshots |
|
|
44
|
+
| **Screenshot Scaling** | Auto-resize to 1000px max, JPEG compression (~94% size reduction), transparent coordinate conversion (image ↔ device space) |
|
|
44
45
|
| **Configuration** | YAML config via `REPLICANT_CONFIG` for UI behavior customization |
|
|
45
46
|
| **Utilities** | Response caching with progressive disclosure, on-demand documentation |
|
|
46
47
|
|
|
@@ -63,8 +64,8 @@ replicant-mcp wraps all of this into a clean interface that AI can understand an
|
|
|
63
64
|
| **Developer Experience** | Simplified tool authoring with `defineTool()` helper | Future |
|
|
64
65
|
| | Auto-generate JSON schema from Zod via `zod-to-json-schema` | Future |
|
|
65
66
|
| | Convention-based tool auto-discovery (no manual wiring) | Future |
|
|
66
|
-
| **Screenshot Scaling** | Auto-resize screenshots to prevent API context limits |
|
|
67
|
-
| | Transparent coordinate conversion (image ↔ device space) |
|
|
67
|
+
| **Screenshot Scaling** | Auto-resize screenshots to prevent API context limits | ✅ |
|
|
68
|
+
| | Transparent coordinate conversion (image ↔ device space) | ✅ |
|
|
68
69
|
| | Raw mode for external context management | Planned |
|
|
69
70
|
|
|
70
71
|
---
|
|
@@ -21,6 +21,7 @@ export interface ScreenshotResult {
|
|
|
21
21
|
mode: "file" | "inline";
|
|
22
22
|
path?: string;
|
|
23
23
|
base64?: string;
|
|
24
|
+
mimeType?: string;
|
|
24
25
|
sizeBytes?: number;
|
|
25
26
|
device?: {
|
|
26
27
|
width: number;
|
|
@@ -63,7 +64,7 @@ export declare class UiAutomatorAdapter {
|
|
|
63
64
|
textContains?: string;
|
|
64
65
|
className?: string;
|
|
65
66
|
}): Promise<AccessibilityNode[]>;
|
|
66
|
-
tap(deviceId: string, x: number, y: number): Promise<void>;
|
|
67
|
+
tap(deviceId: string, x: number, y: number, deviceSpace?: boolean): Promise<void>;
|
|
67
68
|
tapElement(deviceId: string, element: AccessibilityNode): Promise<void>;
|
|
68
69
|
input(deviceId: string, text: string): Promise<void>;
|
|
69
70
|
scroll(deviceId: string, direction: "up" | "down" | "left" | "right", amount?: number): Promise<void>;
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as path from "path";
|
|
2
2
|
import * as fs from "fs";
|
|
3
|
+
import * as os from "os";
|
|
3
4
|
import sharp from "sharp";
|
|
4
5
|
import { AdbAdapter } from "./adb.js";
|
|
5
6
|
import { parseUiDump, findElements, flattenTree } from "../parsers/ui-dump.js";
|
|
@@ -63,11 +64,12 @@ export class UiAutomatorAdapter {
|
|
|
63
64
|
const tree = await this.dump(deviceId);
|
|
64
65
|
return findElements(tree, selector);
|
|
65
66
|
}
|
|
66
|
-
async tap(deviceId, x, y) {
|
|
67
|
+
async tap(deviceId, x, y, deviceSpace) {
|
|
67
68
|
// Convert from image space to device space if scaling is active
|
|
69
|
+
// Skip conversion if deviceSpace=true (coordinates are already in device space)
|
|
68
70
|
let tapX = x;
|
|
69
71
|
let tapY = y;
|
|
70
|
-
if (this.scalingState && this.scalingState.scaleFactor !== 1.0) {
|
|
72
|
+
if (!deviceSpace && this.scalingState && this.scalingState.scaleFactor !== 1.0) {
|
|
71
73
|
const converted = toDeviceSpace(x, y, this.scalingState.scaleFactor);
|
|
72
74
|
tapX = converted.x;
|
|
73
75
|
tapY = converted.y;
|
|
@@ -133,16 +135,47 @@ export class UiAutomatorAdapter {
|
|
|
133
135
|
}
|
|
134
136
|
try {
|
|
135
137
|
if (options.inline) {
|
|
136
|
-
// Inline mode:
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
138
|
+
// Inline mode: pull to temp, scale, convert to JPEG, return base64
|
|
139
|
+
const tempPath = path.join(os.tmpdir(), `replicant-inline-${Date.now()}.png`);
|
|
140
|
+
try {
|
|
141
|
+
// Pull to temp file
|
|
142
|
+
await this.adb.pull(deviceId, remotePath, tempPath);
|
|
143
|
+
// Get dimensions
|
|
144
|
+
const metadata = await sharp(tempPath).metadata();
|
|
145
|
+
const deviceWidth = metadata.width;
|
|
146
|
+
const deviceHeight = metadata.height;
|
|
147
|
+
// Calculate scale factor
|
|
148
|
+
const scaleFactor = calculateScaleFactor(deviceWidth, deviceHeight, maxDimension);
|
|
149
|
+
const imageWidth = Math.round(deviceWidth / scaleFactor);
|
|
150
|
+
const imageHeight = Math.round(deviceHeight / scaleFactor);
|
|
151
|
+
// Scale and convert to JPEG
|
|
152
|
+
const buffer = await sharp(tempPath)
|
|
153
|
+
.resize(imageWidth, imageHeight)
|
|
154
|
+
.jpeg({ quality: 70 })
|
|
155
|
+
.toBuffer();
|
|
156
|
+
// Update scaling state (now supported for inline!)
|
|
157
|
+
this.scalingState = {
|
|
158
|
+
scaleFactor,
|
|
159
|
+
deviceWidth,
|
|
160
|
+
deviceHeight,
|
|
161
|
+
imageWidth,
|
|
162
|
+
imageHeight,
|
|
163
|
+
};
|
|
164
|
+
return {
|
|
165
|
+
mode: "inline",
|
|
166
|
+
base64: buffer.toString("base64"),
|
|
167
|
+
mimeType: "image/jpeg",
|
|
168
|
+
sizeBytes: buffer.length,
|
|
169
|
+
device: { width: deviceWidth, height: deviceHeight },
|
|
170
|
+
image: { width: imageWidth, height: imageHeight },
|
|
171
|
+
scaleFactor,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
finally {
|
|
175
|
+
// Clean up temp file
|
|
176
|
+
const fsPromises = await import("fs/promises");
|
|
177
|
+
await fsPromises.unlink(tempPath).catch(() => { });
|
|
178
|
+
}
|
|
146
179
|
}
|
|
147
180
|
else {
|
|
148
181
|
// File mode: pull to local, then optionally scale
|
package/dist/tools/ui.d.ts
CHANGED
|
@@ -28,6 +28,7 @@ export declare const uiInputSchema: z.ZodObject<{
|
|
|
28
28
|
debug: z.ZodOptional<z.ZodBoolean>;
|
|
29
29
|
gridCell: z.ZodOptional<z.ZodNumber>;
|
|
30
30
|
gridPosition: z.ZodOptional<z.ZodNumber>;
|
|
31
|
+
deviceSpace: z.ZodOptional<z.ZodBoolean>;
|
|
31
32
|
maxDimension: z.ZodOptional<z.ZodNumber>;
|
|
32
33
|
raw: z.ZodOptional<z.ZodBoolean>;
|
|
33
34
|
compact: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -113,6 +114,10 @@ export declare const uiToolDefinition: {
|
|
|
113
114
|
maximum: number;
|
|
114
115
|
description: string;
|
|
115
116
|
};
|
|
117
|
+
deviceSpace: {
|
|
118
|
+
type: string;
|
|
119
|
+
description: string;
|
|
120
|
+
};
|
|
116
121
|
maxDimension: {
|
|
117
122
|
type: string;
|
|
118
123
|
description: string;
|
package/dist/tools/ui.js
CHANGED
|
@@ -19,6 +19,7 @@ export const uiInputSchema = z.object({
|
|
|
19
19
|
debug: z.boolean().optional(),
|
|
20
20
|
gridCell: z.number().min(1).max(24).optional(),
|
|
21
21
|
gridPosition: z.number().min(1).max(5).optional(),
|
|
22
|
+
deviceSpace: z.boolean().optional(),
|
|
22
23
|
maxDimension: z.number().optional(),
|
|
23
24
|
raw: z.boolean().optional(),
|
|
24
25
|
compact: z.boolean().optional(),
|
|
@@ -324,8 +325,8 @@ export async function handleUiTool(input, context, uiConfig) {
|
|
|
324
325
|
else {
|
|
325
326
|
throw new Error("Either x/y coordinates or elementIndex is required for tap");
|
|
326
327
|
}
|
|
327
|
-
await context.ui.tap(deviceId, x, y);
|
|
328
|
-
return { tapped: { x, y }, deviceId };
|
|
328
|
+
await context.ui.tap(deviceId, x, y, input.deviceSpace);
|
|
329
|
+
return { tapped: { x, y, deviceSpace: input.deviceSpace ?? false }, deviceId };
|
|
329
330
|
}
|
|
330
331
|
case "input": {
|
|
331
332
|
if (!input.text) {
|
|
@@ -395,6 +396,10 @@ export const uiToolDefinition = {
|
|
|
395
396
|
debug: { type: "boolean", description: "Include source (accessibility/ocr) and confidence in response" },
|
|
396
397
|
gridCell: { type: "number", minimum: 1, maximum: 24, description: "Grid cell number (1-24) for Tier 5 refinement" },
|
|
397
398
|
gridPosition: { type: "number", minimum: 1, maximum: 5, description: "Position within cell (1=TL, 2=TR, 3=Center, 4=BL, 5=BR)" },
|
|
399
|
+
deviceSpace: {
|
|
400
|
+
type: "boolean",
|
|
401
|
+
description: "For tap: treat x/y as device coordinates (skip image→device scaling). Use when coordinates come from adb shell input tap testing.",
|
|
402
|
+
},
|
|
398
403
|
maxDimension: {
|
|
399
404
|
type: "number",
|
|
400
405
|
description: "Max image dimension in pixels (default: 1000). Higher = better quality, more tokens.",
|