replicant-mcp 1.3.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -40,7 +40,8 @@ replicant-mcp wraps all of this into a clean interface that AI can understand an
40
40
  | **Device Control** | List connected devices, select active device, query device properties |
41
41
  | **App Management** | Install, uninstall, launch, stop apps; clear app data; list installed packages |
42
42
  | **Log Analysis** | Filter logcat by package, tag, level, time; configurable line limits |
43
- | **UI Automation** | Accessibility-first element finding with multi-tier fallback (accessibility → OCR → visual), spatial proximity search (`nearestTo`), grid-based precision tapping, tap, text input, screenshots |
43
+ | **UI Automation** | Accessibility-first element finding with multi-tier fallback (accessibility → OCR → visual), spatial proximity search (`nearestTo`), grid-based precision tapping, tap (with deviceSpace option), text input, screenshots |
44
+ | **Screenshot Scaling** | Auto-resize to 1000px max, JPEG compression (~94% size reduction), transparent coordinate conversion (image ↔ device space) |
44
45
  | **Configuration** | YAML config via `REPLICANT_CONFIG` for UI behavior customization |
45
46
  | **Utilities** | Response caching with progressive disclosure, on-demand documentation |
46
47
 
@@ -63,8 +64,8 @@ replicant-mcp wraps all of this into a clean interface that AI can understand an
63
64
  | **Developer Experience** | Simplified tool authoring with `defineTool()` helper | Future |
64
65
  | | Auto-generate JSON schema from Zod via `zod-to-json-schema` | Future |
65
66
  | | Convention-based tool auto-discovery (no manual wiring) | Future |
66
- | **Screenshot Scaling** | Auto-resize screenshots to prevent API context limits | Planned |
67
- | | Transparent coordinate conversion (image ↔ device space) | Planned |
67
+ | **Screenshot Scaling** | Auto-resize screenshots to prevent API context limits | |
68
+ | | Transparent coordinate conversion (image ↔ device space) | |
68
69
  | | Raw mode for external context management | Planned |
69
70
 
70
71
  ---
@@ -21,6 +21,7 @@ export interface ScreenshotResult {
21
21
  mode: "file" | "inline";
22
22
  path?: string;
23
23
  base64?: string;
24
+ mimeType?: string;
24
25
  sizeBytes?: number;
25
26
  device?: {
26
27
  width: number;
@@ -63,7 +64,7 @@ export declare class UiAutomatorAdapter {
63
64
  textContains?: string;
64
65
  className?: string;
65
66
  }): Promise<AccessibilityNode[]>;
66
- tap(deviceId: string, x: number, y: number): Promise<void>;
67
+ tap(deviceId: string, x: number, y: number, deviceSpace?: boolean): Promise<void>;
67
68
  tapElement(deviceId: string, element: AccessibilityNode): Promise<void>;
68
69
  input(deviceId: string, text: string): Promise<void>;
69
70
  scroll(deviceId: string, direction: "up" | "down" | "left" | "right", amount?: number): Promise<void>;
@@ -1,5 +1,6 @@
1
1
  import * as path from "path";
2
2
  import * as fs from "fs";
3
+ import * as os from "os";
3
4
  import sharp from "sharp";
4
5
  import { AdbAdapter } from "./adb.js";
5
6
  import { parseUiDump, findElements, flattenTree } from "../parsers/ui-dump.js";
@@ -63,11 +64,12 @@ export class UiAutomatorAdapter {
63
64
  const tree = await this.dump(deviceId);
64
65
  return findElements(tree, selector);
65
66
  }
66
- async tap(deviceId, x, y) {
67
+ async tap(deviceId, x, y, deviceSpace) {
67
68
  // Convert from image space to device space if scaling is active
69
+ // Skip conversion if deviceSpace=true (coordinates are already in device space)
68
70
  let tapX = x;
69
71
  let tapY = y;
70
- if (this.scalingState && this.scalingState.scaleFactor !== 1.0) {
72
+ if (!deviceSpace && this.scalingState && this.scalingState.scaleFactor !== 1.0) {
71
73
  const converted = toDeviceSpace(x, y, this.scalingState.scaleFactor);
72
74
  tapX = converted.x;
73
75
  tapY = converted.y;
@@ -133,16 +135,47 @@ export class UiAutomatorAdapter {
133
135
  }
134
136
  try {
135
137
  if (options.inline) {
136
- // Inline mode: return base64 (no scaling support for inline mode)
137
- // Clear scaling state since inline mode doesn't support coordinate conversion
138
- this.scalingState = null;
139
- const base64Result = await this.adb.shell(deviceId, `base64 ${remotePath}`);
140
- const sizeResult = await this.adb.shell(deviceId, `stat -c%s ${remotePath}`);
141
- return {
142
- mode: "inline",
143
- base64: base64Result.stdout.trim(),
144
- sizeBytes: parseInt(sizeResult.stdout.trim(), 10),
145
- };
138
+ // Inline mode: pull to temp, scale, convert to JPEG, return base64
139
+ const tempPath = path.join(os.tmpdir(), `replicant-inline-${Date.now()}.png`);
140
+ try {
141
+ // Pull to temp file
142
+ await this.adb.pull(deviceId, remotePath, tempPath);
143
+ // Get dimensions
144
+ const metadata = await sharp(tempPath).metadata();
145
+ const deviceWidth = metadata.width;
146
+ const deviceHeight = metadata.height;
147
+ // Calculate scale factor
148
+ const scaleFactor = calculateScaleFactor(deviceWidth, deviceHeight, maxDimension);
149
+ const imageWidth = Math.round(deviceWidth / scaleFactor);
150
+ const imageHeight = Math.round(deviceHeight / scaleFactor);
151
+ // Scale and convert to JPEG
152
+ const buffer = await sharp(tempPath)
153
+ .resize(imageWidth, imageHeight)
154
+ .jpeg({ quality: 70 })
155
+ .toBuffer();
156
+ // Update scaling state (now supported for inline!)
157
+ this.scalingState = {
158
+ scaleFactor,
159
+ deviceWidth,
160
+ deviceHeight,
161
+ imageWidth,
162
+ imageHeight,
163
+ };
164
+ return {
165
+ mode: "inline",
166
+ base64: buffer.toString("base64"),
167
+ mimeType: "image/jpeg",
168
+ sizeBytes: buffer.length,
169
+ device: { width: deviceWidth, height: deviceHeight },
170
+ image: { width: imageWidth, height: imageHeight },
171
+ scaleFactor,
172
+ };
173
+ }
174
+ finally {
175
+ // Clean up temp file
176
+ const fsPromises = await import("fs/promises");
177
+ await fsPromises.unlink(tempPath).catch(() => { });
178
+ }
146
179
  }
147
180
  else {
148
181
  // File mode: pull to local, then optionally scale
@@ -28,6 +28,7 @@ export declare const uiInputSchema: z.ZodObject<{
28
28
  debug: z.ZodOptional<z.ZodBoolean>;
29
29
  gridCell: z.ZodOptional<z.ZodNumber>;
30
30
  gridPosition: z.ZodOptional<z.ZodNumber>;
31
+ deviceSpace: z.ZodOptional<z.ZodBoolean>;
31
32
  maxDimension: z.ZodOptional<z.ZodNumber>;
32
33
  raw: z.ZodOptional<z.ZodBoolean>;
33
34
  compact: z.ZodOptional<z.ZodBoolean>;
@@ -113,6 +114,10 @@ export declare const uiToolDefinition: {
113
114
  maximum: number;
114
115
  description: string;
115
116
  };
117
+ deviceSpace: {
118
+ type: string;
119
+ description: string;
120
+ };
116
121
  maxDimension: {
117
122
  type: string;
118
123
  description: string;
package/dist/tools/ui.js CHANGED
@@ -19,6 +19,7 @@ export const uiInputSchema = z.object({
19
19
  debug: z.boolean().optional(),
20
20
  gridCell: z.number().min(1).max(24).optional(),
21
21
  gridPosition: z.number().min(1).max(5).optional(),
22
+ deviceSpace: z.boolean().optional(),
22
23
  maxDimension: z.number().optional(),
23
24
  raw: z.boolean().optional(),
24
25
  compact: z.boolean().optional(),
@@ -324,8 +325,8 @@ export async function handleUiTool(input, context, uiConfig) {
324
325
  else {
325
326
  throw new Error("Either x/y coordinates or elementIndex is required for tap");
326
327
  }
327
- await context.ui.tap(deviceId, x, y);
328
- return { tapped: { x, y }, deviceId };
328
+ await context.ui.tap(deviceId, x, y, input.deviceSpace);
329
+ return { tapped: { x, y, deviceSpace: input.deviceSpace ?? false }, deviceId };
329
330
  }
330
331
  case "input": {
331
332
  if (!input.text) {
@@ -395,6 +396,10 @@ export const uiToolDefinition = {
395
396
  debug: { type: "boolean", description: "Include source (accessibility/ocr) and confidence in response" },
396
397
  gridCell: { type: "number", minimum: 1, maximum: 24, description: "Grid cell number (1-24) for Tier 5 refinement" },
397
398
  gridPosition: { type: "number", minimum: 1, maximum: 5, description: "Position within cell (1=TL, 2=TR, 3=Center, 4=BL, 5=BR)" },
399
+ deviceSpace: {
400
+ type: "boolean",
401
+ description: "For tap: treat x/y as device coordinates (skip image→device scaling). Use when coordinates come from adb shell input tap testing.",
402
+ },
398
403
  maxDimension: {
399
404
  type: "number",
400
405
  description: "Max image dimension in pixels (default: 1000). Higher = better quality, more tokens.",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "replicant-mcp",
3
- "version": "1.3.2",
3
+ "version": "1.4.0",
4
4
  "description": "Android MCP server for AI-assisted Android development",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",