replicant-mcp 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -66,6 +66,7 @@ export declare class UiAutomatorAdapter {
66
66
  tap(deviceId: string, x: number, y: number): Promise<void>;
67
67
  tapElement(deviceId: string, element: AccessibilityNode): Promise<void>;
68
68
  input(deviceId: string, text: string): Promise<void>;
69
+ scroll(deviceId: string, direction: "up" | "down" | "left" | "right", amount?: number): Promise<void>;
69
70
  screenshot(deviceId: string, options?: ScreenshotOptions): Promise<ScreenshotResult>;
70
71
  accessibilityCheck(deviceId: string): Promise<{
71
72
  hasAccessibleElements: boolean;
@@ -82,6 +82,47 @@ export class UiAutomatorAdapter {
82
82
  const escaped = text.replace(/(['"\\$`])/g, "\\$1").replace(/ /g, "%s");
83
83
  await this.adb.shell(deviceId, `input text "${escaped}"`);
84
84
  }
85
+ async scroll(deviceId, direction, amount = 0.5) {
86
+ const screen = await this.getScreenMetadata(deviceId);
87
+ const { width, height } = screen;
88
+ // Calculate scroll distance based on amount (0-1 representing screen percentage)
89
+ const scrollDistance = Math.round((direction === "up" || direction === "down" ? height : width) * amount * 0.8);
90
+ // Center point of the screen
91
+ const centerX = Math.round(width / 2);
92
+ const centerY = Math.round(height / 2);
93
+ // Calculate start and end points based on direction
94
+ // Note: "scroll down" means content moves up, so we swipe up (finger moves from bottom to top)
95
+ let startX, startY, endX, endY;
96
+ switch (direction) {
97
+ case "down": // Scroll down = swipe up = finger moves up
98
+ startX = centerX;
99
+ startY = centerY + scrollDistance / 2;
100
+ endX = centerX;
101
+ endY = centerY - scrollDistance / 2;
102
+ break;
103
+ case "up": // Scroll up = swipe down = finger moves down
104
+ startX = centerX;
105
+ startY = centerY - scrollDistance / 2;
106
+ endX = centerX;
107
+ endY = centerY + scrollDistance / 2;
108
+ break;
109
+ case "right": // Scroll right = swipe left = finger moves left
110
+ startX = centerX + scrollDistance / 2;
111
+ startY = centerY;
112
+ endX = centerX - scrollDistance / 2;
113
+ endY = centerY;
114
+ break;
115
+ case "left": // Scroll left = swipe right = finger moves right
116
+ startX = centerX - scrollDistance / 2;
117
+ startY = centerY;
118
+ endX = centerX + scrollDistance / 2;
119
+ endY = centerY;
120
+ break;
121
+ }
122
+ // Duration in ms - longer for larger scrolls, minimum 100ms
123
+ const duration = Math.max(100, Math.round(scrollDistance / 2));
124
+ await this.adb.shell(deviceId, `input swipe ${startX} ${startY} ${endX} ${endY} ${duration}`);
125
+ }
85
126
  async screenshot(deviceId, options = {}) {
86
127
  const remotePath = "/sdcard/replicant-screenshot.png";
87
128
  const maxDimension = options.maxDimension ?? 1000;
@@ -7,6 +7,7 @@ export declare const uiInputSchema: z.ZodObject<{
7
7
  dump: "dump";
8
8
  tap: "tap";
9
9
  input: "input";
10
+ scroll: "scroll";
10
11
  screenshot: "screenshot";
11
12
  "accessibility-check": "accessibility-check";
12
13
  "visual-snapshot": "visual-snapshot";
@@ -29,6 +30,14 @@ export declare const uiInputSchema: z.ZodObject<{
29
30
  gridPosition: z.ZodOptional<z.ZodNumber>;
30
31
  maxDimension: z.ZodOptional<z.ZodNumber>;
31
32
  raw: z.ZodOptional<z.ZodBoolean>;
33
+ compact: z.ZodOptional<z.ZodBoolean>;
34
+ direction: z.ZodOptional<z.ZodEnum<{
35
+ up: "up";
36
+ down: "down";
37
+ left: "left";
38
+ right: "right";
39
+ }>>;
40
+ amount: z.ZodOptional<z.ZodNumber>;
32
41
  }, z.core.$strip>;
33
42
  export type UiInput = z.infer<typeof uiInputSchema>;
34
43
  export declare function handleUiTool(input: UiInput, context: ServerContext, uiConfig?: UiConfig): Promise<Record<string, unknown>>;
@@ -112,6 +121,21 @@ export declare const uiToolDefinition: {
112
121
  type: string;
113
122
  description: string;
114
123
  };
124
+ compact: {
125
+ type: string;
126
+ description: string;
127
+ };
128
+ direction: {
129
+ type: string;
130
+ enum: string[];
131
+ description: string;
132
+ };
133
+ amount: {
134
+ type: string;
135
+ minimum: number;
136
+ maximum: number;
137
+ description: string;
138
+ };
115
139
  };
116
140
  required: string[];
117
141
  };
package/dist/tools/ui.js CHANGED
@@ -2,7 +2,7 @@ import { z } from "zod";
2
2
  import { CACHE_TTLS } from "../types/index.js";
3
3
  import { flattenTree } from "../parsers/ui-dump.js";
4
4
  export const uiInputSchema = z.object({
5
- operation: z.enum(["dump", "find", "tap", "input", "screenshot", "accessibility-check", "visual-snapshot"]),
5
+ operation: z.enum(["dump", "find", "tap", "input", "scroll", "screenshot", "accessibility-check", "visual-snapshot"]),
6
6
  selector: z.object({
7
7
  resourceId: z.string().optional(),
8
8
  text: z.string().optional(),
@@ -21,6 +21,9 @@ export const uiInputSchema = z.object({
21
21
  gridPosition: z.number().min(1).max(5).optional(),
22
22
  maxDimension: z.number().optional(),
23
23
  raw: z.boolean().optional(),
24
+ compact: z.boolean().optional(),
25
+ direction: z.enum(["up", "down", "left", "right"]).optional(),
26
+ amount: z.number().min(0).max(1).optional(),
24
27
  });
25
28
  // Store last find results for elementIndex reference
26
29
  // Updated to support accessibility, OCR, and grid elements
@@ -103,7 +106,20 @@ export async function handleUiTool(input, context, uiConfig) {
103
106
  // Cache the tree
104
107
  const dumpId = context.cache.generateId("ui-dump");
105
108
  context.cache.set(dumpId, { tree, deviceId }, "ui-dump", CACHE_TTLS.UI_TREE);
106
- // Create a simplified view
109
+ if (input.compact) {
110
+ // Compact mode: flat list of interactive elements only
111
+ const flat = flattenTree(tree);
112
+ const interactive = flat.filter((n) => n.clickable || n.focusable);
113
+ const elements = interactive.map((n) => ({
114
+ text: n.text || n.contentDesc || undefined,
115
+ type: n.className.split(".").pop(),
116
+ x: n.centerX,
117
+ y: n.centerY,
118
+ resourceId: n.resourceId ? n.resourceId.split("/").pop() : undefined,
119
+ }));
120
+ return { dumpId, elements, count: elements.length, deviceId };
121
+ }
122
+ // Full mode: hierarchical tree with all details
107
123
  const simplifyNode = (node, depth = 0) => ({
108
124
  className: node.className.split(".").pop(),
109
125
  text: node.text || undefined,
@@ -318,10 +334,18 @@ export async function handleUiTool(input, context, uiConfig) {
318
334
  await context.ui.input(deviceId, input.text);
319
335
  return { input: input.text, deviceId };
320
336
  }
337
+ case "scroll": {
338
+ if (!input.direction) {
339
+ throw new Error("direction is required for scroll operation");
340
+ }
341
+ const amount = input.amount ?? 0.5; // Default to half-screen scroll
342
+ await context.ui.scroll(deviceId, input.direction, amount);
343
+ return { scrolled: { direction: input.direction, amount }, deviceId };
344
+ }
321
345
  case "screenshot": {
322
346
  const result = await context.ui.screenshot(deviceId, {
323
347
  localPath: input.localPath,
324
- inline: input.inline,
348
+ inline: input.inline ?? true,
325
349
  maxDimension: input.maxDimension ?? config.maxImageDimension,
326
350
  raw: input.raw,
327
351
  });
@@ -343,13 +367,13 @@ export async function handleUiTool(input, context, uiConfig) {
343
367
  }
344
368
  export const uiToolDefinition = {
345
369
  name: "ui",
346
- description: "Interact with app UI via accessibility tree. Auto-selects device if only one connected. Operations: dump, find, tap, input, screenshot, accessibility-check, visual-snapshot.",
370
+ description: "Interact with app UI via accessibility tree. Auto-selects device if only one connected. Operations: dump, find, tap, input, scroll, screenshot, accessibility-check, visual-snapshot.",
347
371
  inputSchema: {
348
372
  type: "object",
349
373
  properties: {
350
374
  operation: {
351
375
  type: "string",
352
- enum: ["dump", "find", "tap", "input", "screenshot", "accessibility-check", "visual-snapshot"],
376
+ enum: ["dump", "find", "tap", "input", "scroll", "screenshot", "accessibility-check", "visual-snapshot"],
353
377
  },
354
378
  selector: {
355
379
  type: "object",
@@ -367,7 +391,7 @@ export const uiToolDefinition = {
367
391
  elementIndex: { type: "number", description: "Element index from last find (for tap)" },
368
392
  text: { type: "string", description: "Text to input" },
369
393
  localPath: { type: "string", description: "Local path for screenshot (default: .replicant/screenshots/screenshot-{timestamp}.png)" },
370
- inline: { type: "boolean", description: "Return base64 instead of file path (token-heavy, use sparingly)" },
394
+ inline: { type: "boolean", description: "Return base64 image data (default: true). Set to false to save to file instead." },
371
395
  debug: { type: "boolean", description: "Include source (accessibility/ocr) and confidence in response" },
372
396
  gridCell: { type: "number", minimum: 1, maximum: 24, description: "Grid cell number (1-24) for Tier 5 refinement" },
373
397
  gridPosition: { type: "number", minimum: 1, maximum: 5, description: "Position within cell (1=TL, 2=TR, 3=Center, 4=BL, 5=BR)" },
@@ -379,6 +403,21 @@ export const uiToolDefinition = {
379
403
  type: "boolean",
380
404
  description: "Skip scaling, return full device resolution. Warning: may exceed API limits.",
381
405
  },
406
+ compact: {
407
+ type: "boolean",
408
+ description: "For dump: return flat list of interactive elements with {text, type, x, y, resourceId} instead of full tree.",
409
+ },
410
+ direction: {
411
+ type: "string",
412
+ enum: ["up", "down", "left", "right"],
413
+ description: "Scroll direction (for scroll operation)",
414
+ },
415
+ amount: {
416
+ type: "number",
417
+ minimum: 0,
418
+ maximum: 1,
419
+ description: "Scroll amount as fraction of screen (0-1, default: 0.5)",
420
+ },
382
421
  },
383
422
  required: ["operation"],
384
423
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "replicant-mcp",
3
- "version": "1.2.1",
3
+ "version": "1.3.0",
4
4
  "description": "Android MCP server for AI-assisted Android development",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -20,7 +20,10 @@
20
20
  "test:device": "tsx scripts/real-device-test.ts",
21
21
  "start": "npm run build && node dist/index.js",
22
22
  "validate": "npm run build && npm run test -- --run",
23
- "prepublishOnly": "npm run build && npm test -- --run"
23
+ "prepublishOnly": "npm run build && npm test -- --run",
24
+ "release": "bash scripts/release.sh",
25
+ "release:minor": "bash scripts/release.sh minor",
26
+ "release:major": "bash scripts/release.sh major"
24
27
  },
25
28
  "keywords": [
26
29
  "mcp",