replicant-mcp 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +138 -25
  2. package/dist/adapters/adb.d.ts +1 -0
  3. package/dist/adapters/adb.js +7 -1
  4. package/dist/adapters/emulator.js +11 -11
  5. package/dist/adapters/ui-automator.d.ts +41 -1
  6. package/dist/adapters/ui-automator.js +256 -8
  7. package/dist/cli/gradle.js +3 -3
  8. package/dist/cli.js +1 -1
  9. package/dist/server.d.ts +3 -1
  10. package/dist/server.js +23 -3
  11. package/dist/services/config.d.ts +16 -0
  12. package/dist/services/config.js +62 -0
  13. package/dist/services/device-state.d.ts +2 -0
  14. package/dist/services/device-state.js +18 -0
  15. package/dist/services/environment.d.ts +18 -0
  16. package/dist/services/environment.js +130 -0
  17. package/dist/services/grid.d.ts +28 -0
  18. package/dist/services/grid.js +98 -0
  19. package/dist/services/icon-patterns.d.ts +10 -0
  20. package/dist/services/icon-patterns.js +51 -0
  21. package/dist/services/index.d.ts +6 -0
  22. package/dist/services/index.js +6 -0
  23. package/dist/services/ocr.d.ts +4 -0
  24. package/dist/services/ocr.js +59 -0
  25. package/dist/services/process-runner.d.ts +6 -0
  26. package/dist/services/process-runner.js +26 -0
  27. package/dist/services/visual-candidates.d.ts +24 -0
  28. package/dist/services/visual-candidates.js +78 -0
  29. package/dist/tools/adb-app.js +3 -2
  30. package/dist/tools/adb-device.d.ts +1 -0
  31. package/dist/tools/adb-device.js +47 -8
  32. package/dist/tools/adb-logcat.js +3 -2
  33. package/dist/tools/adb-shell.js +3 -2
  34. package/dist/tools/emulator-device.d.ts +1 -1
  35. package/dist/tools/gradle-get-details.d.ts +1 -1
  36. package/dist/tools/ui.d.ts +32 -1
  37. package/dist/tools/ui.js +253 -12
  38. package/dist/types/config.d.ts +34 -0
  39. package/dist/types/config.js +11 -0
  40. package/dist/types/errors.d.ts +25 -2
  41. package/dist/types/errors.js +23 -4
  42. package/dist/types/icon-recognition.d.ts +50 -0
  43. package/dist/types/icon-recognition.js +1 -0
  44. package/dist/types/index.d.ts +3 -0
  45. package/dist/types/index.js +3 -0
  46. package/dist/types/ocr.d.ts +21 -0
  47. package/dist/types/ocr.js +1 -0
  48. package/package.json +5 -2
package/dist/tools/ui.js CHANGED
@@ -1,23 +1,99 @@
1
1
  import { z } from "zod";
2
2
  import { CACHE_TTLS } from "../types/index.js";
3
+ import { flattenTree } from "../parsers/ui-dump.js";
3
4
  export const uiInputSchema = z.object({
4
- operation: z.enum(["dump", "find", "tap", "input", "screenshot", "accessibility-check"]),
5
+ operation: z.enum(["dump", "find", "tap", "input", "screenshot", "accessibility-check", "visual-snapshot"]),
5
6
  selector: z.object({
6
7
  resourceId: z.string().optional(),
7
8
  text: z.string().optional(),
8
9
  textContains: z.string().optional(),
9
10
  className: z.string().optional(),
11
+ nearestTo: z.string().optional(),
10
12
  }).optional(),
11
13
  x: z.number().optional(),
12
14
  y: z.number().optional(),
13
15
  elementIndex: z.number().optional(),
14
16
  text: z.string().optional(),
15
17
  localPath: z.string().optional(),
18
+ inline: z.boolean().optional(),
19
+ debug: z.boolean().optional(),
20
+ gridCell: z.number().min(1).max(24).optional(),
21
+ gridPosition: z.number().min(1).max(5).optional(),
16
22
  });
17
23
  // Store last find results for elementIndex reference
24
+ // Updated to support accessibility, OCR, and grid elements
18
25
  let lastFindResults = [];
19
- export async function handleUiTool(input, context) {
20
- const deviceId = context.deviceState.requireCurrentDevice().id;
26
+ // Type guards for different element types
27
+ function isAccessibilityNode(el) {
28
+ return "centerX" in el && "className" in el;
29
+ }
30
+ function isOcrElement(el) {
31
+ return "confidence" in el && "center" in el;
32
+ }
33
+ function isGridElement(el) {
34
+ return "center" in el && "bounds" in el && !("confidence" in el) && !("centerX" in el);
35
+ }
36
+ // Helper to get center coordinates from any element type
37
+ function getElementCenter(element) {
38
+ if (isAccessibilityNode(element)) {
39
+ return { x: element.centerX, y: element.centerY };
40
+ }
41
+ else {
42
+ // OcrElement or GridElement - both have center property
43
+ return element.center;
44
+ }
45
+ }
46
+ // Calculate Euclidean distance between two points
47
+ function calculateDistance(p1, p2) {
48
+ return Math.sqrt(Math.pow(p2.x - p1.x, 2) + Math.pow(p2.y - p1.y, 2));
49
+ }
50
+ // Check if a point is inside element bounds
51
+ function isPointInBounds(point, bounds) {
52
+ return (point.x >= bounds.left &&
53
+ point.x <= bounds.right &&
54
+ point.y >= bounds.top &&
55
+ point.y <= bounds.bottom);
56
+ }
57
+ // Calculate area of bounds
58
+ function boundsArea(bounds) {
59
+ return (bounds.right - bounds.left) * (bounds.bottom - bounds.top);
60
+ }
61
+ // Find targets whose smallest containing ViewGroup also contains the anchor point
62
+ function findContainingSiblingTargets(tree, anchorPoint, targetElements) {
63
+ const flat = flattenTree(tree);
64
+ const containingTargets = [];
65
+ for (const target of targetElements) {
66
+ const targetCenter = { x: target.centerX, y: target.centerY };
67
+ // Find the smallest ViewGroup that contains the target
68
+ let smallestContainerForTarget = null;
69
+ let smallestArea = Infinity;
70
+ for (const node of flat) {
71
+ if (!node.className?.includes("ViewGroup"))
72
+ continue;
73
+ if (!isPointInBounds(targetCenter, node.bounds))
74
+ continue;
75
+ const area = boundsArea(node.bounds);
76
+ if (area < smallestArea) {
77
+ smallestArea = area;
78
+ smallestContainerForTarget = node;
79
+ }
80
+ }
81
+ // Check if that smallest container also contains the anchor point
82
+ if (smallestContainerForTarget && isPointInBounds(anchorPoint, smallestContainerForTarget.bounds)) {
83
+ containingTargets.push(target);
84
+ }
85
+ }
86
+ return containingTargets;
87
+ }
88
+ export async function handleUiTool(input, context, uiConfig) {
89
+ const device = await context.deviceState.ensureDevice(context.adb);
90
+ const deviceId = device.id;
91
+ // Get config - use provided or defaults
92
+ const config = uiConfig ?? {
93
+ visualModePackages: [],
94
+ autoFallbackScreenshot: true,
95
+ includeBase64: false,
96
+ };
21
97
  switch (input.operation) {
22
98
  case "dump": {
23
99
  const tree = await context.ui.dump(deviceId);
@@ -43,9 +119,149 @@ export async function handleUiTool(input, context) {
43
119
  if (!input.selector) {
44
120
  throw new Error("selector is required for find operation");
45
121
  }
122
+ const debug = input.debug ?? false;
123
+ const nearestTo = input.selector.nearestTo;
124
+ // Use findWithFallbacks for text-based selectors
125
+ if (input.selector.text || input.selector.textContains) {
126
+ // If nearestTo is specified, first find the anchor element
127
+ let anchorCenter = null;
128
+ if (nearestTo) {
129
+ const anchorResult = await context.ui.findWithFallbacks(deviceId, { text: nearestTo }, {
130
+ debug: false,
131
+ includeVisualFallback: false,
132
+ });
133
+ if (anchorResult.elements.length > 0) {
134
+ anchorCenter = getElementCenter(anchorResult.elements[0]);
135
+ }
136
+ }
137
+ const result = await context.ui.findWithFallbacks(deviceId, input.selector, {
138
+ debug,
139
+ includeVisualFallback: config.autoFallbackScreenshot,
140
+ includeBase64: config.includeBase64,
141
+ gridCell: input.gridCell,
142
+ gridPosition: input.gridPosition,
143
+ });
144
+ // If we have an anchor, use containment-based matching
145
+ let usedContainment = false;
146
+ if (anchorCenter && result.elements.length > 0) {
147
+ // Filter to AccessibilityNode elements for containment check
148
+ const accessibilityElements = result.elements.filter(isAccessibilityNode);
149
+ if (accessibilityElements.length > 0) {
150
+ // Get the full tree for containment analysis
151
+ const tree = await context.ui.dump(deviceId);
152
+ // Find elements whose parent container contains the anchor point
153
+ const containingMatches = findContainingSiblingTargets(tree, anchorCenter, accessibilityElements);
154
+ if (containingMatches.length > 0) {
155
+ // Prioritize containment matches, then sort remaining by distance
156
+ usedContainment = true;
157
+ const containingCenters = new Set(containingMatches.map((el) => `${el.centerX},${el.centerY}`));
158
+ result.elements.sort((a, b) => {
159
+ const aCenter = getElementCenter(a);
160
+ const bCenter = getElementCenter(b);
161
+ const aContains = containingCenters.has(`${aCenter.x},${aCenter.y}`);
162
+ const bContains = containingCenters.has(`${bCenter.x},${bCenter.y}`);
163
+ // Containment matches come first
164
+ if (aContains && !bContains)
165
+ return -1;
166
+ if (!aContains && bContains)
167
+ return 1;
168
+ // Within same group, sort by distance
169
+ const distA = calculateDistance(aCenter, anchorCenter);
170
+ const distB = calculateDistance(bCenter, anchorCenter);
171
+ return distA - distB;
172
+ });
173
+ }
174
+ else {
175
+ // Fallback to pure distance sorting if no containment matches
176
+ result.elements.sort((a, b) => {
177
+ const distA = calculateDistance(getElementCenter(a), anchorCenter);
178
+ const distB = calculateDistance(getElementCenter(b), anchorCenter);
179
+ return distA - distB;
180
+ });
181
+ }
182
+ }
183
+ }
184
+ lastFindResults = result.elements;
185
+ const response = {
186
+ elements: result.elements.map((el, index) => {
187
+ if (isAccessibilityNode(el)) {
188
+ return {
189
+ index,
190
+ text: el.text,
191
+ resourceId: el.resourceId,
192
+ className: el.className,
193
+ centerX: el.centerX,
194
+ centerY: el.centerY,
195
+ bounds: el.bounds,
196
+ clickable: el.clickable,
197
+ };
198
+ }
199
+ else if (isOcrElement(el)) {
200
+ return {
201
+ index,
202
+ text: el.text,
203
+ center: el.center,
204
+ bounds: el.bounds,
205
+ confidence: debug ? el.confidence : undefined,
206
+ };
207
+ }
208
+ else {
209
+ // GridElement
210
+ return {
211
+ index,
212
+ center: el.center,
213
+ bounds: el.bounds,
214
+ };
215
+ }
216
+ }),
217
+ count: result.elements.length,
218
+ deviceId,
219
+ };
220
+ // Always include tier and confidence when available
221
+ if (result.tier !== undefined)
222
+ response.tier = result.tier;
223
+ if (result.confidence)
224
+ response.confidence = result.confidence;
225
+ if (debug) {
226
+ response.source = result.source;
227
+ if (result.fallbackReason) {
228
+ response.fallbackReason = result.fallbackReason;
229
+ }
230
+ }
231
+ // Include nearestTo info when used
232
+ if (nearestTo && anchorCenter) {
233
+ response.sortedByProximityTo = {
234
+ query: nearestTo,
235
+ anchor: anchorCenter,
236
+ method: usedContainment ? "containment" : "distance",
237
+ };
238
+ }
239
+ else if (nearestTo && !anchorCenter) {
240
+ response.nearestToWarning = `Could not find anchor element: "${nearestTo}"`;
241
+ }
242
+ // Include Tier 4 visual candidates if present
243
+ if (result.candidates) {
244
+ response.candidates = result.candidates;
245
+ if (result.truncated)
246
+ response.truncated = result.truncated;
247
+ if (result.totalCandidates)
248
+ response.totalCandidates = result.totalCandidates;
249
+ }
250
+ // Include Tier 5 grid fields if present
251
+ if (result.gridImage)
252
+ response.gridImage = result.gridImage;
253
+ if (result.gridPositions)
254
+ response.gridPositions = result.gridPositions;
255
+ // Include visual fallback if present (when count is 0 and autoFallbackScreenshot is enabled)
256
+ if (result.visualFallback) {
257
+ response.visualFallback = result.visualFallback;
258
+ }
259
+ return response;
260
+ }
261
+ // Non-text selectors use regular find (no OCR fallback)
46
262
  const elements = await context.ui.find(deviceId, input.selector);
47
263
  lastFindResults = elements;
48
- return {
264
+ const response = {
49
265
  elements: elements.map((el, index) => ({
50
266
  index,
51
267
  text: el.text,
@@ -59,6 +275,17 @@ export async function handleUiTool(input, context) {
59
275
  count: elements.length,
60
276
  deviceId,
61
277
  };
278
+ // Include visual fallback for non-text selectors when no results and config allows
279
+ if (elements.length === 0 && config.autoFallbackScreenshot) {
280
+ const snapshot = await context.ui.visualSnapshot(deviceId, {
281
+ includeBase64: config.includeBase64,
282
+ });
283
+ response.visualFallback = {
284
+ ...snapshot,
285
+ hint: "No elements matched selector. Use screenshot to identify tap coordinates.",
286
+ };
287
+ }
288
+ return response;
62
289
  }
63
290
  case "tap": {
64
291
  let x, y;
@@ -67,8 +294,9 @@ export async function handleUiTool(input, context) {
67
294
  throw new Error(`Element at index ${input.elementIndex} not found. Run 'find' first.`);
68
295
  }
69
296
  const element = lastFindResults[input.elementIndex];
70
- x = element.centerX;
71
- y = element.centerY;
297
+ const center = getElementCenter(element);
298
+ x = center.x;
299
+ y = center.y;
72
300
  }
73
301
  else if (input.x !== undefined && input.y !== undefined) {
74
302
  x = input.x;
@@ -88,27 +316,35 @@ export async function handleUiTool(input, context) {
88
316
  return { input: input.text, deviceId };
89
317
  }
90
318
  case "screenshot": {
91
- const localPath = input.localPath || `/tmp/screenshot-${Date.now()}.png`;
92
- await context.ui.screenshot(deviceId, localPath);
93
- return { path: localPath, deviceId };
319
+ const result = await context.ui.screenshot(deviceId, {
320
+ localPath: input.localPath,
321
+ inline: input.inline,
322
+ });
323
+ return { ...result, deviceId };
94
324
  }
95
325
  case "accessibility-check": {
96
326
  const result = await context.ui.accessibilityCheck(deviceId);
97
327
  return { ...result, deviceId };
98
328
  }
329
+ case "visual-snapshot": {
330
+ const snapshot = await context.ui.visualSnapshot(deviceId, {
331
+ includeBase64: input.inline ?? config.includeBase64,
332
+ });
333
+ return { ...snapshot, deviceId };
334
+ }
99
335
  default:
100
336
  throw new Error(`Unknown operation: ${input.operation}`);
101
337
  }
102
338
  }
103
339
  export const uiToolDefinition = {
104
340
  name: "ui",
105
- description: "Interact with app UI via accessibility tree. Operations: dump, find, tap, input, screenshot, accessibility-check.",
341
+ description: "Interact with app UI via accessibility tree. Auto-selects device if only one connected. Operations: dump, find, tap, input, screenshot, accessibility-check, visual-snapshot.",
106
342
  inputSchema: {
107
343
  type: "object",
108
344
  properties: {
109
345
  operation: {
110
346
  type: "string",
111
- enum: ["dump", "find", "tap", "input", "screenshot", "accessibility-check"],
347
+ enum: ["dump", "find", "tap", "input", "screenshot", "accessibility-check", "visual-snapshot"],
112
348
  },
113
349
  selector: {
114
350
  type: "object",
@@ -117,6 +353,7 @@ export const uiToolDefinition = {
117
353
  text: { type: "string" },
118
354
  textContains: { type: "string" },
119
355
  className: { type: "string" },
356
+ nearestTo: { type: "string", description: "Find elements nearest to this text (spatial proximity)" },
120
357
  },
121
358
  description: "Element selector (for find)",
122
359
  },
@@ -124,7 +361,11 @@ export const uiToolDefinition = {
124
361
  y: { type: "number", description: "Y coordinate (for tap)" },
125
362
  elementIndex: { type: "number", description: "Element index from last find (for tap)" },
126
363
  text: { type: "string", description: "Text to input" },
127
- localPath: { type: "string", description: "Local path for screenshot" },
364
+ localPath: { type: "string", description: "Local path for screenshot (default: .replicant/screenshots/screenshot-{timestamp}.png)" },
365
+ inline: { type: "boolean", description: "Return base64 instead of file path (token-heavy, use sparingly)" },
366
+ debug: { type: "boolean", description: "Include source (accessibility/ocr) and confidence in response" },
367
+ gridCell: { type: "number", minimum: 1, maximum: 24, description: "Grid cell number (1-24) for Tier 5 refinement" },
368
+ gridPosition: { type: "number", minimum: 1, maximum: 5, description: "Position within cell (1=TL, 2=TR, 3=Center, 4=BL, 5=BR)" },
128
369
  },
129
370
  required: ["operation"],
130
371
  },
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Configuration types for replicant-mcp
3
+ * Loaded from REPLICANT_CONFIG environment variable path
4
+ */
5
+ export interface UiConfig {
6
+ /** Always skip accessibility and use visual mode for these packages */
7
+ visualModePackages: string[];
8
+ /** Auto-include screenshot when find returns no results (default: true) */
9
+ autoFallbackScreenshot: boolean;
10
+ /** Include base64-encoded screenshot in response (default: false) */
11
+ includeBase64: boolean;
12
+ }
13
+ export interface ReplicantConfig {
14
+ ui: UiConfig;
15
+ }
16
+ export declare const DEFAULT_CONFIG: ReplicantConfig;
17
+ /**
18
+ * Visual snapshot response returned when accessibility fails
19
+ * or when visual-snapshot operation is explicitly requested
20
+ */
21
+ export interface VisualSnapshot {
22
+ screenshotPath: string;
23
+ screenshotBase64?: string;
24
+ screen: {
25
+ width: number;
26
+ height: number;
27
+ density: number;
28
+ };
29
+ app: {
30
+ packageName: string;
31
+ activityName: string;
32
+ };
33
+ hint?: string;
34
+ }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Configuration types for replicant-mcp
3
+ * Loaded from REPLICANT_CONFIG environment variable path
4
+ */
5
+ export const DEFAULT_CONFIG = {
6
+ ui: {
7
+ visualModePackages: [],
8
+ autoFallbackScreenshot: true,
9
+ includeBase64: false,
10
+ },
11
+ };
@@ -9,13 +9,30 @@ export declare const ErrorCode: {
9
9
  readonly PACKAGE_NOT_FOUND: "PACKAGE_NOT_FOUND";
10
10
  readonly INSTALL_FAILED: "INSTALL_FAILED";
11
11
  readonly AVD_NOT_FOUND: "AVD_NOT_FOUND";
12
+ readonly EMULATOR_NOT_FOUND: "EMULATOR_NOT_FOUND";
12
13
  readonly EMULATOR_START_FAILED: "EMULATOR_START_FAILED";
13
14
  readonly SNAPSHOT_NOT_FOUND: "SNAPSHOT_NOT_FOUND";
14
15
  readonly COMMAND_BLOCKED: "COMMAND_BLOCKED";
15
16
  readonly TIMEOUT: "TIMEOUT";
16
17
  readonly CACHE_MISS: "CACHE_MISS";
18
+ readonly SDK_NOT_FOUND: "SDK_NOT_FOUND";
19
+ readonly ADB_NOT_FOUND: "ADB_NOT_FOUND";
20
+ readonly ADB_NOT_EXECUTABLE: "ADB_NOT_EXECUTABLE";
21
+ readonly ADB_SERVER_ERROR: "ADB_SERVER_ERROR";
22
+ readonly NO_DEVICES: "NO_DEVICES";
23
+ readonly MULTIPLE_DEVICES: "MULTIPLE_DEVICES";
24
+ readonly SCREENSHOT_FAILED: "SCREENSHOT_FAILED";
25
+ readonly PULL_FAILED: "PULL_FAILED";
26
+ readonly HEALTH_CHECK_FAILED: "HEALTH_CHECK_FAILED";
17
27
  };
18
28
  export type ErrorCode = (typeof ErrorCode)[keyof typeof ErrorCode];
29
+ export interface ErrorContext {
30
+ command?: string;
31
+ exitCode?: number;
32
+ stderr?: string;
33
+ checkedPaths?: string[];
34
+ [key: string]: unknown;
35
+ }
19
36
  export interface ToolError {
20
37
  error: ErrorCode;
21
38
  message: string;
@@ -25,7 +42,13 @@ export interface ToolError {
25
42
  export declare class ReplicantError extends Error {
26
43
  readonly code: ErrorCode;
27
44
  readonly suggestion?: string | undefined;
28
- readonly details?: Record<string, unknown> | undefined;
29
- constructor(code: ErrorCode, message: string, suggestion?: string | undefined, details?: Record<string, unknown> | undefined);
45
+ readonly context?: ErrorContext | undefined;
46
+ constructor(code: ErrorCode, message: string, suggestion?: string | undefined, context?: ErrorContext | undefined);
47
+ toJSON(): {
48
+ error: ErrorCode;
49
+ message: string;
50
+ suggestion: string | undefined;
51
+ context: ErrorContext | undefined;
52
+ };
30
53
  toToolError(): ToolError;
31
54
  }
@@ -13,6 +13,7 @@ export const ErrorCode = {
13
13
  INSTALL_FAILED: "INSTALL_FAILED",
14
14
  // Emulator errors
15
15
  AVD_NOT_FOUND: "AVD_NOT_FOUND",
16
+ EMULATOR_NOT_FOUND: "EMULATOR_NOT_FOUND",
16
17
  EMULATOR_START_FAILED: "EMULATOR_START_FAILED",
17
18
  SNAPSHOT_NOT_FOUND: "SNAPSHOT_NOT_FOUND",
18
19
  // Safety errors
@@ -20,24 +21,42 @@ export const ErrorCode = {
20
21
  TIMEOUT: "TIMEOUT",
21
22
  // Cache errors
22
23
  CACHE_MISS: "CACHE_MISS",
24
+ // New "Just Works" UX error codes
25
+ SDK_NOT_FOUND: "SDK_NOT_FOUND",
26
+ ADB_NOT_FOUND: "ADB_NOT_FOUND",
27
+ ADB_NOT_EXECUTABLE: "ADB_NOT_EXECUTABLE",
28
+ ADB_SERVER_ERROR: "ADB_SERVER_ERROR",
29
+ NO_DEVICES: "NO_DEVICES",
30
+ MULTIPLE_DEVICES: "MULTIPLE_DEVICES",
31
+ SCREENSHOT_FAILED: "SCREENSHOT_FAILED",
32
+ PULL_FAILED: "PULL_FAILED",
33
+ HEALTH_CHECK_FAILED: "HEALTH_CHECK_FAILED",
23
34
  };
24
35
  export class ReplicantError extends Error {
25
36
  code;
26
37
  suggestion;
27
- details;
28
- constructor(code, message, suggestion, details) {
38
+ context;
39
+ constructor(code, message, suggestion, context) {
29
40
  super(message);
30
41
  this.code = code;
31
42
  this.suggestion = suggestion;
32
- this.details = details;
43
+ this.context = context;
33
44
  this.name = "ReplicantError";
34
45
  }
46
+ toJSON() {
47
+ return {
48
+ error: this.code,
49
+ message: this.message,
50
+ suggestion: this.suggestion,
51
+ context: this.context,
52
+ };
53
+ }
35
54
  toToolError() {
36
55
  return {
37
56
  error: this.code,
38
57
  message: this.message,
39
58
  suggestion: this.suggestion,
40
- details: this.details,
59
+ details: this.context,
41
60
  };
42
61
  }
43
62
  }
@@ -0,0 +1,50 @@
1
+ export type ConfidenceLevel = "high" | "medium" | "low";
2
+ export type FindSource = "accessibility" | "ocr" | "visual" | "grid";
3
+ export type FindTier = 1 | 2 | 3 | 4 | 5;
4
+ export interface VisualCandidate {
5
+ index: number;
6
+ bounds: string;
7
+ center: {
8
+ x: number;
9
+ y: number;
10
+ };
11
+ image: string;
12
+ }
13
+ export interface GridPosition {
14
+ cell: number;
15
+ position: 1 | 2 | 3 | 4 | 5;
16
+ x: number;
17
+ y: number;
18
+ }
19
+ import { AccessibilityNode } from "../parsers/ui-dump.js";
20
+ import { OcrElement } from "./ocr.js";
21
+ export interface GridElement {
22
+ index: number;
23
+ bounds: string;
24
+ center: {
25
+ x: number;
26
+ y: number;
27
+ };
28
+ }
29
+ export type FindElement = AccessibilityNode | OcrElement | GridElement;
30
+ export interface FindWithFallbacksResult {
31
+ elements: FindElement[];
32
+ source: FindSource;
33
+ tier?: FindTier;
34
+ confidence?: ConfidenceLevel;
35
+ fallbackReason?: string;
36
+ candidates?: VisualCandidate[];
37
+ truncated?: boolean;
38
+ totalCandidates?: number;
39
+ gridImage?: string;
40
+ gridCell?: number;
41
+ gridPositions?: string[];
42
+ visualFallback?: import("./config.js").VisualSnapshot;
43
+ }
44
+ export interface FindOptions {
45
+ debug?: boolean;
46
+ includeVisualFallback?: boolean;
47
+ includeBase64?: boolean;
48
+ gridCell?: number;
49
+ gridPosition?: 1 | 2 | 3 | 4 | 5;
50
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -1,3 +1,6 @@
1
1
  export * from "./errors.js";
2
2
  export * from "./cache.js";
3
3
  export * from "./device.js";
4
+ export * from "./ocr.js";
5
+ export * from "./config.js";
6
+ export * from "./icon-recognition.js";
@@ -1,3 +1,6 @@
1
1
  export * from "./errors.js";
2
2
  export * from "./cache.js";
3
3
  export * from "./device.js";
4
+ export * from "./ocr.js";
5
+ export * from "./config.js";
6
+ export * from "./icon-recognition.js";
@@ -0,0 +1,21 @@
1
+ export interface OcrBounds {
2
+ x0: number;
3
+ y0: number;
4
+ x1: number;
5
+ y1: number;
6
+ }
7
+ export interface OcrResult {
8
+ text: string;
9
+ confidence: number;
10
+ bounds: OcrBounds;
11
+ }
12
+ export interface OcrElement {
13
+ index: number;
14
+ text: string;
15
+ bounds: string;
16
+ center: {
17
+ x: number;
18
+ y: number;
19
+ };
20
+ confidence: number;
21
+ }
@@ -0,0 +1 @@
1
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "replicant-mcp",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "description": "Android MCP server for AI-assisted Android development",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -20,7 +20,6 @@
20
20
  "test:device": "tsx scripts/real-device-test.ts",
21
21
  "start": "node dist/index.js",
22
22
  "validate": "npm run build && npm run test -- --run",
23
- "install-skill": "bash scripts/install-skill.sh",
24
23
  "prepublishOnly": "npm run build && npm test -- --run"
25
24
  },
26
25
  "keywords": [
@@ -52,10 +51,14 @@
52
51
  "@modelcontextprotocol/sdk": "^1.25.3",
53
52
  "commander": "^14.0.2",
54
53
  "execa": "^9.6.1",
54
+ "sharp": "^0.34.5",
55
+ "tesseract.js": "^7.0.0",
56
+ "yaml": "^2.8.2",
55
57
  "zod": "^4.3.5"
56
58
  },
57
59
  "devDependencies": {
58
60
  "@types/node": "^25.0.9",
61
+ "@types/sharp": "^0.31.1",
59
62
  "@vitest/coverage-v8": "^4.0.17",
60
63
  "tsx": "^4.21.0",
61
64
  "typescript": "^5.9.3",