replicant-mcp 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +138 -25
- package/dist/adapters/adb.d.ts +1 -0
- package/dist/adapters/adb.js +7 -1
- package/dist/adapters/emulator.js +11 -11
- package/dist/adapters/ui-automator.d.ts +41 -1
- package/dist/adapters/ui-automator.js +256 -8
- package/dist/cli/gradle.js +3 -3
- package/dist/cli.js +1 -1
- package/dist/server.d.ts +3 -1
- package/dist/server.js +23 -3
- package/dist/services/config.d.ts +16 -0
- package/dist/services/config.js +62 -0
- package/dist/services/device-state.d.ts +2 -0
- package/dist/services/device-state.js +18 -0
- package/dist/services/environment.d.ts +18 -0
- package/dist/services/environment.js +130 -0
- package/dist/services/grid.d.ts +28 -0
- package/dist/services/grid.js +98 -0
- package/dist/services/icon-patterns.d.ts +10 -0
- package/dist/services/icon-patterns.js +51 -0
- package/dist/services/index.d.ts +6 -0
- package/dist/services/index.js +6 -0
- package/dist/services/ocr.d.ts +4 -0
- package/dist/services/ocr.js +59 -0
- package/dist/services/process-runner.d.ts +6 -0
- package/dist/services/process-runner.js +26 -0
- package/dist/services/visual-candidates.d.ts +24 -0
- package/dist/services/visual-candidates.js +78 -0
- package/dist/tools/adb-app.js +3 -2
- package/dist/tools/adb-device.d.ts +1 -0
- package/dist/tools/adb-device.js +47 -8
- package/dist/tools/adb-logcat.js +3 -2
- package/dist/tools/adb-shell.js +3 -2
- package/dist/tools/emulator-device.d.ts +1 -1
- package/dist/tools/gradle-get-details.d.ts +1 -1
- package/dist/tools/ui.d.ts +32 -1
- package/dist/tools/ui.js +253 -12
- package/dist/types/config.d.ts +34 -0
- package/dist/types/config.js +11 -0
- package/dist/types/errors.d.ts +25 -2
- package/dist/types/errors.js +23 -4
- package/dist/types/icon-recognition.d.ts +50 -0
- package/dist/types/icon-recognition.js +1 -0
- package/dist/types/index.d.ts +3 -0
- package/dist/types/index.js +3 -0
- package/dist/types/ocr.d.ts +21 -0
- package/dist/types/ocr.js +1 -0
- package/package.json +5 -2
package/README.md
CHANGED
|
@@ -10,6 +10,12 @@ replicant-mcp is a [Model Context Protocol](https://modelcontextprotocol.io/) se
|
|
|
10
10
|
|
|
11
11
|
---
|
|
12
12
|
|
|
13
|
+
## Demo
|
|
14
|
+
|
|
15
|
+

|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
13
19
|
## Why replicant-mcp?
|
|
14
20
|
|
|
15
21
|
Android development involves juggling a lot: Gradle builds, emulator management, ADB commands, logcat filtering, UI testing. Each has its own CLI, flags, and quirks.
|
|
@@ -25,6 +31,41 @@ replicant-mcp wraps all of this into a clean interface that AI can understand an
|
|
|
25
31
|
|
|
26
32
|
---
|
|
27
33
|
|
|
34
|
+
## Current Features
|
|
35
|
+
|
|
36
|
+
| Category | Capabilities |
|
|
37
|
+
|----------|-------------|
|
|
38
|
+
| **Build & Test** | Build APKs/bundles, run unit and instrumented tests, list modules/variants/tasks, fetch detailed build logs |
|
|
39
|
+
| **Emulator** | Create, start, stop, wipe emulators; save/load/delete snapshots |
|
|
40
|
+
| **Device Control** | List connected devices, select active device, query device properties |
|
|
41
|
+
| **App Management** | Install, uninstall, launch, stop apps; clear app data; list installed packages |
|
|
42
|
+
| **Log Analysis** | Filter logcat by package, tag, level, time; configurable line limits |
|
|
43
|
+
| **UI Automation** | Accessibility-first element finding with multi-tier fallback (accessibility → OCR → visual), spatial proximity search (`nearestTo`), grid-based precision tapping, tap, text input, screenshots |
|
|
44
|
+
| **Configuration** | YAML config via `REPLICANT_CONFIG` for UI behavior customization |
|
|
45
|
+
| **Utilities** | Response caching with progressive disclosure, on-demand documentation |
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Future Roadmap
|
|
50
|
+
|
|
51
|
+
| Feature | Item | Status |
|
|
52
|
+
|---------|------|--------|
|
|
53
|
+
| **Visual Fallback** | Icon recognition (pattern + visual + grid fallback) | ✅ |
|
|
54
|
+
| | Semantic image search (LLM-assisted visual understanding) | Future |
|
|
55
|
+
| **Custom Build Commands** | Skill override for project-specific builds | Planned |
|
|
56
|
+
| | Auto-detect gradlew vs gradle | Planned |
|
|
57
|
+
| | Configurable default variant | Planned |
|
|
58
|
+
| | Extend skill override to test/lint operations | Future |
|
|
59
|
+
| **Video Capture** | Start/stop recording | Planned |
|
|
60
|
+
| | Duration-based capture | Planned |
|
|
61
|
+
| | Configurable output directory and quality | Planned |
|
|
62
|
+
| | WebM/GIF conversion (ffmpeg) | Future |
|
|
63
|
+
| **Developer Experience** | Simplified tool authoring with `defineTool()` helper | Future |
|
|
64
|
+
| | Auto-generate JSON schema from Zod via `zod-to-json-schema` | Future |
|
|
65
|
+
| | Convention-based tool auto-discovery (no manual wiring) | Future |
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
28
69
|
## Quick Start
|
|
29
70
|
|
|
30
71
|
### Prerequisites
|
|
@@ -43,18 +84,17 @@ emulator -version # Should show Android emulator version
|
|
|
43
84
|
|
|
44
85
|
### Installation
|
|
45
86
|
|
|
87
|
+
**Option 1: npm (recommended)**
|
|
88
|
+
```bash
|
|
89
|
+
npm install -g replicant-mcp
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
**Option 2: From source**
|
|
46
93
|
```bash
|
|
47
|
-
# Clone the repo
|
|
48
94
|
git clone https://github.com/thecombatwombat/replicant-mcp.git
|
|
49
95
|
cd replicant-mcp
|
|
50
|
-
|
|
51
|
-
# Install dependencies
|
|
52
96
|
npm install
|
|
53
|
-
|
|
54
|
-
# Build
|
|
55
97
|
npm run build
|
|
56
|
-
|
|
57
|
-
# Verify everything works
|
|
58
98
|
npm test
|
|
59
99
|
```
|
|
60
100
|
|
|
@@ -75,31 +115,59 @@ Add this to your Claude Desktop config (`~/Library/Application Support/Claude/cl
|
|
|
75
115
|
|
|
76
116
|
Restart Claude Desktop. You should see "replicant" in the MCP servers list.
|
|
77
117
|
|
|
78
|
-
###
|
|
118
|
+
### Connect to Claude Code
|
|
79
119
|
|
|
80
|
-
|
|
120
|
+
Add the MCP server with environment variables for Android SDK:
|
|
81
121
|
|
|
82
|
-
**Option 1: Via Plugin Marketplace (Recommended)**
|
|
83
122
|
```bash
|
|
84
|
-
|
|
85
|
-
/
|
|
123
|
+
claude mcp add replicant \
|
|
124
|
+
-e ANDROID_HOME=$HOME/Library/Android/sdk \
|
|
125
|
+
-e PATH="$HOME/Library/Android/sdk/platform-tools:$HOME/Library/Android/sdk/emulator:$HOME/Library/Android/sdk/cmdline-tools/latest/bin:$PATH" \
|
|
126
|
+
-- node $(npm root -g)/replicant-mcp/dist/index.js
|
|
86
127
|
```
|
|
87
128
|
|
|
88
|
-
**
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
129
|
+
> **Note:** Adjust `ANDROID_HOME` if your Android SDK is in a different location. On Linux, it's typically `$HOME/Android/Sdk`.
|
|
130
|
+
|
|
131
|
+
Restart Claude Code to load the MCP server.
|
|
132
|
+
|
|
133
|
+
### Reducing Permission Prompts (Optional)
|
|
134
|
+
|
|
135
|
+
By default, Claude Code asks for permission on each tool call. To auto-approve replicant-mcp tools, add this to your `.claude/settings.json`:
|
|
136
|
+
|
|
137
|
+
```json
|
|
138
|
+
{
|
|
139
|
+
"permissions": {
|
|
140
|
+
"allow": [
|
|
141
|
+
"mcp__replicant__*"
|
|
142
|
+
]
|
|
143
|
+
}
|
|
144
|
+
}
|
|
92
145
|
```
|
|
93
146
|
|
|
94
|
-
|
|
147
|
+
This is especially useful for agentic workflows where human intervention is limited.
|
|
95
148
|
|
|
96
|
-
|
|
149
|
+
### PR Automation (Optional)
|
|
97
150
|
|
|
98
|
-
|
|
99
|
-
-
|
|
100
|
-
-
|
|
151
|
+
This project includes a Claude Code skill for automated PR handling. When invoked, it:
|
|
152
|
+
- Creates a branch and PR from your current changes
|
|
153
|
+
- Polls for Greptile and human reviews every 2 minutes (max 5 cycles)
|
|
154
|
+
- Automatically addresses Greptile feedback
|
|
155
|
+
- Merges when a human approves
|
|
101
156
|
|
|
102
|
-
|
|
157
|
+
To use:
|
|
158
|
+
```
|
|
159
|
+
/pr-with-review --branch feature/my-feature --title "My PR" --body "Description" --commit-message "feat: add feature"
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Or let Claude invoke it automatically when creating PRs.
|
|
163
|
+
|
|
164
|
+
### Output Directory
|
|
165
|
+
|
|
166
|
+
replicant-mcp stores screenshots in `.replicant/screenshots/` within your current working directory. Add this to your `.gitignore`:
|
|
167
|
+
|
|
168
|
+
```gitignore
|
|
169
|
+
.replicant/
|
|
170
|
+
```
|
|
103
171
|
|
|
104
172
|
---
|
|
105
173
|
|
|
@@ -175,7 +243,7 @@ Claude: Let me check the error logs.
|
|
|
175
243
|
Want me to look at that file?
|
|
176
244
|
```
|
|
177
245
|
|
|
178
|
-
### UI Automation (
|
|
246
|
+
### UI Automation (Smart Element Finding)
|
|
179
247
|
|
|
180
248
|
```
|
|
181
249
|
You: "Tap the Login button"
|
|
@@ -187,7 +255,26 @@ Claude: I'll find and tap the Login button.
|
|
|
187
255
|
Tapped "Login" at coordinates (540, 1847)
|
|
188
256
|
```
|
|
189
257
|
|
|
190
|
-
|
|
258
|
+
**Spatial proximity search** — find elements near other elements:
|
|
259
|
+
```
|
|
260
|
+
You: "Tap the edit icon next to John's name"
|
|
261
|
+
|
|
262
|
+
Claude: [Calls ui with operation: "find", selector: { textContains: "edit", nearestTo: "John" }]
|
|
263
|
+
Found edit button nearest to "John" at (892, 340)
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
**Multi-tier fallback** — when accessibility data isn't available:
|
|
267
|
+
1. **Accessibility tree** — fast, reliable, text-based
|
|
268
|
+
2. **OCR fallback** — Tesseract extracts text from screenshot
|
|
269
|
+
3. **Visual fallback** — returns screenshot + metadata for AI vision
|
|
270
|
+
|
|
271
|
+
**Grid-based precision** — tap icons without text labels:
|
|
272
|
+
```
|
|
273
|
+
Claude: [Calls ui with operation: "tap", gridCell: 5, gridPosition: 3]
|
|
274
|
+
// Taps center of cell 5 in a 24-cell grid overlay
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
This approach is faster, cheaper, and more reliable than pure screenshot-based automation.
|
|
191
278
|
|
|
192
279
|
---
|
|
193
280
|
|
|
@@ -219,7 +306,7 @@ replicant-mcp provides 12 tools organized into categories:
|
|
|
219
306
|
### UI Automation
|
|
220
307
|
| Tool | Description |
|
|
221
308
|
|------|-------------|
|
|
222
|
-
| `ui` |
|
|
309
|
+
| `ui` | Element finding with fallback chain, spatial search (`nearestTo`), tap (coordinates or grid), input text, screenshot, accessibility-check, visual-snapshot |
|
|
223
310
|
|
|
224
311
|
### Utilities
|
|
225
312
|
| Tool | Description |
|
|
@@ -286,6 +373,32 @@ The `adb-shell` tool blocks dangerous commands like `rm -rf /`, `reboot`, and `s
|
|
|
286
373
|
|
|
287
374
|
---
|
|
288
375
|
|
|
376
|
+
## Configuration
|
|
377
|
+
|
|
378
|
+
replicant-mcp can be configured via a YAML file. Set the `REPLICANT_CONFIG` environment variable to the path:
|
|
379
|
+
|
|
380
|
+
```bash
|
|
381
|
+
export REPLICANT_CONFIG=/path/to/config.yaml
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
**Example config.yaml:**
|
|
385
|
+
```yaml
|
|
386
|
+
ui:
|
|
387
|
+
# Always use visual mode (skip accessibility) for these packages
|
|
388
|
+
visualModePackages:
|
|
389
|
+
- com.example.legacy.app
|
|
390
|
+
|
|
391
|
+
# Auto-include screenshot when find returns no results (default: true)
|
|
392
|
+
autoFallbackScreenshot: true
|
|
393
|
+
|
|
394
|
+
# Include base64-encoded screenshot in responses (default: false)
|
|
395
|
+
includeBase64: false
|
|
396
|
+
```
|
|
397
|
+
|
|
398
|
+
Most users won't need a config file—the defaults work well for typical Android apps.
|
|
399
|
+
|
|
400
|
+
---
|
|
401
|
+
|
|
289
402
|
## Development
|
|
290
403
|
|
|
291
404
|
### Project Structure
|
package/dist/adapters/adb.d.ts
CHANGED
|
@@ -11,6 +11,7 @@ export declare class AdbAdapter {
|
|
|
11
11
|
stop(deviceId: string, packageName: string): Promise<void>;
|
|
12
12
|
clearData(deviceId: string, packageName: string): Promise<void>;
|
|
13
13
|
shell(deviceId: string, command: string, timeoutMs?: number): Promise<RunResult>;
|
|
14
|
+
pull(deviceId: string, remotePath: string, localPath: string): Promise<void>;
|
|
14
15
|
logcat(deviceId: string, options: {
|
|
15
16
|
lines?: number;
|
|
16
17
|
filter?: string;
|
package/dist/adapters/adb.js
CHANGED
|
@@ -45,6 +45,12 @@ export class AdbAdapter {
|
|
|
45
45
|
async shell(deviceId, command, timeoutMs) {
|
|
46
46
|
return this.adb(["-s", deviceId, "shell", command], timeoutMs);
|
|
47
47
|
}
|
|
48
|
+
async pull(deviceId, remotePath, localPath) {
|
|
49
|
+
const result = await this.adb(["-s", deviceId, "pull", remotePath, localPath]);
|
|
50
|
+
if (result.exitCode !== 0) {
|
|
51
|
+
throw new ReplicantError(ErrorCode.PULL_FAILED, `Failed to pull ${remotePath} to ${localPath}`, result.stderr || "Check device connection and file paths");
|
|
52
|
+
}
|
|
53
|
+
}
|
|
48
54
|
async logcat(deviceId, options) {
|
|
49
55
|
const args = ["-s", deviceId, "logcat", "-d"];
|
|
50
56
|
if (options.lines) {
|
|
@@ -70,6 +76,6 @@ export class AdbAdapter {
|
|
|
70
76
|
return props;
|
|
71
77
|
}
|
|
72
78
|
async adb(args, timeoutMs) {
|
|
73
|
-
return this.runner.
|
|
79
|
+
return this.runner.runAdb(args, { timeoutMs });
|
|
74
80
|
}
|
|
75
81
|
}
|
|
@@ -8,8 +8,8 @@ export class EmulatorAdapter {
|
|
|
8
8
|
}
|
|
9
9
|
async list() {
|
|
10
10
|
const [avdResult, runningResult] = await Promise.all([
|
|
11
|
-
this.runner.
|
|
12
|
-
this.runner.
|
|
11
|
+
this.runner.runAvdManager(["list", "avd"]),
|
|
12
|
+
this.runner.runEmulator(["-list-avds"]),
|
|
13
13
|
]);
|
|
14
14
|
return {
|
|
15
15
|
available: parseAvdList(avdResult.stdout),
|
|
@@ -17,7 +17,7 @@ export class EmulatorAdapter {
|
|
|
17
17
|
};
|
|
18
18
|
}
|
|
19
19
|
async create(name, device, systemImage) {
|
|
20
|
-
const result = await this.runner.
|
|
20
|
+
const result = await this.runner.runAvdManager([
|
|
21
21
|
"create", "avd",
|
|
22
22
|
"-n", name,
|
|
23
23
|
"-k", systemImage,
|
|
@@ -31,7 +31,7 @@ export class EmulatorAdapter {
|
|
|
31
31
|
async start(avdName) {
|
|
32
32
|
// Start emulator in background - don't wait for it
|
|
33
33
|
// Returns immediately, emulator boots in background
|
|
34
|
-
this.runner.
|
|
34
|
+
this.runner.runEmulator([
|
|
35
35
|
"-avd", avdName,
|
|
36
36
|
"-no-snapshot-load",
|
|
37
37
|
"-no-boot-anim",
|
|
@@ -41,7 +41,7 @@ export class EmulatorAdapter {
|
|
|
41
41
|
// Give it a moment to register
|
|
42
42
|
await new Promise((r) => setTimeout(r, 2000));
|
|
43
43
|
// Find the new emulator ID
|
|
44
|
-
const result = await this.runner.
|
|
44
|
+
const result = await this.runner.runAdb(["devices"]);
|
|
45
45
|
const match = result.stdout.match(/emulator-\d+/);
|
|
46
46
|
if (!match) {
|
|
47
47
|
throw new ReplicantError(ErrorCode.EMULATOR_START_FAILED, `Emulator ${avdName} failed to start`, "Check the AVD name and try again");
|
|
@@ -49,24 +49,24 @@ export class EmulatorAdapter {
|
|
|
49
49
|
return match[0];
|
|
50
50
|
}
|
|
51
51
|
async kill(emulatorId) {
|
|
52
|
-
await this.runner.
|
|
52
|
+
await this.runner.runAdb(["-s", emulatorId, "emu", "kill"]);
|
|
53
53
|
}
|
|
54
54
|
async wipe(avdName) {
|
|
55
|
-
await this.runner.
|
|
55
|
+
await this.runner.runEmulator(["-avd", avdName, "-wipe-data", "-no-window"], { timeoutMs: 5000 }).catch(() => {
|
|
56
56
|
// Expected behavior
|
|
57
57
|
});
|
|
58
58
|
}
|
|
59
59
|
async snapshotSave(emulatorId, name) {
|
|
60
|
-
await this.runner.
|
|
60
|
+
await this.runner.runAdb(["-s", emulatorId, "emu", "avd", "snapshot", "save", name]);
|
|
61
61
|
}
|
|
62
62
|
async snapshotLoad(emulatorId, name) {
|
|
63
|
-
await this.runner.
|
|
63
|
+
await this.runner.runAdb(["-s", emulatorId, "emu", "avd", "snapshot", "load", name]);
|
|
64
64
|
}
|
|
65
65
|
async snapshotList(emulatorId) {
|
|
66
|
-
const result = await this.runner.
|
|
66
|
+
const result = await this.runner.runAdb(["-s", emulatorId, "emu", "avd", "snapshot", "list"]);
|
|
67
67
|
return parseSnapshotList(result.stdout);
|
|
68
68
|
}
|
|
69
69
|
async snapshotDelete(emulatorId, name) {
|
|
70
|
-
await this.runner.
|
|
70
|
+
await this.runner.runAdb(["-s", emulatorId, "emu", "avd", "snapshot", "delete", name]);
|
|
71
71
|
}
|
|
72
72
|
}
|
|
@@ -1,5 +1,28 @@
|
|
|
1
1
|
import { AdbAdapter } from "./adb.js";
|
|
2
2
|
import { AccessibilityNode } from "../parsers/ui-dump.js";
|
|
3
|
+
import { VisualSnapshot } from "../types/index.js";
|
|
4
|
+
import { FindWithFallbacksResult, FindOptions as IconFindOptions } from "../types/icon-recognition.js";
|
|
5
|
+
export interface ScreenMetadata {
|
|
6
|
+
width: number;
|
|
7
|
+
height: number;
|
|
8
|
+
density: number;
|
|
9
|
+
}
|
|
10
|
+
export interface CurrentApp {
|
|
11
|
+
packageName: string;
|
|
12
|
+
activityName: string;
|
|
13
|
+
}
|
|
14
|
+
export interface ScreenshotOptions {
|
|
15
|
+
localPath?: string;
|
|
16
|
+
inline?: boolean;
|
|
17
|
+
}
|
|
18
|
+
export interface ScreenshotResult {
|
|
19
|
+
mode: "file" | "inline";
|
|
20
|
+
path?: string;
|
|
21
|
+
base64?: string;
|
|
22
|
+
sizeBytes?: number;
|
|
23
|
+
}
|
|
24
|
+
export type FindWithOcrResult = FindWithFallbacksResult;
|
|
25
|
+
export type FindOptions = IconFindOptions;
|
|
3
26
|
export declare class UiAutomatorAdapter {
|
|
4
27
|
private adb;
|
|
5
28
|
constructor(adb?: AdbAdapter);
|
|
@@ -13,11 +36,28 @@ export declare class UiAutomatorAdapter {
|
|
|
13
36
|
tap(deviceId: string, x: number, y: number): Promise<void>;
|
|
14
37
|
tapElement(deviceId: string, element: AccessibilityNode): Promise<void>;
|
|
15
38
|
input(deviceId: string, text: string): Promise<void>;
|
|
16
|
-
screenshot(deviceId: string,
|
|
39
|
+
screenshot(deviceId: string, options?: ScreenshotOptions): Promise<ScreenshotResult>;
|
|
17
40
|
accessibilityCheck(deviceId: string): Promise<{
|
|
18
41
|
hasAccessibleElements: boolean;
|
|
19
42
|
clickableCount: number;
|
|
20
43
|
textCount: number;
|
|
21
44
|
totalElements: number;
|
|
22
45
|
}>;
|
|
46
|
+
getScreenMetadata(deviceId: string): Promise<ScreenMetadata>;
|
|
47
|
+
getCurrentApp(deviceId: string): Promise<CurrentApp>;
|
|
48
|
+
visualSnapshot(deviceId: string, options?: {
|
|
49
|
+
includeBase64?: boolean;
|
|
50
|
+
}): Promise<VisualSnapshot>;
|
|
51
|
+
findWithFallbacks(deviceId: string, selector: {
|
|
52
|
+
resourceId?: string;
|
|
53
|
+
text?: string;
|
|
54
|
+
textContains?: string;
|
|
55
|
+
className?: string;
|
|
56
|
+
}, options?: FindOptions): Promise<FindWithFallbacksResult>;
|
|
57
|
+
findWithOcrFallback(deviceId: string, selector: {
|
|
58
|
+
resourceId?: string;
|
|
59
|
+
text?: string;
|
|
60
|
+
textContains?: string;
|
|
61
|
+
className?: string;
|
|
62
|
+
}, options?: FindOptions): Promise<FindWithFallbacksResult>;
|
|
23
63
|
}
|
|
@@ -1,5 +1,21 @@
|
|
|
1
|
+
import * as path from "path";
|
|
2
|
+
import * as fs from "fs";
|
|
1
3
|
import { AdbAdapter } from "./adb.js";
|
|
2
4
|
import { parseUiDump, findElements, flattenTree } from "../parsers/ui-dump.js";
|
|
5
|
+
import { ReplicantError, ErrorCode } from "../types/index.js";
|
|
6
|
+
import { extractText, searchText } from "../services/ocr.js";
|
|
7
|
+
import { matchIconPattern, matchesResourceId } from "../services/icon-patterns.js";
|
|
8
|
+
import { filterIconCandidates, formatBounds, cropCandidateImage } from "../services/visual-candidates.js";
|
|
9
|
+
import { calculateGridCellBounds, calculatePositionCoordinates, createGridOverlay, POSITION_LABELS, } from "../services/grid.js";
|
|
10
|
+
/**
|
|
11
|
+
* Get default screenshot path in project-relative .replicant/screenshots directory.
|
|
12
|
+
* Creates the directory if it doesn't exist.
|
|
13
|
+
*/
|
|
14
|
+
function getDefaultScreenshotPath() {
|
|
15
|
+
const dir = path.join(process.cwd(), ".replicant", "screenshots");
|
|
16
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
17
|
+
return path.join(dir, `screenshot-${Date.now()}.png`);
|
|
18
|
+
}
|
|
3
19
|
export class UiAutomatorAdapter {
|
|
4
20
|
adb;
|
|
5
21
|
constructor(adb = new AdbAdapter()) {
|
|
@@ -29,14 +45,35 @@ export class UiAutomatorAdapter {
|
|
|
29
45
|
const escaped = text.replace(/(['"\\$`])/g, "\\$1").replace(/ /g, "%s");
|
|
30
46
|
await this.adb.shell(deviceId, `input text "${escaped}"`);
|
|
31
47
|
}
|
|
32
|
-
async screenshot(deviceId,
|
|
33
|
-
const remotePath = "/sdcard/screenshot.png";
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
48
|
+
async screenshot(deviceId, options = {}) {
|
|
49
|
+
const remotePath = "/sdcard/replicant-screenshot.png";
|
|
50
|
+
// Capture screenshot on device
|
|
51
|
+
const captureResult = await this.adb.shell(deviceId, `screencap -p ${remotePath}`);
|
|
52
|
+
if (captureResult.exitCode !== 0) {
|
|
53
|
+
throw new ReplicantError(ErrorCode.SCREENSHOT_FAILED, "Failed to capture screenshot", captureResult.stderr || "Ensure device screen is on and unlocked");
|
|
54
|
+
}
|
|
55
|
+
try {
|
|
56
|
+
if (options.inline) {
|
|
57
|
+
// Inline mode: return base64
|
|
58
|
+
const base64Result = await this.adb.shell(deviceId, `base64 ${remotePath}`);
|
|
59
|
+
const sizeResult = await this.adb.shell(deviceId, `stat -c%s ${remotePath}`);
|
|
60
|
+
return {
|
|
61
|
+
mode: "inline",
|
|
62
|
+
base64: base64Result.stdout.trim(),
|
|
63
|
+
sizeBytes: parseInt(sizeResult.stdout.trim(), 10),
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
// File mode (default): pull to local
|
|
68
|
+
const localPath = options.localPath || getDefaultScreenshotPath();
|
|
69
|
+
await this.adb.pull(deviceId, remotePath, localPath);
|
|
70
|
+
return { mode: "file", path: localPath };
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
finally {
|
|
74
|
+
// Always clean up remote file
|
|
75
|
+
await this.adb.shell(deviceId, `rm -f ${remotePath}`);
|
|
76
|
+
}
|
|
40
77
|
}
|
|
41
78
|
async accessibilityCheck(deviceId) {
|
|
42
79
|
const tree = await this.dump(deviceId);
|
|
@@ -50,4 +87,215 @@ export class UiAutomatorAdapter {
|
|
|
50
87
|
totalElements: flat.length,
|
|
51
88
|
};
|
|
52
89
|
}
|
|
90
|
+
async getScreenMetadata(deviceId) {
|
|
91
|
+
// Get screen size via wm size
|
|
92
|
+
const sizeResult = await this.adb.shell(deviceId, "wm size");
|
|
93
|
+
const sizeMatch = sizeResult.stdout.match(/Physical size:\s*(\d+)x(\d+)/);
|
|
94
|
+
let width = 1080;
|
|
95
|
+
let height = 1920;
|
|
96
|
+
if (sizeMatch) {
|
|
97
|
+
width = parseInt(sizeMatch[1], 10);
|
|
98
|
+
height = parseInt(sizeMatch[2], 10);
|
|
99
|
+
}
|
|
100
|
+
// Get density via wm density
|
|
101
|
+
const densityResult = await this.adb.shell(deviceId, "wm density");
|
|
102
|
+
const densityMatch = densityResult.stdout.match(/Physical density:\s*(\d+)/);
|
|
103
|
+
// Convert DPI to density multiplier (baseline is 160 dpi)
|
|
104
|
+
let density = 2.75; // Default reasonable value
|
|
105
|
+
if (densityMatch) {
|
|
106
|
+
const dpi = parseInt(densityMatch[1], 10);
|
|
107
|
+
density = dpi / 160;
|
|
108
|
+
}
|
|
109
|
+
return { width, height, density };
|
|
110
|
+
}
|
|
111
|
+
async getCurrentApp(deviceId) {
|
|
112
|
+
// Get current focused activity
|
|
113
|
+
const result = await this.adb.shell(deviceId, "dumpsys activity activities | grep mResumedActivity");
|
|
114
|
+
// Parse: mResumedActivity: ActivityRecord{... com.example/.MainActivity t123}
|
|
115
|
+
const match = result.stdout.match(/([a-zA-Z0-9_.]+)\/([a-zA-Z0-9_.]+)\s+/);
|
|
116
|
+
if (match) {
|
|
117
|
+
return {
|
|
118
|
+
packageName: match[1],
|
|
119
|
+
activityName: match[2],
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
// Fallback to simpler approach
|
|
123
|
+
const fallbackResult = await this.adb.shell(deviceId, "dumpsys window | grep mCurrentFocus");
|
|
124
|
+
const fallbackMatch = fallbackResult.stdout.match(/([a-zA-Z0-9_.]+)\/([a-zA-Z0-9_.]+)/);
|
|
125
|
+
if (fallbackMatch) {
|
|
126
|
+
return {
|
|
127
|
+
packageName: fallbackMatch[1],
|
|
128
|
+
activityName: fallbackMatch[2],
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
return {
|
|
132
|
+
packageName: "unknown",
|
|
133
|
+
activityName: "unknown",
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
async visualSnapshot(deviceId, options = {}) {
|
|
137
|
+
// Always get file-based screenshot first
|
|
138
|
+
const [screenshotResult, screen, app] = await Promise.all([
|
|
139
|
+
this.screenshot(deviceId, {}),
|
|
140
|
+
this.getScreenMetadata(deviceId),
|
|
141
|
+
this.getCurrentApp(deviceId),
|
|
142
|
+
]);
|
|
143
|
+
const snapshot = {
|
|
144
|
+
screenshotPath: screenshotResult.path,
|
|
145
|
+
screen,
|
|
146
|
+
app,
|
|
147
|
+
};
|
|
148
|
+
// Optionally also get base64 encoding from local file
|
|
149
|
+
if (options.includeBase64 && screenshotResult.path) {
|
|
150
|
+
const fs = await import("fs/promises");
|
|
151
|
+
const buffer = await fs.readFile(screenshotResult.path);
|
|
152
|
+
snapshot.screenshotBase64 = buffer.toString("base64");
|
|
153
|
+
}
|
|
154
|
+
return snapshot;
|
|
155
|
+
}
|
|
156
|
+
async findWithFallbacks(deviceId, selector, options = {}) {
|
|
157
|
+
// Handle Tier 5 grid refinement FIRST (when gridCell and gridPosition are provided)
|
|
158
|
+
if (options.gridCell !== undefined && options.gridPosition !== undefined) {
|
|
159
|
+
const screen = await this.getScreenMetadata(deviceId);
|
|
160
|
+
const cellBounds = calculateGridCellBounds(options.gridCell, screen.width, screen.height);
|
|
161
|
+
const coords = calculatePositionCoordinates(options.gridPosition, cellBounds);
|
|
162
|
+
return {
|
|
163
|
+
elements: [
|
|
164
|
+
{
|
|
165
|
+
index: 0,
|
|
166
|
+
bounds: `[${cellBounds.x0},${cellBounds.y0}][${cellBounds.x1},${cellBounds.y1}]`,
|
|
167
|
+
center: coords,
|
|
168
|
+
},
|
|
169
|
+
],
|
|
170
|
+
source: "grid",
|
|
171
|
+
tier: 5,
|
|
172
|
+
confidence: "low",
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
// Tier 1: Accessibility text match
|
|
176
|
+
const accessibilityResults = await this.find(deviceId, selector);
|
|
177
|
+
if (accessibilityResults.length > 0) {
|
|
178
|
+
return {
|
|
179
|
+
elements: accessibilityResults,
|
|
180
|
+
source: "accessibility",
|
|
181
|
+
tier: 1,
|
|
182
|
+
confidence: "high",
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
// Tier 2: ResourceId pattern match (for text-based queries)
|
|
186
|
+
if (selector.text || selector.textContains) {
|
|
187
|
+
const query = selector.text || selector.textContains;
|
|
188
|
+
const patterns = matchIconPattern(query);
|
|
189
|
+
if (patterns) {
|
|
190
|
+
const tree = await this.dump(deviceId);
|
|
191
|
+
const flat = flattenTree(tree);
|
|
192
|
+
const patternMatches = flat.filter((node) => node.resourceId && matchesResourceId(node.resourceId, patterns));
|
|
193
|
+
if (patternMatches.length > 0) {
|
|
194
|
+
return {
|
|
195
|
+
elements: patternMatches,
|
|
196
|
+
source: "accessibility",
|
|
197
|
+
tier: 2,
|
|
198
|
+
confidence: "high",
|
|
199
|
+
fallbackReason: options.debug
|
|
200
|
+
? "no text match, found via resourceId pattern"
|
|
201
|
+
: undefined,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
// Tier 3: OCR (existing logic)
|
|
207
|
+
if (selector.text || selector.textContains) {
|
|
208
|
+
const searchTerm = selector.text || selector.textContains;
|
|
209
|
+
// Take screenshot for OCR
|
|
210
|
+
const screenshotResult = await this.screenshot(deviceId, {});
|
|
211
|
+
try {
|
|
212
|
+
// Run OCR
|
|
213
|
+
const ocrResults = await extractText(screenshotResult.path);
|
|
214
|
+
const matches = searchText(ocrResults, searchTerm);
|
|
215
|
+
if (matches.length > 0) {
|
|
216
|
+
return {
|
|
217
|
+
elements: matches,
|
|
218
|
+
source: "ocr",
|
|
219
|
+
tier: 3,
|
|
220
|
+
confidence: "high",
|
|
221
|
+
fallbackReason: options.debug
|
|
222
|
+
? "no accessibility or pattern match, found via OCR"
|
|
223
|
+
: undefined,
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
// Tier 4: Visual candidates (unlabeled clickables)
|
|
227
|
+
const tree = await this.dump(deviceId);
|
|
228
|
+
const flat = flattenTree(tree);
|
|
229
|
+
const iconCandidates = filterIconCandidates(flat);
|
|
230
|
+
if (iconCandidates.length > 0) {
|
|
231
|
+
const candidates = await Promise.all(iconCandidates.map(async (node, index) => ({
|
|
232
|
+
index,
|
|
233
|
+
bounds: formatBounds(node),
|
|
234
|
+
center: { x: node.centerX, y: node.centerY },
|
|
235
|
+
image: await cropCandidateImage(screenshotResult.path, node.bounds),
|
|
236
|
+
})));
|
|
237
|
+
const allUnlabeled = flat.filter((n) => n.clickable && !n.text && !n.contentDesc);
|
|
238
|
+
return {
|
|
239
|
+
elements: [],
|
|
240
|
+
source: "visual",
|
|
241
|
+
tier: 4,
|
|
242
|
+
confidence: "medium",
|
|
243
|
+
candidates,
|
|
244
|
+
truncated: iconCandidates.length < allUnlabeled.length,
|
|
245
|
+
totalCandidates: allUnlabeled.length,
|
|
246
|
+
fallbackReason: options.debug
|
|
247
|
+
? "no text/pattern/OCR match, showing visual candidates"
|
|
248
|
+
: undefined,
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
// Tier 5: Grid fallback (empty or unusable accessibility tree)
|
|
252
|
+
const screen = await this.getScreenMetadata(deviceId);
|
|
253
|
+
const gridImage = await createGridOverlay(screenshotResult.path);
|
|
254
|
+
return {
|
|
255
|
+
elements: [],
|
|
256
|
+
source: "grid",
|
|
257
|
+
tier: 5,
|
|
258
|
+
confidence: "low",
|
|
259
|
+
gridImage,
|
|
260
|
+
gridPositions: POSITION_LABELS,
|
|
261
|
+
fallbackReason: options.debug
|
|
262
|
+
? "no usable elements, showing grid for coordinate selection"
|
|
263
|
+
: undefined,
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
finally {
|
|
267
|
+
// Always clean up screenshot - Tier 3/4/5 all embed base64 data in response
|
|
268
|
+
if (screenshotResult.path) {
|
|
269
|
+
const fs = await import("fs/promises");
|
|
270
|
+
await fs.unlink(screenshotResult.path).catch(() => { });
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
// No text selector - return empty with visual fallback if requested
|
|
275
|
+
if (options.includeVisualFallback) {
|
|
276
|
+
const snapshot = await this.visualSnapshot(deviceId, {
|
|
277
|
+
includeBase64: options.includeBase64,
|
|
278
|
+
});
|
|
279
|
+
return {
|
|
280
|
+
elements: [],
|
|
281
|
+
source: "accessibility",
|
|
282
|
+
tier: 1,
|
|
283
|
+
confidence: "high",
|
|
284
|
+
visualFallback: {
|
|
285
|
+
...snapshot,
|
|
286
|
+
hint: "No elements matched selector. Use screenshot to identify tap coordinates.",
|
|
287
|
+
},
|
|
288
|
+
};
|
|
289
|
+
}
|
|
290
|
+
return {
|
|
291
|
+
elements: [],
|
|
292
|
+
source: "accessibility",
|
|
293
|
+
tier: 1,
|
|
294
|
+
confidence: "high",
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
// Backward compatible alias
|
|
298
|
+
async findWithOcrFallback(deviceId, selector, options = {}) {
|
|
299
|
+
return this.findWithFallbacks(deviceId, selector, options);
|
|
300
|
+
}
|
|
53
301
|
}
|