@xiuchang-midscene/android 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,290 @@
1
+ import { AbstractInterface } from '@midscene/core/device';
2
+ import type { ActionParam } from '@midscene/core';
3
+ import type { ActionReturn } from '@midscene/core';
4
+ import { ADB } from 'appium-adb';
5
+ import { Agent } from '@midscene/core/agent';
6
+ import { AgentOpt } from '@midscene/core/agent';
7
+ import { AndroidDeviceInputOpt } from '@midscene/core/device';
8
+ import { AndroidDeviceOpt } from '@midscene/core/device';
9
+ import { BaseMCPServer } from '@midscene/shared/mcp';
10
+ import { BaseMidsceneTools } from '@midscene/shared/mcp';
11
+ import { DeviceAction } from '@midscene/core';
12
+ import type { ElementInfo } from '@midscene/shared/extractor';
13
+ import { InterfaceType } from '@midscene/core';
14
+ import { LaunchMCPServerOptions } from '@midscene/shared/mcp';
15
+ import { LaunchMCPServerResult } from '@midscene/shared/mcp';
16
+ import { Point } from '@midscene/core';
17
+ import { Size } from '@midscene/core';
18
+ import { Tool } from '@midscene/shared/mcp';
19
+ import { ToolDefinition } from '@midscene/shared/mcp';
20
+
21
+ declare type ActionArgs<T extends DeviceAction> = [ActionParam<T>] extends [undefined] ? [] : [ActionParam<T>];
22
+
23
+ declare class AndroidAgent extends Agent<AndroidDevice> {
24
+ /**
25
+ * Trigger the system back operation on Android devices
26
+ */
27
+ back: WrappedAction<DeviceActionAndroidBackButton>;
28
+ /**
29
+ * Trigger the system home operation on Android devices
30
+ */
31
+ home: WrappedAction<DeviceActionAndroidHomeButton>;
32
+ /**
33
+ * Trigger the system recent apps operation on Android devices
34
+ */
35
+ recentApps: WrappedAction<DeviceActionAndroidRecentAppsButton>;
36
+ /**
37
+ * User-provided app name to package name mapping
38
+ */
39
+ private appNameMapping;
40
+ constructor(device: AndroidDevice, opts?: AndroidAgentOpt);
41
+ /**
42
+ * Launch an Android app or URL
43
+ * @param uri - App package name, URL, or app name to launch
44
+ */
45
+ launch(uri: string): Promise<void>;
46
+ /**
47
+ * Execute ADB shell command on Android device
48
+ * @param command - ADB shell command to execute
49
+ */
50
+ runAdbShell(command: string): Promise<string>;
51
+ private createActionWrapper;
52
+ }
53
+
54
+ declare type AndroidAgentOpt = AgentOpt & {
55
+ /**
56
+ * Custom mapping of app names to package names
57
+ * User-provided mappings will take precedence over default mappings
58
+ */
59
+ appNameMapping?: Record<string, string>;
60
+ };
61
+
62
+ declare class AndroidDevice implements AbstractInterface {
63
+ private deviceId;
64
+ private yadbPushed;
65
+ private devicePixelRatio;
66
+ private devicePixelRatioInitialized;
67
+ private adb;
68
+ private connectingAdb;
69
+ private destroyed;
70
+ private description;
71
+ private customActions?;
72
+ private cachedScreenSize;
73
+ private cachedOrientation;
74
+ private cachedPhysicalDisplayId;
75
+ private scrcpyAdapter;
76
+ private appNameMapping;
77
+ private cachedAdjustScale;
78
+ private takeScreenshotFailCount;
79
+ private static readonly TAKE_SCREENSHOT_FAIL_THRESHOLD;
80
+ interfaceType: InterfaceType;
81
+ uri: string | undefined;
82
+ /** Set by AndroidAgent to provide AI-based verification for IME fallback. */
83
+ inputVerifyFn?: (text: string) => Promise<boolean>;
84
+ options?: AndroidDeviceOpt;
85
+ actionSpace(): DeviceAction<any>[];
86
+ constructor(deviceId: string, options?: AndroidDeviceOpt);
87
+ describe(): string;
88
+ connect(): Promise<ADB>;
89
+ getAdb(): Promise<ADB>;
90
+ private createAdbProxy;
91
+ /**
92
+ * Get or create the scrcpy adapter (lazy initialization)
93
+ */
94
+ private getScrcpyAdapter;
95
+ /**
96
+ * Get device physical info needed by scrcpy adapter
97
+ */
98
+ private getDevicePhysicalInfo;
99
+ /**
100
+ * Set the app name to package name mapping
101
+ */
102
+ setAppNameMapping(mapping: Record<string, string>): void;
103
+ /**
104
+ * Resolve app name to package name using the mapping
105
+ * Comparison is case-insensitive and ignores spaces, dashes, and underscores.
106
+ * Keys in appNameMapping are pre-normalized, so we only need to normalize the input.
107
+ * @param appName The app name to resolve
108
+ */
109
+ private resolvePackageName;
110
+ launch(uri: string): Promise<AndroidDevice>;
111
+ execYadb(keyboardContent: string): Promise<void>;
112
+ /**
113
+ * Write text to the device clipboard using yadb's -writeClipboard command.
114
+ * Requires yadb v1.1.0+.
115
+ */
116
+ execYadbWriteClipboard(text: string): Promise<void>;
117
+ /**
118
+ * Input text via ADBKeyboard IME.
119
+ * Flow:
120
+ * 1. Record the current default IME.
121
+ * 2. Enable and switch to ADBKeyboard, wait for activation.
122
+ * 3. Send text via broadcast (base64-encoded to handle any Unicode safely).
123
+ * 4. Restore the original IME.
124
+ *
125
+ * Requires ADBKeyboard (com.android.adbkeyboard) to be installed on the device.
126
+ */
127
+ private typeViaAdbKeyboard;
128
+ getElementsInfo(): Promise<ElementInfo[]>;
129
+ getElementsNodeTree(): Promise<any>;
130
+ getScreenSize(): Promise<{
131
+ override: string;
132
+ physical: string;
133
+ orientation: number;
134
+ isCurrentOrientation?: boolean;
135
+ }>;
136
+ private initializeDevicePixelRatio;
137
+ getDisplayDensity(): Promise<number>;
138
+ getDisplayOrientation(): Promise<number>;
139
+ /**
140
+ * Get physical screen dimensions adjusted for current orientation.
141
+ * Swaps width/height when the device is in landscape and the reported
142
+ * dimensions do not already reflect the current orientation.
143
+ */
144
+ private getOrientedPhysicalSize;
145
+ size(): Promise<Size>;
146
+ /**
147
+ * Compute and cache the coordinate adjustment scale by comparing
148
+ * physical dimensions with logical dimensions from size().
149
+ * Cached after first call; invalidated on destroy().
150
+ */
151
+ private getAdjustScale;
152
+ /**
153
+ * Convert logical coordinates (from AI) back to physical coordinates (for ADB).
154
+ * The ratio is derived from size(), so overriding size() alone is sufficient.
155
+ */
156
+ private adjustCoordinates;
157
+ /**
158
+ * Calculate the end point for scroll operations based on start point, scroll delta, and screen boundaries.
159
+ * This method ensures that scroll operations stay within screen bounds and maintain a minimum scroll distance
160
+ * for effective scrolling gestures on Android devices.
161
+ *
162
+ * @param start - The starting point of the scroll gesture
163
+ * @param deltaX - The horizontal scroll distance (positive = scroll right, negative = scroll left)
164
+ * @param deltaY - The vertical scroll distance (positive = scroll down, negative = scroll up)
165
+ * @param maxWidth - The maximum width boundary (screen width)
166
+ * @param maxHeight - The maximum height boundary (screen height)
167
+ * @returns The calculated end point for the scroll gesture
168
+ */
169
+ private calculateScrollEndPoint;
170
+ screenshotBase64(): Promise<string>;
171
+ clearInput(element?: ElementInfo): Promise<void>;
172
+ forceScreenshot(path: string): Promise<void>;
173
+ url(): Promise<string>;
174
+ scrollUntilTop(startPoint?: Point): Promise<void>;
175
+ scrollUntilBottom(startPoint?: Point): Promise<void>;
176
+ scrollUntilLeft(startPoint?: Point): Promise<void>;
177
+ scrollUntilRight(startPoint?: Point): Promise<void>;
178
+ scrollUp(distance?: number, startPoint?: Point): Promise<void>;
179
+ scrollDown(distance?: number, startPoint?: Point): Promise<void>;
180
+ scrollLeft(distance?: number, startPoint?: Point): Promise<void>;
181
+ scrollRight(distance?: number, startPoint?: Point): Promise<void>;
182
+ ensureYadb(): Promise<void>;
183
+ /**
184
+ * Check if text contains characters that may cause issues with ADB inputText.
185
+ * appium-adb's inputText has known bugs with certain characters:
186
+ * - Backslash causes broken shell quoting
187
+ * - Backtick is not escaped at all
188
+ * - Text containing both " and ' throws an error
189
+ * - Dollar sign can cause variable expansion issues
190
+ *
191
+ * For these characters, we route through yadb which handles them correctly
192
+ * via escapeForShell + double-quoted shell context.
193
+ */
194
+ private shouldUseYadbForText;
195
+ /**
196
+ * Execute text input using a specific strategy.
197
+ * Pure execution — no keyboard-dismiss logic, no strategy resolution.
198
+ */
199
+ private _typeWithStrategy;
200
+ keyboardType(text: string, options?: AndroidDeviceInputOpt): Promise<void>;
201
+ private normalizeKeyName;
202
+ keyboardPress(key: string): Promise<void>;
203
+ mouseClick(x: number, y: number): Promise<void>;
204
+ mouseDoubleClick(x: number, y: number): Promise<void>;
205
+ mouseMove(): Promise<void>;
206
+ mouseDrag(from: {
207
+ x: number;
208
+ y: number;
209
+ }, to: {
210
+ x: number;
211
+ y: number;
212
+ }, duration?: number): Promise<void>;
213
+ scroll(deltaX: number, deltaY: number, duration?: number): Promise<void>;
214
+ destroy(): Promise<void>;
215
+ /**
216
+ * Get the current time from the Android device.
217
+ * Returns the device's current timestamp in milliseconds.
218
+ * This is useful when the system time and device time are not synchronized.
219
+ */
220
+ getTimestamp(): Promise<number>;
221
+ back(): Promise<void>;
222
+ home(): Promise<void>;
223
+ recentApps(): Promise<void>;
224
+ longPress(x: number, y: number, duration?: number): Promise<void>;
225
+ pullDown(startPoint?: Point, distance?: number, duration?: number): Promise<void>;
226
+ pullDrag(from: {
227
+ x: number;
228
+ y: number;
229
+ }, to: {
230
+ x: number;
231
+ y: number;
232
+ }, duration: number): Promise<void>;
233
+ pullUp(startPoint?: Point, distance?: number, duration?: number): Promise<void>;
234
+ private getDisplayArg;
235
+ getPhysicalDisplayId(): Promise<string | null>;
236
+ hideKeyboard(options?: AndroidDeviceInputOpt, timeoutMs?: number): Promise<boolean>;
237
+ }
238
+
239
+ /**
240
+ * Android MCP Server
241
+ * Provides MCP tools for Android automation through ADB
242
+ */
243
+ export declare class AndroidMCPServer extends BaseMCPServer {
244
+ constructor(toolsManager?: AndroidMidsceneTools);
245
+ protected createToolsManager(): AndroidMidsceneTools;
246
+ }
247
+
248
+ /**
249
+ * Android-specific tools manager
250
+ * Extends BaseMidsceneTools to provide Android ADB device connection tools
251
+ */
252
+ declare class AndroidMidsceneTools extends BaseMidsceneTools<AndroidAgent> {
253
+ protected createTemporaryDevice(): AndroidDevice;
254
+ protected ensureAgent(deviceId?: string): Promise<AndroidAgent>;
255
+ /**
256
+ * Provide Android-specific platform tools
257
+ */
258
+ protected preparePlatformTools(): ToolDefinition[];
259
+ }
260
+
261
+ declare type DeviceActionAndroidBackButton = DeviceAction<undefined, void>;
262
+
263
+ declare type DeviceActionAndroidHomeButton = DeviceAction<undefined, void>;
264
+
265
+ declare type DeviceActionAndroidRecentAppsButton = DeviceAction<undefined, void>;
266
+
267
+ /**
268
+ * Create MCP kit for a specific Android Agent
269
+ */
270
+ export declare function mcpKitForAgent(agent: Agent | AndroidAgent): Promise<{
271
+ description: string;
272
+ tools: Tool[];
273
+ }>;
274
+
275
+ /**
276
+ * Create an MCP server launcher for a specific Android Agent
277
+ */
278
+ export declare function mcpServerForAgent(agent: Agent | AndroidAgent): {
279
+ launch(options?: {
280
+ verbose?: boolean;
281
+ }): Promise<LaunchMCPServerResult>;
282
+ launchHttp(options: LaunchMCPServerOptions): Promise<LaunchMCPServerResult>;
283
+ };
284
+
285
+ /**
286
+ * Helper type to convert DeviceAction to wrapped method signature
287
+ */
288
+ declare type WrappedAction<T extends DeviceAction> = (...args: ActionArgs<T>) => Promise<ActionReturn<T>>;
289
+
290
+ export { }
package/package.json ADDED
@@ -0,0 +1,70 @@
1
+ {
2
+ "name": "@xiuchang-midscene/android",
3
+ "version": "1.6.0",
4
+ "description": "Android automation library for Midscene",
5
+ "keywords": [
6
+ "Android UI automation",
7
+ "Android AI testing",
8
+ "Android automation library",
9
+ "Android automation tool",
10
+ "Android use"
11
+ ],
12
+ "main": "./dist/lib/index.js",
13
+ "module": "./dist/es/index.mjs",
14
+ "types": "./dist/types/index.d.ts",
15
+ "bin": {
16
+ "midscene-android": "./bin/midscene-android"
17
+ },
18
+ "files": ["bin", "dist", "README.md"],
19
+ "exports": {
20
+ ".": {
21
+ "types": "./dist/types/index.d.ts",
22
+ "import": "./dist/es/index.mjs",
23
+ "require": "./dist/lib/index.js"
24
+ },
25
+ "./mcp-server": {
26
+ "types": "./dist/types/mcp-server.d.ts",
27
+ "import": "./dist/es/mcp-server.mjs",
28
+ "require": "./dist/lib/mcp-server.js"
29
+ },
30
+ "./package.json": "./package.json"
31
+ },
32
+ "scripts": {
33
+ "dev": "npm run build:watch",
34
+ "prebuild": "node scripts/download-scrcpy-server.mjs && node scripts/download-yadb.mjs",
35
+ "build": "rslib build",
36
+ "build:watch": "rslib build --watch --no-clean",
37
+ "prepack": "node scripts/download-scrcpy-server.mjs && node scripts/download-yadb.mjs",
38
+ "playground": "DEBUG=midscene:* tsx demo/playground.ts",
39
+ "test": "vitest --run",
40
+ "test:u": "vitest --run -u",
41
+ "test:ai": "AI_TEST_TYPE=android npm run test",
42
+ "test:ai:cache": "MIDSCENE_CACHE=true AI_TEST_TYPE=android npm run test"
43
+ },
44
+ "dependencies": {
45
+ "@midscene/core": "workspace:*",
46
+ "@midscene/shared": "workspace:*",
47
+ "@yume-chan/adb": "2.5.1",
48
+ "@yume-chan/adb-scrcpy": "2.3.2",
49
+ "@yume-chan/adb-server-node-tcp": "2.5.2",
50
+ "@yume-chan/scrcpy": "2.3.0",
51
+ "@yume-chan/stream-extra": "2.1.0",
52
+ "appium-adb": "12.12.1",
53
+ "sharp": "^0.34.3"
54
+ },
55
+ "optionalDependencies": {
56
+ "@ffmpeg-installer/ffmpeg": "^1.1.0"
57
+ },
58
+ "devDependencies": {
59
+ "@midscene/playground": "workspace:*",
60
+ "@rslib/core": "^0.18.3",
61
+ "@types/node": "^18.0.0",
62
+ "dotenv": "^16.4.5",
63
+ "gh-release-fetch": "^4.0.3",
64
+ "typescript": "^5.8.3",
65
+ "tsx": "^4.19.2",
66
+ "vitest": "3.0.5",
67
+ "zod": "3.24.3"
68
+ },
69
+ "license": "MIT"
70
+ }