@xiuchang-midscene/android 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ export { }
@@ -0,0 +1,465 @@
1
+ import { AbstractInterface } from '@midscene/core/device';
2
+ import type { ActionParam } from '@midscene/core';
3
+ import type { ActionReturn } from '@midscene/core';
4
+ import { ADB } from 'appium-adb';
5
+ import type { Adb } from '@yume-chan/adb';
6
+ import { Agent } from '@midscene/core/agent';
7
+ import { AgentOpt } from '@midscene/core/agent';
8
+ import { AndroidDeviceInputOpt } from '@midscene/core/device';
9
+ import { AndroidDeviceOpt } from '@midscene/core/device';
10
+ import { BaseMidsceneTools } from '@midscene/shared/mcp';
11
+ import { Device } from 'appium-adb';
12
+ import { DeviceAction } from '@midscene/core';
13
+ import type { ElementInfo } from '@midscene/shared/extractor';
14
+ import { InterfaceType } from '@midscene/core';
15
+ import { overrideAIConfig } from '@midscene/shared/env';
16
+ import { Point } from '@midscene/core';
17
+ import { Size } from '@midscene/core';
18
+ import { ToolDefinition } from '@midscene/shared/mcp';
19
+
20
+ declare type ActionArgs<T extends DeviceAction> = [ActionParam<T>] extends [undefined] ? [] : [ActionParam<T>];
21
+
22
+ export declare function agentFromAdbDevice(deviceId?: string, opts?: AndroidAgentOpt & AndroidDeviceOpt): Promise<AndroidAgent>;
23
+
24
+ export declare class AndroidAgent extends Agent<AndroidDevice> {
25
+ /**
26
+ * Trigger the system back operation on Android devices
27
+ */
28
+ back: WrappedAction<DeviceActionAndroidBackButton>;
29
+ /**
30
+ * Trigger the system home operation on Android devices
31
+ */
32
+ home: WrappedAction<DeviceActionAndroidHomeButton>;
33
+ /**
34
+ * Trigger the system recent apps operation on Android devices
35
+ */
36
+ recentApps: WrappedAction<DeviceActionAndroidRecentAppsButton>;
37
+ /**
38
+ * User-provided app name to package name mapping
39
+ */
40
+ private appNameMapping;
41
+ constructor(device: AndroidDevice, opts?: AndroidAgentOpt);
42
+ /**
43
+ * Launch an Android app or URL
44
+ * @param uri - App package name, URL, or app name to launch
45
+ */
46
+ launch(uri: string): Promise<void>;
47
+ /**
48
+ * Execute ADB shell command on Android device
49
+ * @param command - ADB shell command to execute
50
+ */
51
+ runAdbShell(command: string): Promise<string>;
52
+ private createActionWrapper;
53
+ }
54
+
55
+ export declare type AndroidAgentOpt = AgentOpt & {
56
+ /**
57
+ * Custom mapping of app names to package names
58
+ * User-provided mappings will take precedence over default mappings
59
+ */
60
+ appNameMapping?: Record<string, string>;
61
+ };
62
+
63
+ export declare interface AndroidConnectedDevice extends Device {
64
+ model?: string;
65
+ brand?: string;
66
+ resolution?: string;
67
+ density?: number;
68
+ }
69
+
70
+ export declare class AndroidDevice implements AbstractInterface {
71
+ private deviceId;
72
+ private yadbPushed;
73
+ private devicePixelRatio;
74
+ private devicePixelRatioInitialized;
75
+ private adb;
76
+ private connectingAdb;
77
+ private destroyed;
78
+ private description;
79
+ private customActions?;
80
+ private cachedScreenSize;
81
+ private cachedOrientation;
82
+ private cachedPhysicalDisplayId;
83
+ private scrcpyAdapter;
84
+ private appNameMapping;
85
+ private cachedAdjustScale;
86
+ private takeScreenshotFailCount;
87
+ private static readonly TAKE_SCREENSHOT_FAIL_THRESHOLD;
88
+ interfaceType: InterfaceType;
89
+ uri: string | undefined;
90
+ /** Set by AndroidAgent to provide AI-based verification for IME fallback. */
91
+ inputVerifyFn?: (text: string) => Promise<boolean>;
92
+ options?: AndroidDeviceOpt;
93
+ actionSpace(): DeviceAction<any>[];
94
+ constructor(deviceId: string, options?: AndroidDeviceOpt);
95
+ describe(): string;
96
+ connect(): Promise<ADB>;
97
+ getAdb(): Promise<ADB>;
98
+ private createAdbProxy;
99
+ /**
100
+ * Get or create the scrcpy adapter (lazy initialization)
101
+ */
102
+ private getScrcpyAdapter;
103
+ /**
104
+ * Get device physical info needed by scrcpy adapter
105
+ */
106
+ private getDevicePhysicalInfo;
107
+ /**
108
+ * Set the app name to package name mapping
109
+ */
110
+ setAppNameMapping(mapping: Record<string, string>): void;
111
+ /**
112
+ * Resolve app name to package name using the mapping
113
+ * Comparison is case-insensitive and ignores spaces, dashes, and underscores.
114
+ * Keys in appNameMapping are pre-normalized, so we only need to normalize the input.
115
+ * @param appName The app name to resolve
116
+ */
117
+ private resolvePackageName;
118
+ launch(uri: string): Promise<AndroidDevice>;
119
+ execYadb(keyboardContent: string): Promise<void>;
120
+ /**
121
+ * Write text to the device clipboard using yadb's -writeClipboard command.
122
+ * Requires yadb v1.1.0+.
123
+ */
124
+ execYadbWriteClipboard(text: string): Promise<void>;
125
+ /**
126
+ * Input text via ADBKeyboard IME.
127
+ * Flow:
128
+ * 1. Record the current default IME.
129
+ * 2. Enable and switch to ADBKeyboard, wait for activation.
130
+ * 3. Send text via broadcast (base64-encoded to handle any Unicode safely).
131
+ * 4. Restore the original IME.
132
+ *
133
+ * Requires ADBKeyboard (com.android.adbkeyboard) to be installed on the device.
134
+ */
135
+ private typeViaAdbKeyboard;
136
+ getElementsInfo(): Promise<ElementInfo[]>;
137
+ getElementsNodeTree(): Promise<any>;
138
+ getScreenSize(): Promise<{
139
+ override: string;
140
+ physical: string;
141
+ orientation: number;
142
+ isCurrentOrientation?: boolean;
143
+ }>;
144
+ private initializeDevicePixelRatio;
145
+ getDisplayDensity(): Promise<number>;
146
+ getDisplayOrientation(): Promise<number>;
147
+ /**
148
+ * Get physical screen dimensions adjusted for current orientation.
149
+ * Swaps width/height when the device is in landscape and the reported
150
+ * dimensions do not already reflect the current orientation.
151
+ */
152
+ private getOrientedPhysicalSize;
153
+ size(): Promise<Size>;
154
+ /**
155
+ * Compute and cache the coordinate adjustment scale by comparing
156
+ * physical dimensions with logical dimensions from size().
157
+ * Cached after first call; invalidated on destroy().
158
+ */
159
+ private getAdjustScale;
160
+ /**
161
+ * Convert logical coordinates (from AI) back to physical coordinates (for ADB).
162
+ * The ratio is derived from size(), so overriding size() alone is sufficient.
163
+ */
164
+ private adjustCoordinates;
165
+ /**
166
+ * Calculate the end point for scroll operations based on start point, scroll delta, and screen boundaries.
167
+ * This method ensures that scroll operations stay within screen bounds and maintain a minimum scroll distance
168
+ * for effective scrolling gestures on Android devices.
169
+ *
170
+ * @param start - The starting point of the scroll gesture
171
+ * @param deltaX - The horizontal scroll distance (positive = scroll right, negative = scroll left)
172
+ * @param deltaY - The vertical scroll distance (positive = scroll down, negative = scroll up)
173
+ * @param maxWidth - The maximum width boundary (screen width)
174
+ * @param maxHeight - The maximum height boundary (screen height)
175
+ * @returns The calculated end point for the scroll gesture
176
+ */
177
+ private calculateScrollEndPoint;
178
+ screenshotBase64(): Promise<string>;
179
+ clearInput(element?: ElementInfo): Promise<void>;
180
+ forceScreenshot(path: string): Promise<void>;
181
+ url(): Promise<string>;
182
+ scrollUntilTop(startPoint?: Point): Promise<void>;
183
+ scrollUntilBottom(startPoint?: Point): Promise<void>;
184
+ scrollUntilLeft(startPoint?: Point): Promise<void>;
185
+ scrollUntilRight(startPoint?: Point): Promise<void>;
186
+ scrollUp(distance?: number, startPoint?: Point): Promise<void>;
187
+ scrollDown(distance?: number, startPoint?: Point): Promise<void>;
188
+ scrollLeft(distance?: number, startPoint?: Point): Promise<void>;
189
+ scrollRight(distance?: number, startPoint?: Point): Promise<void>;
190
+ ensureYadb(): Promise<void>;
191
+ /**
192
+ * Check if text contains characters that may cause issues with ADB inputText.
193
+ * appium-adb's inputText has known bugs with certain characters:
194
+ * - Backslash causes broken shell quoting
195
+ * - Backtick is not escaped at all
196
+ * - Text containing both " and ' throws an error
197
+ * - Dollar sign can cause variable expansion issues
198
+ *
199
+ * For these characters, we route through yadb which handles them correctly
200
+ * via escapeForShell + double-quoted shell context.
201
+ */
202
+ private shouldUseYadbForText;
203
+ /**
204
+ * Execute text input using a specific strategy.
205
+ * Pure execution — no keyboard-dismiss logic, no strategy resolution.
206
+ */
207
+ private _typeWithStrategy;
208
+ keyboardType(text: string, options?: AndroidDeviceInputOpt): Promise<void>;
209
+ private normalizeKeyName;
210
+ keyboardPress(key: string): Promise<void>;
211
+ mouseClick(x: number, y: number): Promise<void>;
212
+ mouseDoubleClick(x: number, y: number): Promise<void>;
213
+ mouseMove(): Promise<void>;
214
+ mouseDrag(from: {
215
+ x: number;
216
+ y: number;
217
+ }, to: {
218
+ x: number;
219
+ y: number;
220
+ }, duration?: number): Promise<void>;
221
+ scroll(deltaX: number, deltaY: number, duration?: number): Promise<void>;
222
+ destroy(): Promise<void>;
223
+ /**
224
+ * Get the current time from the Android device.
225
+ * Returns the device's current timestamp in milliseconds.
226
+ * This is useful when the system time and device time are not synchronized.
227
+ */
228
+ getTimestamp(): Promise<number>;
229
+ back(): Promise<void>;
230
+ home(): Promise<void>;
231
+ recentApps(): Promise<void>;
232
+ longPress(x: number, y: number, duration?: number): Promise<void>;
233
+ pullDown(startPoint?: Point, distance?: number, duration?: number): Promise<void>;
234
+ pullDrag(from: {
235
+ x: number;
236
+ y: number;
237
+ }, to: {
238
+ x: number;
239
+ y: number;
240
+ }, duration: number): Promise<void>;
241
+ pullUp(startPoint?: Point, distance?: number, duration?: number): Promise<void>;
242
+ private getDisplayArg;
243
+ getPhysicalDisplayId(): Promise<string | null>;
244
+ hideKeyboard(options?: AndroidDeviceInputOpt, timeoutMs?: number): Promise<boolean>;
245
+ }
246
+
247
+ /**
248
+ * Android-specific tools manager
249
+ * Extends BaseMidsceneTools to provide Android ADB device connection tools
250
+ */
251
+ export declare class AndroidMidsceneTools extends BaseMidsceneTools<AndroidAgent> {
252
+ protected createTemporaryDevice(): AndroidDevice;
253
+ protected ensureAgent(deviceId?: string): Promise<AndroidAgent>;
254
+ /**
255
+ * Provide Android-specific platform tools
256
+ */
257
+ protected preparePlatformTools(): ToolDefinition[];
258
+ }
259
+
260
+ declare type DeviceActionAndroidBackButton = DeviceAction<undefined, void>;
261
+
262
+ declare type DeviceActionAndroidHomeButton = DeviceAction<undefined, void>;
263
+
264
+ declare type DeviceActionAndroidRecentAppsButton = DeviceAction<undefined, void>;
265
+
266
+ declare interface DevicePhysicalInfo {
267
+ physicalWidth: number;
268
+ physicalHeight: number;
269
+ dpr: number;
270
+ orientation: number;
271
+ isCurrentOrientation?: boolean;
272
+ }
273
+
274
+ export declare function getConnectedDevices(): Promise<Device[]>;
275
+
276
+ export declare function getConnectedDevicesWithDetails(): Promise<AndroidConnectedDevice[]>;
277
+
278
+ export { overrideAIConfig }
279
+
280
+ declare interface ResolvedScrcpyConfig {
281
+ enabled: boolean;
282
+ maxSize: number;
283
+ videoBitRate: number;
284
+ idleTimeoutMs: number;
285
+ }
286
+
287
+ declare interface ScrcpyConfig {
288
+ enabled?: boolean;
289
+ maxSize?: number;
290
+ videoBitRate?: number;
291
+ idleTimeoutMs?: number;
292
+ }
293
+
294
+ /**
295
+ * Adapter that encapsulates all scrcpy-related logic for AndroidDevice.
296
+ * Handles config normalization, manager lifecycle, screenshot, and resolution.
297
+ */
298
+ export declare class ScrcpyDeviceAdapter {
299
+ private deviceId;
300
+ private scrcpyConfig;
301
+ private manager;
302
+ private resolvedConfig;
303
+ private initFailed;
304
+ constructor(deviceId: string, scrcpyConfig: ScrcpyConfig | undefined);
305
+ isEnabled(): boolean;
306
+ /**
307
+ * Initialize scrcpy connection. Called once during device.connect().
308
+ * If initialization fails, marks scrcpy as permanently disabled (no further retries).
309
+ */
310
+ initialize(deviceInfo: DevicePhysicalInfo): Promise<void>;
311
+ /**
312
+ * Resolve scrcpy config.
313
+ * maxSize defaults to 0 (no scaling, full physical resolution) so the Agent layer
314
+ * receives the highest quality image for AI processing.
315
+ * videoBitRate is auto-scaled based on physical pixel count to ensure
316
+ * sufficient quality for all-I-frame H.264 encoding.
317
+ */
318
+ resolveConfig(deviceInfo: DevicePhysicalInfo): ResolvedScrcpyConfig;
319
+ /**
320
+ * Get or create the ScrcpyScreenshotManager.
321
+ * Uses dynamic import for @yume-chan packages (ESM-only, must use await import in CJS builds).
322
+ */
323
+ ensureManager(deviceInfo: DevicePhysicalInfo): Promise<ScrcpyScreenshotManager>;
324
+ /**
325
+ * Take a screenshot via scrcpy, returns base64 string.
326
+ * Throws on failure (caller should fallback to ADB).
327
+ */
328
+ screenshotBase64(deviceInfo: DevicePhysicalInfo): Promise<string>;
329
+ /**
330
+ * Get scrcpy's actual video resolution.
331
+ * Returns null if scrcpy is not connected yet.
332
+ */
333
+ getResolution(): {
334
+ width: number;
335
+ height: number;
336
+ } | null;
337
+ /**
338
+ * Compute size from scrcpy resolution.
339
+ * Returns null if scrcpy is not connected.
340
+ */
341
+ getSize(deviceInfo: DevicePhysicalInfo): Size | null;
342
+ /**
343
+ * Calculate the scaling ratio from physical to scrcpy resolution.
344
+ */
345
+ getScalingRatio(physicalWidth: number): number | null;
346
+ disconnect(): Promise<void>;
347
+ }
348
+
349
+ declare class ScrcpyScreenshotManager {
350
+ private adb;
351
+ private scrcpyClient;
352
+ private videoStream;
353
+ private spsHeader;
354
+ private idleTimer;
355
+ private isConnecting;
356
+ private isInitialized;
357
+ private options;
358
+ private ffmpegAvailable;
359
+ private keyframeResolvers;
360
+ private lastRawKeyframe;
361
+ private videoResolution;
362
+ private streamReader;
363
+ constructor(adb: Adb, options?: ScrcpyScreenshotOptions);
364
+ /**
365
+ * Validate environment prerequisites (ffmpeg, scrcpy-server, etc.)
366
+ * Must be called once after construction, before any screenshot operations.
367
+ * Throws if prerequisites are not met.
368
+ */
369
+ validateEnvironment(): Promise<void>;
370
+ /**
371
+ * Ensure scrcpy connection is active
372
+ */
373
+ ensureConnected(): Promise<void>;
374
+ /**
375
+ * Resolve path to scrcpy server binary
376
+ */
377
+ private resolveServerBinPath;
378
+ /**
379
+ * Get ffmpeg executable path
380
+ * Priority: @ffmpeg-installer/ffmpeg > system ffmpeg
381
+ */
382
+ private getFfmpegPath;
383
+ /**
384
+ * Consume video frames and keep latest frame
385
+ */
386
+ private startFrameConsumer;
387
+ /**
388
+ * Main frame consumption loop
389
+ * Includes busy-loop detection: if reader.read() resolves too fast
390
+ * (e.g. broken stream returning immediately), we throttle to prevent 100% CPU.
391
+ */
392
+ private consumeFramesLoop;
393
+ /**
394
+ * Process a single video packet from the scrcpy stream.
395
+ * With sendFrameMeta: true, the stream emits properly framed packets:
396
+ * - "configuration" packets contain SPS/PPS header data
397
+ * - "data" packets contain complete video frames with correct boundaries
398
+ * This avoids the frame-splitting issue that occurs with sendFrameMeta: false
399
+ * at high resolutions where raw chunks may not align with frame boundaries.
400
+ */
401
+ private processFrame;
402
+ /**
403
+ * Get screenshot as JPEG.
404
+ * Tries to get a fresh frame within a short timeout. If the screen is static
405
+ * (no new frames arrive), falls back to the latest cached keyframe.
406
+ */
407
+ getScreenshotJpeg(): Promise<Buffer>;
408
+ /**
409
+ * Get the actual video stream resolution
410
+ * Returns null if scrcpy is not connected yet
411
+ */
412
+ getResolution(): {
413
+ width: number;
414
+ height: number;
415
+ } | null;
416
+ /**
417
+ * Notify all pending keyframe waiters
418
+ */
419
+ private notifyKeyframeWaiters;
420
+ /**
421
+ * Wait for the next keyframe to arrive
422
+ */
423
+ private waitForNextKeyframe;
424
+ /**
425
+ * Ensure ffmpeg is available for PNG conversion
426
+ */
427
+ private ensureFfmpegAvailable;
428
+ /**
429
+ * Wait for first keyframe with SPS/PPS header
430
+ */
431
+ private waitForKeyframe;
432
+ /**
433
+ * Check if ffmpeg is available in the system
434
+ */
435
+ private checkFfmpegAvailable;
436
+ /**
437
+ * Decode H.264 data to JPEG using ffmpeg
438
+ */
439
+ private decodeH264ToJpeg;
440
+ /**
441
+ * Reset idle timeout timer
442
+ */
443
+ private resetIdleTimer;
444
+ /**
445
+ * Disconnect scrcpy
446
+ */
447
+ disconnect(): Promise<void>;
448
+ /**
449
+ * Check if scrcpy is initialized and connected
450
+ */
451
+ isConnected(): boolean;
452
+ }
453
+
454
+ declare interface ScrcpyScreenshotOptions {
455
+ maxSize?: number;
456
+ videoBitRate?: number;
457
+ idleTimeoutMs?: number;
458
+ }
459
+
460
+ /**
461
+ * Helper type to convert DeviceAction to wrapped method signature
462
+ */
463
+ declare type WrappedAction<T extends DeviceAction> = (...args: ActionArgs<T>) => Promise<ActionReturn<T>>;
464
+
465
+ export { }