@aiscene/android 1.7.15 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1784 @@
1
+ import { ADB } from 'appium-adb';
2
+ import type { CreateOpenAIClientFn } from '@midscene/shared/env';
3
+ import type { ElementNode } from '@midscene/shared/extractor';
4
+ import { IModelConfig } from '@midscene/shared/env';
5
+ import { LaunchMCPServerOptions } from '@aiscene/shared/mcp';
6
+ import { LaunchMCPServerResult } from '@aiscene/shared/mcp';
7
+ import type { LocateResultElement } from '@midscene/shared/types';
8
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
9
+ import { ModelConfigManager } from '@midscene/shared/env';
10
+ import { Point } from '@midscene/shared/types';
11
+ import { Rect } from '@midscene/shared/types';
12
+ import { Size } from '@midscene/shared/types';
13
+ import type { TModelConfig } from '@midscene/shared/env';
14
+ import { z } from './lib';
15
+
16
+ declare abstract class AbstractInterface {
17
+ abstract interfaceType: string;
18
+ abstract screenshotBase64(): Promise<string>;
19
+ abstract size(): Promise<Size>;
20
+ abstract actionSpace(): DeviceAction[];
21
+ abstract cacheFeatureForPoint?(center: [number, number], options?: {
22
+ targetDescription?: string;
23
+ modelConfig?: IModelConfig;
24
+ }): Promise<ElementCacheFeature>;
25
+ abstract rectMatchesCacheFeature?(feature: ElementCacheFeature): Promise<Rect>;
26
+ abstract destroy?(): Promise<void>;
27
+ abstract describe?(): string;
28
+ abstract beforeInvokeAction?(actionName: string, param: any): Promise<void>;
29
+ abstract afterInvokeAction?(actionName: string, param: any): Promise<void>;
30
+ registerFileChooserListener?(handler: (chooser: FileChooserHandler) => Promise<void>): Promise<{
31
+ dispose: () => void;
32
+ getError: () => Error | undefined;
33
+ }>;
34
+ abstract getElementsNodeTree?: () => Promise<ElementNode>;
35
+ abstract url?: () => string | Promise<string>;
36
+ abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;
37
+ /**
38
+ * Get the current time from the device.
39
+ * Returns the device's current timestamp in milliseconds.
40
+ * This is useful when the system time and device time are not synchronized.
41
+ */
42
+ getTimestamp?(): Promise<number>;
43
+ /** URL of native MJPEG stream for real-time screen preview (e.g. WDA MJPEG server) */
44
+ mjpegStreamUrl?: string;
45
+ }
46
+
47
+ declare type ActionArgs<T extends DeviceAction> = [ActionParam<T>] extends [undefined] ? [] : [ActionParam<T>];
48
+
49
+ /**
50
+ * Type utilities for extracting types from DeviceAction definitions
51
+ */
52
+ /**
53
+ * Extract parameter type from a DeviceAction
54
+ */
55
+ declare type ActionParam<Action extends DeviceAction<any, any>> = Action extends DeviceAction<infer P, any> ? P : never;
56
+
57
+ /**
58
+ * Extract return type from a DeviceAction
59
+ */
60
+ declare type ActionReturn<Action extends DeviceAction<any, any>> = Action extends DeviceAction<any, infer R> ? R : never;
61
+
62
+ declare type ActionScrollParam = {
63
+ direction?: 'down' | 'up' | 'right' | 'left';
64
+ scrollType?: ScrollType;
65
+ distance?: number | null;
66
+ locate?: LocateResultElement;
67
+ };
68
+
69
+ /**
70
+ * Action space item definition
71
+ * Note: Intentionally no index signature to maintain compatibility with DeviceAction
72
+ */
73
+ declare interface ActionSpaceItem {
74
+ name: string;
75
+ description?: string;
76
+ args?: Record<string, unknown>;
77
+ paramSchema?: z.ZodTypeAny;
78
+ }
79
+
80
+ declare class Agent<InterfaceType extends AbstractInterface = AbstractInterface> {
81
+ interface: InterfaceType;
82
+ service: Service;
83
+ dump: GroupedActionDump;
84
+ reportFile?: string | null;
85
+ reportFileName?: string;
86
+ taskExecutor: TaskExecutor;
87
+ opts: AgentOpt;
88
+ /**
89
+ * If true, the agent will not perform any actions
90
+ */
91
+ dryMode: boolean;
92
+ onTaskStartTip?: OnTaskStartTip;
93
+ taskCache?: TaskCache;
94
+ private dumpUpdateListeners;
95
+ get onDumpUpdate(): ((dump: string, executionDump?: ExecutionDump) => void) | undefined;
96
+ set onDumpUpdate(callback: ((dump: string, executionDump?: ExecutionDump) => void) | undefined);
97
+ destroyed: boolean;
98
+ modelConfigManager: ModelConfigManager;
99
+ /**
100
+ * Frozen page context for consistent AI operations
101
+ */
102
+ private frozenUIContext?;
103
+ private get aiActContext();
104
+ /**
105
+ * Flag to track if VL model warning has been shown
106
+ */
107
+ private hasWarnedNonVLModel;
108
+ private executionDumpIndexByRunner;
109
+ private fullActionSpace;
110
+ private reportGenerator;
111
+ get page(): InterfaceType;
112
+ /**
113
+ * Ensures VL model warning is shown once when needed
114
+ */
115
+ private ensureVLModelWarning;
116
+ private resolveReplanningCycleLimit;
117
+ constructor(interfaceInstance: InterfaceType, opts?: AgentOpt);
118
+ getActionSpace(): Promise<DeviceAction[]>;
119
+ private static readonly CONTEXT_RETRY_MAX;
120
+ private static readonly CONTEXT_RETRY_DELAY_MS;
121
+ /**
122
+ * Override in subclasses to indicate which errors are transient and should
123
+ * trigger an automatic retry when building the UI context.
124
+ * Returns `false` by default (no retry).
125
+ */
126
+ protected isRetryableContextError(_error: unknown): boolean;
127
+ getUIContext(action?: ServiceAction): Promise<UIContext>;
128
+ _snapshotContext(): Promise<UIContext>;
129
+ /**
130
+ * @deprecated Use {@link setAIActContext} instead.
131
+ */
132
+ setAIActionContext(prompt: string): Promise<void>;
133
+ setAIActContext(prompt: string): Promise<void>;
134
+ resetDump(): GroupedActionDump;
135
+ appendExecutionDump(execution: ExecutionDump, runner?: TaskRunner): void;
136
+ dumpDataString(opt?: {
137
+ inlineScreenshots?: boolean;
138
+ }): string;
139
+ reportHTMLString(opt?: {
140
+ inlineScreenshots?: boolean;
141
+ }): string;
142
+ private lastExecutionDump?;
143
+ writeOutActionDumps(executionDump?: ExecutionDump): void;
144
+ private getGroupMeta;
145
+ private callbackOnTaskStartTip;
146
+ wrapActionInActionSpace<T extends DeviceAction>(name: string): (param: ActionParam<T>) => Promise<ActionReturn<T>>;
147
+ callActionInActionSpace<T = any>(type: string, opt?: T): Promise<any>;
148
+ aiTap(locatePrompt: TUserPrompt, opt?: LocateOption & {
149
+ fileChooserAccept?: string | string[];
150
+ }): Promise<any>;
151
+ aiRightClick(locatePrompt: TUserPrompt, opt?: LocateOption): Promise<any>;
152
+ aiDoubleClick(locatePrompt: TUserPrompt, opt?: LocateOption): Promise<any>;
153
+ aiHover(locatePrompt: TUserPrompt, opt?: LocateOption): Promise<any>;
154
+ aiInput(locatePrompt: TUserPrompt, opt: LocateOption & {
155
+ value: string | number;
156
+ } & {
157
+ autoDismissKeyboard?: boolean;
158
+ } & {
159
+ mode?: 'replace' | 'clear' | 'typeOnly' | 'append';
160
+ }): Promise<any>;
161
+ /**
162
+ * @deprecated Use aiInput(locatePrompt, opt) instead where opt contains the value
163
+ */
164
+ aiInput(value: string | number, locatePrompt: TUserPrompt, opt?: LocateOption & {
165
+ autoDismissKeyboard?: boolean;
166
+ } & {
167
+ mode?: 'replace' | 'clear' | 'typeOnly' | 'append';
168
+ }): Promise<any>;
169
+ aiKeyboardPress(locatePrompt: TUserPrompt, opt: LocateOption & {
170
+ keyName: string;
171
+ }): Promise<any>;
172
+ /**
173
+ * @deprecated Use aiKeyboardPress(locatePrompt, opt) instead where opt contains the keyName
174
+ */
175
+ aiKeyboardPress(keyName: string, locatePrompt?: TUserPrompt, opt?: LocateOption): Promise<any>;
176
+ aiScroll(locatePrompt: TUserPrompt | undefined, opt: LocateOption & ScrollParam): Promise<any>;
177
+ /**
178
+ * @deprecated Use aiScroll(locatePrompt, opt) instead where opt contains the scroll parameters
179
+ */
180
+ aiScroll(scrollParam: ScrollParam, locatePrompt?: TUserPrompt, opt?: LocateOption): Promise<any>;
181
+ aiPinch(locatePrompt: TUserPrompt | undefined, opt: LocateOption & {
182
+ direction: 'in' | 'out';
183
+ distance?: number;
184
+ duration?: number;
185
+ }): Promise<any>;
186
+ aiAct(taskPrompt: string, opt?: AiActOptions): Promise<string | undefined>;
187
+ /**
188
+ * @deprecated Use {@link Agent.aiAct} instead.
189
+ */
190
+ aiAction(taskPrompt: string, opt?: AiActOptions): Promise<string | undefined>;
191
+ aiQuery<ReturnType = any>(demand: ServiceExtractParam, opt?: ServiceExtractOption): Promise<ReturnType>;
192
+ aiBoolean(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<boolean>;
193
+ aiNumber(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<number>;
194
+ aiString(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<string>;
195
+ aiAsk(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<string>;
196
+ describeElementAtPoint(center: [number, number], opt?: {
197
+ verifyPrompt?: boolean;
198
+ retryLimit?: number;
199
+ deepLocate?: boolean;
200
+ } & LocatorValidatorOption): Promise<AgentDescribeElementAtPointResult>;
201
+ verifyLocator(prompt: string, locateOpt: LocateOption | undefined, expectCenter: [number, number], verifyLocateOption?: LocatorValidatorOption): Promise<LocateValidatorResult>;
202
+ aiLocate(prompt: TUserPrompt, opt?: LocateOption): Promise<Pick<LocateResultElement, "rect" | "center">>;
203
+ aiAssert(assertion: TUserPrompt, msg?: string, opt?: AgentAssertOpt & ServiceExtractOption): Promise<{
204
+ pass: boolean;
205
+ thought: string | undefined;
206
+ message: string | undefined;
207
+ } | undefined>;
208
+ aiWaitFor(assertion: TUserPrompt, opt?: AgentWaitForOpt): Promise<void>;
209
+ ai(...args: Parameters<typeof Agent.aiAct>): Promise<string | undefined>;
210
+ runYaml(yamlScriptContent: string): Promise<{
211
+ result: Record<string, any>;
212
+ }>;
213
+ evaluateJavaScript(script: string): Promise<any>;
214
+ /**
215
+ * Add a dump update listener
216
+ * @param listener Listener function
217
+ * @returns A remove function that can be called to remove this listener
218
+ */
219
+ addDumpUpdateListener(listener: (dump: string, executionDump?: ExecutionDump) => void): () => void;
220
+ /**
221
+ * Remove a dump update listener
222
+ * @param listener The listener function to remove
223
+ */
224
+ removeDumpUpdateListener(listener: (dump: string, executionDump?: ExecutionDump) => void): void;
225
+ /**
226
+ * Clear all dump update listeners
227
+ */
228
+ clearDumpUpdateListeners(): void;
229
+ destroy(): Promise<void>;
230
+ recordToReport(title?: string, opt?: {
231
+ content: string;
232
+ }): Promise<void>;
233
+ /**
234
+ * @deprecated Use {@link Agent.recordToReport} instead.
235
+ */
236
+ logScreenshot(title?: string, opt?: {
237
+ content: string;
238
+ }): Promise<void>;
239
+ _unstableLogContent(): {
240
+ groupName: string;
241
+ groupDescription: string | undefined;
242
+ executions: ExecutionDump[];
243
+ };
244
+ /**
245
+ * Freezes the current page context to be reused in subsequent AI operations
246
+ * This avoids recalculating page context for each operation
247
+ */
248
+ freezePageContext(): Promise<void>;
249
+ /**
250
+ * Unfreezes the page context, allowing AI operations to calculate context dynamically
251
+ */
252
+ unfreezePageContext(): Promise<void>;
253
+ /**
254
+ * Process cache configuration and return normalized cache settings
255
+ */
256
+ private processCacheConfig;
257
+ private normalizeFilePaths;
258
+ private normalizeFileInput;
259
+ /**
260
+ * Manually flush cache to file
261
+ * @param options - Optional configuration
262
+ * @param options.cleanUnused - If true, removes unused cache records before flushing
263
+ */
264
+ flushCache(options?: {
265
+ cleanUnused?: boolean;
266
+ }): Promise<void>;
267
+ }
268
+
269
+ declare interface AgentAssertOpt {
270
+ keepRawResponse?: boolean;
271
+ }
272
+
273
+ declare interface AgentDescribeElementAtPointResult {
274
+ prompt: string;
275
+ deepLocate: boolean;
276
+ verifyResult?: LocateValidatorResult;
277
+ }
278
+
279
+ declare interface AgentOpt {
280
+ testId?: string;
281
+ cacheId?: string;
282
+ groupName?: string;
283
+ groupDescription?: string;
284
+ generateReport?: boolean;
285
+ autoPrintReportMsg?: boolean;
286
+ /**
287
+ * Use directory-based report format with separate image files.
288
+ *
289
+ * When enabled:
290
+ * - Screenshots are saved as PNG files in a `screenshots/` subdirectory
291
+ * - Report is generated as `index.html` with relative image paths
292
+ * - Reduces memory usage and report file size
293
+ *
294
+ * IMPORTANT: 'html-and-external-assets' reports must be served via HTTP server
295
+ * (e.g., `npx serve ./report-dir`). The file:// protocol will not
296
+ * work due to browser CORS restrictions.
297
+ *
298
+ * @default 'single-html'
299
+ */
300
+ outputFormat?: 'single-html' | 'html-and-external-assets';
301
+ onTaskStartTip?: OnTaskStartTip;
302
+ aiActContext?: string;
303
+ aiActionContext?: string;
304
+ reportFileName?: string;
305
+ modelConfig?: TModelConfig;
306
+ cache?: Cache_2;
307
+ /**
308
+ * Maximum number of replanning cycles for aiAct.
309
+ * Defaults to 20 (40 for `vlm-ui-tars`) when not provided.
310
+ * If omitted, the agent will also read `MIDSCENE_REPLANNING_CYCLE_LIMIT` for backward compatibility.
311
+ */
312
+ replanningCycleLimit?: number;
313
+ /**
314
+ * Wait time in milliseconds after each action execution.
315
+ * This allows the UI to settle and stabilize before the next action.
316
+ * Defaults to 300ms when not provided.
317
+ */
318
+ waitAfterAction?: number;
319
+ /**
320
+ * When set to true, Midscene will use the target device's time (Android/iOS)
321
+ * instead of the system time. Useful when the device time differs from the
322
+ * host machine. Default: false
323
+ */
324
+ useDeviceTimestamp?: boolean;
325
+ /**
326
+ * Custom screenshot shrink factor to reduce AI token usage.
327
+ * When set, the screenshot will be scaled down by this factor from the physical resolution.
328
+ *
329
+ * Example:
330
+ * - Physical screen width: 3000px, dpr=6
331
+ * - Logical width: 500px
332
+ * - screenshotShrinkFactor: 2
333
+ * - Actual shrunk screenshot width: 3000 / 2 = 1500px
334
+ * - AI analyzes the 1500px screenshot
335
+ * - Coordinates are transformed back to logical (500px) before actions execute
336
+ *
337
+ * Benefits:
338
+ * - Reduces token usage for high-resolution screenshots
339
+ * - Maintains accuracy by scaling coordinates appropriately
340
+ *
341
+ * Must be >= 1 (shrinking only, enlarging is not supported).
342
+ *
343
+ * @default 1 (no shrinking, uses original physical screenshot)
344
+ */
345
+ screenshotShrinkFactor?: number;
346
+ /**
347
+ * Custom OpenAI client factory function
348
+ *
349
+ * If provided, this function will be called to create OpenAI client instances
350
+ * for each AI call, allowing you to:
351
+ * - Wrap clients with observability tools (langsmith, langfuse)
352
+ * - Use custom OpenAI-compatible clients
353
+ * - Apply different configurations based on intent
354
+ *
355
+ * @param config - Resolved model configuration
356
+ * @returns OpenAI client instance (original or wrapped)
357
+ *
358
+ * @example
359
+ * ```typescript
360
+ * createOpenAIClient: async (openai, opts) => {
361
+ * // Wrap with langsmith for planning tasks
362
+ * if (opts.baseURL?.includes('planning')) {
363
+ * return wrapOpenAI(openai, { metadata: { task: 'planning' } });
364
+ * }
365
+ *
366
+ * return openai;
367
+ * }
368
+ * ```
369
+ */
370
+ createOpenAIClient?: CreateOpenAIClientFn;
371
+ }
372
+
373
+ declare interface AgentWaitForOpt extends ServiceExtractOption {
374
+ checkIntervalMs?: number;
375
+ timeoutMs?: number;
376
+ }
377
+
378
+ declare type AiActOptions = {
379
+ cacheable?: boolean;
380
+ fileChooserAccept?: string | string[];
381
+ deepThink?: DeepThinkOption;
382
+ deepLocate?: boolean;
383
+ abortSignal?: AbortSignal;
384
+ };
385
+
386
+ declare interface AIDescribeElementResponse {
387
+ description: string;
388
+ error?: string;
389
+ }
390
+
391
+ declare type AIUsageInfo = Record<string, any> & {
392
+ prompt_tokens: number | undefined;
393
+ completion_tokens: number | undefined;
394
+ total_tokens: number | undefined;
395
+ cached_input: number | undefined;
396
+ time_cost: number | undefined;
397
+ model_name: string | undefined;
398
+ model_description: string | undefined;
399
+ intent: string | undefined;
400
+ request_id: string | undefined;
401
+ };
402
+
403
+ declare class AndroidAgent extends Agent<AndroidDevice> {
404
+ /**
405
+ * Trigger the system back operation on Android devices
406
+ */
407
+ back: WrappedAction<DeviceActionAndroidBackButton>;
408
+ /**
409
+ * Trigger the system home operation on Android devices
410
+ */
411
+ home: WrappedAction<DeviceActionAndroidHomeButton>;
412
+ /**
413
+ * Trigger the system recent apps operation on Android devices
414
+ */
415
+ recentApps: WrappedAction<DeviceActionAndroidRecentAppsButton>;
416
+ /**
417
+ * User-provided app name to package name mapping
418
+ */
419
+ private appNameMapping;
420
+ constructor(device: AndroidDevice, opts?: AndroidAgentOpt);
421
+ /**
422
+ * Launch an Android app or URL
423
+ * @param uri - App package name, URL, or app name to launch
424
+ */
425
+ launch(uri: string): Promise<void>;
426
+ /**
427
+ * Execute ADB shell command on Android device
428
+ * @param command - ADB shell command to execute
429
+ */
430
+ runAdbShell(command: string): Promise<string>;
431
+ private createActionWrapper;
432
+ }
433
+
434
+ declare type AndroidAgentOpt = AgentOpt & {
435
+ /**
436
+ * Custom mapping of app names to package names
437
+ * User-provided mappings will take precedence over default mappings
438
+ */
439
+ appNameMapping?: Record<string, string>;
440
+ };
441
+
442
+ declare class AndroidDevice implements AbstractInterface {
443
+ private deviceId;
444
+ private yadbPushed;
445
+ private devicePixelRatio;
446
+ private devicePixelRatioInitialized;
447
+ private adb;
448
+ private connectingAdb;
449
+ private destroyed;
450
+ private description;
451
+ private customActions?;
452
+ private cachedScreenSize;
453
+ private cachedOrientation;
454
+ private cachedPhysicalDisplayId;
455
+ private scrcpyAdapter;
456
+ private appNameMapping;
457
+ private scalingRatio;
458
+ private takeScreenshotFailCount;
459
+ private static readonly TAKE_SCREENSHOT_FAIL_THRESHOLD;
460
+ interfaceType: InterfaceType;
461
+ uri: string | undefined;
462
+ options?: AndroidDeviceOpt;
463
+ actionSpace(): DeviceAction<any>[];
464
+ constructor(deviceId: string, options?: AndroidDeviceOpt);
465
+ describe(): string;
466
+ connect(): Promise<ADB>;
467
+ getAdb(): Promise<ADB>;
468
+ private createAdbProxy;
469
+ /**
470
+ * Get or create the scrcpy adapter (lazy initialization)
471
+ */
472
+ private getScrcpyAdapter;
473
+ /**
474
+ * Get device physical info needed by scrcpy adapter
475
+ */
476
+ private getDevicePhysicalInfo;
477
+ /**
478
+ * Set the app name to package name mapping
479
+ */
480
+ setAppNameMapping(mapping: Record<string, string>): void;
481
+ /**
482
+ * Resolve app name to package name using the mapping
483
+ * Comparison is case-insensitive and ignores spaces, dashes, and underscores.
484
+ * Keys in appNameMapping are pre-normalized, so we only need to normalize the input.
485
+ * @param appName The app name to resolve
486
+ */
487
+ private resolvePackageName;
488
+ launch(uri: string): Promise<AndroidDevice>;
489
+ execYadb(keyboardContent: string): Promise<void>;
490
+ getElementsInfo(): Promise<ElementInfo[]>;
491
+ getElementsNodeTree(): Promise<any>;
492
+ getScreenSize(): Promise<{
493
+ override: string;
494
+ physical: string;
495
+ orientation: number;
496
+ isCurrentOrientation?: boolean;
497
+ }>;
498
+ private initializeDevicePixelRatio;
499
+ getDisplayDensity(): Promise<number>;
500
+ getDisplayOrientation(): Promise<number>;
501
+ /**
502
+ * Get physical screen dimensions adjusted for current orientation.
503
+ * Swaps width/height when the device is in landscape and the reported
504
+ * dimensions do not already reflect the current orientation.
505
+ */
506
+ private getOrientedPhysicalSize;
507
+ size(): Promise<Size>;
508
+ cacheFeatureForPoint(center: [number, number]): Promise<{
509
+ centerX: number;
510
+ centerY: number;
511
+ screenSize: {
512
+ width: number;
513
+ height: number;
514
+ };
515
+ }>;
516
+ rectMatchesCacheFeature(feature: {
517
+ centerX: number;
518
+ centerY: number;
519
+ screenSize: {
520
+ width: number;
521
+ height: number;
522
+ };
523
+ }): Promise<{
524
+ left: number;
525
+ top: number;
526
+ width: number;
527
+ height: number;
528
+ }>;
529
+ /**
530
+ * Convert logical coordinates (from AI) back to physical coordinates (for ADB).
531
+ * The ratio is derived from size(), so overriding size() alone is sufficient.
532
+ */
533
+ private adjustCoordinates;
534
+ /**
535
+ * Calculate the end point for scroll operations based on start point, scroll delta, and screen boundaries.
536
+ * This method ensures that scroll operations stay within screen bounds and maintain a minimum scroll distance
537
+ * for effective scrolling gestures on Android devices.
538
+ *
539
+ * @param start - The starting point of the scroll gesture
540
+ * @param deltaX - The horizontal scroll distance (positive = scroll right, negative = scroll left)
541
+ * @param deltaY - The vertical scroll distance (positive = scroll down, negative = scroll up)
542
+ * @param maxWidth - The maximum width boundary (screen width)
543
+ * @param maxHeight - The maximum height boundary (screen height)
544
+ * @returns The calculated end point for the scroll gesture
545
+ */
546
+ private calculateScrollEndPoint;
547
+ screenshotBase64(): Promise<string>;
548
+ clearInput(element?: ElementInfo): Promise<void>;
549
+ forceScreenshot(path: string): Promise<void>;
550
+ url(): Promise<string>;
551
+ scrollUntilTop(startPoint?: Point): Promise<void>;
552
+ scrollUntilBottom(startPoint?: Point): Promise<void>;
553
+ scrollUntilLeft(startPoint?: Point): Promise<void>;
554
+ scrollUntilRight(startPoint?: Point): Promise<void>;
555
+ scrollUp(distance?: number, startPoint?: Point): Promise<void>;
556
+ scrollDown(distance?: number, startPoint?: Point): Promise<void>;
557
+ scrollLeft(distance?: number, startPoint?: Point): Promise<void>;
558
+ scrollRight(distance?: number, startPoint?: Point): Promise<void>;
559
+ ensureYadb(): Promise<void>;
560
+ /**
561
+ * Check if text contains characters that may cause issues with ADB inputText.
562
+ * appium-adb's inputText has known bugs with certain characters:
563
+ * - Backslash causes broken shell quoting
564
+ * - Backtick is not escaped at all
565
+ * - Text containing both " and ' throws an error
566
+ * - Dollar sign can cause variable expansion issues
567
+ *
568
+ * For these characters, we route through yadb which handles them correctly
569
+ * via escapeForShell + double-quoted shell context.
570
+ */
571
+ private shouldUseYadbForText;
572
+ keyboardType(text: string, options?: AndroidDeviceInputOpt): Promise<void>;
573
+ private normalizeKeyName;
574
+ keyboardPress(key: string): Promise<void>;
575
+ mouseClick(x: number, y: number): Promise<void>;
576
+ mouseDoubleClick(x: number, y: number): Promise<void>;
577
+ mouseMove(): Promise<void>;
578
+ mouseDrag(from: {
579
+ x: number;
580
+ y: number;
581
+ }, to: {
582
+ x: number;
583
+ y: number;
584
+ }, duration?: number): Promise<void>;
585
+ scroll(deltaX: number, deltaY: number, duration?: number): Promise<void>;
586
+ destroy(): Promise<void>;
587
+ /**
588
+ * Get the current time from the Android device.
589
+ * Returns the device's current timestamp in milliseconds.
590
+ * This is useful when the system time and device time are not synchronized.
591
+ */
592
+ getTimestamp(): Promise<number>;
593
+ back(): Promise<void>;
594
+ home(): Promise<void>;
595
+ recentApps(): Promise<void>;
596
+ longPress(x: number, y: number, duration?: number): Promise<void>;
597
+ pullDown(startPoint?: Point, distance?: number, duration?: number): Promise<void>;
598
+ pullDrag(from: {
599
+ x: number;
600
+ y: number;
601
+ }, to: {
602
+ x: number;
603
+ y: number;
604
+ }, duration: number): Promise<void>;
605
+ pullUp(startPoint?: Point, distance?: number, duration?: number): Promise<void>;
606
+ private getDisplayArg;
607
+ getPhysicalDisplayId(): Promise<string | null>;
608
+ hideKeyboard(options?: AndroidDeviceInputOpt, timeoutMs?: number): Promise<boolean>;
609
+ }
610
+
611
+ /**
612
+ * Android device input options
613
+ */
614
+ declare type AndroidDeviceInputOpt = {
615
+ /** Automatically dismiss the keyboard after input is completed */
616
+ autoDismissKeyboard?: boolean;
617
+ /** Strategy for dismissing the keyboard: 'esc-first' tries ESC before BACK, 'back-first' tries BACK before ESC */
618
+ keyboardDismissStrategy?: 'esc-first' | 'back-first';
619
+ };
620
+
621
+ /**
622
+ * Android device options
623
+ */
624
+ declare type AndroidDeviceOpt = {
625
+ /** Path to the ADB executable */
626
+ androidAdbPath?: string;
627
+ /** Remote ADB host address */
628
+ remoteAdbHost?: string;
629
+ /** Remote ADB port */
630
+ remoteAdbPort?: number;
631
+ /** Input method editor strategy: 'always-yadb' always uses yadb, 'yadb-for-non-ascii' uses yadb only for non-ASCII characters */
632
+ imeStrategy?: 'always-yadb' | 'yadb-for-non-ascii';
633
+ /** Display ID to use for this device */
634
+ displayId?: number;
635
+ /** Use physical display ID for screenshot operations */
636
+ usePhysicalDisplayIdForScreenshot?: boolean;
637
+ /** Use physical display ID when looking up display information */
638
+ usePhysicalDisplayIdForDisplayLookup?: boolean;
639
+ /** Custom device actions to register */
640
+ customActions?: DeviceAction<any>[];
641
+ /**
642
+ * @deprecated Use `screenshotShrinkFactor` in AgentOpt instead.
643
+ * This option no longer affects screenshot size sent to AI model.
644
+ */
645
+ screenshotResizeScale?: number;
646
+ /** Always fetch screen info on each call; if false, cache the first result */
647
+ alwaysRefreshScreenInfo?: boolean;
648
+ /** Minimum screenshot buffer size in bytes (default: 10240 = 10KB). Set to 0 to disable validation. */
649
+ minScreenshotBufferSize?: number;
650
+ /**
651
+ * Scrcpy screenshot configuration for high-performance screen capture.
652
+ *
653
+ * Scrcpy provides 6-8x faster screenshots by streaming H.264 video from the device.
654
+ * When enabled, scrcpy will:
655
+ * 1. Start a video stream from the device on first screenshot request
656
+ * 2. Keep the connection alive for subsequent screenshots (16-50ms each)
657
+ * 3. Automatically disconnect after idle timeout to save resources
658
+ * 4. Fallback to standard ADB mode if unavailable
659
+ *
660
+ * @example
661
+ * ```typescript
662
+ * // Enable scrcpy for high-performance screenshots
663
+ * const device = new AndroidDevice(deviceId, {
664
+ * scrcpyConfig: {
665
+ * enabled: true,
666
+ * },
667
+ * });
668
+ *
669
+ * // Custom configuration
670
+ * const device = new AndroidDevice(deviceId, {
671
+ * scrcpyConfig: {
672
+ * enabled: true,
673
+ * maxSize: 0, // 0 = no scaling
674
+ * idleTimeoutMs: 30000,
675
+ * videoBitRate: 8_000_000,
676
+ * },
677
+ * });
678
+ * ```
679
+ */
680
+ scrcpyConfig?: {
681
+ /**
682
+ * Enable scrcpy for high-performance screenshots.
683
+ * @default false
684
+ */
685
+ enabled?: boolean;
686
+ /**
687
+ * Maximum video dimension (width or height).
688
+ * Video stream will be scaled down if device resolution exceeds this value.
689
+ * Lower values reduce bandwidth but may affect image quality.
690
+ *
691
+ * @default 0 (no scaling, use original resolution)
692
+ * @example
693
+ * { maxSize: 1024 } // Always scale to 1024
694
+ */
695
+ maxSize?: number;
696
+ /**
697
+ * Idle timeout in milliseconds before disconnecting scrcpy.
698
+ * Connection auto-closes after this period of inactivity to save resources.
699
+ * Set to 0 to disable auto-disconnect.
700
+ * @default 30000 (30 seconds)
701
+ */
702
+ idleTimeoutMs?: number;
703
+ /**
704
+ * Video bit rate for H.264 encoding in bits per second.
705
+ * Higher values improve quality but increase bandwidth usage.
706
+ * @default 2000000 (2 Mbps)
707
+ */
708
+ videoBitRate?: number;
709
+ };
710
+ } & AndroidDeviceInputOpt;
711
+
712
+ /**
713
+ * Android MCP Server
714
+ * Provides MCP tools for Android automation through ADB
715
+ */
716
+ export declare class AndroidMCPServer extends BaseMCPServer {
717
+ constructor(toolsManager?: AndroidMidsceneTools);
718
+ protected createToolsManager(): AndroidMidsceneTools;
719
+ }
720
+
721
+ /**
722
+ * Android-specific tools manager
723
+ * Extends BaseMidsceneTools to provide Android ADB device connection tools
724
+ */
725
+ declare class AndroidMidsceneTools extends BaseMidsceneTools<AndroidAgent> {
726
+ protected createTemporaryDevice(): AndroidDevice;
727
+ protected ensureAgent(deviceId?: string): Promise<AndroidAgent>;
728
+ /**
729
+ * Provide Android-specific platform tools
730
+ */
731
+ protected preparePlatformTools(): ToolDefinition[];
732
+ }
733
+
734
+ /**
735
+ * Base agent interface
736
+ * Represents a platform-specific agent (Android, iOS, Web)
737
+ * Note: Return types use `unknown` for compatibility with platform-specific implementations
738
+ */
739
+ declare interface BaseAgent {
740
+ getActionSpace(): Promise<ActionSpaceItem[]>;
741
+ destroy?(): Promise<void>;
742
+ page?: {
743
+ screenshotBase64(): Promise<string>;
744
+ };
745
+ aiAction?: (description: string, params?: Record<string, unknown>) => Promise<unknown>;
746
+ aiWaitFor?: (assertion: string, options: Record<string, unknown>) => Promise<unknown>;
747
+ }
748
+
749
+ /**
750
+ * Base device interface for temporary device instances
751
+ */
752
+ declare interface BaseDevice {
753
+ actionSpace(): ActionSpaceItem[];
754
+ destroy?(): Promise<void>;
755
+ }
756
+
757
+ /**
758
+ * Base MCP Server class with programmatic launch() API
759
+ * Each platform extends this to provide their own tools manager
760
+ */
761
+ declare abstract class BaseMCPServer {
762
+ protected mcpServer: McpServer;
763
+ protected toolsManager?: IMidsceneTools;
764
+ protected config: BaseMCPServerConfig;
765
+ protected providedToolsManager?: IMidsceneTools;
766
+ constructor(config: BaseMCPServerConfig, toolsManager?: IMidsceneTools);
767
+ /**
768
+ * Platform-specific: create tools manager instance
769
+ * This is only called if no tools manager was provided in constructor
770
+ */
771
+ protected abstract createToolsManager(): IMidsceneTools;
772
+ /**
773
+ * Initialize tools manager and attach to MCP server
774
+ */
775
+ private initializeToolsManager;
776
+ /**
777
+ * Perform cleanup on shutdown
778
+ */
779
+ private performCleanup;
780
+ /**
781
+ * Initialize and launch the MCP server with stdio transport
782
+ */
783
+ launch(): Promise<LaunchMCPServerResult_2>;
784
+ /**
785
+ * Launch MCP server with HTTP transport
786
+ * Supports stateful sessions for web applications and service integration
787
+ */
788
+ launchHttp(options: HttpLaunchOptions): Promise<LaunchMCPServerResult_2>;
789
+ /**
790
+ * Create a new HTTP session with transport
791
+ */
792
+ private createHttpSession;
793
+ /**
794
+ * Start periodic session cleanup for inactive sessions
795
+ */
796
+ private startSessionCleanup;
797
+ /**
798
+ * Setup shutdown handlers for HTTP server
799
+ */
800
+ private setupHttpShutdownHandlers;
801
+ /**
802
+ * Get the underlying MCP server instance
803
+ */
804
+ getServer(): McpServer;
805
+ /**
806
+ * Get the tools manager instance
807
+ */
808
+ getToolsManager(): IMidsceneTools | undefined;
809
+ }
810
+
811
+ declare interface BaseMCPServerConfig {
812
+ name: string;
813
+ version: string;
814
+ description: string;
815
+ }
816
+
817
+ /**
818
+ * Base class for platform-specific MCP tools
819
+ * Generic type TAgent allows subclasses to use their specific agent types
820
+ */
821
+ declare abstract class BaseMidsceneTools<TAgent extends BaseAgent = BaseAgent> implements IMidsceneTools {
822
+ protected mcpServer?: McpServer;
823
+ protected agent?: TAgent;
824
+ protected toolDefinitions: ToolDefinition[];
825
+ /**
826
+ * Ensure agent is initialized and ready for use.
827
+ * Must be implemented by subclasses to create platform-specific agent.
828
+ * @param initParam Optional initialization parameter (platform-specific, e.g., URL, device ID)
829
+ * @returns Promise resolving to initialized agent instance
830
+ * @throws Error if agent initialization fails
831
+ */
832
+ protected abstract ensureAgent(initParam?: string): Promise<TAgent>;
833
+ /**
834
+ * Optional: prepare platform-specific tools (e.g., device connection)
835
+ */
836
+ protected preparePlatformTools(): ToolDefinition[];
837
+ /**
838
+ * Must be implemented by subclasses to create a temporary device instance
839
+ * This allows getting real actionSpace without connecting to device
840
+ */
841
+ protected abstract createTemporaryDevice(): BaseDevice;
842
+ /**
843
+ * Initialize all tools by querying actionSpace
844
+ * Uses two-layer fallback strategy:
845
+ * 1. Try to get actionSpace from connected agent (if available)
846
+ * 2. Create temporary device instance to read actionSpace (always succeeds)
847
+ */
848
+ initTools(): Promise<void>;
849
+ /**
850
+ * Attach to MCP server and register all tools
851
+ */
852
+ attachToServer(server: McpServer): void;
853
+ /**
854
+ * Cleanup method - destroy agent and release resources
855
+ */
856
+ destroy(): Promise<void>;
857
+ /**
858
+ * Get tool definitions
859
+ */
860
+ getToolDefinitions(): ToolDefinition[];
861
+ /**
862
+ * Set agent for the tools manager
863
+ */
864
+ setAgent(agent: TAgent): void;
865
+ /**
866
+ * Helper: Convert base64 screenshot to image content array
867
+ */
868
+ protected buildScreenshotContent(screenshot: string): {
869
+ type: "image";
870
+ data: string;
871
+ mimeType: string;
872
+ }[];
873
+ /**
874
+ * Helper: Build a simple text result for tool responses
875
+ */
876
+ protected buildTextResult(text: string): {
877
+ content: {
878
+ type: "text";
879
+ text: string;
880
+ }[];
881
+ };
882
+ /**
883
+ * Create a disconnect handler for releasing platform resources
884
+ * @param platformName Human-readable platform name for the response message
885
+ * @returns Handler function that destroys the agent and returns appropriate response
886
+ */
887
+ protected createDisconnectHandler(platformName: string): () => Promise<{
888
+ content: {
889
+ type: "text";
890
+ text: string;
891
+ }[];
892
+ }>;
893
+ }
894
+
895
+ declare type Cache_2 = false | true | CacheConfig;
896
+
897
+ /**
898
+ * Agent
899
+ */
900
+ declare type CacheConfig = {
901
+ strategy?: 'read-only' | 'read-write' | 'write-only';
902
+ id: string;
903
+ };
904
+
905
+ declare type CacheFileContent = {
906
+ midsceneVersion: string;
907
+ cacheId: string;
908
+ caches: Array<PlanningCache | LocateCache>;
909
+ };
910
+
911
+ declare type DeepThinkOption = 'unset' | true | false;
912
+
913
+ declare interface DetailedLocateParam extends Omit<LocateOption, 'deepThink' | keyof TMultimodalPrompt> {
914
+ prompt: TUserPrompt;
915
+ }
916
+
917
+ declare interface DeviceAction<TParam = any, TReturn = any> {
918
+ name: string;
919
+ description?: string;
920
+ interfaceAlias?: string;
921
+ paramSchema?: z.ZodType<TParam>;
922
+ call: (param: TParam, context: ExecutorContext) => Promise<TReturn> | TReturn;
923
+ delayAfterRunner?: number;
924
+ /**
925
+ * An example param object for this action.
926
+ * Locate fields with { prompt } will automatically get bbox injected when needed.
927
+ */
928
+ sample?: {
929
+ [K in keyof TParam]?: any;
930
+ };
931
+ }
932
+
933
+ declare type DeviceActionAndroidBackButton = DeviceAction<undefined, void>;
934
+
935
+ declare type DeviceActionAndroidHomeButton = DeviceAction<undefined, void>;
936
+
937
+ declare type DeviceActionAndroidRecentAppsButton = DeviceAction<undefined, void>;
938
+
939
+ declare interface DumpMeta {
940
+ logTime: number;
941
+ }
942
+
943
+ declare type ElementCacheFeature = Record<string, unknown>;
944
+
945
+ declare interface ElementInfo {
946
+ id: string;
947
+ indexId: number;
948
+ nodeHashId: string;
949
+ xpaths?: string[];
950
+ attributes: {
951
+ nodeType: NodeType;
952
+ [key: string]: string;
953
+ };
954
+ nodeType: NodeType;
955
+ content: string;
956
+ rect: {
957
+ left: number;
958
+ top: number;
959
+ width: number;
960
+ height: number;
961
+ };
962
+ center: [number, number];
963
+ isVisible: boolean;
964
+ }
965
+
966
+ /**
967
+ * ExecutionDump class for serializing and deserializing execution dumps
968
+ */
969
+ declare class ExecutionDump implements IExecutionDump {
970
+ id?: string;
971
+ logTime: number;
972
+ name: string;
973
+ description?: string;
974
+ tasks: ExecutionTask[];
975
+ aiActContext?: string;
976
+ constructor(data: IExecutionDump);
977
+ /**
978
+ * Serialize the ExecutionDump to a JSON string
979
+ */
980
+ serialize(indents?: number): string;
981
+ /**
982
+ * Convert to a plain object for JSON serialization
983
+ */
984
+ toJSON(): IExecutionDump;
985
+ /**
986
+ * Create an ExecutionDump instance from a serialized JSON string
987
+ */
988
+ static fromSerializedString(serialized: string): ExecutionDump;
989
+ /**
990
+ * Create an ExecutionDump instance from a plain object
991
+ */
992
+ static fromJSON(data: IExecutionDump): ExecutionDump;
993
+ /**
994
+ * Collect all ScreenshotItem instances from tasks.
995
+ * Scans through uiContext and recorder items to find screenshots.
996
+ *
997
+ * @returns Array of ScreenshotItem instances
998
+ */
999
+ collectScreenshots(): ScreenshotItem[];
1000
+ }
1001
+
1002
+ declare interface ExecutionRecorderItem {
1003
+ type: 'screenshot';
1004
+ ts: number;
1005
+ screenshot?: ScreenshotItem;
1006
+ timing?: string;
1007
+ }
1008
+
1009
+ declare interface ExecutionResult<OutputType = any> {
1010
+ output: OutputType;
1011
+ thought?: string;
1012
+ runner: TaskRunner;
1013
+ }
1014
+
1015
+ declare type ExecutionTask<E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<any, any, any>> = E & ExecutionTaskReturn<E extends ExecutionTaskApply<any, any, infer TaskOutput, any> ? TaskOutput : unknown, E extends ExecutionTaskApply<any, any, any, infer TaskLog> ? TaskLog : unknown> & {
1016
+ taskId: string;
1017
+ status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';
1018
+ error?: Error;
1019
+ errorMessage?: string;
1020
+ errorStack?: string;
1021
+ timing?: {
1022
+ start: number;
1023
+ getUiContextStart?: number;
1024
+ getUiContextEnd?: number;
1025
+ callAiStart?: number;
1026
+ callAiEnd?: number;
1027
+ beforeInvokeActionHookStart?: number;
1028
+ beforeInvokeActionHookEnd?: number;
1029
+ callActionStart?: number;
1030
+ callActionEnd?: number;
1031
+ afterInvokeActionHookStart?: number;
1032
+ afterInvokeActionHookEnd?: number;
1033
+ captureAfterCallingSnapshotStart?: number;
1034
+ captureAfterCallingSnapshotEnd?: number;
1035
+ end?: number;
1036
+ cost?: number;
1037
+ };
1038
+ usage?: AIUsageInfo;
1039
+ searchAreaUsage?: AIUsageInfo;
1040
+ reasoning_content?: string;
1041
+ };
1042
+
1043
+ declare interface ExecutionTaskApply<Type extends ExecutionTaskType = any, TaskParam = any, TaskOutput = any, TaskLog = any> {
1044
+ type: Type;
1045
+ subType?: string;
1046
+ param?: TaskParam;
1047
+ thought?: string;
1048
+ uiContext?: UIContext;
1049
+ executor: (param: TaskParam, context: ExecutorContext) => Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void> | undefined | void;
1050
+ }
1051
+
1052
+ declare interface ExecutionTaskHitBy {
1053
+ from: string;
1054
+ context: Record<string, any>;
1055
+ }
1056
+
1057
+ declare interface ExecutionTaskProgressOptions {
1058
+ onTaskStart?: (task: ExecutionTask) => Promise<void> | void;
1059
+ }
1060
+
1061
+ declare interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {
1062
+ output?: TaskOutput;
1063
+ log?: TaskLog;
1064
+ recorder?: ExecutionRecorderItem[];
1065
+ hitBy?: ExecutionTaskHitBy;
1066
+ }
1067
+
1068
+ declare type ExecutionTaskType = 'Planning' | 'Insight' | 'Action Space' | 'Log';
1069
+
1070
+ declare interface ExecutorContext {
1071
+ task: ExecutionTask;
1072
+ element?: LocateResultElement | null;
1073
+ uiContext?: UIContext;
1074
+ }
1075
+
1076
+ declare interface FileChooserHandler {
1077
+ accept(files: string[]): Promise<void>;
1078
+ }
1079
+
1080
+ /**
1081
+ * GroupedActionDump class for serializing and deserializing grouped action dumps
1082
+ */
1083
+ declare class GroupedActionDump implements IGroupedActionDump {
1084
+ sdkVersion: string;
1085
+ groupName: string;
1086
+ groupDescription?: string;
1087
+ modelBriefs: ModelBrief[];
1088
+ executions: ExecutionDump[];
1089
+ deviceType?: string;
1090
+ constructor(data: IGroupedActionDump);
1091
+ /**
1092
+ * Serialize the GroupedActionDump to a JSON string
1093
+ * Uses compact { $screenshot: id } format
1094
+ */
1095
+ serialize(indents?: number): string;
1096
+ /**
1097
+ * Serialize the GroupedActionDump with inline screenshots to a JSON string.
1098
+ * Each ScreenshotItem is replaced with { base64: "...", capturedAt }.
1099
+ */
1100
+ serializeWithInlineScreenshots(indents?: number): string;
1101
+ /**
1102
+ * Convert to a plain object for JSON serialization
1103
+ */
1104
+ toJSON(): IGroupedActionDump;
1105
+ /**
1106
+ * Create a GroupedActionDump instance from a serialized JSON string
1107
+ */
1108
+ static fromSerializedString(serialized: string): GroupedActionDump;
1109
+ /**
1110
+ * Create a GroupedActionDump instance from a plain object
1111
+ */
1112
+ static fromJSON(data: IGroupedActionDump): GroupedActionDump;
1113
+ /**
1114
+ * Collect all ScreenshotItem instances from all executions.
1115
+ *
1116
+ * @returns Array of all ScreenshotItem instances across all executions
1117
+ */
1118
+ collectAllScreenshots(): ScreenshotItem[];
1119
+ /**
1120
+ * Serialize the dump to files with screenshots as separate PNG files.
1121
+ * Creates:
1122
+ * - {basePath} - dump JSON with { $screenshot: id } references
1123
+ * - {basePath}.screenshots/ - PNG files
1124
+ * - {basePath}.screenshots.json - ID to path mapping
1125
+ *
1126
+ * @param basePath - Base path for the dump file
1127
+ */
1128
+ serializeToFiles(basePath: string): void;
1129
+ /**
1130
+ * Read dump from files and return JSON string with inline screenshots.
1131
+ * Reads the dump JSON and screenshot files, then inlines the base64 data.
1132
+ *
1133
+ * @param basePath - Base path for the dump file
1134
+ * @returns JSON string with inline screenshots ({ base64: "..." } format)
1135
+ */
1136
+ static fromFilesAsInlineJson(basePath: string): string;
1137
+ /**
1138
+ * Clean up all files associated with a serialized dump.
1139
+ *
1140
+ * @param basePath - Base path for the dump file
1141
+ */
1142
+ static cleanupFiles(basePath: string): void;
1143
+ /**
1144
+ * Get all file paths associated with a serialized dump.
1145
+ *
1146
+ * @param basePath - Base path for the dump file
1147
+ * @returns Array of all associated file paths
1148
+ */
1149
+ static getFilePaths(basePath: string): string[];
1150
+ }
1151
+
1152
+ declare interface HttpLaunchOptions {
1153
+ port: number;
1154
+ host?: string;
1155
+ }
1156
+
1157
+ declare interface IExecutionDump extends DumpMeta {
1158
+ /** Stable unique identifier for this execution run */
1159
+ id?: string;
1160
+ name: string;
1161
+ description?: string;
1162
+ tasks: ExecutionTask[];
1163
+ aiActContext?: string;
1164
+ }
1165
+
1166
+ declare interface IGroupedActionDump {
1167
+ sdkVersion: string;
1168
+ groupName: string;
1169
+ groupDescription?: string;
1170
+ modelBriefs: ModelBrief[];
1171
+ executions: IExecutionDump[];
1172
+ deviceType?: string;
1173
+ }
1174
+
1175
+ /**
1176
+ * Interface for platform-specific MCP tools manager
1177
+ */
1178
+ declare interface IMidsceneTools {
1179
+ attachToServer(server: McpServer): void;
1180
+ initTools(): Promise<void>;
1181
+ destroy?(): Promise<void>;
1182
+ }
1183
+
1184
+ declare type InterfaceType = 'puppeteer' | 'playwright' | 'static' | 'chrome-extension-proxy' | 'android' | string;
1185
+
1186
+ declare interface LaunchMCPServerResult_2 {
1187
+ /**
1188
+ * The MCP server port (for HTTP mode)
1189
+ */
1190
+ port?: number;
1191
+ /**
1192
+ * The server host (for HTTP mode)
1193
+ */
1194
+ host?: string;
1195
+ /**
1196
+ * Function to gracefully shutdown the MCP server
1197
+ */
1198
+ close: () => Promise<void>;
1199
+ }
1200
+
1201
+ declare interface LocateCache {
1202
+ type: 'locate';
1203
+ prompt: TUserPrompt;
1204
+ cache?: ElementCacheFeature;
1205
+ /** @deprecated kept for backward compatibility */
1206
+ xpaths?: string[];
1207
+ }
1208
+
1209
+ declare interface LocateOption extends Partial<TMultimodalPrompt> {
1210
+ prompt?: TUserPrompt;
1211
+ deepLocate?: boolean;
1212
+ /** @deprecated Use `deepLocate` instead. Kept for backward compatibility. */
1213
+ deepThink?: boolean;
1214
+ cacheable?: boolean;
1215
+ xpath?: string;
1216
+ uiContext?: UIContext;
1217
+ fileChooserAccept?: string | string[];
1218
+ }
1219
+
1220
+ declare interface LocateOpts {
1221
+ context?: UIContext;
1222
+ planLocatedElement?: LocateResultElement;
1223
+ }
1224
+
1225
+ declare interface LocateResult {
1226
+ element: LocateResultElement | null;
1227
+ rect?: Rect;
1228
+ }
1229
+
1230
+ declare type LocateResultWithDump = LocateResult & ServiceResultBase;
1231
+
1232
+ declare interface LocateValidatorResult {
1233
+ pass: boolean;
1234
+ rect: Rect;
1235
+ center: [number, number];
1236
+ centerDistance?: number;
1237
+ }
1238
+
1239
+ declare interface LocatorValidatorOption {
1240
+ centerDistanceThreshold?: number;
1241
+ }
1242
+
1243
+ declare interface MatchCacheResult<T extends PlanningCache | LocateCache> {
1244
+ cacheContent: T;
1245
+ cacheUsable: boolean;
1246
+ updateFn: (cb: (cache: T) => void) => void;
1247
+ }
1248
+
1249
+ /**
1250
+ * Create MCP kit for a specific Android Agent
1251
+ */
1252
+ export declare function mcpKitForAgent(agent: Agent | AndroidAgent): Promise<{
1253
+ description: string;
1254
+ tools: Tool[];
1255
+ }>;
1256
+
1257
+ /**
1258
+ * Create an MCP server launcher for a specific Android Agent
1259
+ */
1260
+ export declare function mcpServerForAgent(agent: Agent | AndroidAgent): {
1261
+ launch(options?: {
1262
+ verbose?: boolean;
1263
+ }): Promise<LaunchMCPServerResult>;
1264
+ launchHttp(options: LaunchMCPServerOptions): Promise<LaunchMCPServerResult>;
1265
+ };
1266
+
1267
+ declare type MidsceneYamlFlowItem = MidsceneYamlFlowItemAIAction | MidsceneYamlFlowItemAIAssert | MidsceneYamlFlowItemAIWaitFor | MidsceneYamlFlowItemEvaluateJavaScript | MidsceneYamlFlowItemSleep | MidsceneYamlFlowItemLogScreenshot;
1268
+
1269
+ declare interface MidsceneYamlFlowItemAIAction {
1270
+ aiAction?: string;
1271
+ ai?: string;
1272
+ aiAct?: string;
1273
+ aiActionProgressTips?: string[];
1274
+ cacheable?: boolean;
1275
+ [key: string]: unknown;
1276
+ }
1277
+
1278
+ declare interface MidsceneYamlFlowItemAIAssert extends ServiceExtractOption {
1279
+ aiAssert: string;
1280
+ errorMessage?: string;
1281
+ name?: string;
1282
+ }
1283
+
1284
+ declare interface MidsceneYamlFlowItemAIWaitFor extends ServiceExtractOption {
1285
+ aiWaitFor: string;
1286
+ timeout?: number;
1287
+ }
1288
+
1289
+ declare interface MidsceneYamlFlowItemEvaluateJavaScript {
1290
+ javascript: string;
1291
+ name?: string;
1292
+ }
1293
+
1294
+ declare interface MidsceneYamlFlowItemLogScreenshot {
1295
+ logScreenshot?: string;
1296
+ recordToReport?: string;
1297
+ content?: string;
1298
+ }
1299
+
1300
+ declare interface MidsceneYamlFlowItemSleep {
1301
+ sleep: number;
1302
+ }
1303
+
1304
+ declare interface ModelBrief {
1305
+ /**
1306
+ * The intent/category of the model call, for example "planning" or "insight".
1307
+ */
1308
+ intent?: string;
1309
+ /**
1310
+ * The model name returned by usage metadata, for example "gpt-4o".
1311
+ */
1312
+ name?: string;
1313
+ /**
1314
+ * Optional human-readable model description, for example "qwen2.5-vl mode".
1315
+ */
1316
+ modelDescription?: string;
1317
+ }
1318
+
1319
+ declare enum NodeType {
1320
+ CONTAINER = "CONTAINER Node",
1321
+ FORM_ITEM = "FORM_ITEM Node",
1322
+ BUTTON = "BUTTON Node",
1323
+ A = "Anchor Node",
1324
+ IMG = "IMG Node",
1325
+ TEXT = "TEXT Node",
1326
+ POSITION = "POSITION Node"
1327
+ }
1328
+
1329
+ /**
1330
+ * agent
1331
+ */
1332
+ declare type OnTaskStartTip = (tip: string) => Promise<void> | void;
1333
+
1334
+ declare interface PlanningAction<ParamType = any> {
1335
+ thought?: string;
1336
+ log?: string;
1337
+ type: string;
1338
+ param: ParamType;
1339
+ }
1340
+
1341
+ declare type PlanningActionParamWaitFor = AgentWaitForOpt & {};
1342
+
1343
+ declare interface PlanningCache {
1344
+ type: 'plan';
1345
+ prompt: string;
1346
+ yamlWorkflow: string;
1347
+ }
1348
+
1349
+ /**
1350
+ * planning
1351
+ *
1352
+ */
1353
+ declare interface PlanningLocateParam extends DetailedLocateParam {
1354
+ bbox?: [number, number, number, number];
1355
+ }
1356
+
1357
+ /**
1358
+ * ScreenshotItem encapsulates screenshot data.
1359
+ *
1360
+ * Supports lazy loading after memory release:
1361
+ * - inline mode: reads from HTML file using streaming (extractImageByIdSync)
1362
+ * - directory mode: reads from file on disk
1363
+ *
1364
+ * After persistence, memory is released but the screenshot can be recovered
1365
+ * on-demand from disk, making it safe to release memory at any time.
1366
+ */
1367
+ declare class ScreenshotItem {
1368
+ private _id;
1369
+ private _base64;
1370
+ private _format;
1371
+ private _capturedAt;
1372
+ private _persistedAs;
1373
+ private _persistedPath;
1374
+ private _persistedHtmlPath;
1375
+ private constructor();
1376
+ /** Create a new ScreenshotItem from base64 data */
1377
+ static create(base64: string, capturedAt: number): ScreenshotItem;
1378
+ get id(): string;
1379
+ /** Get the image format (png or jpeg) */
1380
+ get format(): 'png' | 'jpeg';
1381
+ /** Get the file extension for this screenshot */
1382
+ get extension(): string;
1383
+ /** Get screenshot capture timestamp in milliseconds */
1384
+ get capturedAt(): number;
1385
+ get base64(): string;
1386
+ /** Check if base64 data is still available in memory (not yet released) */
1387
+ hasBase64(): boolean;
1388
+ /**
1389
+ * Mark as persisted to HTML (inline mode).
1390
+ * Releases base64 memory, but keeps HTML path for lazy loading recovery.
1391
+ * @param htmlPath - absolute path to the HTML file containing the image
1392
+ */
1393
+ markPersistedInline(htmlPath: string): void;
1394
+ /**
1395
+ * Mark as persisted to file (directory mode).
1396
+ * Releases base64 memory, but keeps file path for lazy loading recovery.
1397
+ * @param relativePath - relative path for serialization (e.g., "./screenshots/id.jpeg")
1398
+ * @param absolutePath - absolute path for lazy loading recovery
1399
+ */
1400
+ markPersistedToPath(relativePath: string, absolutePath: string): void;
1401
+ /** Serialize for JSON - format depends on persistence state */
1402
+ toSerializable(): ScreenshotSerializeFormat;
1403
+ /** Check if a value is a serialized ScreenshotItem reference (inline or directory mode) */
1404
+ static isSerialized(value: unknown): value is ScreenshotSerializeFormat;
1405
+ /**
1406
+ * Get base64 data without the data URI prefix.
1407
+ * Useful for writing raw binary data to files.
1408
+ */
1409
+ get rawBase64(): string;
1410
+ }
1411
+
1412
+ /**
1413
+ * Serialization format for ScreenshotItem
1414
+ * - { $screenshot: "id" } - inline mode, references imageMap in HTML
1415
+ * - { base64: "path" } - directory mode, references external file path
1416
+ */
1417
+ declare type ScreenshotSerializeFormat = {
1418
+ $screenshot: string;
1419
+ capturedAt: number;
1420
+ } | {
1421
+ base64: string;
1422
+ capturedAt: number;
1423
+ };
1424
+
1425
+ declare type ScrollParam = Omit<ActionScrollParam, 'locate'>;
1426
+
1427
+ declare type ScrollType = 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft' | 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft';
1428
+
1429
+ declare class Service {
1430
+ contextRetrieverFn: () => Promise<UIContext> | UIContext;
1431
+ taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;
1432
+ constructor(context: UIContext | (() => Promise<UIContext> | UIContext), opt?: ServiceOptions);
1433
+ locate(query: PlanningLocateParam, opt: LocateOpts, modelConfig: IModelConfig, abortSignal?: AbortSignal): Promise<LocateResultWithDump>;
1434
+ extract<T>(dataDemand: ServiceExtractParam, modelConfig: IModelConfig, opt?: ServiceExtractOption, pageDescription?: string, multimodalPrompt?: TMultimodalPrompt, context?: UIContext): Promise<ServiceExtractResult<T>>;
1435
+ describe(target: Rect | [number, number], modelConfig: IModelConfig, opt?: {
1436
+ deepLocate?: boolean;
1437
+ }): Promise<Pick<AIDescribeElementResponse, 'description'>>;
1438
+ }
1439
+
1440
+ declare type ServiceAction = 'locate' | 'extract' | 'assert' | 'describe';
1441
+
1442
+ declare interface ServiceDump extends DumpMeta {
1443
+ type: 'locate' | 'extract' | 'assert';
1444
+ logId: string;
1445
+ userQuery: {
1446
+ element?: TUserPrompt;
1447
+ dataDemand?: ServiceExtractParam;
1448
+ assertion?: TUserPrompt;
1449
+ };
1450
+ matchedElement: LocateResultElement[];
1451
+ matchedRect?: Rect;
1452
+ deepLocate?: boolean;
1453
+ data: any;
1454
+ assertionPass?: boolean;
1455
+ assertionThought?: string;
1456
+ taskInfo: ServiceTaskInfo;
1457
+ error?: string;
1458
+ output?: any;
1459
+ }
1460
+
1461
+ declare interface ServiceExtractOption {
1462
+ domIncluded?: boolean | 'visible-only';
1463
+ screenshotIncluded?: boolean;
1464
+ [key: string]: unknown;
1465
+ }
1466
+
1467
+ declare type ServiceExtractParam = string | Record<string, string>;
1468
+
1469
+ declare interface ServiceExtractResult<T> extends ServiceResultBase {
1470
+ data: T;
1471
+ thought?: string;
1472
+ usage?: AIUsageInfo;
1473
+ reasoning_content?: string;
1474
+ }
1475
+
1476
+ declare interface ServiceOptions {
1477
+ taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;
1478
+ }
1479
+
1480
+ declare interface ServiceResultBase {
1481
+ dump: ServiceDump;
1482
+ }
1483
+
1484
+ declare interface ServiceTaskInfo {
1485
+ durationMs: number;
1486
+ formatResponse?: string;
1487
+ rawResponse?: string;
1488
+ usage?: AIUsageInfo;
1489
+ searchArea?: Rect;
1490
+ searchAreaRawResponse?: string;
1491
+ searchAreaUsage?: AIUsageInfo;
1492
+ reasoning_content?: string;
1493
+ }
1494
+
1495
+ declare class TaskCache {
1496
+ cacheId: string;
1497
+ cacheFilePath?: string;
1498
+ cache: CacheFileContent;
1499
+ isCacheResultUsed: boolean;
1500
+ cacheOriginalLength: number;
1501
+ readOnlyMode: boolean;
1502
+ writeOnlyMode: boolean;
1503
+ private matchedCacheIndices;
1504
+ constructor(cacheId: string, isCacheResultUsed: boolean, cacheFilePath?: string, options?: {
1505
+ readOnly?: boolean;
1506
+ writeOnly?: boolean;
1507
+ });
1508
+ matchCache(prompt: TUserPrompt, type: 'plan' | 'locate'): MatchCacheResult<PlanningCache | LocateCache> | undefined;
1509
+ matchPlanCache(prompt: string): MatchCacheResult<PlanningCache> | undefined;
1510
+ matchLocateCache(prompt: TUserPrompt): MatchCacheResult<LocateCache> | undefined;
1511
+ appendCache(cache: PlanningCache | LocateCache): void;
1512
+ loadCacheFromFile(): CacheFileContent | undefined;
1513
+ flushCacheToFile(options?: {
1514
+ cleanUnused?: boolean;
1515
+ }): void;
1516
+ updateOrAppendCacheRecord(newRecord: PlanningCache | LocateCache, cachedRecord?: MatchCacheResult<PlanningCache | LocateCache>): void;
1517
+ }
1518
+
1519
+ declare class TaskExecutionError extends Error {
1520
+ runner: TaskRunner;
1521
+ errorTask: ExecutionTask | null;
1522
+ constructor(message: string, runner: TaskRunner, errorTask: ExecutionTask | null, options?: {
1523
+ cause?: unknown;
1524
+ });
1525
+ }
1526
+
1527
+ declare class TaskExecutor {
1528
+ interface: AbstractInterface;
1529
+ service: Service;
1530
+ taskCache?: TaskCache;
1531
+ private readonly providedActionSpace;
1532
+ private readonly taskBuilder;
1533
+ onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];
1534
+ private readonly hooks?;
1535
+ replanningCycleLimit?: number;
1536
+ waitAfterAction?: number;
1537
+ useDeviceTimestamp?: boolean;
1538
+ get page(): AbstractInterface;
1539
+ constructor(interfaceInstance: AbstractInterface, service: Service, opts: {
1540
+ taskCache?: TaskCache;
1541
+ onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];
1542
+ replanningCycleLimit?: number;
1543
+ waitAfterAction?: number;
1544
+ useDeviceTimestamp?: boolean;
1545
+ hooks?: TaskExecutorHooks;
1546
+ actionSpace: DeviceAction[];
1547
+ });
1548
+ private createExecutionSession;
1549
+ private getActionSpace;
1550
+ /**
1551
+ * Get a readable time string using device time when configured.
1552
+ * This method respects the useDeviceTimestamp configuration.
1553
+ * @param format - Optional format string
1554
+ * @returns A formatted time string
1555
+ */
1556
+ private getTimeString;
1557
+ convertPlanToExecutable(plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, options?: {
1558
+ cacheable?: boolean;
1559
+ deepLocate?: boolean;
1560
+ abortSignal?: AbortSignal;
1561
+ }): Promise<{
1562
+ tasks: ExecutionTaskApply[];
1563
+ }>;
1564
+ loadYamlFlowAsPlanning(userInstruction: string, yamlString: string): Promise<{
1565
+ runner: TaskRunner;
1566
+ }>;
1567
+ runPlans(title: string, plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig): Promise<ExecutionResult>;
1568
+ action(userPrompt: string, modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, includeBboxInPlanning: boolean, aiActContext?: string, cacheable?: boolean, replanningCycleLimitOverride?: number, imagesIncludeCount?: number, deepThink?: DeepThinkOption, fileChooserAccept?: string[], deepLocate?: boolean, abortSignal?: AbortSignal): Promise<ExecutionResult<{
1569
+ yamlFlow?: MidsceneYamlFlowItem[];
1570
+ output?: string;
1571
+ } | undefined>>;
1572
+ private runAction;
1573
+ private createTypeQueryTask;
1574
+ createTypeQueryExecution<T>(type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert', demand: ServiceExtractParam, modelConfig: IModelConfig, opt?: ServiceExtractOption, multimodalPrompt?: TMultimodalPrompt): Promise<ExecutionResult<T>>;
1575
+ waitFor(assertion: TUserPrompt, opt: PlanningActionParamWaitFor, modelConfig: IModelConfig): Promise<ExecutionResult<void>>;
1576
+ }
1577
+
1578
+ declare interface TaskExecutorHooks {
1579
+ onTaskUpdate?: (runner: TaskRunner, error?: TaskExecutionError) => Promise<void> | void;
1580
+ }
1581
+
1582
+ declare class TaskRunner {
1583
+ readonly id: string;
1584
+ name: string;
1585
+ tasks: ExecutionTask[];
1586
+ status: 'init' | 'pending' | 'running' | 'completed' | 'error';
1587
+ onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];
1588
+ private readonly uiContextBuilder;
1589
+ private readonly onTaskUpdate?;
1590
+ private readonly executionLogTime;
1591
+ constructor(name: string, uiContextBuilder: () => Promise<UIContext>, options?: TaskRunnerInitOptions);
1592
+ private emitOnTaskUpdate;
1593
+ private lastUiContext?;
1594
+ private getUiContext;
1595
+ private captureScreenshot;
1596
+ private attachRecorderItem;
1597
+ private markTaskAsPending;
1598
+ private normalizeStatusFromError;
1599
+ append(task: ExecutionTaskApply[] | ExecutionTaskApply, options?: TaskRunnerOperationOptions): Promise<void>;
1600
+ appendAndFlush(task: ExecutionTaskApply[] | ExecutionTaskApply, options?: TaskRunnerOperationOptions): Promise<{
1601
+ output: any;
1602
+ thought?: string;
1603
+ } | undefined>;
1604
+ flush(options?: TaskRunnerOperationOptions): Promise<{
1605
+ output: any;
1606
+ thought?: string;
1607
+ } | undefined>;
1608
+ isInErrorState(): boolean;
1609
+ latestErrorTask(): ExecutionTask | null;
1610
+ dump(): ExecutionDump;
1611
+ appendErrorPlan(errorMsg: string): Promise<{
1612
+ output: undefined;
1613
+ runner: TaskRunner;
1614
+ }>;
1615
+ }
1616
+
1617
+ declare type TaskRunnerInitOptions = ExecutionTaskProgressOptions & {
1618
+ tasks?: ExecutionTaskApply[];
1619
+ onTaskUpdate?: (runner: TaskRunner, error?: TaskExecutionError) => Promise<void> | void;
1620
+ };
1621
+
1622
+ declare type TaskRunnerOperationOptions = {
1623
+ allowWhenError?: boolean;
1624
+ };
1625
+
1626
+ declare type TMultimodalPrompt = z.infer<typeof TMultimodalPromptSchema>;
1627
+
1628
+ declare const TMultimodalPromptSchema: z.ZodObject<{
1629
+ images: z.ZodOptional<z.ZodArray<z.ZodObject<{
1630
+ name: z.ZodString;
1631
+ url: z.ZodString;
1632
+ }, "strip", z.ZodTypeAny, {
1633
+ name: string;
1634
+ url: string;
1635
+ }, {
1636
+ name: string;
1637
+ url: string;
1638
+ }>, "many">>;
1639
+ convertHttpImage2Base64: z.ZodOptional<z.ZodBoolean>;
1640
+ }, "strip", z.ZodTypeAny, {
1641
+ images?: {
1642
+ name: string;
1643
+ url: string;
1644
+ }[] | undefined;
1645
+ convertHttpImage2Base64?: boolean | undefined;
1646
+ }, {
1647
+ images?: {
1648
+ name: string;
1649
+ url: string;
1650
+ }[] | undefined;
1651
+ convertHttpImage2Base64?: boolean | undefined;
1652
+ }>;
1653
+
1654
+ /**
1655
+ * Tool type for mcpKitForAgent return value
1656
+ */
1657
+ declare type Tool = ToolDefinition;
1658
+
1659
+ /**
1660
+ * Tool definition for MCP server
1661
+ */
1662
+ declare interface ToolDefinition<T = Record<string, unknown>> {
1663
+ name: string;
1664
+ description: string;
1665
+ schema: ToolSchema;
1666
+ handler: ToolHandler<T>;
1667
+ }
1668
+
1669
+ /**
1670
+ * Tool handler function type
1671
+ * Takes parsed arguments and returns a tool result
1672
+ */
1673
+ declare type ToolHandler<T = Record<string, unknown>> = (args: T) => Promise<ToolResult>;
1674
+
1675
+ /**
1676
+ * Result type for tool execution (MCP compatible)
1677
+ */
1678
+ declare interface ToolResult {
1679
+ [x: string]: unknown;
1680
+ content: ToolResultContent[];
1681
+ isError?: boolean;
1682
+ _meta?: Record<string, unknown>;
1683
+ }
1684
+
1685
+ /**
1686
+ * Content item types for tool results (MCP compatible)
1687
+ */
1688
+ declare type ToolResultContent = {
1689
+ type: 'text';
1690
+ text: string;
1691
+ } | {
1692
+ type: 'image';
1693
+ data: string;
1694
+ mimeType: string;
1695
+ } | {
1696
+ type: 'audio';
1697
+ data: string;
1698
+ mimeType: string;
1699
+ } | {
1700
+ type: 'resource';
1701
+ resource: {
1702
+ text: string;
1703
+ uri: string;
1704
+ mimeType?: string;
1705
+ } | {
1706
+ uri: string;
1707
+ blob: string;
1708
+ mimeType?: string;
1709
+ };
1710
+ };
1711
+
1712
+ /**
1713
+ * Tool schema type using Zod
1714
+ */
1715
+ declare type ToolSchema = Record<string, z.ZodTypeAny>;
1716
+
1717
+ declare type TUserPrompt = z.infer<typeof TUserPromptSchema>;
1718
+
1719
+ declare const TUserPromptSchema: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
1720
+ prompt: z.ZodString;
1721
+ }, "strip", z.ZodTypeAny, {
1722
+ prompt: string;
1723
+ }, {
1724
+ prompt: string;
1725
+ }>, z.ZodObject<{
1726
+ images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
1727
+ name: z.ZodString;
1728
+ url: z.ZodString;
1729
+ }, "strip", z.ZodTypeAny, {
1730
+ name: string;
1731
+ url: string;
1732
+ }, {
1733
+ name: string;
1734
+ url: string;
1735
+ }>, "many">>>;
1736
+ convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
1737
+ }, "strip", z.ZodTypeAny, {
1738
+ images?: {
1739
+ name: string;
1740
+ url: string;
1741
+ }[] | undefined;
1742
+ convertHttpImage2Base64?: boolean | undefined;
1743
+ }, {
1744
+ images?: {
1745
+ name: string;
1746
+ url: string;
1747
+ }[] | undefined;
1748
+ convertHttpImage2Base64?: boolean | undefined;
1749
+ }>>]>;
1750
+
1751
+ /**
1752
+ * context
1753
+ */
1754
+ declare abstract class UIContext {
1755
+ /**
1756
+ * screenshot of the current UI state. which size is shotSize(be shrunk by screenshotShrinkFactor),
1757
+ */
1758
+ abstract screenshot: ScreenshotItem;
1759
+ /**
1760
+ * screenshot size after shrinking
1761
+ */
1762
+ abstract shotSize: Size;
1763
+ /**
1764
+ * The ratio for converting shrunk screenshot coordinates to logical coordinates.
1765
+ *
1766
+ * Example:
1767
+ * - Physical screen width: 3000px, dpr=6
1768
+ * - Logical width: 500px
1769
+ * - User-defined screenshotShrinkFactor: 2
1770
+ * - Actual shrunk screenshot width: 3000 / 2 = 1500px
1771
+ * - shrunkShotToLogicalRatio: dpr / screenshotShrinkFactor = 6 / 2 = 3
1772
+ * - To map back to logical coordinates: 1500 / shrunkShotToLogicalRatio = 500px
1773
+ */
1774
+ abstract shrunkShotToLogicalRatio: number;
1775
+ abstract _isFrozen?: boolean;
1776
+ abstract deprecatedDpr?: number;
1777
+ }
1778
+
1779
+ /**
1780
+ * Helper type to convert DeviceAction to wrapped method signature
1781
+ */
1782
+ declare type WrappedAction<T extends DeviceAction> = (...args: ActionArgs<T>) => Promise<ActionReturn<T>>;
1783
+
1784
+ export { }