npm - @aiscene/android - Versions diffs - 1.7.15 → 1.8.1 - Mend

@aiscene/android 1.7.15 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/es/cli.mjs +1954 -3
package/dist/es/index.mjs +1952 -6
package/dist/es/mcp-server.mjs +1954 -5
package/dist/lib/cli.js +1975 -10
package/dist/lib/index.js +1965 -17
package/dist/lib/mcp-server.js +1988 -37
package/dist/types/cli.d.ts +1 -0
package/dist/types/index.d.ts +1448 -16
package/dist/types/mcp-server.d.ts +1784 -0
package/package.json +4 -8

package/dist/types/mcp-server.d.ts ADDED Viewed

@@ -0,0 +1,1784 @@
+import { ADB } from 'appium-adb';
+import type { CreateOpenAIClientFn } from '@midscene/shared/env';
+import type { ElementNode } from '@midscene/shared/extractor';
+import { IModelConfig } from '@midscene/shared/env';
+import { LaunchMCPServerOptions } from '@aiscene/shared/mcp';
+import { LaunchMCPServerResult } from '@aiscene/shared/mcp';
+import type { LocateResultElement } from '@midscene/shared/types';
+import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import { ModelConfigManager } from '@midscene/shared/env';
+import { Point } from '@midscene/shared/types';
+import { Rect } from '@midscene/shared/types';
+import { Size } from '@midscene/shared/types';
+import type { TModelConfig } from '@midscene/shared/env';
+import { z } from './lib';
+declare abstract class AbstractInterface {
+    abstract interfaceType: string;
+    abstract screenshotBase64(): Promise<string>;
+    abstract size(): Promise<Size>;
+    abstract actionSpace(): DeviceAction[];
+    abstract cacheFeatureForPoint?(center: [number, number], options?: {
+        targetDescription?: string;
+        modelConfig?: IModelConfig;
+    }): Promise<ElementCacheFeature>;
+    abstract rectMatchesCacheFeature?(feature: ElementCacheFeature): Promise<Rect>;
+    abstract destroy?(): Promise<void>;
+    abstract describe?(): string;
+    abstract beforeInvokeAction?(actionName: string, param: any): Promise<void>;
+    abstract afterInvokeAction?(actionName: string, param: any): Promise<void>;
+    registerFileChooserListener?(handler: (chooser: FileChooserHandler) => Promise<void>): Promise<{
+        dispose: () => void;
+        getError: () => Error | undefined;
+    }>;
+    abstract getElementsNodeTree?: () => Promise<ElementNode>;
+    abstract url?: () => string | Promise<string>;
+    abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;
+    /**
+     * Get the current time from the device.
+     * Returns the device's current timestamp in milliseconds.
+     * This is useful when the system time and device time are not synchronized.
+     */
+    getTimestamp?(): Promise<number>;
+    /** URL of native MJPEG stream for real-time screen preview (e.g. WDA MJPEG server) */
+    mjpegStreamUrl?: string;
+}
+declare type ActionArgs<T extends DeviceAction> = [ActionParam<T>] extends [undefined] ? [] : [ActionParam<T>];
+/**
+ * Type utilities for extracting types from DeviceAction definitions
+ */
+/**
+ * Extract parameter type from a DeviceAction
+ */
+declare type ActionParam<Action extends DeviceAction<any, any>> = Action extends DeviceAction<infer P, any> ? P : never;
+/**
+ * Extract return type from a DeviceAction
+ */
+declare type ActionReturn<Action extends DeviceAction<any, any>> = Action extends DeviceAction<any, infer R> ? R : never;
+declare type ActionScrollParam = {
+    direction?: 'down' | 'up' | 'right' | 'left';
+    scrollType?: ScrollType;
+    distance?: number | null;
+    locate?: LocateResultElement;
+};
+/**
+ * Action space item definition
+ * Note: Intentionally no index signature to maintain compatibility with DeviceAction
+ */
+declare interface ActionSpaceItem {
+    name: string;
+    description?: string;
+    args?: Record<string, unknown>;
+    paramSchema?: z.ZodTypeAny;
+}
+declare class Agent<InterfaceType extends AbstractInterface = AbstractInterface> {
+    interface: InterfaceType;
+    service: Service;
+    dump: GroupedActionDump;
+    reportFile?: string | null;
+    reportFileName?: string;
+    taskExecutor: TaskExecutor;
+    opts: AgentOpt;
+    /**
+     * If true, the agent will not perform any actions
+     */
+    dryMode: boolean;
+    onTaskStartTip?: OnTaskStartTip;
+    taskCache?: TaskCache;
+    private dumpUpdateListeners;
+    get onDumpUpdate(): ((dump: string, executionDump?: ExecutionDump) => void) | undefined;
+    set onDumpUpdate(callback: ((dump: string, executionDump?: ExecutionDump) => void) | undefined);
+    destroyed: boolean;
+    modelConfigManager: ModelConfigManager;
+    /**
+     * Frozen page context for consistent AI operations
+     */
+    private frozenUIContext?;
+    private get aiActContext();
+    /**
+     * Flag to track if VL model warning has been shown
+     */
+    private hasWarnedNonVLModel;
+    private executionDumpIndexByRunner;
+    private fullActionSpace;
+    private reportGenerator;
+    get page(): InterfaceType;
+    /**
+     * Ensures VL model warning is shown once when needed
+     */
+    private ensureVLModelWarning;
+    private resolveReplanningCycleLimit;
+    constructor(interfaceInstance: InterfaceType, opts?: AgentOpt);
+    getActionSpace(): Promise<DeviceAction[]>;
+    private static readonly CONTEXT_RETRY_MAX;
+    private static readonly CONTEXT_RETRY_DELAY_MS;
+    /**
+     * Override in subclasses to indicate which errors are transient and should
+     * trigger an automatic retry when building the UI context.
+     * Returns `false` by default (no retry).
+     */
+    protected isRetryableContextError(_error: unknown): boolean;
+    getUIContext(action?: ServiceAction): Promise<UIContext>;
+    _snapshotContext(): Promise<UIContext>;
+    /**
+     * @deprecated Use {@link setAIActContext} instead.
+     */
+    setAIActionContext(prompt: string): Promise<void>;
+    setAIActContext(prompt: string): Promise<void>;
+    resetDump(): GroupedActionDump;
+    appendExecutionDump(execution: ExecutionDump, runner?: TaskRunner): void;
+    dumpDataString(opt?: {
+        inlineScreenshots?: boolean;
+    }): string;
+    reportHTMLString(opt?: {
+        inlineScreenshots?: boolean;
+    }): string;
+    private lastExecutionDump?;
+    writeOutActionDumps(executionDump?: ExecutionDump): void;
+    private getGroupMeta;
+    private callbackOnTaskStartTip;
+    wrapActionInActionSpace<T extends DeviceAction>(name: string): (param: ActionParam<T>) => Promise<ActionReturn<T>>;
+    callActionInActionSpace<T = any>(type: string, opt?: T): Promise<any>;
+    aiTap(locatePrompt: TUserPrompt, opt?: LocateOption & {
+        fileChooserAccept?: string | string[];
+    }): Promise<any>;
+    aiRightClick(locatePrompt: TUserPrompt, opt?: LocateOption): Promise<any>;
+    aiDoubleClick(locatePrompt: TUserPrompt, opt?: LocateOption): Promise<any>;
+    aiHover(locatePrompt: TUserPrompt, opt?: LocateOption): Promise<any>;
+    aiInput(locatePrompt: TUserPrompt, opt: LocateOption & {
+        value: string | number;
+    } & {
+        autoDismissKeyboard?: boolean;
+    } & {
+        mode?: 'replace' | 'clear' | 'typeOnly' | 'append';
+    }): Promise<any>;
+    /**
+     * @deprecated Use aiInput(locatePrompt, opt) instead where opt contains the value
+     */
+    aiInput(value: string | number, locatePrompt: TUserPrompt, opt?: LocateOption & {
+        autoDismissKeyboard?: boolean;
+    } & {
+        mode?: 'replace' | 'clear' | 'typeOnly' | 'append';
+    }): Promise<any>;
+    aiKeyboardPress(locatePrompt: TUserPrompt, opt: LocateOption & {
+        keyName: string;
+    }): Promise<any>;
+    /**
+     * @deprecated Use aiKeyboardPress(locatePrompt, opt) instead where opt contains the keyName
+     */
+    aiKeyboardPress(keyName: string, locatePrompt?: TUserPrompt, opt?: LocateOption): Promise<any>;
+    aiScroll(locatePrompt: TUserPrompt | undefined, opt: LocateOption & ScrollParam): Promise<any>;
+    /**
+     * @deprecated Use aiScroll(locatePrompt, opt) instead where opt contains the scroll parameters
+     */
+    aiScroll(scrollParam: ScrollParam, locatePrompt?: TUserPrompt, opt?: LocateOption): Promise<any>;
+    aiPinch(locatePrompt: TUserPrompt | undefined, opt: LocateOption & {
+        direction: 'in' | 'out';
+        distance?: number;
+        duration?: number;
+    }): Promise<any>;
+    aiAct(taskPrompt: string, opt?: AiActOptions): Promise<string | undefined>;
+    /**
+     * @deprecated Use {@link Agent.aiAct} instead.
+     */
+    aiAction(taskPrompt: string, opt?: AiActOptions): Promise<string | undefined>;
+    aiQuery<ReturnType = any>(demand: ServiceExtractParam, opt?: ServiceExtractOption): Promise<ReturnType>;
+    aiBoolean(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<boolean>;
+    aiNumber(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<number>;
+    aiString(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<string>;
+    aiAsk(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<string>;
+    describeElementAtPoint(center: [number, number], opt?: {
+        verifyPrompt?: boolean;
+        retryLimit?: number;
+        deepLocate?: boolean;
+    } & LocatorValidatorOption): Promise<AgentDescribeElementAtPointResult>;
+    verifyLocator(prompt: string, locateOpt: LocateOption | undefined, expectCenter: [number, number], verifyLocateOption?: LocatorValidatorOption): Promise<LocateValidatorResult>;
+    aiLocate(prompt: TUserPrompt, opt?: LocateOption): Promise<Pick<LocateResultElement, "rect" | "center">>;
+    aiAssert(assertion: TUserPrompt, msg?: string, opt?: AgentAssertOpt & ServiceExtractOption): Promise<{
+        pass: boolean;
+        thought: string | undefined;
+        message: string | undefined;
+    } | undefined>;
+    aiWaitFor(assertion: TUserPrompt, opt?: AgentWaitForOpt): Promise<void>;
+    ai(...args: Parameters<typeof Agent.aiAct>): Promise<string | undefined>;
+    runYaml(yamlScriptContent: string): Promise<{
+        result: Record<string, any>;
+    }>;
+    evaluateJavaScript(script: string): Promise<any>;
+    /**
+     * Add a dump update listener
+     * @param listener Listener function
+     * @returns A remove function that can be called to remove this listener
+     */
+    addDumpUpdateListener(listener: (dump: string, executionDump?: ExecutionDump) => void): () => void;
+    /**
+     * Remove a dump update listener
+     * @param listener The listener function to remove
+     */
+    removeDumpUpdateListener(listener: (dump: string, executionDump?: ExecutionDump) => void): void;
+    /**
+     * Clear all dump update listeners
+     */
+    clearDumpUpdateListeners(): void;
+    destroy(): Promise<void>;
+    recordToReport(title?: string, opt?: {
+        content: string;
+    }): Promise<void>;
+    /**
+     * @deprecated Use {@link Agent.recordToReport} instead.
+     */
+    logScreenshot(title?: string, opt?: {
+        content: string;
+    }): Promise<void>;
+    _unstableLogContent(): {
+        groupName: string;
+        groupDescription: string | undefined;
+        executions: ExecutionDump[];
+    };
+    /**
+     * Freezes the current page context to be reused in subsequent AI operations
+     * This avoids recalculating page context for each operation
+     */
+    freezePageContext(): Promise<void>;
+    /**
+     * Unfreezes the page context, allowing AI operations to calculate context dynamically
+     */
+    unfreezePageContext(): Promise<void>;
+    /**
+     * Process cache configuration and return normalized cache settings
+     */
+    private processCacheConfig;
+    private normalizeFilePaths;
+    private normalizeFileInput;
+    /**
+     * Manually flush cache to file
+     * @param options - Optional configuration
+     * @param options.cleanUnused - If true, removes unused cache records before flushing
+     */
+    flushCache(options?: {
+        cleanUnused?: boolean;
+    }): Promise<void>;
+}
+declare interface AgentAssertOpt {
+    keepRawResponse?: boolean;
+}
+declare interface AgentDescribeElementAtPointResult {
+    prompt: string;
+    deepLocate: boolean;
+    verifyResult?: LocateValidatorResult;
+}
+declare interface AgentOpt {
+    testId?: string;
+    cacheId?: string;
+    groupName?: string;
+    groupDescription?: string;
+    generateReport?: boolean;
+    autoPrintReportMsg?: boolean;
+    /**
+     * Use directory-based report format with separate image files.
+     *
+     * When enabled:
+     * - Screenshots are saved as PNG files in a `screenshots/` subdirectory
+     * - Report is generated as `index.html` with relative image paths
+     * - Reduces memory usage and report file size
+     *
+     * IMPORTANT: 'html-and-external-assets' reports must be served via HTTP server
+     * (e.g., `npx serve ./report-dir`). The file:// protocol will not
+     * work due to browser CORS restrictions.
+     *
+     * @default 'single-html'
+     */
+    outputFormat?: 'single-html' | 'html-and-external-assets';
+    onTaskStartTip?: OnTaskStartTip;
+    aiActContext?: string;
+    aiActionContext?: string;
+    reportFileName?: string;
+    modelConfig?: TModelConfig;
+    cache?: Cache_2;
+    /**
+     * Maximum number of replanning cycles for aiAct.
+     * Defaults to 20 (40 for `vlm-ui-tars`) when not provided.
+     * If omitted, the agent will also read `MIDSCENE_REPLANNING_CYCLE_LIMIT` for backward compatibility.
+     */
+    replanningCycleLimit?: number;
+    /**
+     * Wait time in milliseconds after each action execution.
+     * This allows the UI to settle and stabilize before the next action.
+     * Defaults to 300ms when not provided.
+     */
+    waitAfterAction?: number;
+    /**
+     * When set to true, Midscene will use the target device's time (Android/iOS)
+     * instead of the system time. Useful when the device time differs from the
+     * host machine. Default: false
+     */
+    useDeviceTimestamp?: boolean;
+    /**
+     * Custom screenshot shrink factor to reduce AI token usage.
+     * When set, the screenshot will be scaled down by this factor from the physical resolution.
+     *
+     * Example:
+     * - Physical screen width: 3000px, dpr=6
+     * - Logical width: 500px
+     * - screenshotShrinkFactor: 2
+     * - Actual shrunk screenshot width: 3000 / 2 = 1500px
+     * - AI analyzes the 1500px screenshot
+     * - Coordinates are transformed back to logical (500px) before actions execute
+     *
+     * Benefits:
+     * - Reduces token usage for high-resolution screenshots
+     * - Maintains accuracy by scaling coordinates appropriately
+     *
+     * Must be >= 1 (shrinking only, enlarging is not supported).
+     *
+     * @default 1 (no shrinking, uses original physical screenshot)
+     */
+    screenshotShrinkFactor?: number;
+    /**
+     * Custom OpenAI client factory function
+     *
+     * If provided, this function will be called to create OpenAI client instances
+     * for each AI call, allowing you to:
+     * - Wrap clients with observability tools (langsmith, langfuse)
+     * - Use custom OpenAI-compatible clients
+     * - Apply different configurations based on intent
+     *
+     * @param config - Resolved model configuration
+     * @returns OpenAI client instance (original or wrapped)
+     *
+     * @example
+     * ```typescript
+     * createOpenAIClient: async (openai, opts) => {
+     *   // Wrap with langsmith for planning tasks
+     *   if (opts.baseURL?.includes('planning')) {
+     *     return wrapOpenAI(openai, { metadata: { task: 'planning' } });
+     *   }
+     *
+     *   return openai;
+     * }
+     * ```
+     */
+    createOpenAIClient?: CreateOpenAIClientFn;
+}
+declare interface AgentWaitForOpt extends ServiceExtractOption {
+    checkIntervalMs?: number;
+    timeoutMs?: number;
+}
+declare type AiActOptions = {
+    cacheable?: boolean;
+    fileChooserAccept?: string | string[];
+    deepThink?: DeepThinkOption;
+    deepLocate?: boolean;
+    abortSignal?: AbortSignal;
+};
+declare interface AIDescribeElementResponse {
+    description: string;
+    error?: string;
+}
+declare type AIUsageInfo = Record<string, any> & {
+    prompt_tokens: number | undefined;
+    completion_tokens: number | undefined;
+    total_tokens: number | undefined;
+    cached_input: number | undefined;
+    time_cost: number | undefined;
+    model_name: string | undefined;
+    model_description: string | undefined;
+    intent: string | undefined;
+    request_id: string | undefined;
+};
+declare class AndroidAgent extends Agent<AndroidDevice> {
+    /**
+     * Trigger the system back operation on Android devices
+     */
+    back: WrappedAction<DeviceActionAndroidBackButton>;
+    /**
+     * Trigger the system home operation on Android devices
+     */
+    home: WrappedAction<DeviceActionAndroidHomeButton>;
+    /**
+     * Trigger the system recent apps operation on Android devices
+     */
+    recentApps: WrappedAction<DeviceActionAndroidRecentAppsButton>;
+    /**
+     * User-provided app name to package name mapping
+     */
+    private appNameMapping;
+    constructor(device: AndroidDevice, opts?: AndroidAgentOpt);
+    /**
+     * Launch an Android app or URL
+     * @param uri - App package name, URL, or app name to launch
+     */
+    launch(uri: string): Promise<void>;
+    /**
+     * Execute ADB shell command on Android device
+     * @param command - ADB shell command to execute
+     */
+    runAdbShell(command: string): Promise<string>;
+    private createActionWrapper;
+}
+declare type AndroidAgentOpt = AgentOpt & {
+    /**
+     * Custom mapping of app names to package names
+     * User-provided mappings will take precedence over default mappings
+     */
+    appNameMapping?: Record<string, string>;
+};
+declare class AndroidDevice implements AbstractInterface {
+    private deviceId;
+    private yadbPushed;
+    private devicePixelRatio;
+    private devicePixelRatioInitialized;
+    private adb;
+    private connectingAdb;
+    private destroyed;
+    private description;
+    private customActions?;
+    private cachedScreenSize;
+    private cachedOrientation;
+    private cachedPhysicalDisplayId;
+    private scrcpyAdapter;
+    private appNameMapping;
+    private scalingRatio;
+    private takeScreenshotFailCount;
+    private static readonly TAKE_SCREENSHOT_FAIL_THRESHOLD;
+    interfaceType: InterfaceType;
+    uri: string | undefined;
+    options?: AndroidDeviceOpt;
+    actionSpace(): DeviceAction<any>[];
+    constructor(deviceId: string, options?: AndroidDeviceOpt);
+    describe(): string;
+    connect(): Promise<ADB>;
+    getAdb(): Promise<ADB>;
+    private createAdbProxy;
+    /**
+     * Get or create the scrcpy adapter (lazy initialization)
+     */
+    private getScrcpyAdapter;
+    /**
+     * Get device physical info needed by scrcpy adapter
+     */
+    private getDevicePhysicalInfo;
+    /**
+     * Set the app name to package name mapping
+     */
+    setAppNameMapping(mapping: Record<string, string>): void;
+    /**
+     * Resolve app name to package name using the mapping
+     * Comparison is case-insensitive and ignores spaces, dashes, and underscores.
+     * Keys in appNameMapping are pre-normalized, so we only need to normalize the input.
+     * @param appName The app name to resolve
+     */
+    private resolvePackageName;
+    launch(uri: string): Promise<AndroidDevice>;
+    execYadb(keyboardContent: string): Promise<void>;
+    getElementsInfo(): Promise<ElementInfo[]>;
+    getElementsNodeTree(): Promise<any>;
+    getScreenSize(): Promise<{
+        override: string;
+        physical: string;
+        orientation: number;
+        isCurrentOrientation?: boolean;
+    }>;
+    private initializeDevicePixelRatio;
+    getDisplayDensity(): Promise<number>;
+    getDisplayOrientation(): Promise<number>;
+    /**
+     * Get physical screen dimensions adjusted for current orientation.
+     * Swaps width/height when the device is in landscape and the reported
+     * dimensions do not already reflect the current orientation.
+     */
+    private getOrientedPhysicalSize;
+    size(): Promise<Size>;
+    cacheFeatureForPoint(center: [number, number]): Promise<{
+        centerX: number;
+        centerY: number;
+        screenSize: {
+            width: number;
+            height: number;
+        };
+    }>;
+    rectMatchesCacheFeature(feature: {
+        centerX: number;
+        centerY: number;
+        screenSize: {
+            width: number;
+            height: number;
+        };
+    }): Promise<{
+        left: number;
+        top: number;
+        width: number;
+        height: number;
+    }>;
+    /**
+     * Convert logical coordinates (from AI) back to physical coordinates (for ADB).
+     * The ratio is derived from size(), so overriding size() alone is sufficient.
+     */
+    private adjustCoordinates;
+    /**
+     * Calculate the end point for scroll operations based on start point, scroll delta, and screen boundaries.
+     * This method ensures that scroll operations stay within screen bounds and maintain a minimum scroll distance
+     * for effective scrolling gestures on Android devices.
+     *
+     * @param start - The starting point of the scroll gesture
+     * @param deltaX - The horizontal scroll distance (positive = scroll right, negative = scroll left)
+     * @param deltaY - The vertical scroll distance (positive = scroll down, negative = scroll up)
+     * @param maxWidth - The maximum width boundary (screen width)
+     * @param maxHeight - The maximum height boundary (screen height)
+     * @returns The calculated end point for the scroll gesture
+     */
+    private calculateScrollEndPoint;
+    screenshotBase64(): Promise<string>;
+    clearInput(element?: ElementInfo): Promise<void>;
+    forceScreenshot(path: string): Promise<void>;
+    url(): Promise<string>;
+    scrollUntilTop(startPoint?: Point): Promise<void>;
+    scrollUntilBottom(startPoint?: Point): Promise<void>;
+    scrollUntilLeft(startPoint?: Point): Promise<void>;
+    scrollUntilRight(startPoint?: Point): Promise<void>;
+    scrollUp(distance?: number, startPoint?: Point): Promise<void>;
+    scrollDown(distance?: number, startPoint?: Point): Promise<void>;
+    scrollLeft(distance?: number, startPoint?: Point): Promise<void>;
+    scrollRight(distance?: number, startPoint?: Point): Promise<void>;
+    ensureYadb(): Promise<void>;
+    /**
+     * Check if text contains characters that may cause issues with ADB inputText.
+     * appium-adb's inputText has known bugs with certain characters:
+     * - Backslash causes broken shell quoting
+     * - Backtick is not escaped at all
+     * - Text containing both " and ' throws an error
+     * - Dollar sign can cause variable expansion issues
+     *
+     * For these characters, we route through yadb which handles them correctly
+     * via escapeForShell + double-quoted shell context.
+     */
+    private shouldUseYadbForText;
+    keyboardType(text: string, options?: AndroidDeviceInputOpt): Promise<void>;
+    private normalizeKeyName;
+    keyboardPress(key: string): Promise<void>;
+    mouseClick(x: number, y: number): Promise<void>;
+    mouseDoubleClick(x: number, y: number): Promise<void>;
+    mouseMove(): Promise<void>;
+    mouseDrag(from: {
+        x: number;
+        y: number;
+    }, to: {
+        x: number;
+        y: number;
+    }, duration?: number): Promise<void>;
+    scroll(deltaX: number, deltaY: number, duration?: number): Promise<void>;
+    destroy(): Promise<void>;
+    /**
+     * Get the current time from the Android device.
+     * Returns the device's current timestamp in milliseconds.
+     * This is useful when the system time and device time are not synchronized.
+     */
+    getTimestamp(): Promise<number>;
+    back(): Promise<void>;
+    home(): Promise<void>;
+    recentApps(): Promise<void>;
+    longPress(x: number, y: number, duration?: number): Promise<void>;
+    pullDown(startPoint?: Point, distance?: number, duration?: number): Promise<void>;
+    pullDrag(from: {
+        x: number;
+        y: number;
+    }, to: {
+        x: number;
+        y: number;
+    }, duration: number): Promise<void>;
+    pullUp(startPoint?: Point, distance?: number, duration?: number): Promise<void>;
+    private getDisplayArg;
+    getPhysicalDisplayId(): Promise<string | null>;
+    hideKeyboard(options?: AndroidDeviceInputOpt, timeoutMs?: number): Promise<boolean>;
+}
+/**
+ * Android device input options
+ */
+declare type AndroidDeviceInputOpt = {
+    /** Automatically dismiss the keyboard after input is completed */
+    autoDismissKeyboard?: boolean;
+    /** Strategy for dismissing the keyboard: 'esc-first' tries ESC before BACK, 'back-first' tries BACK before ESC */
+    keyboardDismissStrategy?: 'esc-first' | 'back-first';
+};
+/**
+ * Android device options
+ */
+declare type AndroidDeviceOpt = {
+    /** Path to the ADB executable */
+    androidAdbPath?: string;
+    /** Remote ADB host address */
+    remoteAdbHost?: string;
+    /** Remote ADB port */
+    remoteAdbPort?: number;
+    /** Input method editor strategy: 'always-yadb' always uses yadb, 'yadb-for-non-ascii' uses yadb only for non-ASCII characters */
+    imeStrategy?: 'always-yadb' | 'yadb-for-non-ascii';
+    /** Display ID to use for this device */
+    displayId?: number;
+    /** Use physical display ID for screenshot operations */
+    usePhysicalDisplayIdForScreenshot?: boolean;
+    /** Use physical display ID when looking up display information */
+    usePhysicalDisplayIdForDisplayLookup?: boolean;
+    /** Custom device actions to register */
+    customActions?: DeviceAction<any>[];
+    /**
+     * @deprecated Use `screenshotShrinkFactor` in AgentOpt instead.
+     * This option no longer affects screenshot size sent to AI model.
+     */
+    screenshotResizeScale?: number;
+    /** Always fetch screen info on each call; if false, cache the first result */
+    alwaysRefreshScreenInfo?: boolean;
+    /** Minimum screenshot buffer size in bytes (default: 10240 = 10KB). Set to 0 to disable validation. */
+    minScreenshotBufferSize?: number;
+    /**
+     * Scrcpy screenshot configuration for high-performance screen capture.
+     *
+     * Scrcpy provides 6-8x faster screenshots by streaming H.264 video from the device.
+     * When enabled, scrcpy will:
+     * 1. Start a video stream from the device on first screenshot request
+     * 2. Keep the connection alive for subsequent screenshots (16-50ms each)
+     * 3. Automatically disconnect after idle timeout to save resources
+     * 4. Fallback to standard ADB mode if unavailable
+     *
+     * @example
+     * ```typescript
+     * // Enable scrcpy for high-performance screenshots
+     * const device = new AndroidDevice(deviceId, {
+     *   scrcpyConfig: {
+     *     enabled: true,
+     *   },
+     * });
+     *
+     * // Custom configuration
+     * const device = new AndroidDevice(deviceId, {
+     *   scrcpyConfig: {
+     *     enabled: true,
+     *     maxSize: 0,        // 0 = no scaling
+     *     idleTimeoutMs: 30000,
+     *     videoBitRate: 8_000_000,
+     *   },
+     * });
+     * ```
+     */
+    scrcpyConfig?: {
+        /**
+         * Enable scrcpy for high-performance screenshots.
+         * @default false
+         */
+        enabled?: boolean;
+        /**
+         * Maximum video dimension (width or height).
+         * Video stream will be scaled down if device resolution exceeds this value.
+         * Lower values reduce bandwidth but may affect image quality.
+         *
+         * @default 0 (no scaling, use original resolution)
+         * @example
+         * { maxSize: 1024 } // Always scale to 1024
+         */
+        maxSize?: number;
+        /**
+         * Idle timeout in milliseconds before disconnecting scrcpy.
+         * Connection auto-closes after this period of inactivity to save resources.
+         * Set to 0 to disable auto-disconnect.
+         * @default 30000 (30 seconds)
+         */
+        idleTimeoutMs?: number;
+        /**
+         * Video bit rate for H.264 encoding in bits per second.
+         * Higher values improve quality but increase bandwidth usage.
+         * @default 2000000 (2 Mbps)
+         */
+        videoBitRate?: number;
+    };
+} & AndroidDeviceInputOpt;
+/**
+ * Android MCP Server
+ * Provides MCP tools for Android automation through ADB
+ */
+export declare class AndroidMCPServer extends BaseMCPServer {
+    constructor(toolsManager?: AndroidMidsceneTools);
+    protected createToolsManager(): AndroidMidsceneTools;
+}
+/**
+ * Android-specific tools manager
+ * Extends BaseMidsceneTools to provide Android ADB device connection tools
+ */
+declare class AndroidMidsceneTools extends BaseMidsceneTools<AndroidAgent> {
+    protected createTemporaryDevice(): AndroidDevice;
+    protected ensureAgent(deviceId?: string): Promise<AndroidAgent>;
+    /**
+     * Provide Android-specific platform tools
+     */
+    protected preparePlatformTools(): ToolDefinition[];
+}
+/**
+ * Base agent interface
+ * Represents a platform-specific agent (Android, iOS, Web)
+ * Note: Return types use `unknown` for compatibility with platform-specific implementations
+ */
+declare interface BaseAgent {
+    getActionSpace(): Promise<ActionSpaceItem[]>;
+    destroy?(): Promise<void>;
+    page?: {
+        screenshotBase64(): Promise<string>;
+    };
+    aiAction?: (description: string, params?: Record<string, unknown>) => Promise<unknown>;
+    aiWaitFor?: (assertion: string, options: Record<string, unknown>) => Promise<unknown>;
+}
+/**
+ * Base device interface for temporary device instances
+ */
+declare interface BaseDevice {
+    actionSpace(): ActionSpaceItem[];
+    destroy?(): Promise<void>;
+}
+/**
+ * Base MCP Server class with programmatic launch() API
+ * Each platform extends this to provide their own tools manager
+ */
+declare abstract class BaseMCPServer {
+    protected mcpServer: McpServer;
+    protected toolsManager?: IMidsceneTools;
+    protected config: BaseMCPServerConfig;
+    protected providedToolsManager?: IMidsceneTools;
+    constructor(config: BaseMCPServerConfig, toolsManager?: IMidsceneTools);
+    /**
+     * Platform-specific: create tools manager instance
+     * This is only called if no tools manager was provided in constructor
+     */
+    protected abstract createToolsManager(): IMidsceneTools;
+    /**
+     * Initialize tools manager and attach to MCP server
+     */
+    private initializeToolsManager;
+    /**
+     * Perform cleanup on shutdown
+     */
+    private performCleanup;
+    /**
+     * Initialize and launch the MCP server with stdio transport
+     */
+    launch(): Promise<LaunchMCPServerResult_2>;
+    /**
+     * Launch MCP server with HTTP transport
+     * Supports stateful sessions for web applications and service integration
+     */
+    launchHttp(options: HttpLaunchOptions): Promise<LaunchMCPServerResult_2>;
+    /**
+     * Create a new HTTP session with transport
+     */
+    private createHttpSession;
+    /**
+     * Start periodic session cleanup for inactive sessions
+     */
+    private startSessionCleanup;
+    /**
+     * Setup shutdown handlers for HTTP server
+     */
+    private setupHttpShutdownHandlers;
+    /**
+     * Get the underlying MCP server instance
+     */
+    getServer(): McpServer;
+    /**
+     * Get the tools manager instance
+     */
+    getToolsManager(): IMidsceneTools | undefined;
+}
+declare interface BaseMCPServerConfig {
+    name: string;
+    version: string;
+    description: string;
+}
+/**
+ * Base class for platform-specific MCP tools
+ * Generic type TAgent allows subclasses to use their specific agent types
+ */
+declare abstract class BaseMidsceneTools<TAgent extends BaseAgent = BaseAgent> implements IMidsceneTools {
+    protected mcpServer?: McpServer;
+    protected agent?: TAgent;
+    protected toolDefinitions: ToolDefinition[];
+    /**
+     * Ensure agent is initialized and ready for use.
+     * Must be implemented by subclasses to create platform-specific agent.
+     * @param initParam Optional initialization parameter (platform-specific, e.g., URL, device ID)
+     * @returns Promise resolving to initialized agent instance
+     * @throws Error if agent initialization fails
+     */
+    protected abstract ensureAgent(initParam?: string): Promise<TAgent>;
+    /**
+     * Optional: prepare platform-specific tools (e.g., device connection)
+     */
+    protected preparePlatformTools(): ToolDefinition[];
+    /**
+     * Must be implemented by subclasses to create a temporary device instance
+     * This allows getting real actionSpace without connecting to device
+     */
+    protected abstract createTemporaryDevice(): BaseDevice;
+    /**
+     * Initialize all tools by querying actionSpace
+     * Uses two-layer fallback strategy:
+     * 1. Try to get actionSpace from connected agent (if available)
+     * 2. Create temporary device instance to read actionSpace (always succeeds)
+     */
+    initTools(): Promise<void>;
+    /**
+     * Attach to MCP server and register all tools
+     */
+    attachToServer(server: McpServer): void;
+    /**
+     * Cleanup method - destroy agent and release resources
+     */
+    destroy(): Promise<void>;
+    /**
+     * Get tool definitions
+     */
+    getToolDefinitions(): ToolDefinition[];
+    /**
+     * Set agent for the tools manager
+     */
+    setAgent(agent: TAgent): void;
+    /**
+     * Helper: Convert base64 screenshot to image content array
+     */
+    protected buildScreenshotContent(screenshot: string): {
+        type: "image";
+        data: string;
+        mimeType: string;
+    }[];
+    /**
+     * Helper: Build a simple text result for tool responses
+     */
+    protected buildTextResult(text: string): {
+        content: {
+            type: "text";
+            text: string;
+        }[];
+    };
+    /**
+     * Create a disconnect handler for releasing platform resources
+     * @param platformName Human-readable platform name for the response message
+     * @returns Handler function that destroys the agent and returns appropriate response
+     */
+    protected createDisconnectHandler(platformName: string): () => Promise<{
+        content: {
+            type: "text";
+            text: string;
+        }[];
+    }>;
+}
+declare type Cache_2 = false | true | CacheConfig;
+/**
+ * Agent
+ */
+declare type CacheConfig = {
+    strategy?: 'read-only' | 'read-write' | 'write-only';
+    id: string;
+};
+declare type CacheFileContent = {
+    midsceneVersion: string;
+    cacheId: string;
+    caches: Array<PlanningCache | LocateCache>;
+};
+declare type DeepThinkOption = 'unset' | true | false;
+declare interface DetailedLocateParam extends Omit<LocateOption, 'deepThink' | keyof TMultimodalPrompt> {
+    prompt: TUserPrompt;
+}
+declare interface DeviceAction<TParam = any, TReturn = any> {
+    name: string;
+    description?: string;
+    interfaceAlias?: string;
+    paramSchema?: z.ZodType<TParam>;
+    call: (param: TParam, context: ExecutorContext) => Promise<TReturn> | TReturn;
+    delayAfterRunner?: number;
+    /**
+     * An example param object for this action.
+     * Locate fields with { prompt } will automatically get bbox injected when needed.
+     */
+    sample?: {
+        [K in keyof TParam]?: any;
+    };
+}
+declare type DeviceActionAndroidBackButton = DeviceAction<undefined, void>;
+declare type DeviceActionAndroidHomeButton = DeviceAction<undefined, void>;
+declare type DeviceActionAndroidRecentAppsButton = DeviceAction<undefined, void>;
+declare interface DumpMeta {
+    logTime: number;
+}
+declare type ElementCacheFeature = Record<string, unknown>;
+declare interface ElementInfo {
+    id: string;
+    indexId: number;
+    nodeHashId: string;
+    xpaths?: string[];
+    attributes: {
+        nodeType: NodeType;
+        [key: string]: string;
+    };
+    nodeType: NodeType;
+    content: string;
+    rect: {
+        left: number;
+        top: number;
+        width: number;
+        height: number;
+    };
+    center: [number, number];
+    isVisible: boolean;
+}
+/**
+ * ExecutionDump class for serializing and deserializing execution dumps
+ */
+declare class ExecutionDump implements IExecutionDump {
+    id?: string;
+    logTime: number;
+    name: string;
+    description?: string;
+    tasks: ExecutionTask[];
+    aiActContext?: string;
+    constructor(data: IExecutionDump);
+    /**
+     * Serialize the ExecutionDump to a JSON string
+     */
+    serialize(indents?: number): string;
+    /**
+     * Convert to a plain object for JSON serialization
+     */
+    toJSON(): IExecutionDump;
+    /**
+     * Create an ExecutionDump instance from a serialized JSON string
+     */
+    static fromSerializedString(serialized: string): ExecutionDump;
+    /**
+     * Create an ExecutionDump instance from a plain object
+     */
+    static fromJSON(data: IExecutionDump): ExecutionDump;
+    /**
+     * Collect all ScreenshotItem instances from tasks.
+     * Scans through uiContext and recorder items to find screenshots.
+     *
+     * @returns Array of ScreenshotItem instances
+     */
+    collectScreenshots(): ScreenshotItem[];
+}
+declare interface ExecutionRecorderItem {
+    type: 'screenshot';
+    ts: number;
+    screenshot?: ScreenshotItem;
+    timing?: string;
+}
+declare interface ExecutionResult<OutputType = any> {
+    output: OutputType;
+    thought?: string;
+    runner: TaskRunner;
+}
+declare type ExecutionTask<E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<any, any, any>> = E & ExecutionTaskReturn<E extends ExecutionTaskApply<any, any, infer TaskOutput, any> ? TaskOutput : unknown, E extends ExecutionTaskApply<any, any, any, infer TaskLog> ? TaskLog : unknown> & {
+    taskId: string;
+    status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';
+    error?: Error;
+    errorMessage?: string;
+    errorStack?: string;
+    timing?: {
+        start: number;
+        getUiContextStart?: number;
+        getUiContextEnd?: number;
+        callAiStart?: number;
+        callAiEnd?: number;
+        beforeInvokeActionHookStart?: number;
+        beforeInvokeActionHookEnd?: number;
+        callActionStart?: number;
+        callActionEnd?: number;
+        afterInvokeActionHookStart?: number;
+        afterInvokeActionHookEnd?: number;
+        captureAfterCallingSnapshotStart?: number;
+        captureAfterCallingSnapshotEnd?: number;
+        end?: number;
+        cost?: number;
+    };
+    usage?: AIUsageInfo;
+    searchAreaUsage?: AIUsageInfo;
+    reasoning_content?: string;
+};
+declare interface ExecutionTaskApply<Type extends ExecutionTaskType = any, TaskParam = any, TaskOutput = any, TaskLog = any> {
+    type: Type;
+    subType?: string;
+    param?: TaskParam;
+    thought?: string;
+    uiContext?: UIContext;
+    executor: (param: TaskParam, context: ExecutorContext) => Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void> | undefined | void;
+}
+declare interface ExecutionTaskHitBy {
+    from: string;
+    context: Record<string, any>;
+}
+declare interface ExecutionTaskProgressOptions {
+    onTaskStart?: (task: ExecutionTask) => Promise<void> | void;
+}
+declare interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {
+    output?: TaskOutput;
+    log?: TaskLog;
+    recorder?: ExecutionRecorderItem[];
+    hitBy?: ExecutionTaskHitBy;
+}
+declare type ExecutionTaskType = 'Planning' | 'Insight' | 'Action Space' | 'Log';
+declare interface ExecutorContext {
+    task: ExecutionTask;
+    element?: LocateResultElement | null;
+    uiContext?: UIContext;
+}
+declare interface FileChooserHandler {
+    accept(files: string[]): Promise<void>;
+}
+/**
+ * GroupedActionDump class for serializing and deserializing grouped action dumps
+ */
+declare class GroupedActionDump implements IGroupedActionDump {
+    sdkVersion: string;
+    groupName: string;
+    groupDescription?: string;
+    modelBriefs: ModelBrief[];
+    executions: ExecutionDump[];
+    deviceType?: string;
+    constructor(data: IGroupedActionDump);
+    /**
+     * Serialize the GroupedActionDump to a JSON string
+     * Uses compact { $screenshot: id } format
+     */
+    serialize(indents?: number): string;
+    /**
+     * Serialize the GroupedActionDump with inline screenshots to a JSON string.
+     * Each ScreenshotItem is replaced with { base64: "...", capturedAt }.
+     */
+    serializeWithInlineScreenshots(indents?: number): string;
+    /**
+     * Convert to a plain object for JSON serialization
+     */
+    toJSON(): IGroupedActionDump;
+    /**
+     * Create a GroupedActionDump instance from a serialized JSON string
+     */
+    static fromSerializedString(serialized: string): GroupedActionDump;
+    /**
+     * Create a GroupedActionDump instance from a plain object
+     */
+    static fromJSON(data: IGroupedActionDump): GroupedActionDump;
+    /**
+     * Collect all ScreenshotItem instances from all executions.
+     *
+     * @returns Array of all ScreenshotItem instances across all executions
+     */
+    collectAllScreenshots(): ScreenshotItem[];
+    /**
+     * Serialize the dump to files with screenshots as separate PNG files.
+     * Creates:
+     * - {basePath} - dump JSON with { $screenshot: id } references
+     * - {basePath}.screenshots/ - PNG files
+     * - {basePath}.screenshots.json - ID to path mapping
+     *
+     * @param basePath - Base path for the dump file
+     */
+    serializeToFiles(basePath: string): void;
+    /**
+     * Read dump from files and return JSON string with inline screenshots.
+     * Reads the dump JSON and screenshot files, then inlines the base64 data.
+     *
+     * @param basePath - Base path for the dump file
+     * @returns JSON string with inline screenshots ({ base64: "..." } format)
+     */
+    static fromFilesAsInlineJson(basePath: string): string;
+    /**
+     * Clean up all files associated with a serialized dump.
+     *
+     * @param basePath - Base path for the dump file
+     */
+    static cleanupFiles(basePath: string): void;
+    /**
+     * Get all file paths associated with a serialized dump.
+     *
+     * @param basePath - Base path for the dump file
+     * @returns Array of all associated file paths
+     */
+    static getFilePaths(basePath: string): string[];
+}
+declare interface HttpLaunchOptions {
+    port: number;
+    host?: string;
+}
+declare interface IExecutionDump extends DumpMeta {
+    /** Stable unique identifier for this execution run */
+    id?: string;
+    name: string;
+    description?: string;
+    tasks: ExecutionTask[];
+    aiActContext?: string;
+}
+declare interface IGroupedActionDump {
+    sdkVersion: string;
+    groupName: string;
+    groupDescription?: string;
+    modelBriefs: ModelBrief[];
+    executions: IExecutionDump[];
+    deviceType?: string;
+}
+/**
+ * Interface for platform-specific MCP tools manager
+ */
+declare interface IMidsceneTools {
+    attachToServer(server: McpServer): void;
+    initTools(): Promise<void>;
+    destroy?(): Promise<void>;
+}
+declare type InterfaceType = 'puppeteer' | 'playwright' | 'static' | 'chrome-extension-proxy' | 'android' | string;
+declare interface LaunchMCPServerResult_2 {
+    /**
+     * The MCP server port (for HTTP mode)
+     */
+    port?: number;
+    /**
+     * The server host (for HTTP mode)
+     */
+    host?: string;
+    /**
+     * Function to gracefully shutdown the MCP server
+     */
+    close: () => Promise<void>;
+}
+declare interface LocateCache {
+    type: 'locate';
+    prompt: TUserPrompt;
+    cache?: ElementCacheFeature;
+    /** @deprecated kept for backward compatibility */
+    xpaths?: string[];
+}
+declare interface LocateOption extends Partial<TMultimodalPrompt> {
+    prompt?: TUserPrompt;
+    deepLocate?: boolean;
+    /** @deprecated Use `deepLocate` instead. Kept for backward compatibility. */
+    deepThink?: boolean;
+    cacheable?: boolean;
+    xpath?: string;
+    uiContext?: UIContext;
+    fileChooserAccept?: string | string[];
+}
+declare interface LocateOpts {
+    context?: UIContext;
+    planLocatedElement?: LocateResultElement;
+}
+declare interface LocateResult {
+    element: LocateResultElement | null;
+    rect?: Rect;
+}
+declare type LocateResultWithDump = LocateResult & ServiceResultBase;
+declare interface LocateValidatorResult {
+    pass: boolean;
+    rect: Rect;
+    center: [number, number];
+    centerDistance?: number;
+}
+declare interface LocatorValidatorOption {
+    centerDistanceThreshold?: number;
+}
+declare interface MatchCacheResult<T extends PlanningCache | LocateCache> {
+    cacheContent: T;
+    cacheUsable: boolean;
+    updateFn: (cb: (cache: T) => void) => void;
+}
+/**
+ * Create MCP kit for a specific Android Agent
+ */
+export declare function mcpKitForAgent(agent: Agent | AndroidAgent): Promise<{
+    description: string;
+    tools: Tool[];
+}>;
+/**
+ * Create an MCP server launcher for a specific Android Agent
+ */
+export declare function mcpServerForAgent(agent: Agent | AndroidAgent): {
+    launch(options?: {
+        verbose?: boolean;
+    }): Promise<LaunchMCPServerResult>;
+    launchHttp(options: LaunchMCPServerOptions): Promise<LaunchMCPServerResult>;
+};
+declare type MidsceneYamlFlowItem = MidsceneYamlFlowItemAIAction | MidsceneYamlFlowItemAIAssert | MidsceneYamlFlowItemAIWaitFor | MidsceneYamlFlowItemEvaluateJavaScript | MidsceneYamlFlowItemSleep | MidsceneYamlFlowItemLogScreenshot;
+declare interface MidsceneYamlFlowItemAIAction {
+    aiAction?: string;
+    ai?: string;
+    aiAct?: string;
+    aiActionProgressTips?: string[];
+    cacheable?: boolean;
+    [key: string]: unknown;
+}
+declare interface MidsceneYamlFlowItemAIAssert extends ServiceExtractOption {
+    aiAssert: string;
+    errorMessage?: string;
+    name?: string;
+}
+declare interface MidsceneYamlFlowItemAIWaitFor extends ServiceExtractOption {
+    aiWaitFor: string;
+    timeout?: number;
+}
+declare interface MidsceneYamlFlowItemEvaluateJavaScript {
+    javascript: string;
+    name?: string;
+}
+declare interface MidsceneYamlFlowItemLogScreenshot {
+    logScreenshot?: string;
+    recordToReport?: string;
+    content?: string;
+}
+declare interface MidsceneYamlFlowItemSleep {
+    sleep: number;
+}
+declare interface ModelBrief {
+    /**
+     * The intent/category of the model call, for example "planning" or "insight".
+     */
+    intent?: string;
+    /**
+     * The model name returned by usage metadata, for example "gpt-4o".
+     */
+    name?: string;
+    /**
+     * Optional human-readable model description, for example "qwen2.5-vl mode".
+     */
+    modelDescription?: string;
+}
+declare enum NodeType {
+    CONTAINER = "CONTAINER Node",
+    FORM_ITEM = "FORM_ITEM Node",
+    BUTTON = "BUTTON Node",
+    A = "Anchor Node",
+    IMG = "IMG Node",
+    TEXT = "TEXT Node",
+    POSITION = "POSITION Node"
+}
+/**
+ * agent
+ */
+declare type OnTaskStartTip = (tip: string) => Promise<void> | void;
+declare interface PlanningAction<ParamType = any> {
+    thought?: string;
+    log?: string;
+    type: string;
+    param: ParamType;
+}
+declare type PlanningActionParamWaitFor = AgentWaitForOpt & {};
+declare interface PlanningCache {
+    type: 'plan';
+    prompt: string;
+    yamlWorkflow: string;
+}
+/**
+ * planning
+ *
+ */
+declare interface PlanningLocateParam extends DetailedLocateParam {
+    bbox?: [number, number, number, number];
+}
+/**
+ * ScreenshotItem encapsulates screenshot data.
+ *
+ * Supports lazy loading after memory release:
+ * - inline mode: reads from HTML file using streaming (extractImageByIdSync)
+ * - directory mode: reads from file on disk
+ *
+ * After persistence, memory is released but the screenshot can be recovered
+ * on-demand from disk, making it safe to release memory at any time.
+ */
+declare class ScreenshotItem {
+    private _id;
+    private _base64;
+    private _format;
+    private _capturedAt;
+    private _persistedAs;
+    private _persistedPath;
+    private _persistedHtmlPath;
+    private constructor();
+    /** Create a new ScreenshotItem from base64 data */
+    static create(base64: string, capturedAt: number): ScreenshotItem;
+    get id(): string;
+    /** Get the image format (png or jpeg) */
+    get format(): 'png' | 'jpeg';
+    /** Get the file extension for this screenshot */
+    get extension(): string;
+    /** Get screenshot capture timestamp in milliseconds */
+    get capturedAt(): number;
+    get base64(): string;
+    /** Check if base64 data is still available in memory (not yet released) */
+    hasBase64(): boolean;
+    /**
+     * Mark as persisted to HTML (inline mode).
+     * Releases base64 memory, but keeps HTML path for lazy loading recovery.
+     * @param htmlPath - absolute path to the HTML file containing the image
+     */
+    markPersistedInline(htmlPath: string): void;
+    /**
+     * Mark as persisted to file (directory mode).
+     * Releases base64 memory, but keeps file path for lazy loading recovery.
+     * @param relativePath - relative path for serialization (e.g., "./screenshots/id.jpeg")
+     * @param absolutePath - absolute path for lazy loading recovery
+     */
+    markPersistedToPath(relativePath: string, absolutePath: string): void;
+    /** Serialize for JSON - format depends on persistence state */
+    toSerializable(): ScreenshotSerializeFormat;
+    /** Check if a value is a serialized ScreenshotItem reference (inline or directory mode) */
+    static isSerialized(value: unknown): value is ScreenshotSerializeFormat;
+    /**
+     * Get base64 data without the data URI prefix.
+     * Useful for writing raw binary data to files.
+     */
+    get rawBase64(): string;
+}
+/**
+ * Serialization format for ScreenshotItem
+ * - { $screenshot: "id" } - inline mode, references imageMap in HTML
+ * - { base64: "path" } - directory mode, references external file path
+ */
+declare type ScreenshotSerializeFormat = {
+    $screenshot: string;
+    capturedAt: number;
+} | {
+    base64: string;
+    capturedAt: number;
+};
+declare type ScrollParam = Omit<ActionScrollParam, 'locate'>;
+declare type ScrollType = 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft' | 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft';
+declare class Service {
+    contextRetrieverFn: () => Promise<UIContext> | UIContext;
+    taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;
+    constructor(context: UIContext | (() => Promise<UIContext> | UIContext), opt?: ServiceOptions);
+    locate(query: PlanningLocateParam, opt: LocateOpts, modelConfig: IModelConfig, abortSignal?: AbortSignal): Promise<LocateResultWithDump>;
+    extract<T>(dataDemand: ServiceExtractParam, modelConfig: IModelConfig, opt?: ServiceExtractOption, pageDescription?: string, multimodalPrompt?: TMultimodalPrompt, context?: UIContext): Promise<ServiceExtractResult<T>>;
+    describe(target: Rect | [number, number], modelConfig: IModelConfig, opt?: {
+        deepLocate?: boolean;
+    }): Promise<Pick<AIDescribeElementResponse, 'description'>>;
+}
+declare type ServiceAction = 'locate' | 'extract' | 'assert' | 'describe';
+declare interface ServiceDump extends DumpMeta {
+    type: 'locate' | 'extract' | 'assert';
+    logId: string;
+    userQuery: {
+        element?: TUserPrompt;
+        dataDemand?: ServiceExtractParam;
+        assertion?: TUserPrompt;
+    };
+    matchedElement: LocateResultElement[];
+    matchedRect?: Rect;
+    deepLocate?: boolean;
+    data: any;
+    assertionPass?: boolean;
+    assertionThought?: string;
+    taskInfo: ServiceTaskInfo;
+    error?: string;
+    output?: any;
+}
+declare interface ServiceExtractOption {
+    domIncluded?: boolean | 'visible-only';
+    screenshotIncluded?: boolean;
+    [key: string]: unknown;
+}
+declare type ServiceExtractParam = string | Record<string, string>;
+declare interface ServiceExtractResult<T> extends ServiceResultBase {
+    data: T;
+    thought?: string;
+    usage?: AIUsageInfo;
+    reasoning_content?: string;
+}
+declare interface ServiceOptions {
+    taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;
+}
+declare interface ServiceResultBase {
+    dump: ServiceDump;
+}
+declare interface ServiceTaskInfo {
+    durationMs: number;
+    formatResponse?: string;
+    rawResponse?: string;
+    usage?: AIUsageInfo;
+    searchArea?: Rect;
+    searchAreaRawResponse?: string;
+    searchAreaUsage?: AIUsageInfo;
+    reasoning_content?: string;
+}
+declare class TaskCache {
+    cacheId: string;
+    cacheFilePath?: string;
+    cache: CacheFileContent;
+    isCacheResultUsed: boolean;
+    cacheOriginalLength: number;
+    readOnlyMode: boolean;
+    writeOnlyMode: boolean;
+    private matchedCacheIndices;
+    constructor(cacheId: string, isCacheResultUsed: boolean, cacheFilePath?: string, options?: {
+        readOnly?: boolean;
+        writeOnly?: boolean;
+    });
+    matchCache(prompt: TUserPrompt, type: 'plan' | 'locate'): MatchCacheResult<PlanningCache | LocateCache> | undefined;
+    matchPlanCache(prompt: string): MatchCacheResult<PlanningCache> | undefined;
+    matchLocateCache(prompt: TUserPrompt): MatchCacheResult<LocateCache> | undefined;
+    appendCache(cache: PlanningCache | LocateCache): void;
+    loadCacheFromFile(): CacheFileContent | undefined;
+    flushCacheToFile(options?: {
+        cleanUnused?: boolean;
+    }): void;
+    updateOrAppendCacheRecord(newRecord: PlanningCache | LocateCache, cachedRecord?: MatchCacheResult<PlanningCache | LocateCache>): void;
+}
+declare class TaskExecutionError extends Error {
+    runner: TaskRunner;
+    errorTask: ExecutionTask | null;
+    constructor(message: string, runner: TaskRunner, errorTask: ExecutionTask | null, options?: {
+        cause?: unknown;
+    });
+}
+declare class TaskExecutor {
+    interface: AbstractInterface;
+    service: Service;
+    taskCache?: TaskCache;
+    private readonly providedActionSpace;
+    private readonly taskBuilder;
+    onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];
+    private readonly hooks?;
+    replanningCycleLimit?: number;
+    waitAfterAction?: number;
+    useDeviceTimestamp?: boolean;
+    get page(): AbstractInterface;
+    constructor(interfaceInstance: AbstractInterface, service: Service, opts: {
+        taskCache?: TaskCache;
+        onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];
+        replanningCycleLimit?: number;
+        waitAfterAction?: number;
+        useDeviceTimestamp?: boolean;
+        hooks?: TaskExecutorHooks;
+        actionSpace: DeviceAction[];
+    });
+    private createExecutionSession;
+    private getActionSpace;
+    /**
+     * Get a readable time string using device time when configured.
+     * This method respects the useDeviceTimestamp configuration.
+     * @param format - Optional format string
+     * @returns A formatted time string
+     */
+    private getTimeString;
+    convertPlanToExecutable(plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, options?: {
+        cacheable?: boolean;
+        deepLocate?: boolean;
+        abortSignal?: AbortSignal;
+    }): Promise<{
+        tasks: ExecutionTaskApply[];
+    }>;
+    loadYamlFlowAsPlanning(userInstruction: string, yamlString: string): Promise<{
+        runner: TaskRunner;
+    }>;
+    runPlans(title: string, plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig): Promise<ExecutionResult>;
+    action(userPrompt: string, modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, includeBboxInPlanning: boolean, aiActContext?: string, cacheable?: boolean, replanningCycleLimitOverride?: number, imagesIncludeCount?: number, deepThink?: DeepThinkOption, fileChooserAccept?: string[], deepLocate?: boolean, abortSignal?: AbortSignal): Promise<ExecutionResult<{
+        yamlFlow?: MidsceneYamlFlowItem[];
+        output?: string;
+    } | undefined>>;
+    private runAction;
+    private createTypeQueryTask;
+    createTypeQueryExecution<T>(type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert', demand: ServiceExtractParam, modelConfig: IModelConfig, opt?: ServiceExtractOption, multimodalPrompt?: TMultimodalPrompt): Promise<ExecutionResult<T>>;
+    waitFor(assertion: TUserPrompt, opt: PlanningActionParamWaitFor, modelConfig: IModelConfig): Promise<ExecutionResult<void>>;
+}
+declare interface TaskExecutorHooks {
+    onTaskUpdate?: (runner: TaskRunner, error?: TaskExecutionError) => Promise<void> | void;
+}
+declare class TaskRunner {
+    readonly id: string;
+    name: string;
+    tasks: ExecutionTask[];
+    status: 'init' | 'pending' | 'running' | 'completed' | 'error';
+    onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];
+    private readonly uiContextBuilder;
+    private readonly onTaskUpdate?;
+    private readonly executionLogTime;
+    constructor(name: string, uiContextBuilder: () => Promise<UIContext>, options?: TaskRunnerInitOptions);
+    private emitOnTaskUpdate;
+    private lastUiContext?;
+    private getUiContext;
+    private captureScreenshot;
+    private attachRecorderItem;
+    private markTaskAsPending;
+    private normalizeStatusFromError;
+    append(task: ExecutionTaskApply[] | ExecutionTaskApply, options?: TaskRunnerOperationOptions): Promise<void>;
+    appendAndFlush(task: ExecutionTaskApply[] | ExecutionTaskApply, options?: TaskRunnerOperationOptions): Promise<{
+        output: any;
+        thought?: string;
+    } | undefined>;
+    flush(options?: TaskRunnerOperationOptions): Promise<{
+        output: any;
+        thought?: string;
+    } | undefined>;
+    isInErrorState(): boolean;
+    latestErrorTask(): ExecutionTask | null;
+    dump(): ExecutionDump;
+    appendErrorPlan(errorMsg: string): Promise<{
+        output: undefined;
+        runner: TaskRunner;
+    }>;
+}
+declare type TaskRunnerInitOptions = ExecutionTaskProgressOptions & {
+    tasks?: ExecutionTaskApply[];
+    onTaskUpdate?: (runner: TaskRunner, error?: TaskExecutionError) => Promise<void> | void;
+};
+declare type TaskRunnerOperationOptions = {
+    allowWhenError?: boolean;
+};
+declare type TMultimodalPrompt = z.infer<typeof TMultimodalPromptSchema>;
+declare const TMultimodalPromptSchema: z.ZodObject<{
+    images: z.ZodOptional<z.ZodArray<z.ZodObject<{
+        name: z.ZodString;
+        url: z.ZodString;
+    }, "strip", z.ZodTypeAny, {
+        name: string;
+        url: string;
+    }, {
+        name: string;
+        url: string;
+    }>, "many">>;
+    convertHttpImage2Base64: z.ZodOptional<z.ZodBoolean>;
+}, "strip", z.ZodTypeAny, {
+    images?: {
+        name: string;
+        url: string;
+    }[] | undefined;
+    convertHttpImage2Base64?: boolean | undefined;
+}, {
+    images?: {
+        name: string;
+        url: string;
+    }[] | undefined;
+    convertHttpImage2Base64?: boolean | undefined;
+}>;
+/**
+ * Tool type for mcpKitForAgent return value
+ */
+declare type Tool = ToolDefinition;
+/**
+ * Tool definition for MCP server
+ */
+declare interface ToolDefinition<T = Record<string, unknown>> {
+    name: string;
+    description: string;
+    schema: ToolSchema;
+    handler: ToolHandler<T>;
+}
+/**
+ * Tool handler function type
+ * Takes parsed arguments and returns a tool result
+ */
+declare type ToolHandler<T = Record<string, unknown>> = (args: T) => Promise<ToolResult>;
+/**
+ * Result type for tool execution (MCP compatible)
+ */
+declare interface ToolResult {
+    [x: string]: unknown;
+    content: ToolResultContent[];
+    isError?: boolean;
+    _meta?: Record<string, unknown>;
+}
+/**
+ * Content item types for tool results (MCP compatible)
+ */
+declare type ToolResultContent = {
+    type: 'text';
+    text: string;
+} | {
+    type: 'image';
+    data: string;
+    mimeType: string;
+} | {
+    type: 'audio';
+    data: string;
+    mimeType: string;
+} | {
+    type: 'resource';
+    resource: {
+        text: string;
+        uri: string;
+        mimeType?: string;
+    } | {
+        uri: string;
+        blob: string;
+        mimeType?: string;
+    };
+};
+/**
+ * Tool schema type using Zod
+ */
+declare type ToolSchema = Record<string, z.ZodTypeAny>;
+declare type TUserPrompt = z.infer<typeof TUserPromptSchema>;
+declare const TUserPromptSchema: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
+    prompt: z.ZodString;
+}, "strip", z.ZodTypeAny, {
+    prompt: string;
+}, {
+    prompt: string;
+}>, z.ZodObject<{
+    images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
+        name: z.ZodString;
+        url: z.ZodString;
+    }, "strip", z.ZodTypeAny, {
+        name: string;
+        url: string;
+    }, {
+        name: string;
+        url: string;
+    }>, "many">>>;
+    convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
+}, "strip", z.ZodTypeAny, {
+    images?: {
+        name: string;
+        url: string;
+    }[] | undefined;
+    convertHttpImage2Base64?: boolean | undefined;
+}, {
+    images?: {
+        name: string;
+        url: string;
+    }[] | undefined;
+    convertHttpImage2Base64?: boolean | undefined;
+}>>]>;
+/**
+ * context
+ */
+declare abstract class UIContext {
+    /**
+     * screenshot of the current UI state. which size is shotSize(be shrunk by screenshotShrinkFactor),
+     */
+    abstract screenshot: ScreenshotItem;
+    /**
+     * screenshot size after shrinking
+     */
+    abstract shotSize: Size;
+    /**
+     * The ratio for converting shrunk screenshot coordinates to logical coordinates.
+     *
+     * Example:
+     * - Physical screen width: 3000px, dpr=6
+     * - Logical width: 500px
+     * - User-defined screenshotShrinkFactor: 2
+     * - Actual shrunk screenshot width: 3000 / 2 = 1500px
+     * - shrunkShotToLogicalRatio: dpr / screenshotShrinkFactor = 6 / 2 = 3
+     * - To map back to logical coordinates: 1500 / shrunkShotToLogicalRatio = 500px
+     */
+    abstract shrunkShotToLogicalRatio: number;
+    abstract _isFrozen?: boolean;
+    abstract deprecatedDpr?: number;
+}
+/**
+ * Helper type to convert DeviceAction to wrapped method signature
+ */
+declare type WrappedAction<T extends DeviceAction> = (...args: ActionArgs<T>) => Promise<ActionReturn<T>>;
+export { }