@midscene/core 1.9.5-beta-20260611045217.0 → 1.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"device/index.mjs","sources":["../../../src/device/index.ts"],"sourcesContent":["import type { ModelRuntime } from '@/ai-model/models';\nimport { getMidsceneLocationSchema } from '@/common';\nimport type {\n ActionScrollParam,\n DeviceAction,\n LocateResultElement,\n} from '@/types';\nimport type { ElementNode } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { z } from 'zod';\nimport type { ElementCacheFeature, Rect, Size, UIContext } from '../types';\n\nexport interface FileChooserHandler {\n accept(files: string[]): Promise<void>;\n}\n\nexport interface FileChooserRegistration {\n dispose: () => void;\n getError: () => Error | undefined | Promise<Error | undefined>;\n}\n\nexport interface MjpegStreamFrame {\n /** Raw base64-encoded image bytes WITHOUT a `data:image/...;base64,` prefix. */\n data: string;\n contentType?: string;\n}\n\nexport interface MjpegStreamHandle {\n stop(): void | Promise<void>;\n}\n\nexport interface MjpegStreamOptions {\n signal?: AbortSignal;\n onFrame(frame: MjpegStreamFrame): void;\n onError?(error: unknown): void;\n}\n\n/** A point in device-pixel coordinates on the screen. */\nexport interface PointerPoint {\n x: number;\n y: number;\n}\n\nexport interface PointerInputPrimitives {\n tap(p: PointerPoint, opts?: { duration?: number }): Promise<void>;\n doubleClick?(p: PointerPoint): Promise<void>;\n rightClick?(p: PointerPoint): Promise<void>;\n hover?(p: PointerPoint): Promise<void>;\n longPress?(p: PointerPoint, opts?: { duration?: number }): Promise<void>;\n dragAndDrop?(from: PointerPoint, to: PointerPoint): Promise<void>;\n}\n\nexport interface TouchInputPrimitives {\n swipe(\n start: PointerPoint,\n end: PointerPoint,\n opts?: { duration?: number; repeat?: number },\n ): Promise<void>;\n pinch?(\n center: PointerPoint,\n opts: { startDistance: number; endDistance: number; duration: number },\n ): Promise<void>;\n}\n\nexport interface KeyboardInputPrimitives {\n keyboardPress(keyName: string, opts?: { target?: unknown }): Promise<void>;\n cursorMove?(direction: 'left' | 'right', times?: number): Promise<void>;\n typeText(\n value: string,\n opts?: {\n autoDismissKeyboard?: boolean;\n keyboardDismissStrategy?: 'esc-first' | 'back-first';\n target?: unknown;\n replace?: boolean;\n focusOnly?: boolean;\n },\n ): Promise<void>;\n clearInput(target?: unknown): Promise<void>;\n}\n\nexport interface ScrollInputPrimitives {\n scroll(param: ActionScrollParam): Promise<void>;\n}\n\nexport interface SystemInputPrimitives {\n backButton?(): Promise<void>;\n homeButton?(): Promise<void>;\n recentAppsButton?(): Promise<void>;\n}\n\nexport interface InputPrimitives {\n pointer?: PointerInputPrimitives;\n keyboard?: KeyboardInputPrimitives;\n touch?: TouchInputPrimitives;\n scroll?: ScrollInputPrimitives;\n system?: SystemInputPrimitives;\n}\n\nexport interface MobileInputPrimitives extends InputPrimitives {\n pointer: PointerInputPrimitives & {\n doubleClick(p: PointerPoint): Promise<void>;\n longPress(p: PointerPoint, opts?: { duration?: number }): Promise<void>;\n dragAndDrop(from: PointerPoint, to: PointerPoint): Promise<void>;\n };\n keyboard: KeyboardInputPrimitives;\n touch: TouchInputPrimitives;\n}\n\nexport interface BrowserInputPrimitives extends InputPrimitives {\n pointer: PointerInputPrimitives & {\n doubleClick(p: PointerPoint): Promise<void>;\n rightClick(p: PointerPoint): Promise<void>;\n hover(p: PointerPoint): Promise<void>;\n dragAndDrop(from: PointerPoint, to: PointerPoint): Promise<void>;\n longPress(p: PointerPoint, opts?: { duration?: number }): Promise<void>;\n };\n keyboard: KeyboardInputPrimitives;\n scroll: ScrollInputPrimitives;\n touch: TouchInputPrimitives;\n}\n\nexport interface ComputerInputPrimitives extends InputPrimitives {\n pointer: PointerInputPrimitives & {\n doubleClick(p: PointerPoint): Promise<void>;\n rightClick(p: PointerPoint): Promise<void>;\n hover(p: PointerPoint): Promise<void>;\n dragAndDrop(from: PointerPoint, to: PointerPoint): Promise<void>;\n };\n keyboard: KeyboardInputPrimitives;\n scroll: ScrollInputPrimitives;\n}\n\nexport abstract class AbstractInterface {\n abstract interfaceType: string;\n\n abstract screenshotBase64(): Promise<string>;\n abstract size(): Promise<Size>;\n abstract actionSpace(): DeviceAction[];\n\n abstract cacheFeatureForPoint?(\n center: [number, number],\n options?: {\n targetDescription?: string;\n modelRuntime?: ModelRuntime;\n },\n ): Promise<ElementCacheFeature>;\n abstract rectMatchesCacheFeature?(\n feature: ElementCacheFeature,\n ): Promise<Rect>;\n\n abstract destroy?(): Promise<void>;\n\n abstract describe?(): string;\n abstract beforeInvokeAction?(actionName: string, param: any): Promise<void>;\n abstract afterInvokeAction?(actionName: string, param: any): Promise<void>;\n\n // for web only\n registerFileChooserListener?(\n handler: (chooser: FileChooserHandler) => Promise<void>,\n ): Promise<FileChooserRegistration>;\n\n // @deprecated do NOT extend this method\n abstract getElementsNodeTree?: () => Promise<ElementNode>;\n\n // @deprecated do NOT extend this method\n abstract url?: () => string | Promise<string>;\n\n // @deprecated do NOT extend this method\n abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;\n\n /**\n * Get the current device-local time as a formatted string.\n * Prefer this for user-visible time because timestamps alone do not preserve\n * the target device's timezone when formatted on the host machine.\n */\n getDeviceLocalTimeString?(format?: string): Promise<string>;\n\n /** URL of native MJPEG stream for real-time screen preview (e.g. WDA MJPEG server) */\n mjpegStreamUrl?: string;\n\n /**\n * Optional in-process MJPEG frame producer. Implementations can push raw\n * base64 frames here when there is no standalone native MJPEG URL, e.g.\n * Chromium CDP Page.startScreencast for web previews.\n */\n startMjpegStream?(\n options: MjpegStreamOptions,\n ): MjpegStreamHandle | undefined | Promise<MjpegStreamHandle | undefined>;\n\n /**\n * Optional hook used after keyboard-only actions to force a fresh frame on\n * the active MJPEG stream. Implementations should be a no-op when no stream\n * is active.\n */\n flushPendingVisualUpdate?(): Promise<void>;\n\n /**\n * Optional navigation state probe for browser-like interfaces, used to drive\n * loading indicators in playground UIs. Returning `undefined` means the\n * interface does not expose this concept.\n */\n navigationState?(): Promise<{ isLoading: boolean }>;\n\n /**\n * Low-level device input surface. Platform implementations expose transport\n * primitives here; higher-level AI actions and manual pointer dispatch should\n * adapt to this instead of duplicating platform gesture logic.\n */\n inputPrimitives?: InputPrimitives;\n}\n\n// Generic function to define actions with proper type inference\n// TRuntime allows specifying a different type for the runtime parameter (after location resolution)\n// TReturn allows specifying the return type of the action\nexport const defineAction = <\n TSchema extends z.ZodType | undefined = undefined,\n TRuntime = TSchema extends z.ZodType ? z.infer<TSchema> : undefined,\n TReturn = any,\n>(\n config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema?: TSchema;\n call: (param: TRuntime) => Promise<TReturn> | TReturn;\n } & Partial<\n Omit<\n DeviceAction<TRuntime, TReturn>,\n 'name' | 'description' | 'interfaceAlias' | 'paramSchema' | 'call'\n >\n >,\n): DeviceAction<TRuntime, TReturn> => {\n return config as any; // Type assertion needed because schema validation type differs from runtime type\n};\n\nfunction pointFromLocate(\n locate: LocateResultElement | undefined,\n missingMessage: string,\n): PointerPoint {\n if (!locate) {\n throw new Error(missingMessage);\n }\n return { x: locate.center[0], y: locate.center[1] };\n}\n\nfunction defineLocatedPointAction<\n TSchema extends z.ZodType,\n TParam extends { locate: LocateResultElement },\n>(config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema: TSchema;\n sample: DeviceAction<TParam>['sample'];\n missingLocateMessage: string;\n call: (point: PointerPoint, param: TParam) => Promise<void>;\n}): DeviceAction<TParam> {\n return defineAction<TSchema, TParam>({\n name: config.name,\n description: config.description,\n interfaceAlias: config.interfaceAlias,\n paramSchema: config.paramSchema,\n sample: config.sample,\n call: async (param) => {\n await config.call(\n pointFromLocate(param.locate, config.missingLocateMessage),\n param,\n );\n },\n });\n}\n\n// Tap\nexport const actionTapParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be tapped'),\n});\nexport type ActionTapParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionTap = (\n tap: PointerInputPrimitives['tap'],\n): DeviceAction<ActionTapParam> => {\n return defineLocatedPointAction<typeof actionTapParamSchema, ActionTapParam>({\n name: 'Tap',\n description: 'Tap the element',\n interfaceAlias: 'aiTap',\n paramSchema: actionTapParamSchema,\n sample: {\n locate: { prompt: 'the \"Submit\" button' },\n },\n missingLocateMessage: 'Element not found, cannot tap',\n call: async (point) => {\n await tap(point);\n },\n });\n};\n\n// RightClick\nexport const actionRightClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be right clicked',\n ),\n});\nexport type ActionRightClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionRightClick = (\n rightClick: NonNullable<PointerInputPrimitives['rightClick']>,\n): DeviceAction<ActionRightClickParam> => {\n return defineLocatedPointAction<\n typeof actionRightClickParamSchema,\n ActionRightClickParam\n >({\n name: 'RightClick',\n description: 'Right click the element',\n interfaceAlias: 'aiRightClick',\n paramSchema: actionRightClickParamSchema,\n sample: {\n locate: { prompt: 'the file icon on the desktop' },\n },\n missingLocateMessage: 'Element not found, cannot right click',\n call: async (point) => {\n await rightClick(point);\n },\n });\n};\n\n// DoubleClick\nexport const actionDoubleClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be double clicked',\n ),\n});\nexport type ActionDoubleClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionDoubleClick = (\n doubleClick: NonNullable<PointerInputPrimitives['doubleClick']>,\n): DeviceAction<ActionDoubleClickParam> => {\n return defineLocatedPointAction<\n typeof actionDoubleClickParamSchema,\n ActionDoubleClickParam\n >({\n name: 'DoubleClick',\n description: 'Double click the element',\n interfaceAlias: 'aiDoubleClick',\n paramSchema: actionDoubleClickParamSchema,\n sample: {\n locate: { prompt: 'the folder icon' },\n },\n missingLocateMessage: 'Element not found, cannot double click',\n call: async (point) => {\n await doubleClick(point);\n },\n });\n};\n\n// Hover\nexport const actionHoverParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be hovered'),\n});\nexport type ActionHoverParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionHover = (\n hover: NonNullable<PointerInputPrimitives['hover']>,\n): DeviceAction<ActionHoverParam> => {\n return defineLocatedPointAction<\n typeof actionHoverParamSchema,\n ActionHoverParam\n >({\n name: 'Hover',\n description: 'Move the mouse to the element',\n interfaceAlias: 'aiHover',\n paramSchema: actionHoverParamSchema,\n sample: {\n locate: { prompt: 'the navigation menu item \"Products\"' },\n },\n missingLocateMessage: 'Element not found, cannot hover',\n call: async (point) => {\n await hover(point);\n },\n });\n};\n\n// Input\nconst inputLocateDescription =\n 'the position of the placeholder or text content in the target input field. If there is no content, locate the center of the input field.';\nexport const actionInputParamSchema = z.object({\n value: z\n .union([z.string(), z.number()])\n .transform((val) => String(val))\n .describe(\n 'The text to input. Provide the final content for replace mode, only the inserted characters for typeOnly mode, or an empty string when using clear mode to remove existing text.',\n ),\n locate: getMidsceneLocationSchema()\n .describe(inputLocateDescription)\n .optional(),\n mode: z\n .enum(['replace', 'clear', 'typeOnly'])\n .default('replace')\n .describe(\n 'Input mode: \"replace\" (default) - clear the field and input the value; \"typeOnly\" - type the value directly without clearing the field first, and should be set explicitly for incremental edits after moving the cursor; \"clear\" - clear the field without inputting new text.',\n ),\n autoDismissKeyboard: z\n .boolean()\n .optional()\n .describe(\n 'If true, the keyboard will be dismissed after the input is completed. Do not set it unless the user asks you to do so.',\n ),\n});\nexport type ActionInputParam = {\n value: string;\n locate?: LocateResultElement;\n mode?: 'replace' | 'clear' | 'typeOnly' | 'append';\n autoDismissKeyboard?: boolean;\n};\n\nexport const defineActionInput = (\n keyboard: KeyboardInputPrimitives,\n): DeviceAction<ActionInputParam> => {\n return defineAction<typeof actionInputParamSchema, ActionInputParam>({\n name: 'Input',\n description: 'Input the value into the element',\n interfaceAlias: 'aiInput',\n paramSchema: actionInputParamSchema,\n sample: {\n value: 'test@example.com',\n locate: { prompt: 'the email input field' },\n },\n call: async (param) => {\n // backward compat: convert deprecated 'append' to 'typeOnly'\n if ((param.mode as string) === 'append') {\n param.mode = 'typeOnly';\n }\n\n if (param.mode === 'clear') {\n await keyboard.clearInput(param.locate);\n return;\n }\n\n if (!param || !param.value) {\n return;\n }\n\n await keyboard.typeText(param.value, {\n target: param.locate,\n replace: param.mode !== 'typeOnly',\n autoDismissKeyboard: param.autoDismissKeyboard,\n });\n },\n });\n};\n\n// KeyboardPress\nexport const actionKeyboardPressParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The element to be clicked before pressing the key')\n .optional(),\n keyName: z\n .string()\n .describe(\n \"The key to be pressed. Use '+' for key combinations, e.g., 'Control+A', 'Shift+Enter'\",\n ),\n});\nexport type ActionKeyboardPressParam = {\n locate?: LocateResultElement;\n keyName: string;\n};\n\nexport const defineActionKeyboardPress = (\n keyboardPress: KeyboardInputPrimitives['keyboardPress'],\n): DeviceAction<ActionKeyboardPressParam> => {\n return defineAction<\n typeof actionKeyboardPressParamSchema,\n ActionKeyboardPressParam\n >({\n name: 'KeyboardPress',\n description:\n 'Press a key or key combination, like \"Enter\", \"Tab\", \"Escape\", or \"Control+A\", \"Shift+Enter\". Do not use this to type text.',\n interfaceAlias: 'aiKeyboardPress',\n paramSchema: actionKeyboardPressParamSchema,\n sample: {\n keyName: 'Enter',\n },\n call: async (param) => {\n await keyboardPress(param.keyName, {\n target: param.locate,\n });\n },\n });\n};\n\n// Scroll\nexport const actionScrollParamSchema = z.object({\n scrollType: z\n .enum([\n 'singleAction',\n 'scrollToBottom',\n 'scrollToTop',\n 'scrollToRight',\n 'scrollToLeft',\n ])\n .default('singleAction')\n .describe(\n 'The scroll behavior: \"singleAction\" for a single scroll action, \"scrollToBottom\" for scrolling all the way to the bottom by rapidly scrolling 5-10 times (skipping intermediate content until reaching the bottom), \"scrollToTop\" for scrolling all the way to the top by rapidly scrolling 5-10 times (skipping intermediate content until reaching the top), \"scrollToRight\" for scrolling all the way to the right by rapidly scrolling multiple times, \"scrollToLeft\" for scrolling all the way to the left by rapidly scrolling multiple times',\n ),\n direction: z\n .enum(['down', 'up', 'right', 'left'])\n .default('down')\n .describe(\n 'The direction to scroll. Only effective when scrollType is \"singleAction\".',\n ),\n distance: z\n .number()\n .nullable()\n .optional()\n .describe('The distance in pixels to scroll'),\n locate: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Describe the target element to be scrolled on, like \"the table\" or \"the list\" or \"the content area\" or \"the scrollable area\". Do NOT provide a general intent like \"scroll to find some element\"',\n ),\n});\n\nexport const defineActionScroll = (\n scroll: ScrollInputPrimitives['scroll'],\n): DeviceAction<ActionScrollParam> => {\n return defineAction<typeof actionScrollParamSchema, ActionScrollParam>({\n name: 'Scroll',\n description:\n 'Scroll the page or a scrollable element to browse content. This is the preferred way to scroll on all platforms, including mobile. Supports scrollToBottom/scrollToTop for boundary navigation. Default: direction `down`, scrollType `singleAction`, distance `null`.',\n interfaceAlias: 'aiScroll',\n paramSchema: actionScrollParamSchema,\n sample: {\n direction: 'down',\n scrollType: 'singleAction',\n locate: { prompt: 'the center of the product list area' },\n },\n call: async (param) => {\n await scroll(param);\n },\n });\n};\n\n// DragAndDrop\nexport const actionDragAndDropParamSchema = z.object({\n from: getMidsceneLocationSchema().describe('The position to be dragged'),\n to: getMidsceneLocationSchema().describe('The position to be dropped'),\n});\nexport type ActionDragAndDropParam = {\n from: LocateResultElement;\n to: LocateResultElement;\n};\n\nexport const defineActionDragAndDrop = (\n dragAndDrop: NonNullable<PointerInputPrimitives['dragAndDrop']>,\n): DeviceAction<ActionDragAndDropParam> => {\n return defineAction<\n typeof actionDragAndDropParamSchema,\n ActionDragAndDropParam\n >({\n name: 'DragAndDrop',\n description:\n 'Pick up a specific UI element and move it to a new position (e.g., reorder a card, move a file into a folder, sort list items). The element itself moves with your finger/mouse.',\n interfaceAlias: 'aiDragAndDrop',\n paramSchema: actionDragAndDropParamSchema,\n sample: {\n from: { prompt: 'the \"report.pdf\" file icon' },\n to: { prompt: 'the upload drop zone' },\n },\n call: async (param) => {\n const from = param.from;\n const to = param.to;\n if (!from) {\n throw new Error('missing \"from\" param for drag and drop');\n }\n if (!to) {\n throw new Error('missing \"to\" param for drag and drop');\n }\n await dragAndDrop(\n { x: from.center[0], y: from.center[1] },\n { x: to.center[0], y: to.center[1] },\n );\n },\n });\n};\n\nexport const ActionLongPressParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be long pressed',\n ),\n duration: z\n .number()\n .optional()\n .describe('Long press duration in milliseconds'),\n});\n\nexport type ActionLongPressParam = {\n locate: LocateResultElement;\n duration?: number;\n};\nexport const defineActionLongPress = (\n longPress: NonNullable<PointerInputPrimitives['longPress']>,\n): DeviceAction<ActionLongPressParam> => {\n return defineLocatedPointAction<\n typeof ActionLongPressParamSchema,\n ActionLongPressParam\n >({\n name: 'LongPress',\n description: 'Long press the element',\n interfaceAlias: 'aiLongPress',\n paramSchema: ActionLongPressParamSchema,\n sample: {\n locate: { prompt: 'the message bubble' },\n },\n missingLocateMessage: 'LongPress requires an element to be located',\n call: async (point, param) => {\n await longPress(point, { duration: param.duration });\n },\n });\n};\n\nexport const ActionSwipeParamSchema = z.object({\n start: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Starting point of the swipe gesture, if not specified, the center of the page will be used',\n ),\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .optional()\n .describe(\n 'The direction to swipe (required when using distance). The direction means the direction of the finger swipe.',\n ),\n distance: z\n .number()\n .optional()\n .describe('The distance in pixels to swipe (mutually exclusive with end)'),\n end: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Ending point of the swipe gesture (mutually exclusive with distance)',\n ),\n duration: z\n .number()\n .default(300)\n .describe('Duration of the swipe gesture in milliseconds'),\n repeat: z\n .number()\n .optional()\n .describe(\n 'The number of times to repeat the swipe gesture. 1 for default, 0 for infinite (e.g. endless swipe until the end of the page)',\n ),\n});\n\nexport type ActionSwipeParam = {\n start?: LocateResultElement;\n direction?: 'up' | 'down' | 'left' | 'right';\n distance?: number;\n end?: LocateResultElement;\n duration?: number;\n repeat?: number;\n};\n\nexport function normalizeMobileSwipeParam(\n param: ActionSwipeParam,\n screenSize: { width: number; height: number },\n): {\n startPoint: { x: number; y: number };\n endPoint: { x: number; y: number };\n duration: number;\n repeatCount: number;\n} {\n const { width, height } = screenSize;\n const { start, end } = param;\n\n const startPoint = start\n ? { x: start.center[0], y: start.center[1] }\n : { x: width / 2, y: height / 2 };\n\n let endPoint: { x: number; y: number };\n\n if (end) {\n endPoint = { x: end.center[0], y: end.center[1] };\n } else if (param.distance) {\n const direction = param.direction;\n if (!direction) {\n throw new Error('direction is required for swipe gesture');\n }\n endPoint = {\n x:\n startPoint.x +\n (direction === 'right'\n ? param.distance\n : direction === 'left'\n ? -param.distance\n : 0),\n y:\n startPoint.y +\n (direction === 'down'\n ? param.distance\n : direction === 'up'\n ? -param.distance\n : 0),\n };\n } else {\n throw new Error(\n 'Either end or distance must be specified for swipe gesture',\n );\n }\n\n endPoint.x = Math.max(0, Math.min(endPoint.x, width));\n endPoint.y = Math.max(0, Math.min(endPoint.y, height));\n\n const duration = param.duration ?? 300;\n\n let repeatCount = typeof param.repeat === 'number' ? param.repeat : 1;\n if (repeatCount === 0) {\n repeatCount = 10;\n }\n\n return { startPoint, endPoint, duration, repeatCount };\n}\n\nexport const defineActionSwipe = (config: {\n swipe: TouchInputPrimitives['swipe'];\n size(): Promise<Size>;\n}): DeviceAction<ActionSwipeParam> => {\n return defineAction<typeof ActionSwipeParamSchema, ActionSwipeParam>({\n name: 'Swipe',\n description:\n 'Perform a touch gesture for interactions beyond regular scrolling (e.g., flip pages in a carousel, dismiss a notification, swipe-to-delete a list item). For regular content scrolling, use Scroll instead. Use \"distance\" + \"direction\" for relative movement, or \"end\" for precise endpoint.',\n paramSchema: ActionSwipeParamSchema,\n sample: {\n start: { prompt: 'center of the notification' },\n end: { prompt: 'upper edge of the screen' },\n },\n call: async (param) => {\n const { startPoint, endPoint, duration, repeatCount } =\n normalizeMobileSwipeParam(param, await config.size());\n for (let i = 0; i < repeatCount; i++) {\n await config.swipe(startPoint, endPoint, { duration });\n }\n },\n });\n};\n\n// ClearInput\nexport const actionClearInputParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The input field to be cleared')\n .optional(),\n});\nexport type ActionClearInputParam = {\n locate?: LocateResultElement;\n};\n\nexport const defineActionClearInput = (\n clearInput: KeyboardInputPrimitives['clearInput'],\n): DeviceAction<ActionClearInputParam> => {\n return defineAction<\n typeof actionClearInputParamSchema,\n ActionClearInputParam\n >({\n name: 'ClearInput',\n description: inputLocateDescription,\n interfaceAlias: 'aiClearInput',\n paramSchema: actionClearInputParamSchema,\n sample: {\n locate: { prompt: 'the search input field' },\n },\n call: async (param) => {\n await clearInput(param.locate);\n },\n });\n};\n\n// CursorMove\nexport const actionCursorMoveParamSchema = z.object({\n direction: z\n .enum(['left', 'right'])\n .describe('The direction to move the cursor'),\n times: z\n .number()\n .int()\n .min(1)\n .default(1)\n .describe(\n 'The number of times to move the cursor in the specified direction',\n ),\n});\nexport type ActionCursorMoveParam = {\n direction: 'left' | 'right';\n times?: number;\n};\n\nexport const defineActionCursorMove = (config: {\n keyboard: Pick<KeyboardInputPrimitives, 'keyboardPress' | 'cursorMove'>;\n sleep?(timeMs: number): Promise<void>;\n}): DeviceAction<ActionCursorMoveParam> => {\n return defineAction<\n typeof actionCursorMoveParamSchema,\n ActionCursorMoveParam\n >({\n name: 'CursorMove',\n description:\n 'Move the text cursor (caret) left or right within an input field or text area. Use this to reposition the cursor without selecting text.',\n paramSchema: actionCursorMoveParamSchema,\n sample: {\n direction: 'left',\n times: 3,\n },\n call: async (param) => {\n const times = param.times ?? 1;\n if (config.keyboard.cursorMove) {\n await config.keyboard.cursorMove(param.direction, times);\n return;\n }\n\n const wait =\n config.sleep ??\n ((timeMs: number) =>\n new Promise<void>((resolve) => setTimeout(resolve, timeMs)));\n const arrowKey = param.direction === 'left' ? 'ArrowLeft' : 'ArrowRight';\n for (let i = 0; i < times; i++) {\n await config.keyboard.keyboardPress(arrowKey);\n await wait(100);\n }\n },\n });\n};\n\n// Pinch\nexport const ActionPinchParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'The element to pinch on. If not specified, the center of the screen will be used',\n ),\n direction: z\n .enum(['in', 'out'])\n .describe(\n 'Pinch direction. \"in\" = pinch fingers together (zoom out / shrink), \"out\" = spread fingers apart (zoom in / enlarge).',\n ),\n distance: z\n .number()\n .positive()\n .optional()\n .describe(\n 'How far each finger moves in pixels. Defaults to a quarter of the shorter screen dimension.',\n ),\n duration: z\n .number()\n .default(500)\n .optional()\n .describe('Duration of the pinch gesture in milliseconds'),\n});\n\nexport type ActionPinchParam = {\n locate?: LocateResultElement;\n direction: 'in' | 'out';\n distance?: number;\n duration?: number;\n};\n\nexport const defineActionPinch = (config: {\n pinch: TouchInputPrimitives['pinch'];\n size(): Promise<Size>;\n}): DeviceAction<ActionPinchParam> | undefined => {\n if (!config.pinch) {\n return undefined;\n }\n\n return defineAction<typeof ActionPinchParamSchema, ActionPinchParam>({\n name: 'Pinch',\n description:\n 'Perform a two-finger pinch gesture. Use direction \"in\" to pinch fingers together (zoom out), or \"out\" to spread fingers apart (zoom in). Optionally specify distance for how far each finger moves.',\n interfaceAlias: 'aiPinch',\n paramSchema: ActionPinchParamSchema,\n sample: {\n locate: { prompt: 'the map area' },\n direction: 'out',\n distance: 200,\n },\n call: async (param) => {\n const { centerX, centerY, startDistance, endDistance, duration } =\n normalizePinchParam(param, await config.size());\n await config.pinch?.(\n { x: centerX, y: centerY },\n { startDistance, endDistance, duration },\n );\n },\n });\n};\n\nexport function normalizePinchParam(\n param: ActionPinchParam,\n screenSize: { width: number; height: number },\n): {\n centerX: number;\n centerY: number;\n startDistance: number;\n endDistance: number;\n duration: number;\n} {\n const { width, height } = screenSize;\n const element = param.locate;\n const centerX = element\n ? Math.round(element.center[0])\n : Math.round(width / 2);\n const centerY = element\n ? Math.round(element.center[1])\n : Math.round(height / 2);\n const duration = param.duration ?? 500;\n\n const baseDistance = Math.round(Math.min(width, height) / 4);\n const fingerDistance = param.distance ?? baseDistance;\n\n const startDistance = baseDistance;\n const endDistance =\n param.direction === 'out'\n ? baseDistance + fingerDistance\n : Math.max(10, baseDistance - fingerDistance);\n\n return { centerX, centerY, startDistance, endDistance, duration };\n}\n\nexport interface MobileInputActionContext {\n input: MobileInputPrimitives;\n size(): Promise<Size>;\n sleep?(timeMs: number): Promise<void>;\n getDefaultAutoDismissKeyboard?(): boolean | undefined;\n systemActions?: SystemInputActionOptions;\n}\n\nexport interface SystemInputActionConfig {\n name: string;\n description: string;\n interfaceAlias?: string;\n delayBeforeRunner?: number;\n delayAfterRunner?: number;\n}\n\nexport interface SystemInputActionOptions {\n backButton?: SystemInputActionConfig;\n homeButton?: SystemInputActionConfig;\n recentAppsButton?: SystemInputActionConfig;\n}\n\nexport interface InputPrimitiveActionOptions {\n size?: () => Promise<Size>;\n sleep?: (timeMs: number) => Promise<void>;\n includeSwipe?: boolean;\n includePinch?: boolean;\n systemActions?: SystemInputActionOptions;\n}\n\nfunction defineSystemInputAction(\n config: SystemInputActionConfig,\n call: () => Promise<void>,\n): DeviceAction<undefined, void> {\n return defineAction<undefined, undefined, void>({\n name: config.name,\n description: config.description,\n interfaceAlias: config.interfaceAlias,\n delayBeforeRunner: config.delayBeforeRunner,\n delayAfterRunner: config.delayAfterRunner,\n call,\n });\n}\n\nexport function defineActionsFromInputPrimitives(\n input: InputPrimitives,\n options: InputPrimitiveActionOptions = {},\n): DeviceAction<any>[] {\n const actions: Array<DeviceAction<any> | undefined> = [];\n const { pointer, keyboard, scroll, touch, system } = input;\n\n if (pointer) {\n actions.push(defineActionTap(pointer.tap));\n if (pointer.doubleClick) {\n actions.push(defineActionDoubleClick(pointer.doubleClick));\n }\n if (pointer.rightClick) {\n actions.push(defineActionRightClick(pointer.rightClick));\n }\n if (pointer.hover) {\n actions.push(defineActionHover(pointer.hover));\n }\n if (pointer.dragAndDrop) {\n actions.push(defineActionDragAndDrop(pointer.dragAndDrop));\n }\n if (pointer.longPress) {\n actions.push(defineActionLongPress(pointer.longPress));\n }\n }\n\n if (keyboard) {\n actions.push(\n defineActionInput(keyboard),\n defineActionClearInput(keyboard.clearInput),\n defineActionKeyboardPress(keyboard.keyboardPress),\n defineActionCursorMove({ keyboard, sleep: options.sleep }),\n );\n }\n\n if (scroll) {\n actions.push(defineActionScroll(scroll.scroll));\n }\n\n if (touch?.swipe && options.size && options.includeSwipe !== false) {\n actions.push(defineActionSwipe({ swipe: touch.swipe, size: options.size }));\n }\n\n if (touch?.pinch && options.size && options.includePinch !== false) {\n actions.push(defineActionPinch({ pinch: touch.pinch, size: options.size }));\n }\n\n if (system && options.systemActions) {\n const { systemActions } = options;\n if (system.backButton && systemActions.backButton) {\n actions.push(\n defineSystemInputAction(systemActions.backButton, system.backButton),\n );\n }\n if (system.homeButton && systemActions.homeButton) {\n actions.push(\n defineSystemInputAction(systemActions.homeButton, system.homeButton),\n );\n }\n if (system.recentAppsButton && systemActions.recentAppsButton) {\n actions.push(\n defineSystemInputAction(\n systemActions.recentAppsButton,\n system.recentAppsButton,\n ),\n );\n }\n }\n\n return actions.filter((action): action is DeviceAction<any> =>\n Boolean(action),\n );\n}\n\nexport function createDefaultMobileActions(\n context: MobileInputActionContext,\n): DeviceAction<any>[] {\n return defineActionsFromInputPrimitives(context.input, {\n size: context.size,\n sleep: context.sleep,\n systemActions: context.systemActions,\n });\n}\n\n// Sleep\nexport const ActionSleepParamSchema = z.object({\n timeMs: z\n .number()\n .default(1000)\n .optional()\n .describe('Sleep duration in milliseconds, defaults to 1000ms (1 second)'),\n});\n\nexport type ActionSleepParam = {\n timeMs?: number;\n};\n\nexport const defineActionSleep = (): DeviceAction<ActionSleepParam> => {\n return defineAction<typeof ActionSleepParamSchema, ActionSleepParam>({\n name: 'Sleep',\n description:\n 'Wait for a specified duration before continuing. Defaults to 1 second (1000ms) if not specified.',\n paramSchema: ActionSleepParamSchema,\n sample: {\n timeMs: 2000,\n },\n call: async (param) => {\n const duration = param?.timeMs ?? 1000;\n getDebug('device:common-action')(`Sleeping for ${duration}ms`);\n await new Promise((resolve) => setTimeout(resolve, duration));\n },\n });\n};\n\nexport type { DeviceAction } from '../types';\nexport type {\n AndroidDeviceOpt,\n AndroidDeviceInputOpt,\n IOSDeviceOpt,\n IOSDeviceInputOpt,\n HarmonyDeviceOpt,\n HarmonyDeviceInputOpt,\n} from './device-options';\n"],"names":["AbstractInterface","defineAction","config","pointFromLocate","locate","missingMessage","Error","defineLocatedPointAction","param","actionTapParamSchema","z","getMidsceneLocationSchema","defineActionTap","tap","point","actionRightClickParamSchema","defineActionRightClick","rightClick","actionDoubleClickParamSchema","defineActionDoubleClick","doubleClick","actionHoverParamSchema","defineActionHover","hover","inputLocateDescription","actionInputParamSchema","val","String","defineActionInput","keyboard","actionKeyboardPressParamSchema","defineActionKeyboardPress","keyboardPress","actionScrollParamSchema","defineActionScroll","scroll","actionDragAndDropParamSchema","defineActionDragAndDrop","dragAndDrop","from","to","ActionLongPressParamSchema","defineActionLongPress","longPress","ActionSwipeParamSchema","normalizeMobileSwipeParam","screenSize","width","height","start","end","startPoint","endPoint","direction","Math","duration","repeatCount","defineActionSwipe","i","actionClearInputParamSchema","defineActionClearInput","clearInput","actionCursorMoveParamSchema","defineActionCursorMove","times","wait","timeMs","Promise","resolve","setTimeout","arrowKey","ActionPinchParamSchema","defineActionPinch","centerX","centerY","startDistance","endDistance","normalizePinchParam","element","baseDistance","fingerDistance","defineSystemInputAction","call","defineActionsFromInputPrimitives","input","options","actions","pointer","touch","system","systemActions","action","Boolean","createDefaultMobileActions","context","ActionSleepParamSchema","defineActionSleep","getDebug"],"mappings":";;;;;;;;;;;;;AAqIO,MAAeA;;QA8CpB;QA8BA;;AACF;AAKO,MAAMC,eAAe,CAK1BC,SAaOA;AAGT,SAASC,gBACPC,MAAuC,EACvCC,cAAsB;IAEtB,IAAI,CAACD,QACH,MAAM,IAAIE,MAAMD;IAElB,OAAO;QAAE,GAAGD,OAAO,MAAM,CAAC,EAAE;QAAE,GAAGA,OAAO,MAAM,CAAC,EAAE;IAAC;AACpD;AAEA,SAASG,yBAGPL,MAQD;IACC,OAAOD,aAA8B;QACnC,MAAMC,OAAO,IAAI;QACjB,aAAaA,OAAO,WAAW;QAC/B,gBAAgBA,OAAO,cAAc;QACrC,aAAaA,OAAO,WAAW;QAC/B,QAAQA,OAAO,MAAM;QACrB,MAAM,OAAOM;YACX,MAAMN,OAAO,IAAI,CACfC,gBAAgBK,MAAM,MAAM,EAAEN,OAAO,oBAAoB,GACzDM;QAEJ;IACF;AACF;AAGO,MAAMC,uBAAuBC,EAAE,MAAM,CAAC;IAC3C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMC,kBAAkB,CAC7BC,MAEON,yBAAsE;QAC3E,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaE;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAsB;QAC1C;QACA,sBAAsB;QACtB,MAAM,OAAOK;YACX,MAAMD,IAAIC;QACZ;IACF;AAIK,MAAMC,8BAA8BL,EAAE,MAAM,CAAC;IAClD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMK,yBAAyB,CACpCC,aAEOV,yBAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaQ;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAA+B;QACnD;QACA,sBAAsB;QACtB,MAAM,OAAOD;YACX,MAAMG,WAAWH;QACnB;IACF;AAIK,MAAMI,+BAA+BR,EAAE,MAAM,CAAC;IACnD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMQ,0BAA0B,CACrCC,cAEOb,yBAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaW;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAkB;QACtC;QACA,sBAAsB;QACtB,MAAM,OAAOJ;YACX,MAAMM,YAAYN;QACpB;IACF;AAIK,MAAMO,yBAAyBX,EAAE,MAAM,CAAC;IAC7C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMW,oBAAoB,CAC/BC,QAEOhB,yBAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAac;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAsC;QAC1D;QACA,sBAAsB;QACtB,MAAM,OAAOP;YACX,MAAMS,MAAMT;QACd;IACF;AAIF,MAAMU,yBACJ;AACK,MAAMC,yBAAyBf,EAAE,MAAM,CAAC;IAC7C,OAAOA,EAAAA,KACC,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG,EAC9B,SAAS,CAAC,CAACgB,MAAQC,OAAOD,MAC1B,QAAQ,CACP;IAEJ,QAAQf,4BACL,QAAQ,CAACa,wBACT,QAAQ;IACX,MAAMd,CAAC,CAADA,OACC,CAAC;QAAC;QAAW;QAAS;KAAW,EACrC,OAAO,CAAC,WACR,QAAQ,CACP;IAEJ,qBAAqBA,EAAAA,OACX,GACP,QAAQ,GACR,QAAQ,CACP;AAEN;AAQO,MAAMkB,oBAAoB,CAC/BC,WAEO5B,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAawB;QACb,QAAQ;YACN,OAAO;YACP,QAAQ;gBAAE,QAAQ;YAAwB;QAC5C;QACA,MAAM,OAAOjB;YAEX,IAAKA,AAA0B,aAA1BA,MAAM,IAAI,EACbA,MAAM,IAAI,GAAG;YAGf,IAAIA,AAAe,YAAfA,MAAM,IAAI,EAAc,YAC1B,MAAMqB,SAAS,UAAU,CAACrB,MAAM,MAAM;YAIxC,IAAI,CAACA,SAAS,CAACA,MAAM,KAAK,EACxB;YAGF,MAAMqB,SAAS,QAAQ,CAACrB,MAAM,KAAK,EAAE;gBACnC,QAAQA,MAAM,MAAM;gBACpB,SAASA,AAAe,eAAfA,MAAM,IAAI;gBACnB,qBAAqBA,MAAM,mBAAmB;YAChD;QACF;IACF;AAIK,MAAMsB,iCAAiCpB,EAAE,MAAM,CAAC;IACrD,QAAQC,4BACL,QAAQ,CAAC,qDACT,QAAQ;IACX,SAASD,EAAAA,MACA,GACN,QAAQ,CACP;AAEN;AAMO,MAAMqB,4BAA4B,CACvCC,gBAEO/B,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAa6B;QACb,QAAQ;YACN,SAAS;QACX;QACA,MAAM,OAAOtB;YACX,MAAMwB,cAAcxB,MAAM,OAAO,EAAE;gBACjC,QAAQA,MAAM,MAAM;YACtB;QACF;IACF;AAIK,MAAMyB,0BAA0BvB,EAAE,MAAM,CAAC;IAC9C,YAAYA,CAAC,CAADA,OACL,CAAC;QACJ;QACA;QACA;QACA;QACA;KACD,EACA,OAAO,CAAC,gBACR,QAAQ,CACP;IAEJ,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;QAAM;QAAS;KAAO,EACpC,OAAO,CAAC,QACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CAAC;IACZ,QAAQC,4BACL,QAAQ,GACR,QAAQ,CACP;AAEN;AAEO,MAAMuB,qBAAqB,CAChCC,SAEOlC,aAAgE;QACrE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAagC;QACb,QAAQ;YACN,WAAW;YACX,YAAY;YACZ,QAAQ;gBAAE,QAAQ;YAAsC;QAC1D;QACA,MAAM,OAAOzB;YACX,MAAM2B,OAAO3B;QACf;IACF;AAIK,MAAM4B,+BAA+B1B,EAAE,MAAM,CAAC;IACnD,MAAMC,4BAA4B,QAAQ,CAAC;IAC3C,IAAIA,4BAA4B,QAAQ,CAAC;AAC3C;AAMO,MAAM0B,0BAA0B,CACrCC,cAEOrC,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAamC;QACb,QAAQ;YACN,MAAM;gBAAE,QAAQ;YAA6B;YAC7C,IAAI;gBAAE,QAAQ;YAAuB;QACvC;QACA,MAAM,OAAO5B;YACX,MAAM+B,OAAO/B,MAAM,IAAI;YACvB,MAAMgC,KAAKhC,MAAM,EAAE;YACnB,IAAI,CAAC+B,MACH,MAAM,IAAIjC,MAAM;YAElB,IAAI,CAACkC,IACH,MAAM,IAAIlC,MAAM;YAElB,MAAMgC,YACJ;gBAAE,GAAGC,KAAK,MAAM,CAAC,EAAE;gBAAE,GAAGA,KAAK,MAAM,CAAC,EAAE;YAAC,GACvC;gBAAE,GAAGC,GAAG,MAAM,CAAC,EAAE;gBAAE,GAAGA,GAAG,MAAM,CAAC,EAAE;YAAC;QAEvC;IACF;AAGK,MAAMC,6BAA6B/B,EAAE,MAAM,CAAC;IACjD,QAAQC,4BAA4B,QAAQ,CAC1C;IAEF,UAAUD,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMgC,wBAAwB,CACnCC,YAEOpC,yBAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAakC;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAqB;QACzC;QACA,sBAAsB;QACtB,MAAM,OAAO3B,OAAON;YAClB,MAAMmC,UAAU7B,OAAO;gBAAE,UAAUN,MAAM,QAAQ;YAAC;QACpD;IACF;AAGK,MAAMoC,yBAAyBlC,EAAE,MAAM,CAAC;IAC7C,OAAOC,4BACJ,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,CAAC,CAADA,OACJ,CAAC;QAAC;QAAM;QAAQ;QAAQ;KAAQ,EACpC,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;IACZ,KAAKC,4BACF,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,CAAC;IACZ,QAAQA,EAAAA,MACC,GACN,QAAQ,GACR,QAAQ,CACP;AAEN;AAWO,SAASmC,0BACdrC,KAAuB,EACvBsC,UAA6C;IAO7C,MAAM,EAAEC,KAAK,EAAEC,MAAM,EAAE,GAAGF;IAC1B,MAAM,EAAEG,KAAK,EAAEC,GAAG,EAAE,GAAG1C;IAEvB,MAAM2C,aAAaF,QACf;QAAE,GAAGA,MAAM,MAAM,CAAC,EAAE;QAAE,GAAGA,MAAM,MAAM,CAAC,EAAE;IAAC,IACzC;QAAE,GAAGF,QAAQ;QAAG,GAAGC,SAAS;IAAE;IAElC,IAAII;IAEJ,IAAIF,KACFE,WAAW;QAAE,GAAGF,IAAI,MAAM,CAAC,EAAE;QAAE,GAAGA,IAAI,MAAM,CAAC,EAAE;IAAC;SAC3C,IAAI1C,MAAM,QAAQ,EAAE;QACzB,MAAM6C,YAAY7C,MAAM,SAAS;QACjC,IAAI,CAAC6C,WACH,MAAM,IAAI/C,MAAM;QAElB8C,WAAW;YACT,GACED,WAAW,CAAC,GACXE,CAAAA,AAAc,YAAdA,YACG7C,MAAM,QAAQ,GACd6C,AAAc,WAAdA,YACE,CAAC7C,MAAM,QAAQ,GACf;YACR,GACE2C,WAAW,CAAC,GACXE,CAAAA,AAAc,WAAdA,YACG7C,MAAM,QAAQ,GACd6C,AAAc,SAAdA,YACE,CAAC7C,MAAM,QAAQ,GACf;QACV;IACF,OACE,MAAM,IAAIF,MACR;IAIJ8C,SAAS,CAAC,GAAGE,KAAK,GAAG,CAAC,GAAGA,KAAK,GAAG,CAACF,SAAS,CAAC,EAAEL;IAC9CK,SAAS,CAAC,GAAGE,KAAK,GAAG,CAAC,GAAGA,KAAK,GAAG,CAACF,SAAS,CAAC,EAAEJ;IAE9C,MAAMO,WAAW/C,MAAM,QAAQ,IAAI;IAEnC,IAAIgD,cAAc,AAAwB,YAAxB,OAAOhD,MAAM,MAAM,GAAgBA,MAAM,MAAM,GAAG;IACpE,IAAIgD,AAAgB,MAAhBA,aACFA,cAAc;IAGhB,OAAO;QAAEL;QAAYC;QAAUG;QAAUC;IAAY;AACvD;AAEO,MAAMC,oBAAoB,CAACvD,SAIzBD,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAa2C;QACb,QAAQ;YACN,OAAO;gBAAE,QAAQ;YAA6B;YAC9C,KAAK;gBAAE,QAAQ;YAA2B;QAC5C;QACA,MAAM,OAAOpC;YACX,MAAM,EAAE2C,UAAU,EAAEC,QAAQ,EAAEG,QAAQ,EAAEC,WAAW,EAAE,GACnDX,0BAA0BrC,OAAO,MAAMN,OAAO,IAAI;YACpD,IAAK,IAAIwD,IAAI,GAAGA,IAAIF,aAAaE,IAC/B,MAAMxD,OAAO,KAAK,CAACiD,YAAYC,UAAU;gBAAEG;YAAS;QAExD;IACF;AAIK,MAAMI,8BAA8BjD,EAAE,MAAM,CAAC;IAClD,QAAQC,4BACL,QAAQ,CAAC,iCACT,QAAQ;AACb;AAKO,MAAMiD,yBAAyB,CACpCC,aAEO5D,aAGL;QACA,MAAM;QACN,aAAauB;QACb,gBAAgB;QAChB,aAAamC;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAyB;QAC7C;QACA,MAAM,OAAOnD;YACX,MAAMqD,WAAWrD,MAAM,MAAM;QAC/B;IACF;AAIK,MAAMsD,8BAA8BpD,EAAE,MAAM,CAAC;IAClD,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;KAAQ,EACtB,QAAQ,CAAC;IACZ,OAAOA,EAAAA,MACE,GACN,GAAG,GACH,GAAG,CAAC,GACJ,OAAO,CAAC,GACR,QAAQ,CACP;AAEN;AAMO,MAAMqD,yBAAyB,CAAC7D,SAI9BD,aAGL;QACA,MAAM;QACN,aACE;QACF,aAAa6D;QACb,QAAQ;YACN,WAAW;YACX,OAAO;QACT;QACA,MAAM,OAAOtD;YACX,MAAMwD,QAAQxD,MAAM,KAAK,IAAI;YAC7B,IAAIN,OAAO,QAAQ,CAAC,UAAU,EAAE,YAC9B,MAAMA,OAAO,QAAQ,CAAC,UAAU,CAACM,MAAM,SAAS,EAAEwD;YAIpD,MAAMC,OACJ/D,OAAO,KAAK,IACV,EAAAgE,SACA,IAAIC,QAAc,CAACC,UAAYC,WAAWD,SAASF,QAAO;YAC9D,MAAMI,WAAW9D,AAAoB,WAApBA,MAAM,SAAS,GAAc,cAAc;YAC5D,IAAK,IAAIkD,IAAI,GAAGA,IAAIM,OAAON,IAAK;gBAC9B,MAAMxD,OAAO,QAAQ,CAAC,aAAa,CAACoE;gBACpC,MAAML,KAAK;YACb;QACF;IACF;AAIK,MAAMM,yBAAyB7D,EAAE,MAAM,CAAC;IAC7C,QAAQC,4BACL,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,CAAC,CAADA,OACJ,CAAC;QAAC;QAAM;KAAM,EAClB,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,GACR,QAAQ,CAAC;AACd;AASO,MAAM8D,oBAAoB,CAACtE;IAIhC,IAAI,CAACA,OAAO,KAAK,EACf;IAGF,OAAOD,aAA8D;QACnE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAasE;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAe;YACjC,WAAW;YACX,UAAU;QACZ;QACA,MAAM,OAAO/D;YACX,MAAM,EAAEiE,OAAO,EAAEC,OAAO,EAAEC,aAAa,EAAEC,WAAW,EAAErB,QAAQ,EAAE,GAC9DsB,oBAAoBrE,OAAO,MAAMN,OAAO,IAAI;YAC9C,MAAMA,OAAO,KAAK,GAChB;gBAAE,GAAGuE;gBAAS,GAAGC;YAAQ,GACzB;gBAAEC;gBAAeC;gBAAarB;YAAS;QAE3C;IACF;AACF;AAEO,SAASsB,oBACdrE,KAAuB,EACvBsC,UAA6C;IAQ7C,MAAM,EAAEC,KAAK,EAAEC,MAAM,EAAE,GAAGF;IAC1B,MAAMgC,UAAUtE,MAAM,MAAM;IAC5B,MAAMiE,UAAUK,UACZxB,KAAK,KAAK,CAACwB,QAAQ,MAAM,CAAC,EAAE,IAC5BxB,KAAK,KAAK,CAACP,QAAQ;IACvB,MAAM2B,UAAUI,UACZxB,KAAK,KAAK,CAACwB,QAAQ,MAAM,CAAC,EAAE,IAC5BxB,KAAK,KAAK,CAACN,SAAS;IACxB,MAAMO,WAAW/C,MAAM,QAAQ,IAAI;IAEnC,MAAMuE,eAAezB,KAAK,KAAK,CAACA,KAAK,GAAG,CAACP,OAAOC,UAAU;IAC1D,MAAMgC,iBAAiBxE,MAAM,QAAQ,IAAIuE;IAEzC,MAAMJ,gBAAgBI;IACtB,MAAMH,cACJpE,AAAoB,UAApBA,MAAM,SAAS,GACXuE,eAAeC,iBACf1B,KAAK,GAAG,CAAC,IAAIyB,eAAeC;IAElC,OAAO;QAAEP;QAASC;QAASC;QAAeC;QAAarB;IAAS;AAClE;AAgCA,SAAS0B,wBACP/E,MAA+B,EAC/BgF,IAAyB;IAEzB,OAAOjF,aAAyC;QAC9C,MAAMC,OAAO,IAAI;QACjB,aAAaA,OAAO,WAAW;QAC/B,gBAAgBA,OAAO,cAAc;QACrC,mBAAmBA,OAAO,iBAAiB;QAC3C,kBAAkBA,OAAO,gBAAgB;QACzCgF;IACF;AACF;AAEO,SAASC,iCACdC,KAAsB,EACtBC,UAAuC,CAAC,CAAC;IAEzC,MAAMC,UAAgD,EAAE;IACxD,MAAM,EAAEC,OAAO,EAAE1D,QAAQ,EAAEM,MAAM,EAAEqD,KAAK,EAAEC,MAAM,EAAE,GAAGL;IAErD,IAAIG,SAAS;QACXD,QAAQ,IAAI,CAAC1E,gBAAgB2E,QAAQ,GAAG;QACxC,IAAIA,QAAQ,WAAW,EACrBD,QAAQ,IAAI,CAACnE,wBAAwBoE,QAAQ,WAAW;QAE1D,IAAIA,QAAQ,UAAU,EACpBD,QAAQ,IAAI,CAACtE,uBAAuBuE,QAAQ,UAAU;QAExD,IAAIA,QAAQ,KAAK,EACfD,QAAQ,IAAI,CAAChE,kBAAkBiE,QAAQ,KAAK;QAE9C,IAAIA,QAAQ,WAAW,EACrBD,QAAQ,IAAI,CAACjD,wBAAwBkD,QAAQ,WAAW;QAE1D,IAAIA,QAAQ,SAAS,EACnBD,QAAQ,IAAI,CAAC5C,sBAAsB6C,QAAQ,SAAS;IAExD;IAEA,IAAI1D,UACFyD,QAAQ,IAAI,CACV1D,kBAAkBC,WAClB+B,uBAAuB/B,SAAS,UAAU,GAC1CE,0BAA0BF,SAAS,aAAa,GAChDkC,uBAAuB;QAAElC;QAAU,OAAOwD,QAAQ,KAAK;IAAC;IAI5D,IAAIlD,QACFmD,QAAQ,IAAI,CAACpD,mBAAmBC,OAAO,MAAM;IAG/C,IAAIqD,OAAO,SAASH,QAAQ,IAAI,IAAIA,AAAyB,UAAzBA,QAAQ,YAAY,EACtDC,QAAQ,IAAI,CAAC7B,kBAAkB;QAAE,OAAO+B,MAAM,KAAK;QAAE,MAAMH,QAAQ,IAAI;IAAC;IAG1E,IAAIG,OAAO,SAASH,QAAQ,IAAI,IAAIA,AAAyB,UAAzBA,QAAQ,YAAY,EACtDC,QAAQ,IAAI,CAACd,kBAAkB;QAAE,OAAOgB,MAAM,KAAK;QAAE,MAAMH,QAAQ,IAAI;IAAC;IAG1E,IAAII,UAAUJ,QAAQ,aAAa,EAAE;QACnC,MAAM,EAAEK,aAAa,EAAE,GAAGL;QAC1B,IAAII,OAAO,UAAU,IAAIC,cAAc,UAAU,EAC/CJ,QAAQ,IAAI,CACVL,wBAAwBS,cAAc,UAAU,EAAED,OAAO,UAAU;QAGvE,IAAIA,OAAO,UAAU,IAAIC,cAAc,UAAU,EAC/CJ,QAAQ,IAAI,CACVL,wBAAwBS,cAAc,UAAU,EAAED,OAAO,UAAU;QAGvE,IAAIA,OAAO,gBAAgB,IAAIC,cAAc,gBAAgB,EAC3DJ,QAAQ,IAAI,CACVL,wBACES,cAAc,gBAAgB,EAC9BD,OAAO,gBAAgB;IAI/B;IAEA,OAAOH,QAAQ,MAAM,CAAC,CAACK,SACrBC,QAAQD;AAEZ;AAEO,SAASE,2BACdC,OAAiC;IAEjC,OAAOX,iCAAiCW,QAAQ,KAAK,EAAE;QACrD,MAAMA,QAAQ,IAAI;QAClB,OAAOA,QAAQ,KAAK;QACpB,eAAeA,QAAQ,aAAa;IACtC;AACF;AAGO,MAAMC,yBAAyBrF,EAAE,MAAM,CAAC;IAC7C,QAAQA,EAAAA,MACC,GACN,OAAO,CAAC,MACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMsF,oBAAoB,IACxB/F,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAa8F;QACb,QAAQ;YACN,QAAQ;QACV;QACA,MAAM,OAAOvF;YACX,MAAM+C,WAAW/C,OAAO,UAAU;YAClCyF,SAAS,wBAAwB,CAAC,aAAa,EAAE1C,SAAS,EAAE,CAAC;YAC7D,MAAM,IAAIY,QAAQ,CAACC,UAAYC,WAAWD,SAASb;QACrD;IACF"}
1
+ {"version":3,"file":"device/index.mjs","sources":["../../../src/device/index.ts"],"sourcesContent":["import type { ModelRuntime } from '@/ai-model/models';\nimport { getMidsceneLocationSchema } from '@/common';\nimport type {\n ActionScrollParam,\n DeviceAction,\n ExecutorContext,\n LocateResultElement,\n} from '@/types';\nimport type { ElementNode } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { z } from 'zod';\nimport type { ElementCacheFeature, Rect, Size, UIContext } from '../types';\n\nexport interface FileChooserHandler {\n accept(files: string[]): Promise<void>;\n}\n\nexport interface FileChooserRegistration {\n dispose: () => void;\n getError: () => Error | undefined | Promise<Error | undefined>;\n}\n\nexport interface MjpegStreamFrame {\n /** Raw base64-encoded image bytes WITHOUT a `data:image/...;base64,` prefix. */\n data: string;\n contentType?: string;\n}\n\nexport interface MjpegStreamHandle {\n stop(): void | Promise<void>;\n}\n\nexport interface MjpegStreamOptions {\n signal?: AbortSignal;\n onFrame(frame: MjpegStreamFrame): void;\n onError?(error: unknown): void;\n}\n\n/** A point in device-pixel coordinates on the screen. */\nexport interface PointerPoint {\n x: number;\n y: number;\n}\n\nexport interface PointerInputPrimitives {\n tap(p: PointerPoint, opts?: { duration?: number }): Promise<void>;\n doubleClick?(p: PointerPoint): Promise<void>;\n rightClick?(p: PointerPoint): Promise<void>;\n hover?(p: PointerPoint): Promise<void>;\n longPress?(p: PointerPoint, opts?: { duration?: number }): Promise<void>;\n dragAndDrop?(from: PointerPoint, to: PointerPoint): Promise<void>;\n}\n\nexport interface TouchInputPrimitives {\n swipe(\n start: PointerPoint,\n end: PointerPoint,\n opts?: { duration?: number; repeat?: number },\n ): Promise<void>;\n pinch?(\n center: PointerPoint,\n opts: { startDistance: number; endDistance: number; duration: number },\n ): Promise<void>;\n}\n\nexport interface KeyboardInputPrimitives {\n keyboardPress(keyName: string, opts?: { target?: unknown }): Promise<void>;\n cursorMove?(direction: 'left' | 'right', times?: number): Promise<void>;\n typeText(\n value: string,\n opts?: {\n autoDismissKeyboard?: boolean;\n keyboardDismissStrategy?: 'esc-first' | 'back-first';\n target?: unknown;\n replace?: boolean;\n focusOnly?: boolean;\n },\n ): Promise<void>;\n clearInput(target?: unknown): Promise<void>;\n}\n\nexport interface ScrollInputPrimitives {\n scroll(param: ActionScrollParam): Promise<void>;\n}\n\nexport interface SystemInputPrimitives {\n backButton?(): Promise<void>;\n homeButton?(): Promise<void>;\n recentAppsButton?(): Promise<void>;\n}\n\nexport interface InputPrimitives {\n pointer?: PointerInputPrimitives;\n keyboard?: KeyboardInputPrimitives;\n touch?: TouchInputPrimitives;\n scroll?: ScrollInputPrimitives;\n system?: SystemInputPrimitives;\n}\n\nexport interface MobileInputPrimitives extends InputPrimitives {\n pointer: PointerInputPrimitives & {\n doubleClick(p: PointerPoint): Promise<void>;\n longPress(p: PointerPoint, opts?: { duration?: number }): Promise<void>;\n dragAndDrop(from: PointerPoint, to: PointerPoint): Promise<void>;\n };\n keyboard: KeyboardInputPrimitives;\n touch: TouchInputPrimitives;\n}\n\nexport interface BrowserInputPrimitives extends InputPrimitives {\n pointer: PointerInputPrimitives & {\n doubleClick(p: PointerPoint): Promise<void>;\n rightClick(p: PointerPoint): Promise<void>;\n hover(p: PointerPoint): Promise<void>;\n dragAndDrop(from: PointerPoint, to: PointerPoint): Promise<void>;\n longPress(p: PointerPoint, opts?: { duration?: number }): Promise<void>;\n };\n keyboard: KeyboardInputPrimitives;\n scroll: ScrollInputPrimitives;\n touch: TouchInputPrimitives;\n}\n\nexport interface ComputerInputPrimitives extends InputPrimitives {\n pointer: PointerInputPrimitives & {\n doubleClick(p: PointerPoint): Promise<void>;\n rightClick(p: PointerPoint): Promise<void>;\n hover(p: PointerPoint): Promise<void>;\n dragAndDrop(from: PointerPoint, to: PointerPoint): Promise<void>;\n };\n keyboard: KeyboardInputPrimitives;\n scroll: ScrollInputPrimitives;\n}\n\nexport abstract class AbstractInterface {\n abstract interfaceType: string;\n\n abstract screenshotBase64(): Promise<string>;\n abstract size(): Promise<Size>;\n abstract actionSpace(): DeviceAction[];\n\n abstract cacheFeatureForPoint?(\n center: [number, number],\n options?: {\n targetDescription?: string;\n modelRuntime?: ModelRuntime;\n },\n ): Promise<ElementCacheFeature>;\n abstract rectMatchesCacheFeature?(\n feature: ElementCacheFeature,\n ): Promise<Rect>;\n\n abstract destroy?(): Promise<void>;\n\n abstract describe?(): string;\n abstract beforeInvokeAction?(actionName: string, param: any): Promise<void>;\n abstract afterInvokeAction?(actionName: string, param: any): Promise<void>;\n\n // for web only\n registerFileChooserListener?(\n handler: (chooser: FileChooserHandler) => Promise<void>,\n ): Promise<FileChooserRegistration>;\n\n // @deprecated do NOT extend this method\n abstract getElementsNodeTree?: () => Promise<ElementNode>;\n\n // @deprecated do NOT extend this method\n abstract url?: () => string | Promise<string>;\n\n // @deprecated do NOT extend this method\n abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;\n\n /**\n * Get the current device-local time as a formatted string.\n * Prefer this for user-visible time because timestamps alone do not preserve\n * the target device's timezone when formatted on the host machine.\n */\n getDeviceLocalTimeString?(format?: string): Promise<string>;\n\n /** URL of native MJPEG stream for real-time screen preview (e.g. WDA MJPEG server) */\n mjpegStreamUrl?: string;\n\n /**\n * Optional in-process MJPEG frame producer. Implementations can push raw\n * base64 frames here when there is no standalone native MJPEG URL, e.g.\n * Chromium CDP Page.startScreencast for web previews.\n */\n startMjpegStream?(\n options: MjpegStreamOptions,\n ): MjpegStreamHandle | undefined | Promise<MjpegStreamHandle | undefined>;\n\n /**\n * Optional hook used after keyboard-only actions to force a fresh frame on\n * the active MJPEG stream. Implementations should be a no-op when no stream\n * is active.\n */\n flushPendingVisualUpdate?(): Promise<void>;\n\n /**\n * Optional navigation state probe for browser-like interfaces, used to drive\n * loading indicators in playground UIs. Returning `undefined` means the\n * interface does not expose this concept.\n */\n navigationState?(): Promise<{ isLoading: boolean }>;\n\n /**\n * Low-level device input surface. Platform implementations expose transport\n * primitives here; higher-level AI actions and manual pointer dispatch should\n * adapt to this instead of duplicating platform gesture logic.\n */\n inputPrimitives?: InputPrimitives;\n}\n\n// Generic function to define actions with proper type inference\n// TRuntime allows specifying a different type for the runtime parameter (after location resolution)\n// TReturn allows specifying the return type of the action\nexport const defineAction = <\n TSchema extends z.ZodType | undefined = undefined,\n TRuntime = TSchema extends z.ZodType ? z.infer<TSchema> : undefined,\n TReturn = any,\n>(\n config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema?: TSchema;\n call: (\n param: TRuntime,\n context?: ExecutorContext,\n ) => Promise<TReturn> | TReturn;\n } & Partial<\n Omit<\n DeviceAction<TRuntime, TReturn>,\n 'name' | 'description' | 'interfaceAlias' | 'paramSchema' | 'call'\n >\n >,\n): DeviceAction<TRuntime, TReturn> => {\n return config as any; // Type assertion needed because schema validation type differs from runtime type\n};\n\nfunction pointFromLocate(\n locate: LocateResultElement | undefined,\n missingMessage: string,\n): PointerPoint {\n if (!locate) {\n throw new Error(missingMessage);\n }\n return { x: locate.center[0], y: locate.center[1] };\n}\n\nfunction defineLocatedPointAction<\n TSchema extends z.ZodType,\n TParam extends { locate: LocateResultElement },\n>(config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema: TSchema;\n sample: DeviceAction<TParam>['sample'];\n missingLocateMessage: string;\n call: (point: PointerPoint, param: TParam) => Promise<void>;\n}): DeviceAction<TParam> {\n return defineAction<TSchema, TParam>({\n name: config.name,\n description: config.description,\n interfaceAlias: config.interfaceAlias,\n paramSchema: config.paramSchema,\n sample: config.sample,\n call: async (param) => {\n await config.call(\n pointFromLocate(param.locate, config.missingLocateMessage),\n param,\n );\n },\n });\n}\n\n// Tap\nexport const actionTapParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be tapped'),\n});\nexport type ActionTapParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionTap = (\n tap: PointerInputPrimitives['tap'],\n): DeviceAction<ActionTapParam> => {\n return defineLocatedPointAction<typeof actionTapParamSchema, ActionTapParam>({\n name: 'Tap',\n description: 'Tap the element',\n interfaceAlias: 'aiTap',\n paramSchema: actionTapParamSchema,\n sample: {\n locate: { prompt: 'the \"Submit\" button' },\n },\n missingLocateMessage: 'Element not found, cannot tap',\n call: async (point) => {\n await tap(point);\n },\n });\n};\n\n// RightClick\nexport const actionRightClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be right clicked',\n ),\n});\nexport type ActionRightClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionRightClick = (\n rightClick: NonNullable<PointerInputPrimitives['rightClick']>,\n): DeviceAction<ActionRightClickParam> => {\n return defineLocatedPointAction<\n typeof actionRightClickParamSchema,\n ActionRightClickParam\n >({\n name: 'RightClick',\n description: 'Right click the element',\n interfaceAlias: 'aiRightClick',\n paramSchema: actionRightClickParamSchema,\n sample: {\n locate: { prompt: 'the file icon on the desktop' },\n },\n missingLocateMessage: 'Element not found, cannot right click',\n call: async (point) => {\n await rightClick(point);\n },\n });\n};\n\n// DoubleClick\nexport const actionDoubleClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be double clicked',\n ),\n});\nexport type ActionDoubleClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionDoubleClick = (\n doubleClick: NonNullable<PointerInputPrimitives['doubleClick']>,\n): DeviceAction<ActionDoubleClickParam> => {\n return defineLocatedPointAction<\n typeof actionDoubleClickParamSchema,\n ActionDoubleClickParam\n >({\n name: 'DoubleClick',\n description: 'Double click the element',\n interfaceAlias: 'aiDoubleClick',\n paramSchema: actionDoubleClickParamSchema,\n sample: {\n locate: { prompt: 'the folder icon' },\n },\n missingLocateMessage: 'Element not found, cannot double click',\n call: async (point) => {\n await doubleClick(point);\n },\n });\n};\n\n// Hover\nexport const actionHoverParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be hovered'),\n});\nexport type ActionHoverParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionHover = (\n hover: NonNullable<PointerInputPrimitives['hover']>,\n): DeviceAction<ActionHoverParam> => {\n return defineLocatedPointAction<\n typeof actionHoverParamSchema,\n ActionHoverParam\n >({\n name: 'Hover',\n description: 'Move the mouse to the element',\n interfaceAlias: 'aiHover',\n paramSchema: actionHoverParamSchema,\n sample: {\n locate: { prompt: 'the navigation menu item \"Products\"' },\n },\n missingLocateMessage: 'Element not found, cannot hover',\n call: async (point) => {\n await hover(point);\n },\n });\n};\n\n// Input\nconst inputLocateDescription =\n 'the position of the placeholder or text content in the target input field. If there is no content, locate the center of the input field.';\nexport const actionInputParamSchema = z.object({\n value: z\n .union([z.string(), z.number()])\n .transform((val) => String(val))\n .describe(\n 'The text to input. Provide the final content for replace mode, only the inserted characters for typeOnly mode, or an empty string when using clear mode to remove existing text.',\n ),\n locate: getMidsceneLocationSchema()\n .describe(inputLocateDescription)\n .optional(),\n mode: z\n .enum(['replace', 'clear', 'typeOnly'])\n .default('replace')\n .describe(\n 'Input mode: \"replace\" (default) - clear the field and input the value; \"typeOnly\" - type the value directly without clearing the field first, and should be set explicitly for incremental edits after moving the cursor; \"clear\" - clear the field without inputting new text.',\n ),\n autoDismissKeyboard: z\n .boolean()\n .optional()\n .describe(\n 'If true, the keyboard will be dismissed after the input is completed. Do not set it unless the user asks you to do so.',\n ),\n});\nexport type ActionInputParam = {\n value: string;\n locate?: LocateResultElement;\n mode?: 'replace' | 'clear' | 'typeOnly' | 'append';\n autoDismissKeyboard?: boolean;\n};\n\nexport const defineActionInput = (\n keyboard: KeyboardInputPrimitives,\n): DeviceAction<ActionInputParam> => {\n return defineAction<typeof actionInputParamSchema, ActionInputParam>({\n name: 'Input',\n description: 'Input the value into the element',\n interfaceAlias: 'aiInput',\n paramSchema: actionInputParamSchema,\n sample: {\n value: 'test@example.com',\n locate: { prompt: 'the email input field' },\n },\n call: async (param) => {\n // backward compat: convert deprecated 'append' to 'typeOnly'\n if ((param.mode as string) === 'append') {\n param.mode = 'typeOnly';\n }\n\n if (param.mode === 'clear') {\n await keyboard.clearInput(param.locate);\n return;\n }\n\n if (!param || !param.value) {\n return;\n }\n\n await keyboard.typeText(param.value, {\n target: param.locate,\n replace: param.mode !== 'typeOnly',\n autoDismissKeyboard: param.autoDismissKeyboard,\n });\n },\n });\n};\n\n// KeyboardPress\nexport const actionKeyboardPressParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The element to be clicked before pressing the key')\n .optional(),\n keyName: z\n .string()\n .describe(\n \"The key to be pressed. Use '+' for key combinations, e.g., 'Control+A', 'Shift+Enter'\",\n ),\n});\nexport type ActionKeyboardPressParam = {\n locate?: LocateResultElement;\n keyName: string;\n};\n\nexport const defineActionKeyboardPress = (\n keyboardPress: KeyboardInputPrimitives['keyboardPress'],\n): DeviceAction<ActionKeyboardPressParam> => {\n return defineAction<\n typeof actionKeyboardPressParamSchema,\n ActionKeyboardPressParam\n >({\n name: 'KeyboardPress',\n description:\n 'Press a key or key combination, like \"Enter\", \"Tab\", \"Escape\", or \"Control+A\", \"Shift+Enter\". Do not use this to type text.',\n interfaceAlias: 'aiKeyboardPress',\n paramSchema: actionKeyboardPressParamSchema,\n sample: {\n keyName: 'Enter',\n },\n call: async (param) => {\n await keyboardPress(param.keyName, {\n target: param.locate,\n });\n },\n });\n};\n\n// Scroll\nexport const actionScrollParamSchema = z.object({\n scrollType: z\n .enum([\n 'singleAction',\n 'scrollToBottom',\n 'scrollToTop',\n 'scrollToRight',\n 'scrollToLeft',\n ])\n .default('singleAction')\n .describe(\n 'The scroll behavior: \"singleAction\" for a single scroll action, \"scrollToBottom\" for scrolling all the way to the bottom by rapidly scrolling 5-10 times (skipping intermediate content until reaching the bottom), \"scrollToTop\" for scrolling all the way to the top by rapidly scrolling 5-10 times (skipping intermediate content until reaching the top), \"scrollToRight\" for scrolling all the way to the right by rapidly scrolling multiple times, \"scrollToLeft\" for scrolling all the way to the left by rapidly scrolling multiple times',\n ),\n direction: z\n .enum(['down', 'up', 'right', 'left'])\n .default('down')\n .describe(\n 'The direction to scroll. Only effective when scrollType is \"singleAction\".',\n ),\n distance: z\n .number()\n .nullable()\n .optional()\n .describe('The distance in pixels to scroll'),\n locate: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Describe the target element to be scrolled on, like \"the table\" or \"the list\" or \"the content area\" or \"the scrollable area\". Do NOT provide a general intent like \"scroll to find some element\"',\n ),\n});\n\nexport const defineActionScroll = (\n scroll: ScrollInputPrimitives['scroll'],\n): DeviceAction<ActionScrollParam> => {\n return defineAction<typeof actionScrollParamSchema, ActionScrollParam>({\n name: 'Scroll',\n description:\n 'Scroll the page or a scrollable element to browse content. This is the preferred way to scroll on all platforms, including mobile. Supports scrollToBottom/scrollToTop for boundary navigation. Default: direction `down`, scrollType `singleAction`, distance `null`.',\n interfaceAlias: 'aiScroll',\n paramSchema: actionScrollParamSchema,\n sample: {\n direction: 'down',\n scrollType: 'singleAction',\n locate: { prompt: 'the center of the product list area' },\n },\n call: async (param) => {\n await scroll(param);\n },\n });\n};\n\n// DragAndDrop\nexport const actionDragAndDropParamSchema = z.object({\n from: getMidsceneLocationSchema().describe('The position to be dragged'),\n to: getMidsceneLocationSchema().describe('The position to be dropped'),\n});\nexport type ActionDragAndDropParam = {\n from: LocateResultElement;\n to: LocateResultElement;\n};\n\nexport const defineActionDragAndDrop = (\n dragAndDrop: NonNullable<PointerInputPrimitives['dragAndDrop']>,\n): DeviceAction<ActionDragAndDropParam> => {\n return defineAction<\n typeof actionDragAndDropParamSchema,\n ActionDragAndDropParam\n >({\n name: 'DragAndDrop',\n description:\n 'Pick up a specific UI element and move it to a new position (e.g., reorder a card, move a file into a folder, sort list items). The element itself moves with your finger/mouse.',\n interfaceAlias: 'aiDragAndDrop',\n paramSchema: actionDragAndDropParamSchema,\n sample: {\n from: { prompt: 'the \"report.pdf\" file icon' },\n to: { prompt: 'the upload drop zone' },\n },\n call: async (param) => {\n const from = param.from;\n const to = param.to;\n if (!from) {\n throw new Error('missing \"from\" param for drag and drop');\n }\n if (!to) {\n throw new Error('missing \"to\" param for drag and drop');\n }\n await dragAndDrop(\n { x: from.center[0], y: from.center[1] },\n { x: to.center[0], y: to.center[1] },\n );\n },\n });\n};\n\nexport const ActionLongPressParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be long pressed',\n ),\n duration: z\n .number()\n .optional()\n .describe('Long press duration in milliseconds'),\n});\n\nexport type ActionLongPressParam = {\n locate: LocateResultElement;\n duration?: number;\n};\nexport const defineActionLongPress = (\n longPress: NonNullable<PointerInputPrimitives['longPress']>,\n): DeviceAction<ActionLongPressParam> => {\n return defineLocatedPointAction<\n typeof ActionLongPressParamSchema,\n ActionLongPressParam\n >({\n name: 'LongPress',\n description: 'Long press the element',\n interfaceAlias: 'aiLongPress',\n paramSchema: ActionLongPressParamSchema,\n sample: {\n locate: { prompt: 'the message bubble' },\n },\n missingLocateMessage: 'LongPress requires an element to be located',\n call: async (point, param) => {\n await longPress(point, { duration: param.duration });\n },\n });\n};\n\nexport const ActionSwipeParamSchema = z.object({\n start: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Starting point of the swipe gesture, if not specified, the center of the page will be used',\n ),\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .optional()\n .describe(\n 'The direction to swipe (required when using distance). The direction means the direction of the finger swipe.',\n ),\n distance: z\n .number()\n .optional()\n .describe('The distance in pixels to swipe (mutually exclusive with end)'),\n end: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Ending point of the swipe gesture (mutually exclusive with distance)',\n ),\n duration: z\n .number()\n .default(300)\n .describe('Duration of the swipe gesture in milliseconds'),\n repeat: z\n .number()\n .optional()\n .describe(\n 'The number of times to repeat the swipe gesture. 1 for default, 0 for infinite (e.g. endless swipe until the end of the page)',\n ),\n});\n\nexport type ActionSwipeParam = {\n start?: LocateResultElement;\n direction?: 'up' | 'down' | 'left' | 'right';\n distance?: number;\n end?: LocateResultElement;\n duration?: number;\n repeat?: number;\n};\n\nexport function normalizeMobileSwipeParam(\n param: ActionSwipeParam,\n screenSize: { width: number; height: number },\n): {\n startPoint: { x: number; y: number };\n endPoint: { x: number; y: number };\n duration: number;\n repeatCount: number;\n} {\n const { width, height } = screenSize;\n const { start, end } = param;\n\n const startPoint = start\n ? { x: start.center[0], y: start.center[1] }\n : { x: width / 2, y: height / 2 };\n\n let endPoint: { x: number; y: number };\n\n if (end) {\n endPoint = { x: end.center[0], y: end.center[1] };\n } else if (param.distance) {\n const direction = param.direction;\n if (!direction) {\n throw new Error('direction is required for swipe gesture');\n }\n endPoint = {\n x:\n startPoint.x +\n (direction === 'right'\n ? param.distance\n : direction === 'left'\n ? -param.distance\n : 0),\n y:\n startPoint.y +\n (direction === 'down'\n ? param.distance\n : direction === 'up'\n ? -param.distance\n : 0),\n };\n } else {\n throw new Error(\n 'Either end or distance must be specified for swipe gesture',\n );\n }\n\n endPoint.x = Math.max(0, Math.min(endPoint.x, width));\n endPoint.y = Math.max(0, Math.min(endPoint.y, height));\n\n const duration = param.duration ?? 300;\n\n let repeatCount = typeof param.repeat === 'number' ? param.repeat : 1;\n if (repeatCount === 0) {\n repeatCount = 10;\n }\n\n return { startPoint, endPoint, duration, repeatCount };\n}\n\nexport const defineActionSwipe = (config: {\n swipe: TouchInputPrimitives['swipe'];\n size(): Promise<Size>;\n}): DeviceAction<ActionSwipeParam> => {\n return defineAction<typeof ActionSwipeParamSchema, ActionSwipeParam>({\n name: 'Swipe',\n description:\n 'Perform a touch gesture for interactions beyond regular scrolling (e.g., adjust a continuous control such as a slider, flip pages in a carousel, dismiss a notification, swipe-to-delete a list item). For regular content scrolling, use Scroll instead. Use \"distance\" + \"direction\" for relative movement, or \"start\" + \"end\" for precise endpoint movement.',\n paramSchema: ActionSwipeParamSchema,\n sample: {\n start: { prompt: 'center of the notification' },\n end: { prompt: 'upper edge of the screen' },\n },\n call: async (param) => {\n const { startPoint, endPoint, duration, repeatCount } =\n normalizeMobileSwipeParam(param, await config.size());\n for (let i = 0; i < repeatCount; i++) {\n await config.swipe(startPoint, endPoint, { duration });\n }\n },\n });\n};\n\n// ClearInput\nexport const actionClearInputParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The input field to be cleared')\n .optional(),\n});\nexport type ActionClearInputParam = {\n locate?: LocateResultElement;\n};\n\nexport const defineActionClearInput = (\n clearInput: KeyboardInputPrimitives['clearInput'],\n): DeviceAction<ActionClearInputParam> => {\n return defineAction<\n typeof actionClearInputParamSchema,\n ActionClearInputParam\n >({\n name: 'ClearInput',\n description: inputLocateDescription,\n interfaceAlias: 'aiClearInput',\n paramSchema: actionClearInputParamSchema,\n sample: {\n locate: { prompt: 'the search input field' },\n },\n call: async (param) => {\n await clearInput(param.locate);\n },\n });\n};\n\n// CursorMove\nexport const actionCursorMoveParamSchema = z.object({\n direction: z\n .enum(['left', 'right'])\n .describe('The direction to move the cursor'),\n times: z\n .number()\n .int()\n .min(1)\n .default(1)\n .describe(\n 'The number of times to move the cursor in the specified direction',\n ),\n});\nexport type ActionCursorMoveParam = {\n direction: 'left' | 'right';\n times?: number;\n};\n\nexport const defineActionCursorMove = (config: {\n keyboard: Pick<KeyboardInputPrimitives, 'keyboardPress' | 'cursorMove'>;\n sleep?(timeMs: number): Promise<void>;\n}): DeviceAction<ActionCursorMoveParam> => {\n return defineAction<\n typeof actionCursorMoveParamSchema,\n ActionCursorMoveParam\n >({\n name: 'CursorMove',\n description:\n 'Move the text cursor (caret) left or right within an input field or text area. Use this to reposition the cursor without selecting text.',\n paramSchema: actionCursorMoveParamSchema,\n sample: {\n direction: 'left',\n times: 3,\n },\n call: async (param) => {\n const times = param.times ?? 1;\n if (config.keyboard.cursorMove) {\n await config.keyboard.cursorMove(param.direction, times);\n return;\n }\n\n const wait =\n config.sleep ??\n ((timeMs: number) =>\n new Promise<void>((resolve) => setTimeout(resolve, timeMs)));\n const arrowKey = param.direction === 'left' ? 'ArrowLeft' : 'ArrowRight';\n for (let i = 0; i < times; i++) {\n await config.keyboard.keyboardPress(arrowKey);\n await wait(100);\n }\n },\n });\n};\n\n// Pinch\nexport const ActionPinchParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'The element to pinch on. If not specified, the center of the screen will be used',\n ),\n direction: z\n .enum(['in', 'out'])\n .describe(\n 'Pinch direction. \"in\" = pinch fingers together (zoom out / shrink), \"out\" = spread fingers apart (zoom in / enlarge).',\n ),\n distance: z\n .number()\n .positive()\n .optional()\n .describe(\n 'How far each finger moves in pixels. Defaults to a quarter of the shorter screen dimension.',\n ),\n duration: z\n .number()\n .default(500)\n .optional()\n .describe('Duration of the pinch gesture in milliseconds'),\n});\n\nexport type ActionPinchParam = {\n locate?: LocateResultElement;\n direction: 'in' | 'out';\n distance?: number;\n duration?: number;\n};\n\nexport const defineActionPinch = (config: {\n pinch: TouchInputPrimitives['pinch'];\n size(): Promise<Size>;\n}): DeviceAction<ActionPinchParam> | undefined => {\n if (!config.pinch) {\n return undefined;\n }\n\n return defineAction<typeof ActionPinchParamSchema, ActionPinchParam>({\n name: 'Pinch',\n description:\n 'Perform a two-finger pinch gesture. Use direction \"in\" to pinch fingers together (zoom out), or \"out\" to spread fingers apart (zoom in). Optionally specify distance for how far each finger moves.',\n interfaceAlias: 'aiPinch',\n paramSchema: ActionPinchParamSchema,\n sample: {\n locate: { prompt: 'the map area' },\n direction: 'out',\n distance: 200,\n },\n call: async (param) => {\n const { centerX, centerY, startDistance, endDistance, duration } =\n normalizePinchParam(param, await config.size());\n await config.pinch?.(\n { x: centerX, y: centerY },\n { startDistance, endDistance, duration },\n );\n },\n });\n};\n\nexport function normalizePinchParam(\n param: ActionPinchParam,\n screenSize: { width: number; height: number },\n): {\n centerX: number;\n centerY: number;\n startDistance: number;\n endDistance: number;\n duration: number;\n} {\n const { width, height } = screenSize;\n const element = param.locate;\n const centerX = element\n ? Math.round(element.center[0])\n : Math.round(width / 2);\n const centerY = element\n ? Math.round(element.center[1])\n : Math.round(height / 2);\n const duration = param.duration ?? 500;\n\n const baseDistance = Math.round(Math.min(width, height) / 4);\n const fingerDistance = param.distance ?? baseDistance;\n\n const startDistance = baseDistance;\n const endDistance =\n param.direction === 'out'\n ? baseDistance + fingerDistance\n : Math.max(10, baseDistance - fingerDistance);\n\n return { centerX, centerY, startDistance, endDistance, duration };\n}\n\nexport interface MobileInputActionContext {\n input: MobileInputPrimitives;\n size(): Promise<Size>;\n sleep?(timeMs: number): Promise<void>;\n getDefaultAutoDismissKeyboard?(): boolean | undefined;\n systemActions?: SystemInputActionOptions;\n}\n\nexport interface SystemInputActionConfig {\n name: string;\n description: string;\n interfaceAlias?: string;\n delayBeforeRunner?: number;\n delayAfterRunner?: number;\n}\n\nexport interface SystemInputActionOptions {\n backButton?: SystemInputActionConfig;\n homeButton?: SystemInputActionConfig;\n recentAppsButton?: SystemInputActionConfig;\n}\n\nexport interface InputPrimitiveActionOptions {\n size?: () => Promise<Size>;\n sleep?: (timeMs: number) => Promise<void>;\n includeSwipe?: boolean;\n includePinch?: boolean;\n systemActions?: SystemInputActionOptions;\n}\n\nfunction defineSystemInputAction(\n config: SystemInputActionConfig,\n call: () => Promise<void>,\n): DeviceAction<undefined, void> {\n return defineAction<undefined, undefined, void>({\n name: config.name,\n description: config.description,\n interfaceAlias: config.interfaceAlias,\n delayBeforeRunner: config.delayBeforeRunner,\n delayAfterRunner: config.delayAfterRunner,\n call,\n });\n}\n\nexport function defineActionsFromInputPrimitives(\n input: InputPrimitives,\n options: InputPrimitiveActionOptions = {},\n): DeviceAction<any>[] {\n const actions: Array<DeviceAction<any> | undefined> = [];\n const { pointer, keyboard, scroll, touch, system } = input;\n\n if (pointer) {\n actions.push(defineActionTap(pointer.tap));\n if (pointer.doubleClick) {\n actions.push(defineActionDoubleClick(pointer.doubleClick));\n }\n if (pointer.rightClick) {\n actions.push(defineActionRightClick(pointer.rightClick));\n }\n if (pointer.hover) {\n actions.push(defineActionHover(pointer.hover));\n }\n if (pointer.dragAndDrop) {\n actions.push(defineActionDragAndDrop(pointer.dragAndDrop));\n }\n if (pointer.longPress) {\n actions.push(defineActionLongPress(pointer.longPress));\n }\n }\n\n if (keyboard) {\n actions.push(\n defineActionInput(keyboard),\n defineActionClearInput(keyboard.clearInput),\n defineActionKeyboardPress(keyboard.keyboardPress),\n defineActionCursorMove({ keyboard, sleep: options.sleep }),\n );\n }\n\n if (scroll) {\n actions.push(defineActionScroll(scroll.scroll));\n }\n\n if (touch?.swipe && options.size && options.includeSwipe !== false) {\n actions.push(defineActionSwipe({ swipe: touch.swipe, size: options.size }));\n }\n\n if (touch?.pinch && options.size && options.includePinch !== false) {\n actions.push(defineActionPinch({ pinch: touch.pinch, size: options.size }));\n }\n\n if (system && options.systemActions) {\n const { systemActions } = options;\n if (system.backButton && systemActions.backButton) {\n actions.push(\n defineSystemInputAction(systemActions.backButton, system.backButton),\n );\n }\n if (system.homeButton && systemActions.homeButton) {\n actions.push(\n defineSystemInputAction(systemActions.homeButton, system.homeButton),\n );\n }\n if (system.recentAppsButton && systemActions.recentAppsButton) {\n actions.push(\n defineSystemInputAction(\n systemActions.recentAppsButton,\n system.recentAppsButton,\n ),\n );\n }\n }\n\n return actions.filter((action): action is DeviceAction<any> =>\n Boolean(action),\n );\n}\n\nexport function createDefaultMobileActions(\n context: MobileInputActionContext,\n): DeviceAction<any>[] {\n return defineActionsFromInputPrimitives(context.input, {\n size: context.size,\n sleep: context.sleep,\n systemActions: context.systemActions,\n });\n}\n\n// Sleep\nexport const ActionSleepParamSchema = z.object({\n timeMs: z\n .number()\n .default(1000)\n .optional()\n .describe('Sleep duration in milliseconds, defaults to 1000ms (1 second)'),\n});\n\nexport type ActionSleepParam = {\n timeMs?: number;\n};\n\nexport const defineActionSleep = (): DeviceAction<ActionSleepParam> => {\n return defineAction<typeof ActionSleepParamSchema, ActionSleepParam>({\n name: 'Sleep',\n description:\n 'Wait for a specified duration before continuing. Defaults to 1 second (1000ms) if not specified.',\n paramSchema: ActionSleepParamSchema,\n sample: {\n timeMs: 2000,\n },\n call: async (param) => {\n const duration = param?.timeMs ?? 1000;\n getDebug('device:common-action')(`Sleeping for ${duration}ms`);\n await new Promise((resolve) => setTimeout(resolve, duration));\n },\n });\n};\n\nexport type { DeviceAction } from '../types';\nexport type {\n AndroidDeviceOpt,\n AndroidDeviceInputOpt,\n IOSDeviceOpt,\n IOSDeviceInputOpt,\n HarmonyDeviceOpt,\n HarmonyDeviceInputOpt,\n} from './device-options';\n"],"names":["AbstractInterface","defineAction","config","pointFromLocate","locate","missingMessage","Error","defineLocatedPointAction","param","actionTapParamSchema","z","getMidsceneLocationSchema","defineActionTap","tap","point","actionRightClickParamSchema","defineActionRightClick","rightClick","actionDoubleClickParamSchema","defineActionDoubleClick","doubleClick","actionHoverParamSchema","defineActionHover","hover","inputLocateDescription","actionInputParamSchema","val","String","defineActionInput","keyboard","actionKeyboardPressParamSchema","defineActionKeyboardPress","keyboardPress","actionScrollParamSchema","defineActionScroll","scroll","actionDragAndDropParamSchema","defineActionDragAndDrop","dragAndDrop","from","to","ActionLongPressParamSchema","defineActionLongPress","longPress","ActionSwipeParamSchema","normalizeMobileSwipeParam","screenSize","width","height","start","end","startPoint","endPoint","direction","Math","duration","repeatCount","defineActionSwipe","i","actionClearInputParamSchema","defineActionClearInput","clearInput","actionCursorMoveParamSchema","defineActionCursorMove","times","wait","timeMs","Promise","resolve","setTimeout","arrowKey","ActionPinchParamSchema","defineActionPinch","centerX","centerY","startDistance","endDistance","normalizePinchParam","element","baseDistance","fingerDistance","defineSystemInputAction","call","defineActionsFromInputPrimitives","input","options","actions","pointer","touch","system","systemActions","action","Boolean","createDefaultMobileActions","context","ActionSleepParamSchema","defineActionSleep","getDebug"],"mappings":";;;;;;;;;;;;;AAsIO,MAAeA;;QA8CpB;QA8BA;;AACF;AAKO,MAAMC,eAAe,CAK1BC,SAgBOA;AAGT,SAASC,gBACPC,MAAuC,EACvCC,cAAsB;IAEtB,IAAI,CAACD,QACH,MAAM,IAAIE,MAAMD;IAElB,OAAO;QAAE,GAAGD,OAAO,MAAM,CAAC,EAAE;QAAE,GAAGA,OAAO,MAAM,CAAC,EAAE;IAAC;AACpD;AAEA,SAASG,yBAGPL,MAQD;IACC,OAAOD,aAA8B;QACnC,MAAMC,OAAO,IAAI;QACjB,aAAaA,OAAO,WAAW;QAC/B,gBAAgBA,OAAO,cAAc;QACrC,aAAaA,OAAO,WAAW;QAC/B,QAAQA,OAAO,MAAM;QACrB,MAAM,OAAOM;YACX,MAAMN,OAAO,IAAI,CACfC,gBAAgBK,MAAM,MAAM,EAAEN,OAAO,oBAAoB,GACzDM;QAEJ;IACF;AACF;AAGO,MAAMC,uBAAuBC,EAAE,MAAM,CAAC;IAC3C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMC,kBAAkB,CAC7BC,MAEON,yBAAsE;QAC3E,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaE;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAsB;QAC1C;QACA,sBAAsB;QACtB,MAAM,OAAOK;YACX,MAAMD,IAAIC;QACZ;IACF;AAIK,MAAMC,8BAA8BL,EAAE,MAAM,CAAC;IAClD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMK,yBAAyB,CACpCC,aAEOV,yBAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaQ;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAA+B;QACnD;QACA,sBAAsB;QACtB,MAAM,OAAOD;YACX,MAAMG,WAAWH;QACnB;IACF;AAIK,MAAMI,+BAA+BR,EAAE,MAAM,CAAC;IACnD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMQ,0BAA0B,CACrCC,cAEOb,yBAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaW;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAkB;QACtC;QACA,sBAAsB;QACtB,MAAM,OAAOJ;YACX,MAAMM,YAAYN;QACpB;IACF;AAIK,MAAMO,yBAAyBX,EAAE,MAAM,CAAC;IAC7C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMW,oBAAoB,CAC/BC,QAEOhB,yBAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAac;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAsC;QAC1D;QACA,sBAAsB;QACtB,MAAM,OAAOP;YACX,MAAMS,MAAMT;QACd;IACF;AAIF,MAAMU,yBACJ;AACK,MAAMC,yBAAyBf,EAAE,MAAM,CAAC;IAC7C,OAAOA,EAAAA,KACC,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG,EAC9B,SAAS,CAAC,CAACgB,MAAQC,OAAOD,MAC1B,QAAQ,CACP;IAEJ,QAAQf,4BACL,QAAQ,CAACa,wBACT,QAAQ;IACX,MAAMd,CAAC,CAADA,OACC,CAAC;QAAC;QAAW;QAAS;KAAW,EACrC,OAAO,CAAC,WACR,QAAQ,CACP;IAEJ,qBAAqBA,EAAAA,OACX,GACP,QAAQ,GACR,QAAQ,CACP;AAEN;AAQO,MAAMkB,oBAAoB,CAC/BC,WAEO5B,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAawB;QACb,QAAQ;YACN,OAAO;YACP,QAAQ;gBAAE,QAAQ;YAAwB;QAC5C;QACA,MAAM,OAAOjB;YAEX,IAAKA,AAA0B,aAA1BA,MAAM,IAAI,EACbA,MAAM,IAAI,GAAG;YAGf,IAAIA,AAAe,YAAfA,MAAM,IAAI,EAAc,YAC1B,MAAMqB,SAAS,UAAU,CAACrB,MAAM,MAAM;YAIxC,IAAI,CAACA,SAAS,CAACA,MAAM,KAAK,EACxB;YAGF,MAAMqB,SAAS,QAAQ,CAACrB,MAAM,KAAK,EAAE;gBACnC,QAAQA,MAAM,MAAM;gBACpB,SAASA,AAAe,eAAfA,MAAM,IAAI;gBACnB,qBAAqBA,MAAM,mBAAmB;YAChD;QACF;IACF;AAIK,MAAMsB,iCAAiCpB,EAAE,MAAM,CAAC;IACrD,QAAQC,4BACL,QAAQ,CAAC,qDACT,QAAQ;IACX,SAASD,EAAAA,MACA,GACN,QAAQ,CACP;AAEN;AAMO,MAAMqB,4BAA4B,CACvCC,gBAEO/B,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAa6B;QACb,QAAQ;YACN,SAAS;QACX;QACA,MAAM,OAAOtB;YACX,MAAMwB,cAAcxB,MAAM,OAAO,EAAE;gBACjC,QAAQA,MAAM,MAAM;YACtB;QACF;IACF;AAIK,MAAMyB,0BAA0BvB,EAAE,MAAM,CAAC;IAC9C,YAAYA,CAAC,CAADA,OACL,CAAC;QACJ;QACA;QACA;QACA;QACA;KACD,EACA,OAAO,CAAC,gBACR,QAAQ,CACP;IAEJ,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;QAAM;QAAS;KAAO,EACpC,OAAO,CAAC,QACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CAAC;IACZ,QAAQC,4BACL,QAAQ,GACR,QAAQ,CACP;AAEN;AAEO,MAAMuB,qBAAqB,CAChCC,SAEOlC,aAAgE;QACrE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAagC;QACb,QAAQ;YACN,WAAW;YACX,YAAY;YACZ,QAAQ;gBAAE,QAAQ;YAAsC;QAC1D;QACA,MAAM,OAAOzB;YACX,MAAM2B,OAAO3B;QACf;IACF;AAIK,MAAM4B,+BAA+B1B,EAAE,MAAM,CAAC;IACnD,MAAMC,4BAA4B,QAAQ,CAAC;IAC3C,IAAIA,4BAA4B,QAAQ,CAAC;AAC3C;AAMO,MAAM0B,0BAA0B,CACrCC,cAEOrC,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAamC;QACb,QAAQ;YACN,MAAM;gBAAE,QAAQ;YAA6B;YAC7C,IAAI;gBAAE,QAAQ;YAAuB;QACvC;QACA,MAAM,OAAO5B;YACX,MAAM+B,OAAO/B,MAAM,IAAI;YACvB,MAAMgC,KAAKhC,MAAM,EAAE;YACnB,IAAI,CAAC+B,MACH,MAAM,IAAIjC,MAAM;YAElB,IAAI,CAACkC,IACH,MAAM,IAAIlC,MAAM;YAElB,MAAMgC,YACJ;gBAAE,GAAGC,KAAK,MAAM,CAAC,EAAE;gBAAE,GAAGA,KAAK,MAAM,CAAC,EAAE;YAAC,GACvC;gBAAE,GAAGC,GAAG,MAAM,CAAC,EAAE;gBAAE,GAAGA,GAAG,MAAM,CAAC,EAAE;YAAC;QAEvC;IACF;AAGK,MAAMC,6BAA6B/B,EAAE,MAAM,CAAC;IACjD,QAAQC,4BAA4B,QAAQ,CAC1C;IAEF,UAAUD,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMgC,wBAAwB,CACnCC,YAEOpC,yBAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAakC;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAqB;QACzC;QACA,sBAAsB;QACtB,MAAM,OAAO3B,OAAON;YAClB,MAAMmC,UAAU7B,OAAO;gBAAE,UAAUN,MAAM,QAAQ;YAAC;QACpD;IACF;AAGK,MAAMoC,yBAAyBlC,EAAE,MAAM,CAAC;IAC7C,OAAOC,4BACJ,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,CAAC,CAADA,OACJ,CAAC;QAAC;QAAM;QAAQ;QAAQ;KAAQ,EACpC,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;IACZ,KAAKC,4BACF,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,CAAC;IACZ,QAAQA,EAAAA,MACC,GACN,QAAQ,GACR,QAAQ,CACP;AAEN;AAWO,SAASmC,0BACdrC,KAAuB,EACvBsC,UAA6C;IAO7C,MAAM,EAAEC,KAAK,EAAEC,MAAM,EAAE,GAAGF;IAC1B,MAAM,EAAEG,KAAK,EAAEC,GAAG,EAAE,GAAG1C;IAEvB,MAAM2C,aAAaF,QACf;QAAE,GAAGA,MAAM,MAAM,CAAC,EAAE;QAAE,GAAGA,MAAM,MAAM,CAAC,EAAE;IAAC,IACzC;QAAE,GAAGF,QAAQ;QAAG,GAAGC,SAAS;IAAE;IAElC,IAAII;IAEJ,IAAIF,KACFE,WAAW;QAAE,GAAGF,IAAI,MAAM,CAAC,EAAE;QAAE,GAAGA,IAAI,MAAM,CAAC,EAAE;IAAC;SAC3C,IAAI1C,MAAM,QAAQ,EAAE;QACzB,MAAM6C,YAAY7C,MAAM,SAAS;QACjC,IAAI,CAAC6C,WACH,MAAM,IAAI/C,MAAM;QAElB8C,WAAW;YACT,GACED,WAAW,CAAC,GACXE,CAAAA,AAAc,YAAdA,YACG7C,MAAM,QAAQ,GACd6C,AAAc,WAAdA,YACE,CAAC7C,MAAM,QAAQ,GACf;YACR,GACE2C,WAAW,CAAC,GACXE,CAAAA,AAAc,WAAdA,YACG7C,MAAM,QAAQ,GACd6C,AAAc,SAAdA,YACE,CAAC7C,MAAM,QAAQ,GACf;QACV;IACF,OACE,MAAM,IAAIF,MACR;IAIJ8C,SAAS,CAAC,GAAGE,KAAK,GAAG,CAAC,GAAGA,KAAK,GAAG,CAACF,SAAS,CAAC,EAAEL;IAC9CK,SAAS,CAAC,GAAGE,KAAK,GAAG,CAAC,GAAGA,KAAK,GAAG,CAACF,SAAS,CAAC,EAAEJ;IAE9C,MAAMO,WAAW/C,MAAM,QAAQ,IAAI;IAEnC,IAAIgD,cAAc,AAAwB,YAAxB,OAAOhD,MAAM,MAAM,GAAgBA,MAAM,MAAM,GAAG;IACpE,IAAIgD,AAAgB,MAAhBA,aACFA,cAAc;IAGhB,OAAO;QAAEL;QAAYC;QAAUG;QAAUC;IAAY;AACvD;AAEO,MAAMC,oBAAoB,CAACvD,SAIzBD,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAa2C;QACb,QAAQ;YACN,OAAO;gBAAE,QAAQ;YAA6B;YAC9C,KAAK;gBAAE,QAAQ;YAA2B;QAC5C;QACA,MAAM,OAAOpC;YACX,MAAM,EAAE2C,UAAU,EAAEC,QAAQ,EAAEG,QAAQ,EAAEC,WAAW,EAAE,GACnDX,0BAA0BrC,OAAO,MAAMN,OAAO,IAAI;YACpD,IAAK,IAAIwD,IAAI,GAAGA,IAAIF,aAAaE,IAC/B,MAAMxD,OAAO,KAAK,CAACiD,YAAYC,UAAU;gBAAEG;YAAS;QAExD;IACF;AAIK,MAAMI,8BAA8BjD,EAAE,MAAM,CAAC;IAClD,QAAQC,4BACL,QAAQ,CAAC,iCACT,QAAQ;AACb;AAKO,MAAMiD,yBAAyB,CACpCC,aAEO5D,aAGL;QACA,MAAM;QACN,aAAauB;QACb,gBAAgB;QAChB,aAAamC;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAyB;QAC7C;QACA,MAAM,OAAOnD;YACX,MAAMqD,WAAWrD,MAAM,MAAM;QAC/B;IACF;AAIK,MAAMsD,8BAA8BpD,EAAE,MAAM,CAAC;IAClD,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;KAAQ,EACtB,QAAQ,CAAC;IACZ,OAAOA,EAAAA,MACE,GACN,GAAG,GACH,GAAG,CAAC,GACJ,OAAO,CAAC,GACR,QAAQ,CACP;AAEN;AAMO,MAAMqD,yBAAyB,CAAC7D,SAI9BD,aAGL;QACA,MAAM;QACN,aACE;QACF,aAAa6D;QACb,QAAQ;YACN,WAAW;YACX,OAAO;QACT;QACA,MAAM,OAAOtD;YACX,MAAMwD,QAAQxD,MAAM,KAAK,IAAI;YAC7B,IAAIN,OAAO,QAAQ,CAAC,UAAU,EAAE,YAC9B,MAAMA,OAAO,QAAQ,CAAC,UAAU,CAACM,MAAM,SAAS,EAAEwD;YAIpD,MAAMC,OACJ/D,OAAO,KAAK,IACV,EAAAgE,SACA,IAAIC,QAAc,CAACC,UAAYC,WAAWD,SAASF,QAAO;YAC9D,MAAMI,WAAW9D,AAAoB,WAApBA,MAAM,SAAS,GAAc,cAAc;YAC5D,IAAK,IAAIkD,IAAI,GAAGA,IAAIM,OAAON,IAAK;gBAC9B,MAAMxD,OAAO,QAAQ,CAAC,aAAa,CAACoE;gBACpC,MAAML,KAAK;YACb;QACF;IACF;AAIK,MAAMM,yBAAyB7D,EAAE,MAAM,CAAC;IAC7C,QAAQC,4BACL,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,CAAC,CAADA,OACJ,CAAC;QAAC;QAAM;KAAM,EAClB,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,GACR,QAAQ,CAAC;AACd;AASO,MAAM8D,oBAAoB,CAACtE;IAIhC,IAAI,CAACA,OAAO,KAAK,EACf;IAGF,OAAOD,aAA8D;QACnE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAasE;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAe;YACjC,WAAW;YACX,UAAU;QACZ;QACA,MAAM,OAAO/D;YACX,MAAM,EAAEiE,OAAO,EAAEC,OAAO,EAAEC,aAAa,EAAEC,WAAW,EAAErB,QAAQ,EAAE,GAC9DsB,oBAAoBrE,OAAO,MAAMN,OAAO,IAAI;YAC9C,MAAMA,OAAO,KAAK,GAChB;gBAAE,GAAGuE;gBAAS,GAAGC;YAAQ,GACzB;gBAAEC;gBAAeC;gBAAarB;YAAS;QAE3C;IACF;AACF;AAEO,SAASsB,oBACdrE,KAAuB,EACvBsC,UAA6C;IAQ7C,MAAM,EAAEC,KAAK,EAAEC,MAAM,EAAE,GAAGF;IAC1B,MAAMgC,UAAUtE,MAAM,MAAM;IAC5B,MAAMiE,UAAUK,UACZxB,KAAK,KAAK,CAACwB,QAAQ,MAAM,CAAC,EAAE,IAC5BxB,KAAK,KAAK,CAACP,QAAQ;IACvB,MAAM2B,UAAUI,UACZxB,KAAK,KAAK,CAACwB,QAAQ,MAAM,CAAC,EAAE,IAC5BxB,KAAK,KAAK,CAACN,SAAS;IACxB,MAAMO,WAAW/C,MAAM,QAAQ,IAAI;IAEnC,MAAMuE,eAAezB,KAAK,KAAK,CAACA,KAAK,GAAG,CAACP,OAAOC,UAAU;IAC1D,MAAMgC,iBAAiBxE,MAAM,QAAQ,IAAIuE;IAEzC,MAAMJ,gBAAgBI;IACtB,MAAMH,cACJpE,AAAoB,UAApBA,MAAM,SAAS,GACXuE,eAAeC,iBACf1B,KAAK,GAAG,CAAC,IAAIyB,eAAeC;IAElC,OAAO;QAAEP;QAASC;QAASC;QAAeC;QAAarB;IAAS;AAClE;AAgCA,SAAS0B,wBACP/E,MAA+B,EAC/BgF,IAAyB;IAEzB,OAAOjF,aAAyC;QAC9C,MAAMC,OAAO,IAAI;QACjB,aAAaA,OAAO,WAAW;QAC/B,gBAAgBA,OAAO,cAAc;QACrC,mBAAmBA,OAAO,iBAAiB;QAC3C,kBAAkBA,OAAO,gBAAgB;QACzCgF;IACF;AACF;AAEO,SAASC,iCACdC,KAAsB,EACtBC,UAAuC,CAAC,CAAC;IAEzC,MAAMC,UAAgD,EAAE;IACxD,MAAM,EAAEC,OAAO,EAAE1D,QAAQ,EAAEM,MAAM,EAAEqD,KAAK,EAAEC,MAAM,EAAE,GAAGL;IAErD,IAAIG,SAAS;QACXD,QAAQ,IAAI,CAAC1E,gBAAgB2E,QAAQ,GAAG;QACxC,IAAIA,QAAQ,WAAW,EACrBD,QAAQ,IAAI,CAACnE,wBAAwBoE,QAAQ,WAAW;QAE1D,IAAIA,QAAQ,UAAU,EACpBD,QAAQ,IAAI,CAACtE,uBAAuBuE,QAAQ,UAAU;QAExD,IAAIA,QAAQ,KAAK,EACfD,QAAQ,IAAI,CAAChE,kBAAkBiE,QAAQ,KAAK;QAE9C,IAAIA,QAAQ,WAAW,EACrBD,QAAQ,IAAI,CAACjD,wBAAwBkD,QAAQ,WAAW;QAE1D,IAAIA,QAAQ,SAAS,EACnBD,QAAQ,IAAI,CAAC5C,sBAAsB6C,QAAQ,SAAS;IAExD;IAEA,IAAI1D,UACFyD,QAAQ,IAAI,CACV1D,kBAAkBC,WAClB+B,uBAAuB/B,SAAS,UAAU,GAC1CE,0BAA0BF,SAAS,aAAa,GAChDkC,uBAAuB;QAAElC;QAAU,OAAOwD,QAAQ,KAAK;IAAC;IAI5D,IAAIlD,QACFmD,QAAQ,IAAI,CAACpD,mBAAmBC,OAAO,MAAM;IAG/C,IAAIqD,OAAO,SAASH,QAAQ,IAAI,IAAIA,AAAyB,UAAzBA,QAAQ,YAAY,EACtDC,QAAQ,IAAI,CAAC7B,kBAAkB;QAAE,OAAO+B,MAAM,KAAK;QAAE,MAAMH,QAAQ,IAAI;IAAC;IAG1E,IAAIG,OAAO,SAASH,QAAQ,IAAI,IAAIA,AAAyB,UAAzBA,QAAQ,YAAY,EACtDC,QAAQ,IAAI,CAACd,kBAAkB;QAAE,OAAOgB,MAAM,KAAK;QAAE,MAAMH,QAAQ,IAAI;IAAC;IAG1E,IAAII,UAAUJ,QAAQ,aAAa,EAAE;QACnC,MAAM,EAAEK,aAAa,EAAE,GAAGL;QAC1B,IAAII,OAAO,UAAU,IAAIC,cAAc,UAAU,EAC/CJ,QAAQ,IAAI,CACVL,wBAAwBS,cAAc,UAAU,EAAED,OAAO,UAAU;QAGvE,IAAIA,OAAO,UAAU,IAAIC,cAAc,UAAU,EAC/CJ,QAAQ,IAAI,CACVL,wBAAwBS,cAAc,UAAU,EAAED,OAAO,UAAU;QAGvE,IAAIA,OAAO,gBAAgB,IAAIC,cAAc,gBAAgB,EAC3DJ,QAAQ,IAAI,CACVL,wBACES,cAAc,gBAAgB,EAC9BD,OAAO,gBAAgB;IAI/B;IAEA,OAAOH,QAAQ,MAAM,CAAC,CAACK,SACrBC,QAAQD;AAEZ;AAEO,SAASE,2BACdC,OAAiC;IAEjC,OAAOX,iCAAiCW,QAAQ,KAAK,EAAE;QACrD,MAAMA,QAAQ,IAAI;QAClB,OAAOA,QAAQ,KAAK;QACpB,eAAeA,QAAQ,aAAa;IACtC;AACF;AAGO,MAAMC,yBAAyBrF,EAAE,MAAM,CAAC;IAC7C,QAAQA,EAAAA,MACC,GACN,OAAO,CAAC,MACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMsF,oBAAoB,IACxB/F,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAa8F;QACb,QAAQ;YACN,QAAQ;QACV;QACA,MAAM,OAAOvF;YACX,MAAM+C,WAAW/C,OAAO,UAAU;YAClCyF,SAAS,wBAAwB,CAAC,aAAa,EAAE1C,SAAS,EAAE,CAAC;YAC7D,MAAM,IAAIY,QAAQ,CAACC,UAAYC,WAAWD,SAASb;QACrD;IACF"}
@@ -1 +1 @@
1
- {"version":3,"file":"types.mjs","sources":["../../src/types.ts"],"sourcesContent":["/* eslint-disable @typescript-eslint/no-explicit-any */\n\nimport type { NodeType } from '@midscene/shared/constants';\nimport type { CreateOpenAIClientFn, TModelConfig } from '@midscene/shared/env';\nimport type {\n BaseElement,\n LocateResultElement,\n Rect,\n Size,\n} from '@midscene/shared/types';\nimport type { z } from 'zod';\nimport type { TUserPrompt } from './common';\nimport type { ScreenshotItem } from './screenshot-item';\nimport type {\n DetailedLocateParam,\n MidsceneYamlFlowItem,\n ServiceExtractOption,\n} from './yaml';\n\nexport type {\n ElementTreeNode,\n BaseElement,\n Rect,\n Size,\n Point,\n} from '@midscene/shared/types';\nexport * from './yaml';\n\nexport { ServiceError } from './errors';\nexport {\n ExecutionDump,\n ReportActionDump,\n GroupedActionDump,\n} from './dump/report-action-dump';\n\nexport type AIUsageInfo = Record<string, any> & {\n prompt_tokens: number | undefined;\n completion_tokens: number | undefined;\n total_tokens: number | undefined;\n cached_input: number | undefined;\n time_cost: number | undefined;\n model_name: string | undefined;\n model_description: string | undefined;\n /**\n * Raw top-level `.model` value returned by the model service response.\n */\n response_model_name: string | undefined;\n /**\n * Semantic intent of the model call, such as default, planning, or insight.\n */\n intent: string | undefined;\n /**\n * Config slot where the model config was resolved from. For example, a\n * planning call may use the default slot when no planning model is configured.\n */\n slot: string | undefined;\n request_id: string | undefined;\n};\n\nexport type { LocateResultElement };\n\nexport type AISingleElementResponseByPosition = {\n position?: {\n x: number;\n y: number;\n };\n bbox?: [number, number, number, number];\n reason: string;\n text: string;\n};\n\nexport type LocateResultPoint = [number, number];\nexport type Bbox = [number, number, number, number];\nexport type LocateResultBbox = Bbox;\nexport type PixelBbox = Bbox;\n\nexport interface AIElementLocateResponse {\n bbox?: LocateResultBbox;\n point?: LocateResultPoint;\n errors?: string[];\n}\n\nexport interface AIDataExtractionResponse<DataDemand> {\n data: DataDemand;\n errors?: string[];\n thought?: string;\n}\n\nexport interface AISectionLocatorResponse {\n bbox?: LocateResultBbox;\n point?: LocateResultPoint;\n references_bbox?: LocateResultBbox[];\n references_point?: LocateResultPoint[];\n error?: string;\n}\n\nexport interface AIAssertionResponse {\n pass: boolean;\n thought: string;\n}\n\nexport interface AIDescribeElementResponse {\n description: string;\n error?: string;\n}\n\nexport interface LocatorValidatorOption {\n centerDistanceThreshold?: number;\n}\n\nexport interface LocateValidatorResult {\n pass: boolean;\n rect: Rect;\n center: [number, number];\n centerDistance?: number;\n}\n\nexport interface AgentDescribeElementAtPointResult {\n prompt: string;\n deepLocate: boolean;\n verifyResult?: LocateValidatorResult;\n}\n\n/**\n * context\n */\n\nexport abstract class UIContext {\n /**\n * screenshot of the current UI state. which size is shotSize(be shrunk by screenshotShrinkFactor),\n */\n abstract screenshot: ScreenshotItem;\n\n /**\n * screenshot size after shrinking\n */\n abstract shotSize: Size;\n\n /**\n * The ratio for converting shrunk screenshot coordinates to logical coordinates.\n *\n * Example:\n * - Physical screen width: 3000px, dpr=6\n * - Logical width: 500px\n * - User-defined screenshotShrinkFactor: 2\n * - Actual shrunk screenshot width: 3000 / 2 = 1500px\n * - shrunkShotToLogicalRatio: dpr / screenshotShrinkFactor = 6 / 2 = 3\n * - To map back to logical coordinates: 1500 / shrunkShotToLogicalRatio = 500px\n */\n abstract shrunkShotToLogicalRatio: number;\n\n abstract _isFrozen?: boolean;\n\n // @deprecated - backward compatibility for aiLocate\n abstract deprecatedDpr?: number;\n}\n\nexport type EnsureObject<T> = { [K in keyof T]: any };\n\nexport type ServiceAction = 'locate' | 'extract' | 'assert' | 'describe';\n\nexport type ServiceExtractParam = string | Record<string, string>;\n\nexport type ElementCacheFeature = Record<string, unknown>;\n\nexport interface LocateResult {\n element: LocateResultElement | null;\n rect?: Rect;\n}\n\nexport type ThinkingLevel = 'off' | 'medium' | 'high';\n\nexport type DeepThinkOption = 'unset' | true | false;\n\nexport interface ServiceTaskInfo {\n durationMs: number;\n formatResponse?: string;\n /**\n * Adapter-extracted content used by Midscene for parsing. This is not the\n * full provider response or choices[0].message.\n */\n rawResponse?: string;\n rawChoiceMessage?: unknown;\n usage?: AIUsageInfo;\n searchArea?: Rect;\n /**\n * Adapter-extracted content from the search-area model call. This is not the\n * full provider response or choices[0].message.\n */\n searchAreaRawResponse?: string;\n searchAreaRawChoiceMessage?: unknown;\n searchAreaUsage?: AIUsageInfo;\n reasoning_content?: string;\n}\n\nexport interface DumpMeta {\n logTime: number;\n}\n\nexport type ReportAttributes = Record<\n string,\n string | number | boolean | null | undefined\n>;\n\nexport interface ReportDumpWithAttributes {\n dumpString: string;\n attributes?: ReportAttributes;\n}\n\nexport interface ServiceDump extends DumpMeta {\n type: 'locate' | 'extract' | 'assert';\n logId: string;\n userQuery: {\n element?: TUserPrompt;\n dataDemand?: ServiceExtractParam;\n assertion?: TUserPrompt;\n };\n matchedElement?: LocateResultElement[];\n matchedRect?: Rect;\n deepLocate?: boolean;\n data: any;\n assertionPass?: boolean;\n assertionThought?: string;\n taskInfo: ServiceTaskInfo;\n error?: string;\n output?: any;\n}\n\nexport type PartialServiceDumpFromSDK = Omit<\n ServiceDump,\n 'logTime' | 'logId' | 'model_name'\n>;\n\nexport interface ServiceResultBase {\n dump: ServiceDump;\n}\n\nexport type LocateResultWithDump = LocateResult & ServiceResultBase;\n\nexport interface ServiceExtractResult<T> extends ServiceResultBase {\n data: T;\n thought?: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}\n\n// intermediate variables to optimize the return value by AI\nexport interface LiteUISection {\n name: string;\n description: string;\n sectionCharacteristics: string;\n textIds: string[];\n}\n\nexport type ElementById = (id: string) => BaseElement | null;\n\nexport type ServiceAssertionResponse = AIAssertionResponse & {\n usage?: AIUsageInfo;\n};\n\n/**\n * agent\n */\n\nexport type OnTaskStartTip = (tip: string) => Promise<void> | void;\n\nexport interface AgentWaitForOpt extends ServiceExtractOption {\n checkIntervalMs?: number;\n timeoutMs?: number;\n}\n\nexport interface AgentAssertOpt {\n keepRawResponse?: boolean;\n}\n\n/**\n * planning\n *\n */\n\nexport interface PlanningLocateParam extends DetailedLocateParam {\n bbox?: LocateResultBbox;\n point?: LocateResultPoint;\n}\n\nexport type PlanningLocateParamWithLocatedPixelBbox = PlanningLocateParam & {\n /** Pixel bbox of the located element in screenshot coordinates. */\n locatedPixelBbox: PixelBbox;\n};\n\nexport interface PlanningAction<ParamType = any> {\n thought?: string;\n log?: string; // a brief preamble to the user explaining what you’re about to do\n type: string;\n param: ParamType;\n}\n\nexport type SubGoalStatus = 'pending' | 'running' | 'finished';\n\nexport interface SubGoal {\n index: number;\n status: SubGoalStatus;\n description: string;\n logs?: string[];\n}\n\nexport interface RawResponsePlanningAIResponse {\n action: PlanningAction;\n thought?: string;\n log: string;\n memory?: string;\n error?: string;\n finalizeMessage?: string;\n finalizeSuccess?: boolean;\n updateSubGoals?: SubGoal[];\n markFinishedIndexes?: number[];\n}\n\nexport interface PlanningAIResponse\n extends Omit<RawResponsePlanningAIResponse, 'action'> {\n actions?: PlanningAction[];\n usage?: AIUsageInfo;\n /**\n * Adapter-extracted content used by Midscene for parsing. This is not the\n * full provider response or choices[0].message.\n */\n rawResponse?: string;\n rawChoiceMessage?: unknown;\n yamlFlow?: MidsceneYamlFlowItem[];\n yamlString?: string;\n error?: string;\n reasoning_content?: string;\n shouldContinuePlanning: boolean;\n output?: string; // Output message from <complete> tag (same as finalizeMessage)\n}\n\nexport interface PlanningActionParamSleep {\n timeMs: number;\n}\n\nexport interface PlanningActionParamError {\n thought: string;\n}\n\nexport type PlanningActionParamWaitFor = AgentWaitForOpt & {};\n\nexport interface LongPressParam {\n duration?: number;\n}\n\nexport interface PullParam {\n direction: 'up' | 'down';\n distance?: number;\n duration?: number;\n}\n/**\n * misc\n */\n\nexport interface Color {\n name: string;\n hex: string;\n}\n\nexport interface BaseAgentParserOpt {\n selector?: string;\n}\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PuppeteerParserOpt extends BaseAgentParserOpt {}\n\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PlaywrightParserOpt extends BaseAgentParserOpt {}\n\n/*\naction\n*/\nexport interface ExecutionTaskProgressOptions {\n onTaskStart?: (task: ExecutionTask) => Promise<void> | void;\n}\n\nexport interface ExecutionRecorderItem {\n type: 'screenshot';\n ts: number;\n screenshot?: ScreenshotItem;\n timing?: string;\n}\n\nexport type ExecutionTaskType = 'Planning' | 'Insight' | 'Action Space' | 'Log';\n\nexport interface ExecutorContext {\n task: ExecutionTask;\n element?: LocateResultElement | null;\n uiContext?: UIContext;\n}\n\nexport interface ExecutionTaskApply<\n Type extends ExecutionTaskType = any,\n TaskParam = any,\n TaskOutput = any,\n TaskLog = any,\n> {\n type: Type;\n subType?: string;\n param?: TaskParam;\n thought?: string;\n uiContext?: UIContext;\n executor: (\n param: TaskParam,\n context: ExecutorContext,\n ) => // biome-ignore lint/suspicious/noConfusingVoidType: void is intentionally allowed as some executors may not return a value\n | Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>\n | undefined\n | void;\n}\n\nexport interface ExecutionTaskHitBy {\n from: string;\n context: Record<string, any>;\n}\n\nexport interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {\n output?: TaskOutput;\n log?: TaskLog;\n recorder?: ExecutionRecorderItem[];\n hitBy?: ExecutionTaskHitBy;\n}\n\nexport type ExecutionTask<\n E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<\n any,\n any,\n any\n >,\n> = E &\n ExecutionTaskReturn<\n E extends ExecutionTaskApply<any, any, infer TaskOutput, any>\n ? TaskOutput\n : unknown,\n E extends ExecutionTaskApply<any, any, any, infer TaskLog>\n ? TaskLog\n : unknown\n > & {\n taskId: string;\n status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';\n error?: Error;\n errorMessage?: string;\n errorStack?: string;\n timing?: {\n start: number;\n getUiContextStart?: number;\n getUiContextEnd?: number;\n callAiStart?: number;\n callAiEnd?: number;\n beforeInvokeActionHookStart?: number;\n beforeInvokeActionHookEnd?: number;\n callActionStart?: number;\n callActionEnd?: number;\n afterInvokeActionHookStart?: number;\n afterInvokeActionHookEnd?: number;\n captureAfterCallingSnapshotStart?: number;\n captureAfterCallingSnapshotEnd?: number;\n end?: number;\n cost?: number;\n };\n usage?: AIUsageInfo;\n searchAreaUsage?: AIUsageInfo;\n reasoning_content?: string;\n };\n\nexport interface IExecutionDump extends DumpMeta {\n /** Stable unique identifier for this execution run */\n id?: string;\n name: string;\n description?: string;\n tasks: ExecutionTask[];\n aiActContext?: string;\n}\n\n/*\ntask - service-locate\n*/\nexport type ExecutionTaskInsightLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskInsightLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport type ExecutionTaskInsightDump = ServiceDump;\n\nexport type ExecutionTaskInsightLocateApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightLocateParam,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightLocate =\n ExecutionTask<ExecutionTaskInsightLocateApply>;\n\n/*\ntask - service-query\n*/\nexport interface ExecutionTaskInsightQueryParam {\n dataDemand: ServiceExtractParam;\n domIncluded?: boolean | 'visible-only';\n}\n\nexport interface ExecutionTaskInsightQueryOutput {\n data: any;\n}\n\nexport type ExecutionTaskInsightQueryApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightQueryParam,\n any,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightQuery =\n ExecutionTask<ExecutionTaskInsightQueryApply>;\n\n/*\ntask - assertion\n*/\nexport interface ExecutionTaskInsightAssertionParam {\n assertion: string;\n}\n\nexport type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightAssertionParam,\n ServiceAssertionResponse,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightAssertion =\n ExecutionTask<ExecutionTaskInsightAssertionApply>;\n\n/*\ntask - action (i.e. interact) \n*/\nexport type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<\n 'Action Space',\n ActionParam,\n void,\n void\n>;\n\nexport type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;\n\n/*\ntask - Log\n*/\n\nexport type ExecutionTaskLogApply<\n LogParam = {\n content: string;\n },\n> = ExecutionTaskApply<'Log', LogParam, void, void>;\n\nexport type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;\n\n/*\ntask - planning\n*/\n\nexport interface ExecutionTaskPlanningParam {\n userInstruction: TUserPrompt;\n userInstructionDisplay?: string;\n aiActContext?: string;\n imagesIncludeCount?: number;\n deepThink?: DeepThinkOption;\n subGoalStatus?: string;\n memoriesStatus?: string;\n}\n\nexport type ExecutionTaskPlanningApply = ExecutionTaskApply<\n 'Planning',\n ExecutionTaskPlanningParam,\n PlanningAIResponse\n>;\n\nexport type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;\n\n/*\ntask - planning-locate\n*/\nexport type ExecutionTaskPlanningLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskPlanningLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport type ExecutionTaskPlanningDump = ServiceDump;\n\nexport type ExecutionTaskPlanningLocateApply = ExecutionTaskApply<\n 'Planning',\n ExecutionTaskPlanningLocateParam,\n ExecutionTaskPlanningLocateOutput,\n ExecutionTaskPlanningDump\n>;\n\nexport type ExecutionTaskPlanningLocate =\n ExecutionTask<ExecutionTaskPlanningLocateApply>;\n\n/*\nReport metadata - extracted from ReportActionDump for per-execution writes\n*/\nexport interface ReportMeta {\n groupName: string;\n groupDescription?: string;\n sdkVersion: string;\n modelBriefs: ModelBrief[];\n deviceType?: string;\n}\n\n// Backward-compatible aliases for existing external consumers.\nexport type GroupMeta = ReportMeta;\n\n/*\nReport dump\n*/\nexport interface IReportActionDump {\n sdkVersion: string;\n groupName: string;\n groupDescription?: string;\n modelBriefs: ModelBrief[];\n executions: IExecutionDump[];\n deviceType?: string;\n}\n\n// Backward-compatible aliases for existing external consumers.\nexport type IGroupedActionDump = IReportActionDump;\n\nexport interface ModelBrief {\n /**\n * The intent/category of the model call, for example \"planning\" or \"insight\".\n */\n intent?: string;\n\n /**\n * The model name returned by usage metadata, for example \"gpt-4o\".\n */\n name?: string;\n\n /**\n * Optional human-readable model description, for example \"qwen2.5-vl mode\".\n */\n modelDescription?: string;\n}\n\nexport type InterfaceType =\n | 'puppeteer'\n | 'playwright'\n | 'static'\n | 'chrome-extension-proxy'\n | 'android'\n | string;\n\nexport interface StreamingCodeGenerationOptions {\n /** Whether to enable streaming output */\n stream?: boolean;\n /** Callback function to handle streaming chunks */\n onChunk?: StreamingCallback;\n /** Callback function to handle streaming completion */\n onComplete?: (finalCode: string) => void;\n /** Callback function to handle streaming errors */\n onError?: (error: Error) => void;\n}\n\nexport type StreamingCallback = (chunk: CodeGenerationChunk) => void;\n\nexport interface CodeGenerationChunk {\n /** The incremental content chunk */\n content: string;\n /** The reasoning content */\n reasoning_content: string;\n /** The accumulated content so far */\n accumulated: string;\n /** Whether this is the final chunk */\n isComplete: boolean;\n /** Token usage information if available */\n usage?: AIUsageInfo;\n}\n\nexport interface StreamingAIResponse {\n /** The final accumulated content */\n content: string;\n /** Token usage information */\n usage?: AIUsageInfo;\n /** Whether the response was streamed */\n isStreamed: boolean;\n}\n\nexport interface DeviceAction<TParam = any, TReturn = any> {\n name: string;\n description?: string;\n interfaceAlias?: string;\n paramSchema?: z.ZodType<TParam>;\n call: (param: TParam, context: ExecutorContext) => Promise<TReturn> | TReturn;\n delayBeforeRunner?: number;\n delayAfterRunner?: number;\n /**\n * An example param object for this action.\n * Locate fields with { prompt } may be resolved to internal pixel bboxes when needed.\n */\n sample?: { [K in keyof TParam]?: any };\n}\n\n/**\n * Type utilities for extracting types from DeviceAction definitions\n */\n\n/**\n * Extract parameter type from a DeviceAction\n */\nexport type ActionParam<Action extends DeviceAction<any, any>> =\n Action extends DeviceAction<infer P, any> ? P : never;\n\n/**\n * Extract return type from a DeviceAction\n */\nexport type ActionReturn<Action extends DeviceAction<any, any>> =\n Action extends DeviceAction<any, infer R> ? R : never;\n\n/**\n * Web-specific types\n */\nexport interface WebElementInfo extends BaseElement {\n id: string;\n attributes: {\n nodeType: NodeType;\n [key: string]: string;\n };\n}\n\n/**\n * Agent\n */\n\nexport type CacheConfig = {\n strategy?: 'read-only' | 'read-write' | 'write-only';\n id: string;\n /**\n * Optional cache directory path.\n * When set, cache files are written to this directory instead of\n * `<MIDSCENE_RUN_DIR>/cache`.\n */\n cacheDir?: string;\n};\n\nexport type Cache =\n | false // No read, no write\n | true // Will throw error at runtime - deprecated\n | CacheConfig; // Object configuration (requires explicit id)\n\nexport interface AgentOpt {\n // @deprecated Use `reportFileName` and `cache.id` instead.\n testId?: string;\n // @deprecated\n cacheId?: string; // Keep backward compatibility, but marked as deprecated\n groupName?: string;\n groupDescription?: string;\n /* if auto generate report, default true */\n generateReport?: boolean;\n /* if persist per-execution dump files next to the report, default false */\n persistExecutionDump?: boolean;\n /* if auto print report msg, default true */\n autoPrintReportMsg?: boolean;\n\n /**\n * Use directory-based report format with separate image files.\n *\n * When enabled:\n * - Screenshots are saved as PNG files in a `screenshots/` subdirectory\n * - Report is generated as `index.html` with relative image paths\n * - Reduces memory usage and report file size\n *\n * IMPORTANT: 'html-and-external-assets' reports must be served via HTTP server\n * (e.g., `npx serve ./report-dir`). The file:// protocol will not\n * work due to browser CORS restrictions.\n *\n * @default 'single-html'\n */\n outputFormat?: 'single-html' | 'html-and-external-assets';\n\n onTaskStartTip?: OnTaskStartTip;\n aiActContext?: string;\n aiActionContext?: string;\n /* custom report file name */\n reportFileName?: string;\n reportAttributes?: ReportAttributes;\n modelConfig?: TModelConfig;\n cache?: Cache;\n /**\n * Maximum number of replanning cycles for aiAct.\n * Defaults are resolved by the active model adapter: 20 for standard planning,\n * 40 for UI-TARS, and 100 for Auto-GLM.\n * If omitted, the agent will also read `MIDSCENE_REPLANNING_CYCLE_LIMIT` for backward compatibility.\n */\n replanningCycleLimit?: number;\n\n /**\n * Wait time in milliseconds after each action execution.\n * This allows the UI to settle and stabilize before the next action.\n * Defaults to 300ms when not provided.\n */\n waitAfterAction?: number;\n\n /**\n * When set to true, Midscene will use the target device's formatted local\n * time instead of the runtime system time. The target interface must implement\n * getDeviceLocalTimeString to provide device-local wall-clock time.\n * Default: false\n */\n useDeviceTime?: boolean;\n\n /**\n * Custom screenshot shrink factor to reduce AI token usage.\n * When set, the screenshot will be scaled down by this factor from the physical resolution.\n *\n * Example:\n * - Physical screen width: 3000px, dpr=6\n * - Logical width: 500px\n * - screenshotShrinkFactor: 2\n * - Actual shrunk screenshot width: 3000 / 2 = 1500px\n * - AI analyzes the 1500px screenshot\n * - Coordinates are transformed back to logical (500px) before actions execute\n *\n * Benefits:\n * - Reduces token usage for high-resolution screenshots\n * - Maintains accuracy by scaling coordinates appropriately\n *\n * Must be >= 1 (shrinking only, enlarging is not supported).\n *\n * @default 1 (no shrinking, uses original physical screenshot)\n */\n screenshotShrinkFactor?: number;\n\n /**\n * Custom OpenAI client factory function\n *\n * If provided, this function will be called to create OpenAI client instances\n * for each AI call, allowing you to:\n * - Wrap clients with observability tools (langsmith, langfuse)\n * - Use custom OpenAI-compatible clients\n * - Apply different configurations based on intent\n *\n * @param config - Resolved model configuration\n * @returns OpenAI client instance (original or wrapped)\n *\n * @example\n * ```typescript\n * createOpenAIClient: async (openai, opts) => {\n * // Wrap with langsmith for planning tasks\n * if (opts.baseURL?.includes('planning')) {\n * return wrapOpenAI(openai, { metadata: { task: 'planning' } });\n * }\n *\n * return openai;\n * }\n * ```\n */\n createOpenAIClient?: CreateOpenAIClientFn;\n}\n\nexport type TestStatus =\n | 'passed'\n | 'failed'\n | 'timedOut'\n | 'skipped'\n | 'interrupted';\n\nexport interface ReportFileAttributes {\n testDuration: number;\n testStatus: TestStatus;\n testTitle: string;\n testId: string;\n testDescription: string;\n}\n\nexport type ReportFileWithAttributes =\n | {\n reportFilePath: string;\n reportAttributes: ReportFileAttributes;\n }\n | {\n reportFilePath?: string;\n reportAttributes: ReportFileAttributes & { testStatus: 'skipped' };\n };\n"],"names":["UIContext"],"mappings":";;;AA+HO,MAAeA;AA4BtB"}
1
+ {"version":3,"file":"types.mjs","sources":["../../src/types.ts"],"sourcesContent":["/* eslint-disable @typescript-eslint/no-explicit-any */\n\nimport type { NodeType } from '@midscene/shared/constants';\nimport type { CreateOpenAIClientFn, TModelConfig } from '@midscene/shared/env';\nimport type {\n BaseElement,\n LocateResultElement,\n Rect,\n Size,\n} from '@midscene/shared/types';\nimport type { z } from 'zod';\nimport type { TUserPrompt } from './common';\nimport type { ScreenshotItem } from './screenshot-item';\nimport type {\n DetailedLocateParam,\n MidsceneYamlFlowItem,\n ServiceExtractOption,\n} from './yaml';\n\nexport type {\n ElementTreeNode,\n BaseElement,\n Rect,\n Size,\n Point,\n} from '@midscene/shared/types';\nexport * from './yaml';\n\nexport { ServiceError } from './errors';\nexport {\n ExecutionDump,\n ReportActionDump,\n GroupedActionDump,\n} from './dump/report-action-dump';\n\nexport type AIUsageInfo = Record<string, any> & {\n prompt_tokens: number | undefined;\n completion_tokens: number | undefined;\n total_tokens: number | undefined;\n cached_input: number | undefined;\n time_cost: number | undefined;\n model_name: string | undefined;\n model_description: string | undefined;\n /**\n * Raw top-level `.model` value returned by the model service response.\n */\n response_model_name: string | undefined;\n /**\n * Semantic intent of the model call, such as default, planning, or insight.\n */\n intent: string | undefined;\n /**\n * Config slot where the model config was resolved from. For example, a\n * planning call may use the default slot when no planning model is configured.\n */\n slot: string | undefined;\n request_id: string | undefined;\n};\n\nexport type { LocateResultElement };\n\nexport type AISingleElementResponseByPosition = {\n position?: {\n x: number;\n y: number;\n };\n bbox?: [number, number, number, number];\n reason: string;\n text: string;\n};\n\nexport type LocateResultPoint = [number, number];\nexport type Bbox = [number, number, number, number];\nexport type LocateResultBbox = Bbox;\nexport type PixelBbox = Bbox;\n\nexport interface AIElementLocateResponse {\n bbox?: LocateResultBbox;\n point?: LocateResultPoint;\n errors?: string[];\n}\n\nexport interface AIDataExtractionResponse<DataDemand> {\n data: DataDemand;\n errors?: string[];\n thought?: string;\n}\n\nexport interface AISectionLocatorResponse {\n bbox?: LocateResultBbox;\n point?: LocateResultPoint;\n references_bbox?: LocateResultBbox[];\n references_point?: LocateResultPoint[];\n error?: string;\n}\n\nexport interface AIAssertionResponse {\n pass: boolean;\n thought: string;\n}\n\nexport interface AIDescribeElementResponse {\n description: string;\n error?: string;\n}\n\nexport interface LocatorValidatorOption {\n centerDistanceThreshold?: number;\n}\n\nexport interface LocateValidatorResult {\n pass: boolean;\n rect: Rect;\n center: [number, number];\n centerDistance?: number;\n}\n\nexport interface AgentDescribeElementAtPointResult {\n prompt: string;\n deepLocate: boolean;\n verifyResult?: LocateValidatorResult;\n}\n\n/**\n * context\n */\n\nexport abstract class UIContext {\n /**\n * screenshot of the current UI state. which size is shotSize(be shrunk by screenshotShrinkFactor),\n */\n abstract screenshot: ScreenshotItem;\n\n /**\n * screenshot size after shrinking\n */\n abstract shotSize: Size;\n\n /**\n * The ratio for converting shrunk screenshot coordinates to logical coordinates.\n *\n * Example:\n * - Physical screen width: 3000px, dpr=6\n * - Logical width: 500px\n * - User-defined screenshotShrinkFactor: 2\n * - Actual shrunk screenshot width: 3000 / 2 = 1500px\n * - shrunkShotToLogicalRatio: dpr / screenshotShrinkFactor = 6 / 2 = 3\n * - To map back to logical coordinates: 1500 / shrunkShotToLogicalRatio = 500px\n */\n abstract shrunkShotToLogicalRatio: number;\n\n abstract _isFrozen?: boolean;\n\n // @deprecated - backward compatibility for aiLocate\n abstract deprecatedDpr?: number;\n}\n\nexport type EnsureObject<T> = { [K in keyof T]: any };\n\nexport type ServiceAction = 'locate' | 'extract' | 'assert' | 'describe';\n\nexport type ServiceExtractParam = string | Record<string, string>;\n\nexport type ElementCacheFeature = Record<string, unknown>;\n\nexport interface LocateResult {\n element: LocateResultElement | null;\n rect?: Rect;\n}\n\nexport type ThinkingLevel = 'off' | 'medium' | 'high';\n\nexport type DeepThinkOption = 'unset' | true | false;\n\nexport interface ServiceTaskInfo {\n durationMs: number;\n formatResponse?: string;\n /**\n * Adapter-extracted content used by Midscene for parsing. This is not the\n * full provider response or choices[0].message.\n */\n rawResponse?: string;\n rawChoiceMessage?: unknown;\n usage?: AIUsageInfo;\n searchArea?: Rect;\n /**\n * Adapter-extracted content from the search-area model call. This is not the\n * full provider response or choices[0].message.\n */\n searchAreaRawResponse?: string;\n searchAreaRawChoiceMessage?: unknown;\n searchAreaUsage?: AIUsageInfo;\n reasoning_content?: string;\n}\n\nexport interface DumpMeta {\n logTime: number;\n}\n\nexport type ReportAttributes = Record<\n string,\n string | number | boolean | null | undefined\n>;\n\nexport interface ReportDumpWithAttributes {\n dumpString: string;\n attributes?: ReportAttributes;\n}\n\nexport interface ServiceDump extends DumpMeta {\n type: 'locate' | 'extract' | 'assert';\n logId: string;\n userQuery: {\n element?: TUserPrompt;\n dataDemand?: ServiceExtractParam;\n assertion?: TUserPrompt;\n };\n matchedElement?: LocateResultElement[];\n matchedRect?: Rect;\n deepLocate?: boolean;\n data: any;\n assertionPass?: boolean;\n assertionThought?: string;\n taskInfo: ServiceTaskInfo;\n error?: string;\n output?: any;\n}\n\nexport type PartialServiceDumpFromSDK = Omit<\n ServiceDump,\n 'logTime' | 'logId' | 'model_name'\n>;\n\nexport interface ServiceResultBase {\n dump: ServiceDump;\n}\n\nexport type LocateResultWithDump = LocateResult & ServiceResultBase;\n\nexport interface ServiceExtractResult<T> extends ServiceResultBase {\n data: T;\n thought?: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}\n\n// intermediate variables to optimize the return value by AI\nexport interface LiteUISection {\n name: string;\n description: string;\n sectionCharacteristics: string;\n textIds: string[];\n}\n\nexport type ElementById = (id: string) => BaseElement | null;\n\nexport type ServiceAssertionResponse = AIAssertionResponse & {\n usage?: AIUsageInfo;\n};\n\n/**\n * agent\n */\n\nexport type OnTaskStartTip = (tip: string) => Promise<void> | void;\n\nexport interface AgentWaitForOpt extends ServiceExtractOption {\n checkIntervalMs?: number;\n timeoutMs?: number;\n}\n\nexport interface AgentAssertOpt {\n keepRawResponse?: boolean;\n}\n\n/**\n * planning\n *\n */\n\nexport interface PlanningLocateParam extends DetailedLocateParam {\n bbox?: LocateResultBbox;\n point?: LocateResultPoint;\n}\n\nexport type PlanningLocateParamWithLocatedPixelBbox = PlanningLocateParam & {\n /** Pixel bbox of the located element in screenshot coordinates. */\n locatedPixelBbox: PixelBbox;\n};\n\nexport interface PlanningAction<ParamType = any> {\n thought?: string;\n log?: string; // a brief preamble to the user explaining what you’re about to do\n type: string;\n param: ParamType;\n}\n\nexport type SubGoalStatus = 'pending' | 'running' | 'finished';\n\nexport interface SubGoal {\n index: number;\n status: SubGoalStatus;\n description: string;\n logs?: string[];\n}\n\nexport interface RawResponsePlanningAIResponse {\n action: PlanningAction;\n thought?: string;\n log: string;\n memory?: string;\n error?: string;\n finalizeMessage?: string;\n finalizeSuccess?: boolean;\n updateSubGoals?: SubGoal[];\n markFinishedIndexes?: number[];\n}\n\nexport interface PlanningAIResponse\n extends Omit<RawResponsePlanningAIResponse, 'action'> {\n actions?: PlanningAction[];\n usage?: AIUsageInfo;\n /**\n * Adapter-extracted content used by Midscene for parsing. This is not the\n * full provider response or choices[0].message.\n */\n rawResponse?: string;\n rawChoiceMessage?: unknown;\n yamlFlow?: MidsceneYamlFlowItem[];\n yamlString?: string;\n error?: string;\n reasoning_content?: string;\n shouldContinuePlanning: boolean;\n output?: string; // Output message from <complete> tag (same as finalizeMessage)\n}\n\nexport interface PlanningActionParamSleep {\n timeMs: number;\n}\n\nexport interface PlanningActionParamError {\n thought: string;\n}\n\nexport type PlanningActionParamWaitFor = AgentWaitForOpt & {};\n\nexport interface LongPressParam {\n duration?: number;\n}\n\nexport interface PullParam {\n direction: 'up' | 'down';\n distance?: number;\n duration?: number;\n}\n/**\n * misc\n */\n\nexport interface Color {\n name: string;\n hex: string;\n}\n\nexport interface BaseAgentParserOpt {\n selector?: string;\n}\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PuppeteerParserOpt extends BaseAgentParserOpt {}\n\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PlaywrightParserOpt extends BaseAgentParserOpt {}\n\n/*\naction\n*/\nexport interface ExecutionTaskProgressOptions {\n onTaskStart?: (task: ExecutionTask) => Promise<void> | void;\n}\n\nexport interface ExecutionRecorderItem {\n type: 'screenshot';\n ts: number;\n screenshot?: ScreenshotItem;\n timing?: string;\n}\n\nexport type ExecutionTaskType = 'Planning' | 'Insight' | 'Action Space' | 'Log';\n\nexport interface ExecutorContext {\n task: ExecutionTask;\n element?: LocateResultElement | null;\n uiContext?: UIContext;\n}\n\nexport interface ExecutionTaskApply<\n Type extends ExecutionTaskType = any,\n TaskParam = any,\n TaskOutput = any,\n TaskLog = any,\n> {\n type: Type;\n subType?: string;\n param?: TaskParam;\n thought?: string;\n uiContext?: UIContext;\n executor: (\n param: TaskParam,\n context: ExecutorContext,\n ) => // biome-ignore lint/suspicious/noConfusingVoidType: void is intentionally allowed as some executors may not return a value\n | Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>\n | undefined\n | void;\n}\n\nexport interface ExecutionTaskHitBy {\n from: string;\n context: Record<string, any>;\n}\n\nexport interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {\n output?: TaskOutput;\n log?: TaskLog;\n recorder?: ExecutionRecorderItem[];\n hitBy?: ExecutionTaskHitBy;\n}\n\nexport type ExecutionTask<\n E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<\n any,\n any,\n any\n >,\n> = E &\n ExecutionTaskReturn<\n E extends ExecutionTaskApply<any, any, infer TaskOutput, any>\n ? TaskOutput\n : unknown,\n E extends ExecutionTaskApply<any, any, any, infer TaskLog>\n ? TaskLog\n : unknown\n > & {\n taskId: string;\n status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';\n /**\n * Optional feedback produced by a task for the next planning round.\n * This is execution metadata, not part of the action return value.\n */\n planningFeedback?: string;\n error?: Error;\n errorMessage?: string;\n errorStack?: string;\n timing?: {\n start: number;\n getUiContextStart?: number;\n getUiContextEnd?: number;\n callAiStart?: number;\n callAiEnd?: number;\n beforeInvokeActionHookStart?: number;\n beforeInvokeActionHookEnd?: number;\n callActionStart?: number;\n callActionEnd?: number;\n afterInvokeActionHookStart?: number;\n afterInvokeActionHookEnd?: number;\n captureAfterCallingSnapshotStart?: number;\n captureAfterCallingSnapshotEnd?: number;\n end?: number;\n cost?: number;\n };\n usage?: AIUsageInfo;\n searchAreaUsage?: AIUsageInfo;\n reasoning_content?: string;\n };\n\nexport interface IExecutionDump extends DumpMeta {\n /** Stable unique identifier for this execution run */\n id?: string;\n name: string;\n description?: string;\n tasks: ExecutionTask[];\n aiActContext?: string;\n}\n\n/*\ntask - service-locate\n*/\nexport type ExecutionTaskInsightLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskInsightLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport type ExecutionTaskInsightDump = ServiceDump;\n\nexport type ExecutionTaskInsightLocateApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightLocateParam,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightLocate =\n ExecutionTask<ExecutionTaskInsightLocateApply>;\n\n/*\ntask - service-query\n*/\nexport interface ExecutionTaskInsightQueryParam {\n dataDemand: ServiceExtractParam;\n domIncluded?: boolean | 'visible-only';\n}\n\nexport interface ExecutionTaskInsightQueryOutput {\n data: any;\n}\n\nexport type ExecutionTaskInsightQueryApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightQueryParam,\n any,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightQuery =\n ExecutionTask<ExecutionTaskInsightQueryApply>;\n\n/*\ntask - assertion\n*/\nexport interface ExecutionTaskInsightAssertionParam {\n assertion: string;\n}\n\nexport type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightAssertionParam,\n ServiceAssertionResponse,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightAssertion =\n ExecutionTask<ExecutionTaskInsightAssertionApply>;\n\n/*\ntask - action (i.e. interact) \n*/\nexport type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<\n 'Action Space',\n ActionParam,\n void,\n void\n>;\n\nexport type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;\n\n/*\ntask - Log\n*/\n\nexport type ExecutionTaskLogApply<\n LogParam = {\n content: string;\n },\n> = ExecutionTaskApply<'Log', LogParam, void, void>;\n\nexport type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;\n\n/*\ntask - planning\n*/\n\nexport interface ExecutionTaskPlanningParam {\n userInstruction: TUserPrompt;\n userInstructionDisplay?: string;\n aiActContext?: string;\n imagesIncludeCount?: number;\n deepThink?: DeepThinkOption;\n subGoalStatus?: string;\n memoriesStatus?: string;\n}\n\nexport type ExecutionTaskPlanningApply = ExecutionTaskApply<\n 'Planning',\n ExecutionTaskPlanningParam,\n PlanningAIResponse\n>;\n\nexport type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;\n\n/*\ntask - planning-locate\n*/\nexport type ExecutionTaskPlanningLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskPlanningLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport type ExecutionTaskPlanningDump = ServiceDump;\n\nexport type ExecutionTaskPlanningLocateApply = ExecutionTaskApply<\n 'Planning',\n ExecutionTaskPlanningLocateParam,\n ExecutionTaskPlanningLocateOutput,\n ExecutionTaskPlanningDump\n>;\n\nexport type ExecutionTaskPlanningLocate =\n ExecutionTask<ExecutionTaskPlanningLocateApply>;\n\n/*\nReport metadata - extracted from ReportActionDump for per-execution writes\n*/\nexport interface ReportMeta {\n groupName: string;\n groupDescription?: string;\n sdkVersion: string;\n modelBriefs: ModelBrief[];\n deviceType?: string;\n}\n\n// Backward-compatible aliases for existing external consumers.\nexport type GroupMeta = ReportMeta;\n\n/*\nReport dump\n*/\nexport interface IReportActionDump {\n sdkVersion: string;\n groupName: string;\n groupDescription?: string;\n modelBriefs: ModelBrief[];\n executions: IExecutionDump[];\n deviceType?: string;\n}\n\n// Backward-compatible aliases for existing external consumers.\nexport type IGroupedActionDump = IReportActionDump;\n\nexport interface ModelBrief {\n /**\n * The intent/category of the model call, for example \"planning\" or \"insight\".\n */\n intent?: string;\n\n /**\n * The model name returned by usage metadata, for example \"gpt-4o\".\n */\n name?: string;\n\n /**\n * Optional human-readable model description, for example \"qwen2.5-vl mode\".\n */\n modelDescription?: string;\n}\n\nexport type InterfaceType =\n | 'puppeteer'\n | 'playwright'\n | 'static'\n | 'chrome-extension-proxy'\n | 'android'\n | string;\n\nexport interface StreamingCodeGenerationOptions {\n /** Whether to enable streaming output */\n stream?: boolean;\n /** Callback function to handle streaming chunks */\n onChunk?: StreamingCallback;\n /** Callback function to handle streaming completion */\n onComplete?: (finalCode: string) => void;\n /** Callback function to handle streaming errors */\n onError?: (error: Error) => void;\n}\n\nexport type StreamingCallback = (chunk: CodeGenerationChunk) => void;\n\nexport interface CodeGenerationChunk {\n /** The incremental content chunk */\n content: string;\n /** The reasoning content */\n reasoning_content: string;\n /** The accumulated content so far */\n accumulated: string;\n /** Whether this is the final chunk */\n isComplete: boolean;\n /** Token usage information if available */\n usage?: AIUsageInfo;\n}\n\nexport interface StreamingAIResponse {\n /** The final accumulated content */\n content: string;\n /** Token usage information */\n usage?: AIUsageInfo;\n /** Whether the response was streamed */\n isStreamed: boolean;\n}\n\nexport interface DeviceAction<TParam = any, TReturn = any> {\n name: string;\n description?: string;\n interfaceAlias?: string;\n paramSchema?: z.ZodType<TParam>;\n call: (\n param: TParam,\n context?: ExecutorContext,\n ) => Promise<TReturn> | TReturn;\n delayBeforeRunner?: number;\n delayAfterRunner?: number;\n /**\n * An example param object for this action.\n * Locate fields with { prompt } may be resolved to internal pixel bboxes when needed.\n */\n sample?: { [K in keyof TParam]?: any };\n}\n\n/**\n * Type utilities for extracting types from DeviceAction definitions\n */\n\n/**\n * Extract parameter type from a DeviceAction\n */\nexport type ActionParam<Action extends DeviceAction<any, any>> =\n Action extends DeviceAction<infer P, any> ? P : never;\n\n/**\n * Extract return type from a DeviceAction\n */\nexport type ActionReturn<Action extends DeviceAction<any, any>> =\n Action extends DeviceAction<any, infer R> ? R : never;\n\n/**\n * Web-specific types\n */\nexport interface WebElementInfo extends BaseElement {\n id: string;\n attributes: {\n nodeType: NodeType;\n [key: string]: string;\n };\n}\n\n/**\n * Agent\n */\n\nexport type CacheConfig = {\n strategy?: 'read-only' | 'read-write' | 'write-only';\n id: string;\n /**\n * Optional cache directory path.\n * When set, cache files are written to this directory instead of\n * `<MIDSCENE_RUN_DIR>/cache`.\n */\n cacheDir?: string;\n};\n\nexport type Cache =\n | false // No read, no write\n | true // Will throw error at runtime - deprecated\n | CacheConfig; // Object configuration (requires explicit id)\n\nexport interface AgentOpt {\n // @deprecated Use `reportFileName` and `cache.id` instead.\n testId?: string;\n // @deprecated\n cacheId?: string; // Keep backward compatibility, but marked as deprecated\n groupName?: string;\n groupDescription?: string;\n /* if auto generate report, default true */\n generateReport?: boolean;\n /* if persist per-execution dump files next to the report, default false */\n persistExecutionDump?: boolean;\n /* if auto print report msg, default true */\n autoPrintReportMsg?: boolean;\n\n /**\n * Use directory-based report format with separate image files.\n *\n * When enabled:\n * - Screenshots are saved as PNG files in a `screenshots/` subdirectory\n * - Report is generated as `index.html` with relative image paths\n * - Reduces memory usage and report file size\n *\n * IMPORTANT: 'html-and-external-assets' reports must be served via HTTP server\n * (e.g., `npx serve ./report-dir`). The file:// protocol will not\n * work due to browser CORS restrictions.\n *\n * @default 'single-html'\n */\n outputFormat?: 'single-html' | 'html-and-external-assets';\n\n onTaskStartTip?: OnTaskStartTip;\n aiActContext?: string;\n aiActionContext?: string;\n /* custom report file name */\n reportFileName?: string;\n reportAttributes?: ReportAttributes;\n modelConfig?: TModelConfig;\n cache?: Cache;\n /**\n * Maximum number of replanning cycles for aiAct.\n * Defaults are resolved by the active model adapter: 20 for standard planning,\n * 40 for UI-TARS, and 100 for Auto-GLM.\n * If omitted, the agent will also read `MIDSCENE_REPLANNING_CYCLE_LIMIT` for backward compatibility.\n */\n replanningCycleLimit?: number;\n\n /**\n * Wait time in milliseconds after each action execution.\n * This allows the UI to settle and stabilize before the next action.\n * Defaults to 300ms when not provided.\n */\n waitAfterAction?: number;\n\n /**\n * When set to true, Midscene will use the target device's formatted local\n * time instead of the runtime system time. The target interface must implement\n * getDeviceLocalTimeString to provide device-local wall-clock time.\n * Default: false\n */\n useDeviceTime?: boolean;\n\n /**\n * Custom screenshot shrink factor to reduce AI token usage.\n * When set, the screenshot will be scaled down by this factor from the physical resolution.\n *\n * Example:\n * - Physical screen width: 3000px, dpr=6\n * - Logical width: 500px\n * - screenshotShrinkFactor: 2\n * - Actual shrunk screenshot width: 3000 / 2 = 1500px\n * - AI analyzes the 1500px screenshot\n * - Coordinates are transformed back to logical (500px) before actions execute\n *\n * Benefits:\n * - Reduces token usage for high-resolution screenshots\n * - Maintains accuracy by scaling coordinates appropriately\n *\n * Must be >= 1 (shrinking only, enlarging is not supported).\n *\n * @default 1 (no shrinking, uses original physical screenshot)\n */\n screenshotShrinkFactor?: number;\n\n /**\n * Custom OpenAI client factory function\n *\n * If provided, this function will be called to create OpenAI client instances\n * for each AI call, allowing you to:\n * - Wrap clients with observability tools (langsmith, langfuse)\n * - Use custom OpenAI-compatible clients\n * - Apply different configurations based on intent\n *\n * @param config - Resolved model configuration\n * @returns OpenAI client instance (original or wrapped)\n *\n * @example\n * ```typescript\n * createOpenAIClient: async (openai, opts) => {\n * // Wrap with langsmith for planning tasks\n * if (opts.baseURL?.includes('planning')) {\n * return wrapOpenAI(openai, { metadata: { task: 'planning' } });\n * }\n *\n * return openai;\n * }\n * ```\n */\n createOpenAIClient?: CreateOpenAIClientFn;\n}\n\nexport type TestStatus =\n | 'passed'\n | 'failed'\n | 'timedOut'\n | 'skipped'\n | 'interrupted';\n\nexport interface ReportFileAttributes {\n testDuration: number;\n testStatus: TestStatus;\n testTitle: string;\n testId: string;\n testDescription: string;\n}\n\nexport type ReportFileWithAttributes =\n | {\n reportFilePath: string;\n reportAttributes: ReportFileAttributes;\n }\n | {\n reportFilePath?: string;\n reportAttributes: ReportFileAttributes & { testStatus: 'skipped' };\n };\n"],"names":["UIContext"],"mappings":";;;AA+HO,MAAeA;AA4BtB"}