@midscene/core 1.2.1-beta-20260108154312.0 → 1.2.1-beta-20260109060244.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/es/agent/agent.mjs +14 -13
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/tasks.mjs +21 -14
  4. package/dist/es/agent/tasks.mjs.map +1 -1
  5. package/dist/es/agent/utils.mjs +1 -1
  6. package/dist/es/ai-model/llm-planning.mjs +3 -12
  7. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  8. package/dist/es/ai-model/prompt/llm-planning.mjs +7 -2
  9. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  10. package/dist/es/ai-model/ui-tars-planning.mjs +1 -1
  11. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
  12. package/dist/es/common.mjs +5 -14
  13. package/dist/es/common.mjs.map +1 -1
  14. package/dist/es/device/index.mjs +3 -28
  15. package/dist/es/device/index.mjs.map +1 -1
  16. package/dist/es/types.mjs.map +1 -1
  17. package/dist/es/utils.mjs +2 -2
  18. package/dist/lib/agent/agent.js +13 -12
  19. package/dist/lib/agent/agent.js.map +1 -1
  20. package/dist/lib/agent/tasks.js +21 -14
  21. package/dist/lib/agent/tasks.js.map +1 -1
  22. package/dist/lib/agent/utils.js +1 -1
  23. package/dist/lib/ai-model/llm-planning.js +2 -11
  24. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  25. package/dist/lib/ai-model/prompt/llm-planning.js +7 -2
  26. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  27. package/dist/lib/ai-model/ui-tars-planning.js +1 -1
  28. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
  29. package/dist/lib/common.js +5 -20
  30. package/dist/lib/common.js.map +1 -1
  31. package/dist/lib/device/index.js +15 -52
  32. package/dist/lib/device/index.js.map +1 -1
  33. package/dist/lib/types.js.map +1 -1
  34. package/dist/lib/utils.js +2 -2
  35. package/dist/types/agent/agent.d.ts +15 -4
  36. package/dist/types/agent/tasks.d.ts +1 -2
  37. package/dist/types/common.d.ts +1 -8
  38. package/dist/types/device/index.d.ts +0 -22
  39. package/dist/types/types.d.ts +2 -1
  40. package/package.json +2 -2
  41. package/dist/es/ai-model/prompt/ui-tars-locator.mjs +0 -34
  42. package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +0 -1
  43. package/dist/lib/ai-model/prompt/ui-tars-locator.js +0 -68
  44. package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +0 -1
  45. package/dist/types/ai-model/prompt/ui-tars-locator.d.ts +0 -1
@@ -1 +1 @@
1
- {"version":3,"file":"common.mjs","sources":["../../src/common.ts"],"sourcesContent":["import type {\n BaseElement,\n DeviceAction,\n ElementTreeNode,\n MidsceneYamlFlowItem,\n PlanningAction,\n Rect,\n Size,\n} from '@/types';\nimport { assert } from '@midscene/shared/utils';\n\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\n\nimport type { PlanningLocateParam } from '@/types';\nimport { NodeType } from '@midscene/shared/constants';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport { treeToList } from '@midscene/shared/extractor';\nimport { compositeElementInfoImg } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { z } from 'zod';\n\nexport type AIArgs = ChatCompletionMessageParam[];\n\nconst defaultBboxSize = 20; // must be even number\nconst debugInspectUtils = getDebug('ai:common');\ntype AdaptBboxInput = number[] | string[] | string | (number[] | string[])[];\n\n// transform the param of locate from qwen mode\nexport function fillBboxParam(\n locate: PlanningLocateParam,\n width: number,\n height: number,\n rightLimit: number,\n bottomLimit: number,\n vlMode: TVlModeTypes | undefined,\n) {\n // The Qwen model might have hallucinations of naming bbox as bbox_2d.\n if ((locate as any).bbox_2d && !locate?.bbox) {\n locate.bbox = (locate as any).bbox_2d;\n // biome-ignore lint/performance/noDelete: <explanation>\n delete (locate as any).bbox_2d;\n }\n\n if (locate?.bbox) {\n locate.bbox = adaptBbox(\n locate.bbox,\n width,\n height,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n }\n\n return locate;\n}\n\nexport function adaptQwen2_5Bbox(\n bbox: number[],\n): [number, number, number, number] {\n if (bbox.length < 2) {\n const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n }\n\n const result: [number, number, number, number] = [\n Math.round(bbox[0]),\n Math.round(bbox[1]),\n typeof bbox[2] === 'number'\n ? Math.round(bbox[2])\n : Math.round(bbox[0] + defaultBboxSize),\n typeof bbox[3] === 'number'\n ? Math.round(bbox[3])\n : Math.round(bbox[1] + defaultBboxSize),\n ];\n return result;\n}\n\nexport function adaptDoubaoBbox(\n bbox: string[] | number[] | string,\n width: number,\n height: number,\n): [number, number, number, number] {\n assert(\n width > 0 && height > 0,\n 'width and height must be greater than 0 in doubao mode',\n );\n\n if (typeof bbox === 'string') {\n assert(\n /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n `invalid bbox data string for doubao-vision mode: ${bbox}`,\n );\n const splitted = bbox.split(' ');\n if (splitted.length === 4) {\n return [\n Math.round((Number(splitted[0]) * width) / 1000),\n Math.round((Number(splitted[1]) * height) / 1000),\n Math.round((Number(splitted[2]) * width) / 1000),\n Math.round((Number(splitted[3]) * height) / 1000),\n ];\n }\n throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);\n }\n\n let bboxList: number[] = [];\n if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n bbox.forEach((item) => {\n if (typeof item === 'string' && item.includes(',')) {\n const [x, y] = item.split(',');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else if (typeof item === 'string' && item.includes(' ')) {\n const [x, y] = item.split(' ');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else {\n bboxList.push(Number(item));\n }\n });\n } else {\n bboxList = bbox as any;\n }\n\n if (bboxList.length === 4 || bboxList.length === 5) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[2] * width) / 1000),\n Math.round((bboxList[3] * height) / 1000),\n ];\n }\n\n // treat the bbox as a center point\n if (\n bboxList.length === 6 ||\n bboxList.length === 2 ||\n bboxList.length === 3 ||\n bboxList.length === 7\n ) {\n return [\n Math.max(\n 0,\n Math.round((bboxList[0] * width) / 1000) - defaultBboxSize / 2,\n ),\n Math.max(\n 0,\n Math.round((bboxList[1] * height) / 1000) - defaultBboxSize / 2,\n ),\n Math.min(\n width,\n Math.round((bboxList[0] * width) / 1000) + defaultBboxSize / 2,\n ),\n Math.min(\n height,\n Math.round((bboxList[1] * height) / 1000) + defaultBboxSize / 2,\n ),\n ];\n }\n\n if (bbox.length === 8) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[4] * width) / 1000),\n Math.round((bboxList[5] * height) / 1000),\n ];\n }\n\n const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n}\n\nfunction normalizeBboxInput(\n bbox: AdaptBboxInput,\n): number[] | string[] | string {\n if (Array.isArray(bbox)) {\n if (Array.isArray(bbox[0])) {\n return bbox[0] as number[] | string[];\n }\n return bbox as number[] | string[];\n }\n return bbox as string;\n}\n\nexport function adaptBbox(\n bbox: AdaptBboxInput,\n width: number,\n height: number,\n rightLimit: number,\n bottomLimit: number,\n vlMode: TVlModeTypes | undefined,\n): [number, number, number, number] {\n const normalizedBbox = normalizeBboxInput(bbox);\n\n let result: [number, number, number, number] = [0, 0, 0, 0];\n if (vlMode === 'doubao-vision' || vlMode === 'vlm-ui-tars') {\n result = adaptDoubaoBbox(normalizedBbox, width, height);\n } else if (vlMode === 'gemini') {\n result = adaptGeminiBbox(normalizedBbox as number[], width, height);\n } else if (vlMode === 'qwen3-vl') {\n result = normalized01000(normalizedBbox as number[], width, height);\n } else {\n result = adaptQwen2_5Bbox(normalizedBbox as number[]);\n }\n\n result[2] = Math.min(result[2], rightLimit);\n result[3] = Math.min(result[3], bottomLimit);\n\n return result;\n}\n\n// x1, y1, x2, y2 -> 0-1000\nexport function normalized01000(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n return [\n Math.round((bbox[0] * width) / 1000),\n Math.round((bbox[1] * height) / 1000),\n Math.round((bbox[2] * width) / 1000),\n Math.round((bbox[3] * height) / 1000),\n ];\n}\n\n// y1, x1, y2, x2 -> 0-1000\nexport function adaptGeminiBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n const left = Math.round((bbox[1] * width) / 1000);\n const top = Math.round((bbox[0] * height) / 1000);\n const right = Math.round((bbox[3] * width) / 1000);\n const bottom = Math.round((bbox[2] * height) / 1000);\n return [left, top, right, bottom];\n}\n\nexport function adaptBboxToRect(\n bbox: number[],\n width: number,\n height: number,\n offsetX = 0,\n offsetY = 0,\n rightLimit = width,\n bottomLimit = height,\n vlMode?: TVlModeTypes | undefined,\n): Rect {\n debugInspectUtils(\n 'adaptBboxToRect',\n bbox,\n width,\n height,\n 'offset',\n offsetX,\n offsetY,\n 'limit',\n rightLimit,\n bottomLimit,\n 'vlMode',\n vlMode,\n );\n const [left, top, right, bottom] = adaptBbox(\n bbox,\n width,\n height,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n\n // Calculate initial rect dimensions\n const rectLeft = left;\n const rectTop = top;\n let rectWidth = right - left;\n let rectHeight = bottom - top;\n\n // Ensure the rect doesn't exceed image boundaries\n // If right edge exceeds width, adjust the width\n if (rectLeft + rectWidth > width) {\n rectWidth = width - rectLeft;\n }\n\n // If bottom edge exceeds height, adjust the height\n if (rectTop + rectHeight > height) {\n rectHeight = height - rectTop;\n }\n\n // Ensure minimum dimensions (width and height should be at least 1)\n rectWidth = Math.max(1, rectWidth);\n rectHeight = Math.max(1, rectHeight);\n\n const rect = {\n left: rectLeft + offsetX,\n top: rectTop + offsetY,\n width: rectWidth,\n height: rectHeight,\n };\n debugInspectUtils('adaptBboxToRect, result=', rect);\n\n return rect;\n}\n\nexport function mergeRects(rects: Rect[]) {\n const minLeft = Math.min(...rects.map((r) => r.left));\n const minTop = Math.min(...rects.map((r) => r.top));\n const maxRight = Math.max(...rects.map((r) => r.left + r.width));\n const maxBottom = Math.max(...rects.map((r) => r.top + r.height));\n return {\n left: minLeft,\n top: minTop,\n width: maxRight - minLeft,\n height: maxBottom - minTop,\n };\n}\n\n// expand the search area to at least 300 x 300, or add a default padding\nexport function expandSearchArea(\n rect: Rect,\n screenSize: Size,\n vlMode: TVlModeTypes | undefined,\n) {\n let minEdgeSize = 500;\n if (vlMode === 'qwen3-vl') {\n minEdgeSize = 1200;\n }\n const defaultPadding = 160;\n\n // Calculate padding needed to reach minimum edge size\n const paddingSizeHorizontal =\n rect.width < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.width) / 2)\n : defaultPadding;\n const paddingSizeVertical =\n rect.height < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.height) / 2)\n : defaultPadding;\n\n // Calculate new dimensions (ensure minimum edge size)\n let newWidth = Math.max(minEdgeSize, rect.width + paddingSizeHorizontal * 2);\n let newHeight = Math.max(minEdgeSize, rect.height + paddingSizeVertical * 2);\n\n // Calculate initial position with padding\n let newLeft = rect.left - paddingSizeHorizontal;\n let newTop = rect.top - paddingSizeVertical;\n\n // Ensure the rect doesn't exceed screen boundaries by adjusting position\n // If the rect goes beyond the right edge, shift it left\n if (newLeft + newWidth > screenSize.width) {\n newLeft = screenSize.width - newWidth;\n }\n\n // If the rect goes beyond the bottom edge, shift it up\n if (newTop + newHeight > screenSize.height) {\n newTop = screenSize.height - newHeight;\n }\n\n // Ensure the rect doesn't go beyond the left/top edges\n newLeft = Math.max(0, newLeft);\n newTop = Math.max(0, newTop);\n\n // If after position adjustment, the rect still exceeds screen boundaries,\n // clamp the dimensions to fit within screen\n if (newLeft + newWidth > screenSize.width) {\n newWidth = screenSize.width - newLeft;\n }\n if (newTop + newHeight > screenSize.height) {\n newHeight = screenSize.height - newTop;\n }\n\n rect.left = newLeft;\n rect.top = newTop;\n rect.width = newWidth;\n rect.height = newHeight;\n\n return rect;\n}\n\nexport async function markupImageForLLM(\n screenshotBase64: string,\n tree: ElementTreeNode<BaseElement>,\n size: Size,\n) {\n const elementsInfo = treeToList(tree);\n const elementsPositionInfoWithoutText = elementsInfo!.filter(\n (elementInfo) => {\n if (elementInfo.attributes.nodeType === NodeType.TEXT) {\n return false;\n }\n return true;\n },\n );\n\n const imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: elementsPositionInfoWithoutText,\n size,\n });\n return imagePayload;\n}\n\nexport function buildYamlFlowFromPlans(\n plans: PlanningAction[],\n actionSpace: DeviceAction<any>[],\n): MidsceneYamlFlowItem[] {\n const flow: MidsceneYamlFlowItem[] = [];\n\n for (const plan of plans) {\n const verb = plan.type;\n\n const action = actionSpace.find((action) => action.name === verb);\n if (!action) {\n console.warn(\n `Cannot convert action ${verb} to yaml flow. Will ignore it.`,\n );\n continue;\n }\n\n const flowKey = action.interfaceAlias || verb;\n const flowParam = action.paramSchema\n ? dumpActionParam(plan.param || {}, action.paramSchema)\n : {};\n\n const flowItem: MidsceneYamlFlowItem = {\n [flowKey]: '',\n ...flowParam,\n };\n\n flow.push(flowItem);\n }\n\n return flow;\n}\n\n// Zod schemas for shared types\nexport const PointSchema = z.object({\n left: z.number(),\n top: z.number(),\n});\n\nexport const SizeSchema = z.object({\n width: z.number(),\n height: z.number(),\n dpr: z.number().optional(),\n});\n\nexport const RectSchema = PointSchema.and(SizeSchema).and(\n z.object({\n zoom: z.number().optional(),\n }),\n);\n\n// Zod schema for TMultimodalPrompt\nexport const TMultimodalPromptSchema = z.object({\n images: z\n .array(\n z.object({\n name: z.string(),\n url: z.string(),\n }),\n )\n .optional(),\n convertHttpImage2Base64: z.boolean().optional(),\n});\n\n// Zod schema for TUserPrompt\nexport const TUserPromptSchema = z.union([\n z.string(),\n z\n .object({\n prompt: z.string(),\n })\n .and(TMultimodalPromptSchema.partial()),\n]);\n\n// Generate TypeScript types from Zod schemas\nexport type TMultimodalPrompt = z.infer<typeof TMultimodalPromptSchema>;\nexport type TUserPrompt = z.infer<typeof TUserPromptSchema>;\n\nconst locateFieldFlagName = 'midscene_location_field_flag';\n\n// Schema for locator field input (when users provide locate parameters)\nconst MidsceneLocationInput = z\n .object({\n prompt: TUserPromptSchema,\n deepThink: z.boolean().optional(),\n cacheable: z.boolean().optional(),\n xpath: z.union([z.string(), z.boolean()]).optional(),\n })\n .passthrough();\n\n// Schema for locator field result (when AI returns locate results)\nconst MidsceneLocationResult = z\n .object({\n [locateFieldFlagName]: z.literal(true),\n prompt: TUserPromptSchema,\n\n // optional fields\n deepThink: z.boolean().optional(), // only available in vl model\n cacheable: z.boolean().optional(),\n xpath: z.boolean().optional(), // preset result for xpath\n\n // these two fields will only appear in the result\n center: z.tuple([z.number(), z.number()]),\n rect: RectSchema,\n })\n .passthrough();\n\n// Export the result type - this is used for runtime results that include center and rect\nexport type MidsceneLocationResultType = z.infer<typeof MidsceneLocationResult>;\n\n// Export the input type - this is the inferred type from getMidsceneLocationSchema()\nexport type MidsceneLocationInputType = z.infer<typeof MidsceneLocationInput>;\n\n/**\n * Returns the schema for locator fields.\n * This now returns the input schema which is more permissive and suitable for validation.\n */\nexport const getMidsceneLocationSchema = () => {\n return MidsceneLocationInput;\n};\n\nexport const ifMidsceneLocatorField = (field: any): boolean => {\n // Handle optional fields by getting the inner type\n let actualField = field;\n if (actualField._def?.typeName === 'ZodOptional') {\n actualField = actualField._def.innerType;\n }\n\n // Check if this is a ZodObject\n if (actualField._def?.typeName === 'ZodObject') {\n const shape = actualField._def.shape();\n\n // Method 1: Check for the location field flag (for result schema)\n if (locateFieldFlagName in shape) {\n return true;\n }\n\n // Method 2: Check if it's the input schema by checking for 'prompt' field\n // Input schema has 'prompt' as a required field\n if ('prompt' in shape && shape.prompt) {\n return true;\n }\n }\n\n return false;\n};\n\nexport const dumpMidsceneLocatorField = (field: any): string => {\n assert(\n ifMidsceneLocatorField(field),\n 'field is not a midscene locator field',\n );\n\n // If field is a string, return it directly\n if (typeof field === 'string') {\n return field;\n }\n\n // If field is an object with prompt property\n if (field && typeof field === 'object' && field.prompt) {\n // If prompt is a string, return it directly\n if (typeof field.prompt === 'string') {\n return field.prompt;\n }\n // If prompt is a TUserPrompt object, extract the prompt string\n if (typeof field.prompt === 'object' && field.prompt.prompt) {\n return field.prompt.prompt; // TODO: dump images if necessary\n }\n }\n\n // Fallback: try to convert to string\n return String(field);\n};\n\nexport const findAllMidsceneLocatorField = (\n zodType?: z.ZodType<any>,\n requiredOnly?: boolean,\n): string[] => {\n if (!zodType) {\n return [];\n }\n\n // Check if this is a ZodObject by checking if it has a shape property\n const zodObject = zodType as any;\n if (zodObject._def?.typeName === 'ZodObject' && zodObject.shape) {\n const keys = Object.keys(zodObject.shape);\n return keys.filter((key) => {\n const field = zodObject.shape[key];\n if (!ifMidsceneLocatorField(field)) {\n return false;\n }\n\n // If requiredOnly is true, filter out optional fields\n if (requiredOnly) {\n return field._def?.typeName !== 'ZodOptional';\n }\n\n return true;\n });\n }\n\n // For other ZodType instances, we can't extract field names\n return [];\n};\n\nexport const dumpActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n const locatorFields = findAllMidsceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue) {\n // If it's already a string, keep it as is\n if (typeof fieldValue === 'string') {\n result[fieldName] = fieldValue;\n } else if (typeof fieldValue === 'object') {\n // Check if this field is actually a MidsceneLocationType object\n if (fieldValue.prompt) {\n // If prompt is a string, use it directly\n if (typeof fieldValue.prompt === 'string') {\n result[fieldName] = fieldValue.prompt;\n } else if (\n typeof fieldValue.prompt === 'object' &&\n fieldValue.prompt.prompt\n ) {\n // If prompt is a TUserPrompt object, extract the prompt string\n result[fieldName] = fieldValue.prompt.prompt;\n }\n }\n }\n }\n }\n\n return result;\n};\n\nexport const loadActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n const locatorFields = findAllMidsceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue && typeof fieldValue === 'string') {\n result[fieldName] = {\n [locateFieldFlagName]: true,\n prompt: fieldValue,\n };\n }\n }\n\n return result;\n};\n\n/**\n * Parse and validate action parameters using Zod schema.\n * All fields are validated through Zod, EXCEPT locator fields which are skipped.\n * Default values defined in the schema are automatically applied.\n *\n * Locator fields are special business logic fields with complex validation requirements,\n * so they are intentionally excluded from Zod parsing and use existing validation logic.\n */\nexport const parseActionParam = (\n rawParam: Record<string, any> | undefined,\n zodSchema?: z.ZodType<any>,\n): Record<string, any> | undefined => {\n // If no schema is provided, return undefined (action takes no parameters)\n if (!zodSchema) {\n return undefined;\n }\n\n // Handle undefined or null rawParam by providing an empty object\n const param = rawParam ?? {};\n\n // Find all locate fields in the schema\n const locateFields = findAllMidsceneLocatorField(zodSchema);\n\n // If there are no locate fields, just do normal validation\n if (locateFields.length === 0) {\n return zodSchema.parse(param);\n }\n\n // Extract locate field values to restore later\n const locateFieldValues: Record<string, any> = {};\n for (const fieldName of locateFields) {\n if (fieldName in param) {\n locateFieldValues[fieldName] = param[fieldName];\n }\n }\n\n // Build params for validation - skip locate fields and use dummy values\n const paramsForValidation: Record<string, any> = {};\n for (const key in param) {\n if (locateFields.includes(key)) {\n // Use dummy value to satisfy schema validation\n paramsForValidation[key] = { prompt: '_dummy_' };\n } else {\n paramsForValidation[key] = param[key];\n }\n }\n\n // Validate with dummy locate values\n const validated = zodSchema.parse(paramsForValidation);\n\n // Restore the actual locate field values (unvalidated, as per business requirement)\n for (const fieldName in locateFieldValues) {\n validated[fieldName] = locateFieldValues[fieldName];\n }\n\n return validated;\n};\n\nexport const finalizeActionName = 'Finalize';\n\n/**\n * Get a readable time string for the current time\n * @param format - Optional format string. Supports: YYYY, MM, DD, HH, mm, ss. Default: 'YYYY-MM-DD HH:mm:ss'\n * @returns A formatted time string with format label\n */\nexport const getReadableTimeString = (\n format = 'YYYY-MM-DD HH:mm:ss',\n): string => {\n const now = new Date();\n const year = now.getFullYear();\n const month = String(now.getMonth() + 1).padStart(2, '0');\n const day = String(now.getDate()).padStart(2, '0');\n const hours = String(now.getHours()).padStart(2, '0');\n const minutes = String(now.getMinutes()).padStart(2, '0');\n const seconds = String(now.getSeconds()).padStart(2, '0');\n\n const timeString = format\n .replace('YYYY', String(year))\n .replace('MM', month)\n .replace('DD', day)\n .replace('HH', hours)\n .replace('mm', minutes)\n .replace('ss', seconds);\n\n return `${timeString} (${format})`;\n};\n"],"names":["defaultBboxSize","debugInspectUtils","getDebug","fillBboxParam","locate","width","height","rightLimit","bottomLimit","vlMode","adaptBbox","adaptQwen2_5Bbox","bbox","msg","JSON","Error","result","Math","adaptDoubaoBbox","assert","splitted","Number","bboxList","Array","item","x","y","normalizeBboxInput","normalizedBbox","adaptGeminiBbox","normalized01000","left","top","right","bottom","adaptBboxToRect","offsetX","offsetY","rectLeft","rectTop","rectWidth","rectHeight","rect","mergeRects","rects","minLeft","r","minTop","maxRight","maxBottom","expandSearchArea","screenSize","minEdgeSize","defaultPadding","paddingSizeHorizontal","paddingSizeVertical","newWidth","newHeight","newLeft","newTop","markupImageForLLM","screenshotBase64","tree","size","elementsInfo","treeToList","elementsPositionInfoWithoutText","elementInfo","NodeType","imagePayload","compositeElementInfoImg","buildYamlFlowFromPlans","plans","actionSpace","flow","plan","verb","action","console","flowKey","flowParam","dumpActionParam","flowItem","PointSchema","z","SizeSchema","RectSchema","TMultimodalPromptSchema","TUserPromptSchema","locateFieldFlagName","MidsceneLocationInput","getMidsceneLocationSchema","ifMidsceneLocatorField","field","actualField","shape","dumpMidsceneLocatorField","String","findAllMidsceneLocatorField","zodType","requiredOnly","zodObject","keys","Object","key","jsonObject","zodSchema","locatorFields","fieldName","fieldValue","loadActionParam","parseActionParam","rawParam","param","locateFields","locateFieldValues","paramsForValidation","validated","finalizeActionName","getReadableTimeString","format","now","Date","year","month","day","hours","minutes","seconds","timeString"],"mappings":";;;;;;AAuBA,MAAMA,kBAAkB;AACxB,MAAMC,oBAAoBC,SAAS;AAI5B,SAASC,cACdC,MAA2B,EAC3BC,KAAa,EACbC,MAAc,EACdC,UAAkB,EAClBC,WAAmB,EACnBC,MAAgC;IAGhC,IAAKL,OAAe,OAAO,IAAI,CAACA,QAAQ,MAAM;QAC5CA,OAAO,IAAI,GAAIA,OAAe,OAAO;QAErC,OAAQA,OAAe,OAAO;IAChC;IAEA,IAAIA,QAAQ,MACVA,OAAO,IAAI,GAAGM,UACZN,OAAO,IAAI,EACXC,OACAC,QACAC,YACAC,aACAC;IAIJ,OAAOL;AACT;AAEO,SAASO,iBACdC,IAAc;IAEd,IAAIA,KAAK,MAAM,GAAG,GAAG;QACnB,MAAMC,MAAM,CAAC,oCAAoC,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;QAC1E,MAAM,IAAIG,MAAMF;IAClB;IAEA,MAAMG,SAA2C;QAC/CC,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QACC,YAAnB,OAAOA,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGZ;QACN,YAAnB,OAAOY,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGZ;KAC1B;IACD,OAAOgB;AACT;AAEO,SAASE,gBACdN,IAAkC,EAClCP,KAAa,EACbC,MAAc;IAEda,OACEd,QAAQ,KAAKC,SAAS,GACtB;IAGF,IAAI,AAAgB,YAAhB,OAAOM,MAAmB;QAC5BO,OACE,+BAA+B,IAAI,CAACP,KAAK,IAAI,KAC7C,CAAC,iDAAiD,EAAEA,MAAM;QAE5D,MAAMQ,WAAWR,KAAK,KAAK,CAAC;QAC5B,IAAIQ,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACLH,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIf,QAAS;YAC3CY,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAId,SAAU;YAC5CW,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIf,QAAS;YAC3CY,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAId,SAAU;SAC7C;QAEH,MAAM,IAAIS,MAAM,CAAC,iDAAiD,EAAEH,MAAM;IAC5E;IAEA,IAAIU,WAAqB,EAAE;IAC3B,IAAIC,MAAM,OAAO,CAACX,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACY;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BF,SAAS,IAAI,CAACD,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAOF,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BF,SAAS,IAAI,CAACD,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OACEJ,SAAS,IAAI,CAACD,OAAOG;IAEzB;SAEAF,WAAWV;IAGb,IAAIU,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACLL,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS;QACnCY,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU;QACpCW,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS;QACnCY,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU;KACrC;IAIH,IACEgB,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QACLL,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS,QAAQL,kBAAkB;QAE/DiB,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU,QAAQN,kBAAkB;QAEhEiB,KAAK,GAAG,CACNZ,OACAY,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS,QAAQL,kBAAkB;QAE/DiB,KAAK,GAAG,CACNX,QACAW,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU,QAAQN,kBAAkB;KAEjE;IAGH,IAAIY,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACLK,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS;QACnCY,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU;QACpCW,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS;QACnCY,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU;KACrC;IAGH,MAAMO,MAAM,CAAC,0CAA0C,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;IAChF,MAAM,IAAIG,MAAMF;AAClB;AAEA,SAASc,mBACPf,IAAoB;IAEpB,IAAIW,MAAM,OAAO,CAACX,OAChB;QAAA,IAAIW,MAAM,OAAO,CAACX,IAAI,CAAC,EAAE,GACvB,OAAOA,IAAI,CAAC,EAAE;IAChB;IAGF,OAAOA;AACT;AAEO,SAASF,UACdE,IAAoB,EACpBP,KAAa,EACbC,MAAc,EACdC,UAAkB,EAClBC,WAAmB,EACnBC,MAAgC;IAEhC,MAAMmB,iBAAiBD,mBAAmBf;IAE1C,IAAII,SAA2C;QAAC;QAAG;QAAG;QAAG;KAAE;IAEzDA,SADEP,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,SACvBS,gBAAgBU,gBAAgBvB,OAAOC,UACvCG,AAAW,aAAXA,SACAoB,gBAAgBD,gBAA4BvB,OAAOC,UACnDG,AAAW,eAAXA,SACAqB,gBAAgBF,gBAA4BvB,OAAOC,UAEnDK,iBAAiBiB;IAG5BZ,MAAM,CAAC,EAAE,GAAGC,KAAK,GAAG,CAACD,MAAM,CAAC,EAAE,EAAET;IAChCS,MAAM,CAAC,EAAE,GAAGC,KAAK,GAAG,CAACD,MAAM,CAAC,EAAE,EAAER;IAEhC,OAAOQ;AACT;AAGO,SAASc,gBACdlB,IAAc,EACdP,KAAa,EACbC,MAAc;IAEd,OAAO;QACLW,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;QAC/BY,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;QAChCW,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;QAC/BY,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;KACjC;AACH;AAGO,SAASuB,gBACdjB,IAAc,EACdP,KAAa,EACbC,MAAc;IAEd,MAAMyB,OAAOd,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;IAC5C,MAAM2B,MAAMf,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;IAC5C,MAAM2B,QAAQhB,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;IAC7C,MAAM6B,SAASjB,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;IAC/C,OAAO;QAACyB;QAAMC;QAAKC;QAAOC;KAAO;AACnC;AAEO,SAASC,gBACdvB,IAAc,EACdP,KAAa,EACbC,MAAc,EACd8B,UAAU,CAAC,EACXC,UAAU,CAAC,EACX9B,aAAaF,KAAK,EAClBG,cAAcF,MAAM,EACpBG,MAAiC;IAEjCR,kBACE,mBACAW,MACAP,OACAC,QACA,UACA8B,SACAC,SACA,SACA9B,YACAC,aACA,UACAC;IAEF,MAAM,CAACsB,MAAMC,KAAKC,OAAOC,OAAO,GAAGxB,UACjCE,MACAP,OACAC,QACAC,YACAC,aACAC;IAIF,MAAM6B,WAAWP;IACjB,MAAMQ,UAAUP;IAChB,IAAIQ,YAAYP,QAAQF;IACxB,IAAIU,aAAaP,SAASF;IAI1B,IAAIM,WAAWE,YAAYnC,OACzBmC,YAAYnC,QAAQiC;IAItB,IAAIC,UAAUE,aAAanC,QACzBmC,aAAanC,SAASiC;IAIxBC,YAAYvB,KAAK,GAAG,CAAC,GAAGuB;IACxBC,aAAaxB,KAAK,GAAG,CAAC,GAAGwB;IAEzB,MAAMC,OAAO;QACX,MAAMJ,WAAWF;QACjB,KAAKG,UAAUF;QACf,OAAOG;QACP,QAAQC;IACV;IACAxC,kBAAkB,4BAA4ByC;IAE9C,OAAOA;AACT;AAEO,SAASC,WAAWC,KAAa;IACtC,MAAMC,UAAU5B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI;IACnD,MAAMC,SAAS9B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG;IACjD,MAAME,WAAW/B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI,GAAGA,EAAE,KAAK;IAC9D,MAAMG,YAAYhC,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG,GAAGA,EAAE,MAAM;IAC/D,OAAO;QACL,MAAMD;QACN,KAAKE;QACL,OAAOC,WAAWH;QAClB,QAAQI,YAAYF;IACtB;AACF;AAGO,SAASG,iBACdR,IAAU,EACVS,UAAgB,EAChB1C,MAAgC;IAEhC,IAAI2C,cAAc;IAClB,IAAI3C,AAAW,eAAXA,QACF2C,cAAc;IAEhB,MAAMC,iBAAiB;IAGvB,MAAMC,wBACJZ,KAAK,KAAK,GAAGU,cACTnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAcV,KAAK,KAAI,IAAK,KACvCW;IACN,MAAME,sBACJb,KAAK,MAAM,GAAGU,cACVnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAcV,KAAK,MAAK,IAAK,KACxCW;IAGN,IAAIG,WAAWvC,KAAK,GAAG,CAACmC,aAAaV,KAAK,KAAK,GAAGY,AAAwB,IAAxBA;IAClD,IAAIG,YAAYxC,KAAK,GAAG,CAACmC,aAAaV,KAAK,MAAM,GAAGa,AAAsB,IAAtBA;IAGpD,IAAIG,UAAUhB,KAAK,IAAI,GAAGY;IAC1B,IAAIK,SAASjB,KAAK,GAAG,GAAGa;IAIxB,IAAIG,UAAUF,WAAWL,WAAW,KAAK,EACvCO,UAAUP,WAAW,KAAK,GAAGK;IAI/B,IAAIG,SAASF,YAAYN,WAAW,MAAM,EACxCQ,SAASR,WAAW,MAAM,GAAGM;IAI/BC,UAAUzC,KAAK,GAAG,CAAC,GAAGyC;IACtBC,SAAS1C,KAAK,GAAG,CAAC,GAAG0C;IAIrB,IAAID,UAAUF,WAAWL,WAAW,KAAK,EACvCK,WAAWL,WAAW,KAAK,GAAGO;IAEhC,IAAIC,SAASF,YAAYN,WAAW,MAAM,EACxCM,YAAYN,WAAW,MAAM,GAAGQ;IAGlCjB,KAAK,IAAI,GAAGgB;IACZhB,KAAK,GAAG,GAAGiB;IACXjB,KAAK,KAAK,GAAGc;IACbd,KAAK,MAAM,GAAGe;IAEd,OAAOf;AACT;AAEO,eAAekB,kBACpBC,gBAAwB,EACxBC,IAAkC,EAClCC,IAAU;IAEV,MAAMC,eAAeC,WAAWH;IAChC,MAAMI,kCAAkCF,aAAc,MAAM,CAC1D,CAACG;QACC,IAAIA,YAAY,UAAU,CAAC,QAAQ,KAAKC,SAAS,IAAI,EACnD,OAAO;QAET,OAAO;IACT;IAGF,MAAMC,eAAe,MAAMC,wBAAwB;QACjD,gBAAgBT;QAChB,sBAAsBK;QACtBH;IACF;IACA,OAAOM;AACT;AAEO,SAASE,uBACdC,KAAuB,EACvBC,WAAgC;IAEhC,MAAMC,OAA+B,EAAE;IAEvC,KAAK,MAAMC,QAAQH,MAAO;QACxB,MAAMI,OAAOD,KAAK,IAAI;QAEtB,MAAME,SAASJ,YAAY,IAAI,CAAC,CAACI,SAAWA,OAAO,IAAI,KAAKD;QAC5D,IAAI,CAACC,QAAQ;YACXC,QAAQ,IAAI,CACV,CAAC,sBAAsB,EAAEF,KAAK,8BAA8B,CAAC;YAE/D;QACF;QAEA,MAAMG,UAAUF,OAAO,cAAc,IAAID;QACzC,MAAMI,YAAYH,OAAO,WAAW,GAChCI,gBAAgBN,KAAK,KAAK,IAAI,CAAC,GAAGE,OAAO,WAAW,IACpD,CAAC;QAEL,MAAMK,WAAiC;YACrC,CAACH,QAAQ,EAAE;YACX,GAAGC,SAAS;QACd;QAEAN,KAAK,IAAI,CAACQ;IACZ;IAEA,OAAOR;AACT;AAGO,MAAMS,cAAcC,EAAE,MAAM,CAAC;IAClC,MAAMA,EAAE,MAAM;IACd,KAAKA,EAAE,MAAM;AACf;AAEO,MAAMC,aAAaD,EAAE,MAAM,CAAC;IACjC,OAAOA,EAAE,MAAM;IACf,QAAQA,EAAE,MAAM;IAChB,KAAKA,EAAE,MAAM,GAAG,QAAQ;AAC1B;AAEO,MAAME,aAAaH,YAAY,GAAG,CAACE,YAAY,GAAG,CACvDD,EAAE,MAAM,CAAC;IACP,MAAMA,EAAE,MAAM,GAAG,QAAQ;AAC3B;AAIK,MAAMG,0BAA0BH,EAAE,MAAM,CAAC;IAC9C,QAAQA,EAAAA,KACA,CACJA,EAAE,MAAM,CAAC;QACP,MAAMA,EAAE,MAAM;QACd,KAAKA,EAAE,MAAM;IACf,IAED,QAAQ;IACX,yBAAyBA,EAAE,OAAO,GAAG,QAAQ;AAC/C;AAGO,MAAMI,oBAAoBJ,EAAE,KAAK,CAAC;IACvCA,EAAE,MAAM;IACRA,EAAAA,MACS,CAAC;QACN,QAAQA,EAAE,MAAM;IAClB,GACC,GAAG,CAACG,wBAAwB,OAAO;CACvC;AAMD,MAAME,sBAAsB;AAG5B,MAAMC,wBAAwBN,EAAAA,MACrB,CAAC;IACN,QAAQI;IACR,WAAWJ,EAAE,OAAO,GAAG,QAAQ;IAC/B,WAAWA,EAAE,OAAO,GAAG,QAAQ;IAC/B,OAAOA,EAAE,KAAK,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,OAAO;KAAG,EAAE,QAAQ;AACpD,GACC,WAAW;AAGiBA,EAAAA,MACtB,CAAC;IACN,CAACK,oBAAoB,EAAEL,EAAE,OAAO,CAAC;IACjC,QAAQI;IAGR,WAAWJ,EAAE,OAAO,GAAG,QAAQ;IAC/B,WAAWA,EAAE,OAAO,GAAG,QAAQ;IAC/B,OAAOA,EAAE,OAAO,GAAG,QAAQ;IAG3B,QAAQA,EAAE,KAAK,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG;IACxC,MAAME;AACR,GACC,WAAW;AAYP,MAAMK,4BAA4B,IAChCD;AAGF,MAAME,yBAAyB,CAACC;IAErC,IAAIC,cAAcD;IAClB,IAAIC,YAAY,IAAI,EAAE,aAAa,eACjCA,cAAcA,YAAY,IAAI,CAAC,SAAS;IAI1C,IAAIA,YAAY,IAAI,EAAE,aAAa,aAAa;QAC9C,MAAMC,QAAQD,YAAY,IAAI,CAAC,KAAK;QAGpC,IAAIL,uBAAuBM,OACzB,OAAO;QAKT,IAAI,YAAYA,SAASA,MAAM,MAAM,EACnC,OAAO;IAEX;IAEA,OAAO;AACT;AAEO,MAAMC,2BAA2B,CAACH;IACvC1E,OACEyE,uBAAuBC,QACvB;IAIF,IAAI,AAAiB,YAAjB,OAAOA,OACT,OAAOA;IAIT,IAAIA,SAAS,AAAiB,YAAjB,OAAOA,SAAsBA,MAAM,MAAM,EAAE;QAEtD,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,EACrB,OAAOA,MAAM,MAAM;QAGrB,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,IAAiBA,MAAM,MAAM,CAAC,MAAM,EACzD,OAAOA,MAAM,MAAM,CAAC,MAAM;IAE9B;IAGA,OAAOI,OAAOJ;AAChB;AAEO,MAAMK,8BAA8B,CACzCC,SACAC;IAEA,IAAI,CAACD,SACH,OAAO,EAAE;IAIX,MAAME,YAAYF;IAClB,IAAIE,UAAU,IAAI,EAAE,aAAa,eAAeA,UAAU,KAAK,EAAE;QAC/D,MAAMC,OAAOC,OAAO,IAAI,CAACF,UAAU,KAAK;QACxC,OAAOC,KAAK,MAAM,CAAC,CAACE;YAClB,MAAMX,QAAQQ,UAAU,KAAK,CAACG,IAAI;YAClC,IAAI,CAACZ,uBAAuBC,QAC1B,OAAO;YAIT,IAAIO,cACF,OAAOP,MAAM,IAAI,EAAE,aAAa;YAGlC,OAAO;QACT;IACF;IAGA,OAAO,EAAE;AACX;AAEO,MAAMZ,kBAAkB,CAC7BwB,YACAC;IAEA,MAAMC,gBAAgBT,4BAA4BQ;IAClD,MAAM1F,SAAS;QAAE,GAAGyF,UAAU;IAAC;IAE/B,KAAK,MAAMG,aAAaD,cAAe;QACrC,MAAME,aAAa7F,MAAM,CAAC4F,UAAU;QACpC,IAAIC,YAEF;YAAA,IAAI,AAAsB,YAAtB,OAAOA,YACT7F,MAAM,CAAC4F,UAAU,GAAGC;iBACf,IAAI,AAAsB,YAAtB,OAAOA,YAEhB;gBAAA,IAAIA,WAAW,MAAM,EAEnB;oBAAA,IAAI,AAA6B,YAA7B,OAAOA,WAAW,MAAM,EAC1B7F,MAAM,CAAC4F,UAAU,GAAGC,WAAW,MAAM;yBAChC,IACL,AAA6B,YAA7B,OAAOA,WAAW,MAAM,IACxBA,WAAW,MAAM,CAAC,MAAM,EAGxB7F,MAAM,CAAC4F,UAAU,GAAGC,WAAW,MAAM,CAAC,MAAM;gBAC9C;YACF;QACF;IAEJ;IAEA,OAAO7F;AACT;AAEO,MAAM8F,kBAAkB,CAC7BL,YACAC;IAEA,MAAMC,gBAAgBT,4BAA4BQ;IAClD,MAAM1F,SAAS;QAAE,GAAGyF,UAAU;IAAC;IAE/B,KAAK,MAAMG,aAAaD,cAAe;QACrC,MAAME,aAAa7F,MAAM,CAAC4F,UAAU;QACpC,IAAIC,cAAc,AAAsB,YAAtB,OAAOA,YACvB7F,MAAM,CAAC4F,UAAU,GAAG;YAClB,CAACnB,oBAAoB,EAAE;YACvB,QAAQoB;QACV;IAEJ;IAEA,OAAO7F;AACT;AAUO,MAAM+F,mBAAmB,CAC9BC,UACAN;IAGA,IAAI,CAACA,WACH;IAIF,MAAMO,QAAQD,YAAY,CAAC;IAG3B,MAAME,eAAehB,4BAA4BQ;IAGjD,IAAIQ,AAAwB,MAAxBA,aAAa,MAAM,EACrB,OAAOR,UAAU,KAAK,CAACO;IAIzB,MAAME,oBAAyC,CAAC;IAChD,KAAK,MAAMP,aAAaM,aACtB,IAAIN,aAAaK,OACfE,iBAAiB,CAACP,UAAU,GAAGK,KAAK,CAACL,UAAU;IAKnD,MAAMQ,sBAA2C,CAAC;IAClD,IAAK,MAAMZ,OAAOS,MAChB,IAAIC,aAAa,QAAQ,CAACV,MAExBY,mBAAmB,CAACZ,IAAI,GAAG;QAAE,QAAQ;IAAU;SAE/CY,mBAAmB,CAACZ,IAAI,GAAGS,KAAK,CAACT,IAAI;IAKzC,MAAMa,YAAYX,UAAU,KAAK,CAACU;IAGlC,IAAK,MAAMR,aAAaO,kBACtBE,SAAS,CAACT,UAAU,GAAGO,iBAAiB,CAACP,UAAU;IAGrD,OAAOS;AACT;AAEO,MAAMC,qBAAqB;AAO3B,MAAMC,wBAAwB,CACnCC,SAAS,qBAAqB;IAE9B,MAAMC,MAAM,IAAIC;IAChB,MAAMC,OAAOF,IAAI,WAAW;IAC5B,MAAMG,QAAQ3B,OAAOwB,IAAI,QAAQ,KAAK,GAAG,QAAQ,CAAC,GAAG;IACrD,MAAMI,MAAM5B,OAAOwB,IAAI,OAAO,IAAI,QAAQ,CAAC,GAAG;IAC9C,MAAMK,QAAQ7B,OAAOwB,IAAI,QAAQ,IAAI,QAAQ,CAAC,GAAG;IACjD,MAAMM,UAAU9B,OAAOwB,IAAI,UAAU,IAAI,QAAQ,CAAC,GAAG;IACrD,MAAMO,UAAU/B,OAAOwB,IAAI,UAAU,IAAI,QAAQ,CAAC,GAAG;IAErD,MAAMQ,aAAaT,OAChB,OAAO,CAAC,QAAQvB,OAAO0B,OACvB,OAAO,CAAC,MAAMC,OACd,OAAO,CAAC,MAAMC,KACd,OAAO,CAAC,MAAMC,OACd,OAAO,CAAC,MAAMC,SACd,OAAO,CAAC,MAAMC;IAEjB,OAAO,GAAGC,WAAW,EAAE,EAAET,OAAO,CAAC,CAAC;AACpC"}
1
+ {"version":3,"file":"common.mjs","sources":["../../src/common.ts"],"sourcesContent":["import type {\n BaseElement,\n DeviceAction,\n ElementTreeNode,\n MidsceneYamlFlowItem,\n PlanningAction,\n Rect,\n Size,\n} from '@/types';\nimport { assert } from '@midscene/shared/utils';\n\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\n\nimport type { PlanningLocateParam } from '@/types';\nimport { NodeType } from '@midscene/shared/constants';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport { treeToList } from '@midscene/shared/extractor';\nimport { compositeElementInfoImg } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { z } from 'zod';\n\nexport type AIArgs = ChatCompletionMessageParam[];\n\nconst defaultBboxSize = 20; // must be even number\nconst debugInspectUtils = getDebug('ai:common');\ntype AdaptBboxInput = number[] | string[] | string | (number[] | string[])[];\n\n// transform the param of locate from qwen mode\nexport function fillBboxParam(\n locate: PlanningLocateParam,\n width: number,\n height: number,\n rightLimit: number,\n bottomLimit: number,\n vlMode: TVlModeTypes | undefined,\n) {\n // The Qwen model might have hallucinations of naming bbox as bbox_2d.\n if ((locate as any).bbox_2d && !locate?.bbox) {\n locate.bbox = (locate as any).bbox_2d;\n // biome-ignore lint/performance/noDelete: <explanation>\n delete (locate as any).bbox_2d;\n }\n\n if (locate?.bbox) {\n locate.bbox = adaptBbox(\n locate.bbox,\n width,\n height,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n }\n\n return locate;\n}\n\nexport function adaptQwen2_5Bbox(\n bbox: number[],\n): [number, number, number, number] {\n if (bbox.length < 2) {\n const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n }\n\n const result: [number, number, number, number] = [\n Math.round(bbox[0]),\n Math.round(bbox[1]),\n typeof bbox[2] === 'number'\n ? Math.round(bbox[2])\n : Math.round(bbox[0] + defaultBboxSize),\n typeof bbox[3] === 'number'\n ? Math.round(bbox[3])\n : Math.round(bbox[1] + defaultBboxSize),\n ];\n return result;\n}\n\nexport function adaptDoubaoBbox(\n bbox: string[] | number[] | string,\n width: number,\n height: number,\n): [number, number, number, number] {\n assert(\n width > 0 && height > 0,\n 'width and height must be greater than 0 in doubao mode',\n );\n\n if (typeof bbox === 'string') {\n assert(\n /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n `invalid bbox data string for doubao-vision mode: ${bbox}`,\n );\n const splitted = bbox.split(' ');\n if (splitted.length === 4) {\n return [\n Math.round((Number(splitted[0]) * width) / 1000),\n Math.round((Number(splitted[1]) * height) / 1000),\n Math.round((Number(splitted[2]) * width) / 1000),\n Math.round((Number(splitted[3]) * height) / 1000),\n ];\n }\n throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);\n }\n\n let bboxList: number[] = [];\n if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n bbox.forEach((item) => {\n if (typeof item === 'string' && item.includes(',')) {\n const [x, y] = item.split(',');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else if (typeof item === 'string' && item.includes(' ')) {\n const [x, y] = item.split(' ');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else {\n bboxList.push(Number(item));\n }\n });\n } else {\n bboxList = bbox as any;\n }\n\n if (bboxList.length === 4 || bboxList.length === 5) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[2] * width) / 1000),\n Math.round((bboxList[3] * height) / 1000),\n ];\n }\n\n // treat the bbox as a center point\n if (\n bboxList.length === 6 ||\n bboxList.length === 2 ||\n bboxList.length === 3 ||\n bboxList.length === 7\n ) {\n return [\n Math.max(\n 0,\n Math.round((bboxList[0] * width) / 1000) - defaultBboxSize / 2,\n ),\n Math.max(\n 0,\n Math.round((bboxList[1] * height) / 1000) - defaultBboxSize / 2,\n ),\n Math.min(\n width,\n Math.round((bboxList[0] * width) / 1000) + defaultBboxSize / 2,\n ),\n Math.min(\n height,\n Math.round((bboxList[1] * height) / 1000) + defaultBboxSize / 2,\n ),\n ];\n }\n\n if (bbox.length === 8) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[4] * width) / 1000),\n Math.round((bboxList[5] * height) / 1000),\n ];\n }\n\n const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n}\n\nfunction normalizeBboxInput(\n bbox: AdaptBboxInput,\n): number[] | string[] | string {\n if (Array.isArray(bbox)) {\n if (Array.isArray(bbox[0])) {\n return bbox[0] as number[] | string[];\n }\n return bbox as number[] | string[];\n }\n return bbox as string;\n}\n\nexport function adaptBbox(\n bbox: AdaptBboxInput,\n width: number,\n height: number,\n rightLimit: number,\n bottomLimit: number,\n vlMode: TVlModeTypes | undefined,\n): [number, number, number, number] {\n const normalizedBbox = normalizeBboxInput(bbox);\n\n let result: [number, number, number, number] = [0, 0, 0, 0];\n if (vlMode === 'doubao-vision' || vlMode === 'vlm-ui-tars') {\n result = adaptDoubaoBbox(normalizedBbox, width, height);\n } else if (vlMode === 'gemini') {\n result = adaptGeminiBbox(normalizedBbox as number[], width, height);\n } else if (vlMode === 'qwen3-vl') {\n result = normalized01000(normalizedBbox as number[], width, height);\n } else {\n result = adaptQwen2_5Bbox(normalizedBbox as number[]);\n }\n\n result[2] = Math.min(result[2], rightLimit);\n result[3] = Math.min(result[3], bottomLimit);\n\n return result;\n}\n\n// x1, y1, x2, y2 -> 0-1000\nexport function normalized01000(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n return [\n Math.round((bbox[0] * width) / 1000),\n Math.round((bbox[1] * height) / 1000),\n Math.round((bbox[2] * width) / 1000),\n Math.round((bbox[3] * height) / 1000),\n ];\n}\n\n// y1, x1, y2, x2 -> 0-1000\nexport function adaptGeminiBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n const left = Math.round((bbox[1] * width) / 1000);\n const top = Math.round((bbox[0] * height) / 1000);\n const right = Math.round((bbox[3] * width) / 1000);\n const bottom = Math.round((bbox[2] * height) / 1000);\n return [left, top, right, bottom];\n}\n\nexport function adaptBboxToRect(\n bbox: number[],\n width: number,\n height: number,\n offsetX = 0,\n offsetY = 0,\n rightLimit = width,\n bottomLimit = height,\n vlMode?: TVlModeTypes | undefined,\n): Rect {\n debugInspectUtils(\n 'adaptBboxToRect',\n bbox,\n width,\n height,\n 'offset',\n offsetX,\n offsetY,\n 'limit',\n rightLimit,\n bottomLimit,\n 'vlMode',\n vlMode,\n );\n const [left, top, right, bottom] = adaptBbox(\n bbox,\n width,\n height,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n\n // Calculate initial rect dimensions\n const rectLeft = left;\n const rectTop = top;\n let rectWidth = right - left;\n let rectHeight = bottom - top;\n\n // Ensure the rect doesn't exceed image boundaries\n // If right edge exceeds width, adjust the width\n if (rectLeft + rectWidth > width) {\n rectWidth = width - rectLeft;\n }\n\n // If bottom edge exceeds height, adjust the height\n if (rectTop + rectHeight > height) {\n rectHeight = height - rectTop;\n }\n\n // Ensure minimum dimensions (width and height should be at least 1)\n rectWidth = Math.max(1, rectWidth);\n rectHeight = Math.max(1, rectHeight);\n\n const rect = {\n left: rectLeft + offsetX,\n top: rectTop + offsetY,\n width: rectWidth,\n height: rectHeight,\n };\n debugInspectUtils('adaptBboxToRect, result=', rect);\n\n return rect;\n}\n\nexport function mergeRects(rects: Rect[]) {\n const minLeft = Math.min(...rects.map((r) => r.left));\n const minTop = Math.min(...rects.map((r) => r.top));\n const maxRight = Math.max(...rects.map((r) => r.left + r.width));\n const maxBottom = Math.max(...rects.map((r) => r.top + r.height));\n return {\n left: minLeft,\n top: minTop,\n width: maxRight - minLeft,\n height: maxBottom - minTop,\n };\n}\n\n// expand the search area to at least 300 x 300, or add a default padding\nexport function expandSearchArea(\n rect: Rect,\n screenSize: Size,\n vlMode: TVlModeTypes | undefined,\n) {\n let minEdgeSize = 500;\n if (vlMode === 'qwen3-vl') {\n minEdgeSize = 1200;\n }\n const defaultPadding = 160;\n\n // Calculate padding needed to reach minimum edge size\n const paddingSizeHorizontal =\n rect.width < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.width) / 2)\n : defaultPadding;\n const paddingSizeVertical =\n rect.height < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.height) / 2)\n : defaultPadding;\n\n // Calculate new dimensions (ensure minimum edge size)\n let newWidth = Math.max(minEdgeSize, rect.width + paddingSizeHorizontal * 2);\n let newHeight = Math.max(minEdgeSize, rect.height + paddingSizeVertical * 2);\n\n // Calculate initial position with padding\n let newLeft = rect.left - paddingSizeHorizontal;\n let newTop = rect.top - paddingSizeVertical;\n\n // Ensure the rect doesn't exceed screen boundaries by adjusting position\n // If the rect goes beyond the right edge, shift it left\n if (newLeft + newWidth > screenSize.width) {\n newLeft = screenSize.width - newWidth;\n }\n\n // If the rect goes beyond the bottom edge, shift it up\n if (newTop + newHeight > screenSize.height) {\n newTop = screenSize.height - newHeight;\n }\n\n // Ensure the rect doesn't go beyond the left/top edges\n newLeft = Math.max(0, newLeft);\n newTop = Math.max(0, newTop);\n\n // If after position adjustment, the rect still exceeds screen boundaries,\n // clamp the dimensions to fit within screen\n if (newLeft + newWidth > screenSize.width) {\n newWidth = screenSize.width - newLeft;\n }\n if (newTop + newHeight > screenSize.height) {\n newHeight = screenSize.height - newTop;\n }\n\n rect.left = newLeft;\n rect.top = newTop;\n rect.width = newWidth;\n rect.height = newHeight;\n\n return rect;\n}\n\nexport async function markupImageForLLM(\n screenshotBase64: string,\n tree: ElementTreeNode<BaseElement>,\n size: Size,\n) {\n const elementsInfo = treeToList(tree);\n const elementsPositionInfoWithoutText = elementsInfo!.filter(\n (elementInfo) => {\n if (elementInfo.attributes.nodeType === NodeType.TEXT) {\n return false;\n }\n return true;\n },\n );\n\n const imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: elementsPositionInfoWithoutText,\n size,\n });\n return imagePayload;\n}\n\nexport function buildYamlFlowFromPlans(\n plans: PlanningAction[],\n actionSpace: DeviceAction<any>[],\n sleep?: number,\n): MidsceneYamlFlowItem[] {\n const flow: MidsceneYamlFlowItem[] = [];\n\n for (const plan of plans) {\n const verb = plan.type;\n\n const action = actionSpace.find((action) => action.name === verb);\n if (!action) {\n console.warn(\n `Cannot convert action ${verb} to yaml flow. Will ignore it.`,\n );\n continue;\n }\n\n const flowKey = action.interfaceAlias || verb;\n const flowParam = action.paramSchema\n ? dumpActionParam(plan.param || {}, action.paramSchema)\n : {};\n\n const flowItem: MidsceneYamlFlowItem = {\n [flowKey]: '',\n ...flowParam,\n };\n\n flow.push(flowItem);\n }\n\n if (sleep) {\n flow.push({\n sleep,\n });\n }\n\n return flow;\n}\n\n// Zod schemas for shared types\nexport const PointSchema = z.object({\n left: z.number(),\n top: z.number(),\n});\n\nexport const SizeSchema = z.object({\n width: z.number(),\n height: z.number(),\n dpr: z.number().optional(),\n});\n\nexport const RectSchema = PointSchema.and(SizeSchema).and(\n z.object({\n zoom: z.number().optional(),\n }),\n);\n\n// Zod schema for TMultimodalPrompt\nexport const TMultimodalPromptSchema = z.object({\n images: z\n .array(\n z.object({\n name: z.string(),\n url: z.string(),\n }),\n )\n .optional(),\n convertHttpImage2Base64: z.boolean().optional(),\n});\n\n// Zod schema for TUserPrompt\nexport const TUserPromptSchema = z.union([\n z.string(),\n z\n .object({\n prompt: z.string(),\n })\n .and(TMultimodalPromptSchema.partial()),\n]);\n\n// Generate TypeScript types from Zod schemas\nexport type TMultimodalPrompt = z.infer<typeof TMultimodalPromptSchema>;\nexport type TUserPrompt = z.infer<typeof TUserPromptSchema>;\n\nconst locateFieldFlagName = 'midscene_location_field_flag';\n\n// Schema for locator field input (when users provide locate parameters)\nconst MidsceneLocationInput = z\n .object({\n prompt: TUserPromptSchema,\n deepThink: z.boolean().optional(),\n cacheable: z.boolean().optional(),\n xpath: z.union([z.string(), z.boolean()]).optional(),\n })\n .passthrough();\n\n// Schema for locator field result (when AI returns locate results)\nconst MidsceneLocationResult = z\n .object({\n [locateFieldFlagName]: z.literal(true),\n prompt: TUserPromptSchema,\n\n // optional fields\n deepThink: z.boolean().optional(), // only available in vl model\n cacheable: z.boolean().optional(),\n xpath: z.boolean().optional(), // preset result for xpath\n\n // these two fields will only appear in the result\n center: z.tuple([z.number(), z.number()]),\n rect: RectSchema,\n })\n .passthrough();\n\n// Export the result type - this is used for runtime results that include center and rect\nexport type MidsceneLocationResultType = z.infer<typeof MidsceneLocationResult>;\n\n// Export the input type - this is the inferred type from getMidsceneLocationSchema()\nexport type MidsceneLocationInputType = z.infer<typeof MidsceneLocationInput>;\n\n/**\n * Returns the schema for locator fields.\n * This now returns the input schema which is more permissive and suitable for validation.\n */\nexport const getMidsceneLocationSchema = () => {\n return MidsceneLocationInput;\n};\n\nexport const ifMidsceneLocatorField = (field: any): boolean => {\n // Handle optional fields by getting the inner type\n let actualField = field;\n if (actualField._def?.typeName === 'ZodOptional') {\n actualField = actualField._def.innerType;\n }\n\n // Check if this is a ZodObject\n if (actualField._def?.typeName === 'ZodObject') {\n const shape = actualField._def.shape();\n\n // Method 1: Check for the location field flag (for result schema)\n if (locateFieldFlagName in shape) {\n return true;\n }\n\n // Method 2: Check if it's the input schema by checking for 'prompt' field\n // Input schema has 'prompt' as a required field\n if ('prompt' in shape && shape.prompt) {\n return true;\n }\n }\n\n return false;\n};\n\nexport const dumpMidsceneLocatorField = (field: any): string => {\n assert(\n ifMidsceneLocatorField(field),\n 'field is not a midscene locator field',\n );\n\n // If field is a string, return it directly\n if (typeof field === 'string') {\n return field;\n }\n\n // If field is an object with prompt property\n if (field && typeof field === 'object' && field.prompt) {\n // If prompt is a string, return it directly\n if (typeof field.prompt === 'string') {\n return field.prompt;\n }\n // If prompt is a TUserPrompt object, extract the prompt string\n if (typeof field.prompt === 'object' && field.prompt.prompt) {\n return field.prompt.prompt; // TODO: dump images if necessary\n }\n }\n\n // Fallback: try to convert to string\n return String(field);\n};\n\nexport const findAllMidsceneLocatorField = (\n zodType?: z.ZodType<any>,\n requiredOnly?: boolean,\n): string[] => {\n if (!zodType) {\n return [];\n }\n\n // Check if this is a ZodObject by checking if it has a shape property\n const zodObject = zodType as any;\n if (zodObject._def?.typeName === 'ZodObject' && zodObject.shape) {\n const keys = Object.keys(zodObject.shape);\n return keys.filter((key) => {\n const field = zodObject.shape[key];\n if (!ifMidsceneLocatorField(field)) {\n return false;\n }\n\n // If requiredOnly is true, filter out optional fields\n if (requiredOnly) {\n return field._def?.typeName !== 'ZodOptional';\n }\n\n return true;\n });\n }\n\n // For other ZodType instances, we can't extract field names\n return [];\n};\n\nexport const dumpActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n const locatorFields = findAllMidsceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue) {\n // If it's already a string, keep it as is\n if (typeof fieldValue === 'string') {\n result[fieldName] = fieldValue;\n } else if (typeof fieldValue === 'object') {\n // Check if this field is actually a MidsceneLocationType object\n if (fieldValue.prompt) {\n // If prompt is a string, use it directly\n if (typeof fieldValue.prompt === 'string') {\n result[fieldName] = fieldValue.prompt;\n } else if (\n typeof fieldValue.prompt === 'object' &&\n fieldValue.prompt.prompt\n ) {\n // If prompt is a TUserPrompt object, extract the prompt string\n result[fieldName] = fieldValue.prompt.prompt;\n }\n }\n }\n }\n }\n\n return result;\n};\n\nexport const loadActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n const locatorFields = findAllMidsceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue && typeof fieldValue === 'string') {\n result[fieldName] = {\n [locateFieldFlagName]: true,\n prompt: fieldValue,\n };\n }\n }\n\n return result;\n};\n\n/**\n * Parse and validate action parameters using Zod schema.\n * All fields are validated through Zod, EXCEPT locator fields which are skipped.\n * Default values defined in the schema are automatically applied.\n *\n * Locator fields are special business logic fields with complex validation requirements,\n * so they are intentionally excluded from Zod parsing and use existing validation logic.\n */\nexport const parseActionParam = (\n rawParam: Record<string, any> | undefined,\n zodSchema?: z.ZodType<any>,\n): Record<string, any> | undefined => {\n // If no schema is provided, return undefined (action takes no parameters)\n if (!zodSchema) {\n return undefined;\n }\n\n // Handle undefined or null rawParam by providing an empty object\n const param = rawParam ?? {};\n\n // Find all locate fields in the schema\n const locateFields = findAllMidsceneLocatorField(zodSchema);\n\n // If there are no locate fields, just do normal validation\n if (locateFields.length === 0) {\n return zodSchema.parse(param);\n }\n\n // Extract locate field values to restore later\n const locateFieldValues: Record<string, any> = {};\n for (const fieldName of locateFields) {\n if (fieldName in param) {\n locateFieldValues[fieldName] = param[fieldName];\n }\n }\n\n // Build params for validation - skip locate fields and use dummy values\n const paramsForValidation: Record<string, any> = {};\n for (const key in param) {\n if (locateFields.includes(key)) {\n // Use dummy value to satisfy schema validation\n paramsForValidation[key] = { prompt: '_dummy_' };\n } else {\n paramsForValidation[key] = param[key];\n }\n }\n\n // Validate with dummy locate values\n const validated = zodSchema.parse(paramsForValidation);\n\n // Restore the actual locate field values (unvalidated, as per business requirement)\n for (const fieldName in locateFieldValues) {\n validated[fieldName] = locateFieldValues[fieldName];\n }\n\n return validated;\n};\n"],"names":["defaultBboxSize","debugInspectUtils","getDebug","fillBboxParam","locate","width","height","rightLimit","bottomLimit","vlMode","adaptBbox","adaptQwen2_5Bbox","bbox","msg","JSON","Error","result","Math","adaptDoubaoBbox","assert","splitted","Number","bboxList","Array","item","x","y","normalizeBboxInput","normalizedBbox","adaptGeminiBbox","normalized01000","left","top","right","bottom","adaptBboxToRect","offsetX","offsetY","rectLeft","rectTop","rectWidth","rectHeight","rect","mergeRects","rects","minLeft","r","minTop","maxRight","maxBottom","expandSearchArea","screenSize","minEdgeSize","defaultPadding","paddingSizeHorizontal","paddingSizeVertical","newWidth","newHeight","newLeft","newTop","markupImageForLLM","screenshotBase64","tree","size","elementsInfo","treeToList","elementsPositionInfoWithoutText","elementInfo","NodeType","imagePayload","compositeElementInfoImg","buildYamlFlowFromPlans","plans","actionSpace","sleep","flow","plan","verb","action","console","flowKey","flowParam","dumpActionParam","flowItem","PointSchema","z","SizeSchema","RectSchema","TMultimodalPromptSchema","TUserPromptSchema","locateFieldFlagName","MidsceneLocationInput","getMidsceneLocationSchema","ifMidsceneLocatorField","field","actualField","shape","dumpMidsceneLocatorField","String","findAllMidsceneLocatorField","zodType","requiredOnly","zodObject","keys","Object","key","jsonObject","zodSchema","locatorFields","fieldName","fieldValue","loadActionParam","parseActionParam","rawParam","param","locateFields","locateFieldValues","paramsForValidation","validated"],"mappings":";;;;;;AAuBA,MAAMA,kBAAkB;AACxB,MAAMC,oBAAoBC,SAAS;AAI5B,SAASC,cACdC,MAA2B,EAC3BC,KAAa,EACbC,MAAc,EACdC,UAAkB,EAClBC,WAAmB,EACnBC,MAAgC;IAGhC,IAAKL,OAAe,OAAO,IAAI,CAACA,QAAQ,MAAM;QAC5CA,OAAO,IAAI,GAAIA,OAAe,OAAO;QAErC,OAAQA,OAAe,OAAO;IAChC;IAEA,IAAIA,QAAQ,MACVA,OAAO,IAAI,GAAGM,UACZN,OAAO,IAAI,EACXC,OACAC,QACAC,YACAC,aACAC;IAIJ,OAAOL;AACT;AAEO,SAASO,iBACdC,IAAc;IAEd,IAAIA,KAAK,MAAM,GAAG,GAAG;QACnB,MAAMC,MAAM,CAAC,oCAAoC,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;QAC1E,MAAM,IAAIG,MAAMF;IAClB;IAEA,MAAMG,SAA2C;QAC/CC,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QACC,YAAnB,OAAOA,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGZ;QACN,YAAnB,OAAOY,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGZ;KAC1B;IACD,OAAOgB;AACT;AAEO,SAASE,gBACdN,IAAkC,EAClCP,KAAa,EACbC,MAAc;IAEda,OACEd,QAAQ,KAAKC,SAAS,GACtB;IAGF,IAAI,AAAgB,YAAhB,OAAOM,MAAmB;QAC5BO,OACE,+BAA+B,IAAI,CAACP,KAAK,IAAI,KAC7C,CAAC,iDAAiD,EAAEA,MAAM;QAE5D,MAAMQ,WAAWR,KAAK,KAAK,CAAC;QAC5B,IAAIQ,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACLH,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIf,QAAS;YAC3CY,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAId,SAAU;YAC5CW,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIf,QAAS;YAC3CY,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAId,SAAU;SAC7C;QAEH,MAAM,IAAIS,MAAM,CAAC,iDAAiD,EAAEH,MAAM;IAC5E;IAEA,IAAIU,WAAqB,EAAE;IAC3B,IAAIC,MAAM,OAAO,CAACX,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACY;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BF,SAAS,IAAI,CAACD,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAOF,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BF,SAAS,IAAI,CAACD,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OACEJ,SAAS,IAAI,CAACD,OAAOG;IAEzB;SAEAF,WAAWV;IAGb,IAAIU,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACLL,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS;QACnCY,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU;QACpCW,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS;QACnCY,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU;KACrC;IAIH,IACEgB,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QACLL,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS,QAAQL,kBAAkB;QAE/DiB,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU,QAAQN,kBAAkB;QAEhEiB,KAAK,GAAG,CACNZ,OACAY,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS,QAAQL,kBAAkB;QAE/DiB,KAAK,GAAG,CACNX,QACAW,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU,QAAQN,kBAAkB;KAEjE;IAGH,IAAIY,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACLK,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS;QACnCY,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU;QACpCW,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS;QACnCY,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU;KACrC;IAGH,MAAMO,MAAM,CAAC,0CAA0C,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;IAChF,MAAM,IAAIG,MAAMF;AAClB;AAEA,SAASc,mBACPf,IAAoB;IAEpB,IAAIW,MAAM,OAAO,CAACX,OAChB;QAAA,IAAIW,MAAM,OAAO,CAACX,IAAI,CAAC,EAAE,GACvB,OAAOA,IAAI,CAAC,EAAE;IAChB;IAGF,OAAOA;AACT;AAEO,SAASF,UACdE,IAAoB,EACpBP,KAAa,EACbC,MAAc,EACdC,UAAkB,EAClBC,WAAmB,EACnBC,MAAgC;IAEhC,MAAMmB,iBAAiBD,mBAAmBf;IAE1C,IAAII,SAA2C;QAAC;QAAG;QAAG;QAAG;KAAE;IAEzDA,SADEP,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,SACvBS,gBAAgBU,gBAAgBvB,OAAOC,UACvCG,AAAW,aAAXA,SACAoB,gBAAgBD,gBAA4BvB,OAAOC,UACnDG,AAAW,eAAXA,SACAqB,gBAAgBF,gBAA4BvB,OAAOC,UAEnDK,iBAAiBiB;IAG5BZ,MAAM,CAAC,EAAE,GAAGC,KAAK,GAAG,CAACD,MAAM,CAAC,EAAE,EAAET;IAChCS,MAAM,CAAC,EAAE,GAAGC,KAAK,GAAG,CAACD,MAAM,CAAC,EAAE,EAAER;IAEhC,OAAOQ;AACT;AAGO,SAASc,gBACdlB,IAAc,EACdP,KAAa,EACbC,MAAc;IAEd,OAAO;QACLW,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;QAC/BY,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;QAChCW,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;QAC/BY,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;KACjC;AACH;AAGO,SAASuB,gBACdjB,IAAc,EACdP,KAAa,EACbC,MAAc;IAEd,MAAMyB,OAAOd,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;IAC5C,MAAM2B,MAAMf,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;IAC5C,MAAM2B,QAAQhB,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;IAC7C,MAAM6B,SAASjB,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;IAC/C,OAAO;QAACyB;QAAMC;QAAKC;QAAOC;KAAO;AACnC;AAEO,SAASC,gBACdvB,IAAc,EACdP,KAAa,EACbC,MAAc,EACd8B,UAAU,CAAC,EACXC,UAAU,CAAC,EACX9B,aAAaF,KAAK,EAClBG,cAAcF,MAAM,EACpBG,MAAiC;IAEjCR,kBACE,mBACAW,MACAP,OACAC,QACA,UACA8B,SACAC,SACA,SACA9B,YACAC,aACA,UACAC;IAEF,MAAM,CAACsB,MAAMC,KAAKC,OAAOC,OAAO,GAAGxB,UACjCE,MACAP,OACAC,QACAC,YACAC,aACAC;IAIF,MAAM6B,WAAWP;IACjB,MAAMQ,UAAUP;IAChB,IAAIQ,YAAYP,QAAQF;IACxB,IAAIU,aAAaP,SAASF;IAI1B,IAAIM,WAAWE,YAAYnC,OACzBmC,YAAYnC,QAAQiC;IAItB,IAAIC,UAAUE,aAAanC,QACzBmC,aAAanC,SAASiC;IAIxBC,YAAYvB,KAAK,GAAG,CAAC,GAAGuB;IACxBC,aAAaxB,KAAK,GAAG,CAAC,GAAGwB;IAEzB,MAAMC,OAAO;QACX,MAAMJ,WAAWF;QACjB,KAAKG,UAAUF;QACf,OAAOG;QACP,QAAQC;IACV;IACAxC,kBAAkB,4BAA4ByC;IAE9C,OAAOA;AACT;AAEO,SAASC,WAAWC,KAAa;IACtC,MAAMC,UAAU5B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI;IACnD,MAAMC,SAAS9B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG;IACjD,MAAME,WAAW/B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI,GAAGA,EAAE,KAAK;IAC9D,MAAMG,YAAYhC,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG,GAAGA,EAAE,MAAM;IAC/D,OAAO;QACL,MAAMD;QACN,KAAKE;QACL,OAAOC,WAAWH;QAClB,QAAQI,YAAYF;IACtB;AACF;AAGO,SAASG,iBACdR,IAAU,EACVS,UAAgB,EAChB1C,MAAgC;IAEhC,IAAI2C,cAAc;IAClB,IAAI3C,AAAW,eAAXA,QACF2C,cAAc;IAEhB,MAAMC,iBAAiB;IAGvB,MAAMC,wBACJZ,KAAK,KAAK,GAAGU,cACTnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAcV,KAAK,KAAI,IAAK,KACvCW;IACN,MAAME,sBACJb,KAAK,MAAM,GAAGU,cACVnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAcV,KAAK,MAAK,IAAK,KACxCW;IAGN,IAAIG,WAAWvC,KAAK,GAAG,CAACmC,aAAaV,KAAK,KAAK,GAAGY,AAAwB,IAAxBA;IAClD,IAAIG,YAAYxC,KAAK,GAAG,CAACmC,aAAaV,KAAK,MAAM,GAAGa,AAAsB,IAAtBA;IAGpD,IAAIG,UAAUhB,KAAK,IAAI,GAAGY;IAC1B,IAAIK,SAASjB,KAAK,GAAG,GAAGa;IAIxB,IAAIG,UAAUF,WAAWL,WAAW,KAAK,EACvCO,UAAUP,WAAW,KAAK,GAAGK;IAI/B,IAAIG,SAASF,YAAYN,WAAW,MAAM,EACxCQ,SAASR,WAAW,MAAM,GAAGM;IAI/BC,UAAUzC,KAAK,GAAG,CAAC,GAAGyC;IACtBC,SAAS1C,KAAK,GAAG,CAAC,GAAG0C;IAIrB,IAAID,UAAUF,WAAWL,WAAW,KAAK,EACvCK,WAAWL,WAAW,KAAK,GAAGO;IAEhC,IAAIC,SAASF,YAAYN,WAAW,MAAM,EACxCM,YAAYN,WAAW,MAAM,GAAGQ;IAGlCjB,KAAK,IAAI,GAAGgB;IACZhB,KAAK,GAAG,GAAGiB;IACXjB,KAAK,KAAK,GAAGc;IACbd,KAAK,MAAM,GAAGe;IAEd,OAAOf;AACT;AAEO,eAAekB,kBACpBC,gBAAwB,EACxBC,IAAkC,EAClCC,IAAU;IAEV,MAAMC,eAAeC,WAAWH;IAChC,MAAMI,kCAAkCF,aAAc,MAAM,CAC1D,CAACG;QACC,IAAIA,YAAY,UAAU,CAAC,QAAQ,KAAKC,SAAS,IAAI,EACnD,OAAO;QAET,OAAO;IACT;IAGF,MAAMC,eAAe,MAAMC,wBAAwB;QACjD,gBAAgBT;QAChB,sBAAsBK;QACtBH;IACF;IACA,OAAOM;AACT;AAEO,SAASE,uBACdC,KAAuB,EACvBC,WAAgC,EAChCC,KAAc;IAEd,MAAMC,OAA+B,EAAE;IAEvC,KAAK,MAAMC,QAAQJ,MAAO;QACxB,MAAMK,OAAOD,KAAK,IAAI;QAEtB,MAAME,SAASL,YAAY,IAAI,CAAC,CAACK,SAAWA,OAAO,IAAI,KAAKD;QAC5D,IAAI,CAACC,QAAQ;YACXC,QAAQ,IAAI,CACV,CAAC,sBAAsB,EAAEF,KAAK,8BAA8B,CAAC;YAE/D;QACF;QAEA,MAAMG,UAAUF,OAAO,cAAc,IAAID;QACzC,MAAMI,YAAYH,OAAO,WAAW,GAChCI,gBAAgBN,KAAK,KAAK,IAAI,CAAC,GAAGE,OAAO,WAAW,IACpD,CAAC;QAEL,MAAMK,WAAiC;YACrC,CAACH,QAAQ,EAAE;YACX,GAAGC,SAAS;QACd;QAEAN,KAAK,IAAI,CAACQ;IACZ;IAEA,IAAIT,OACFC,KAAK,IAAI,CAAC;QACRD;IACF;IAGF,OAAOC;AACT;AAGO,MAAMS,cAAcC,EAAE,MAAM,CAAC;IAClC,MAAMA,EAAE,MAAM;IACd,KAAKA,EAAE,MAAM;AACf;AAEO,MAAMC,aAAaD,EAAE,MAAM,CAAC;IACjC,OAAOA,EAAE,MAAM;IACf,QAAQA,EAAE,MAAM;IAChB,KAAKA,EAAE,MAAM,GAAG,QAAQ;AAC1B;AAEO,MAAME,aAAaH,YAAY,GAAG,CAACE,YAAY,GAAG,CACvDD,EAAE,MAAM,CAAC;IACP,MAAMA,EAAE,MAAM,GAAG,QAAQ;AAC3B;AAIK,MAAMG,0BAA0BH,EAAE,MAAM,CAAC;IAC9C,QAAQA,EAAAA,KACA,CACJA,EAAE,MAAM,CAAC;QACP,MAAMA,EAAE,MAAM;QACd,KAAKA,EAAE,MAAM;IACf,IAED,QAAQ;IACX,yBAAyBA,EAAE,OAAO,GAAG,QAAQ;AAC/C;AAGO,MAAMI,oBAAoBJ,EAAE,KAAK,CAAC;IACvCA,EAAE,MAAM;IACRA,EAAAA,MACS,CAAC;QACN,QAAQA,EAAE,MAAM;IAClB,GACC,GAAG,CAACG,wBAAwB,OAAO;CACvC;AAMD,MAAME,sBAAsB;AAG5B,MAAMC,wBAAwBN,EAAAA,MACrB,CAAC;IACN,QAAQI;IACR,WAAWJ,EAAE,OAAO,GAAG,QAAQ;IAC/B,WAAWA,EAAE,OAAO,GAAG,QAAQ;IAC/B,OAAOA,EAAE,KAAK,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,OAAO;KAAG,EAAE,QAAQ;AACpD,GACC,WAAW;AAGiBA,EAAAA,MACtB,CAAC;IACN,CAACK,oBAAoB,EAAEL,EAAE,OAAO,CAAC;IACjC,QAAQI;IAGR,WAAWJ,EAAE,OAAO,GAAG,QAAQ;IAC/B,WAAWA,EAAE,OAAO,GAAG,QAAQ;IAC/B,OAAOA,EAAE,OAAO,GAAG,QAAQ;IAG3B,QAAQA,EAAE,KAAK,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG;IACxC,MAAME;AACR,GACC,WAAW;AAYP,MAAMK,4BAA4B,IAChCD;AAGF,MAAME,yBAAyB,CAACC;IAErC,IAAIC,cAAcD;IAClB,IAAIC,YAAY,IAAI,EAAE,aAAa,eACjCA,cAAcA,YAAY,IAAI,CAAC,SAAS;IAI1C,IAAIA,YAAY,IAAI,EAAE,aAAa,aAAa;QAC9C,MAAMC,QAAQD,YAAY,IAAI,CAAC,KAAK;QAGpC,IAAIL,uBAAuBM,OACzB,OAAO;QAKT,IAAI,YAAYA,SAASA,MAAM,MAAM,EACnC,OAAO;IAEX;IAEA,OAAO;AACT;AAEO,MAAMC,2BAA2B,CAACH;IACvC3E,OACE0E,uBAAuBC,QACvB;IAIF,IAAI,AAAiB,YAAjB,OAAOA,OACT,OAAOA;IAIT,IAAIA,SAAS,AAAiB,YAAjB,OAAOA,SAAsBA,MAAM,MAAM,EAAE;QAEtD,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,EACrB,OAAOA,MAAM,MAAM;QAGrB,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,IAAiBA,MAAM,MAAM,CAAC,MAAM,EACzD,OAAOA,MAAM,MAAM,CAAC,MAAM;IAE9B;IAGA,OAAOI,OAAOJ;AAChB;AAEO,MAAMK,8BAA8B,CACzCC,SACAC;IAEA,IAAI,CAACD,SACH,OAAO,EAAE;IAIX,MAAME,YAAYF;IAClB,IAAIE,UAAU,IAAI,EAAE,aAAa,eAAeA,UAAU,KAAK,EAAE;QAC/D,MAAMC,OAAOC,OAAO,IAAI,CAACF,UAAU,KAAK;QACxC,OAAOC,KAAK,MAAM,CAAC,CAACE;YAClB,MAAMX,QAAQQ,UAAU,KAAK,CAACG,IAAI;YAClC,IAAI,CAACZ,uBAAuBC,QAC1B,OAAO;YAIT,IAAIO,cACF,OAAOP,MAAM,IAAI,EAAE,aAAa;YAGlC,OAAO;QACT;IACF;IAGA,OAAO,EAAE;AACX;AAEO,MAAMZ,kBAAkB,CAC7BwB,YACAC;IAEA,MAAMC,gBAAgBT,4BAA4BQ;IAClD,MAAM3F,SAAS;QAAE,GAAG0F,UAAU;IAAC;IAE/B,KAAK,MAAMG,aAAaD,cAAe;QACrC,MAAME,aAAa9F,MAAM,CAAC6F,UAAU;QACpC,IAAIC,YAEF;YAAA,IAAI,AAAsB,YAAtB,OAAOA,YACT9F,MAAM,CAAC6F,UAAU,GAAGC;iBACf,IAAI,AAAsB,YAAtB,OAAOA,YAEhB;gBAAA,IAAIA,WAAW,MAAM,EAEnB;oBAAA,IAAI,AAA6B,YAA7B,OAAOA,WAAW,MAAM,EAC1B9F,MAAM,CAAC6F,UAAU,GAAGC,WAAW,MAAM;yBAChC,IACL,AAA6B,YAA7B,OAAOA,WAAW,MAAM,IACxBA,WAAW,MAAM,CAAC,MAAM,EAGxB9F,MAAM,CAAC6F,UAAU,GAAGC,WAAW,MAAM,CAAC,MAAM;gBAC9C;YACF;QACF;IAEJ;IAEA,OAAO9F;AACT;AAEO,MAAM+F,kBAAkB,CAC7BL,YACAC;IAEA,MAAMC,gBAAgBT,4BAA4BQ;IAClD,MAAM3F,SAAS;QAAE,GAAG0F,UAAU;IAAC;IAE/B,KAAK,MAAMG,aAAaD,cAAe;QACrC,MAAME,aAAa9F,MAAM,CAAC6F,UAAU;QACpC,IAAIC,cAAc,AAAsB,YAAtB,OAAOA,YACvB9F,MAAM,CAAC6F,UAAU,GAAG;YAClB,CAACnB,oBAAoB,EAAE;YACvB,QAAQoB;QACV;IAEJ;IAEA,OAAO9F;AACT;AAUO,MAAMgG,mBAAmB,CAC9BC,UACAN;IAGA,IAAI,CAACA,WACH;IAIF,MAAMO,QAAQD,YAAY,CAAC;IAG3B,MAAME,eAAehB,4BAA4BQ;IAGjD,IAAIQ,AAAwB,MAAxBA,aAAa,MAAM,EACrB,OAAOR,UAAU,KAAK,CAACO;IAIzB,MAAME,oBAAyC,CAAC;IAChD,KAAK,MAAMP,aAAaM,aACtB,IAAIN,aAAaK,OACfE,iBAAiB,CAACP,UAAU,GAAGK,KAAK,CAACL,UAAU;IAKnD,MAAMQ,sBAA2C,CAAC;IAClD,IAAK,MAAMZ,OAAOS,MAChB,IAAIC,aAAa,QAAQ,CAACV,MAExBY,mBAAmB,CAACZ,IAAI,GAAG;QAAE,QAAQ;IAAU;SAE/CY,mBAAmB,CAACZ,IAAI,GAAGS,KAAK,CAACT,IAAI;IAKzC,MAAMa,YAAYX,UAAU,KAAK,CAACU;IAGlC,IAAK,MAAMR,aAAaO,kBACtBE,SAAS,CAACT,UAAU,GAAGO,iBAAiB,CAACP,UAAU;IAGrD,OAAOS;AACT"}
@@ -1,4 +1,4 @@
1
- import { finalizeActionName, getMidsceneLocationSchema } from "../common.mjs";
1
+ import { getMidsceneLocationSchema } from "../common.mjs";
2
2
  import { getDebug } from "@midscene/shared/logger";
3
3
  import { z } from "zod";
4
4
  class AbstractInterface {
@@ -156,7 +156,7 @@ const actionAssertParamSchema = z.object({
156
156
  });
157
157
  const defineActionAssert = ()=>defineAction({
158
158
  name: 'Print_Assert_Result',
159
- description: 'Print the result of the assertion. Use this only when the user asks for an assertion',
159
+ description: 'Print the result of the assertion',
160
160
  paramSchema: actionAssertParamSchema,
161
161
  call: async (param)=>{
162
162
  if ('boolean' != typeof param?.result) throw new Error(`The result of the assertion must be a boolean, but got: ${typeof param?.result}. ${param.thought || '(no thought)'}`);
@@ -164,31 +164,6 @@ const defineActionAssert = ()=>defineAction({
164
164
  if (!param.result) throw new Error(`Assertion failed: ${param.thought || '(no thought)'} (Assertion = ${param.condition})`);
165
165
  }
166
166
  });
167
- const ActionSleepParamSchema = z.object({
168
- millisecond: z.number().default(1000).optional().describe('Sleep duration in milliseconds, defaults to 1000ms (1 second)')
169
- });
170
- const defineActionSleep = ()=>defineAction({
171
- name: 'Sleep',
172
- description: 'Wait for a specified duration before continuing. Defaults to 1 second (1000ms) if not specified.',
173
- paramSchema: ActionSleepParamSchema,
174
- call: async (param)=>{
175
- const duration = param?.millisecond ?? 1000;
176
- getDebug('device:common-action')(`Sleeping for ${duration}ms`);
177
- await new Promise((resolve)=>setTimeout(resolve, duration));
178
- }
179
- });
180
- const actionFinalizeParamSchema = z.object({
181
- message: z.string().optional().describe('The conclusion, data, or return value that the user needs. This message will be provided to the user when the task is finalized.')
182
- });
183
- const defineActionFinalize = ()=>defineAction({
184
- name: finalizeActionName,
185
- description: 'Finalize the task. You can provide the conclusion, data, or return value that the user needs in the message.',
186
- paramSchema: actionFinalizeParamSchema,
187
- call: async (param)=>{
188
- getDebug('device:common-action')(`Task finalized${param?.message ? `: ${param.message}` : ''}`);
189
- return param.message;
190
- }
191
- });
192
- export { AbstractInterface, ActionLongPressParamSchema, ActionSleepParamSchema, ActionSwipeParamSchema, actionAssertParamSchema, actionClearInputParamSchema, actionDoubleClickParamSchema, actionDragAndDropParamSchema, actionFinalizeParamSchema, actionHoverParamSchema, actionInputParamSchema, actionKeyboardPressParamSchema, actionRightClickParamSchema, actionScrollParamSchema, actionTapParamSchema, defineAction, defineActionAssert, defineActionClearInput, defineActionDoubleClick, defineActionDragAndDrop, defineActionFinalize, defineActionHover, defineActionInput, defineActionKeyboardPress, defineActionLongPress, defineActionRightClick, defineActionScroll, defineActionSleep, defineActionSwipe, defineActionTap };
167
+ export { AbstractInterface, ActionLongPressParamSchema, ActionSwipeParamSchema, actionAssertParamSchema, actionClearInputParamSchema, actionDoubleClickParamSchema, actionDragAndDropParamSchema, actionHoverParamSchema, actionInputParamSchema, actionKeyboardPressParamSchema, actionRightClickParamSchema, actionScrollParamSchema, actionTapParamSchema, defineAction, defineActionAssert, defineActionClearInput, defineActionDoubleClick, defineActionDragAndDrop, defineActionHover, defineActionInput, defineActionKeyboardPress, defineActionLongPress, defineActionRightClick, defineActionScroll, defineActionSwipe, defineActionTap };
193
168
 
194
169
  //# sourceMappingURL=index.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"device/index.mjs","sources":["../../../src/device/index.ts"],"sourcesContent":["import { finalizeActionName, getMidsceneLocationSchema } from '@/common';\nimport type {\n ActionScrollParam,\n DeviceAction,\n LocateResultElement,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport type { ElementNode } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { z } from 'zod';\nimport type { ElementCacheFeature, Rect, Size, UIContext } from '../types';\n\nexport interface FileChooserHandler {\n accept(files: string[]): Promise<void>;\n}\n\nexport abstract class AbstractInterface {\n abstract interfaceType: string;\n\n abstract screenshotBase64(): Promise<string>;\n abstract size(): Promise<Size>;\n abstract actionSpace(): DeviceAction[];\n\n abstract cacheFeatureForRect?(\n rect: Rect,\n options?: {\n targetDescription?: string;\n modelConfig?: IModelConfig;\n },\n ): Promise<ElementCacheFeature>;\n abstract rectMatchesCacheFeature?(\n feature: ElementCacheFeature,\n ): Promise<Rect>;\n\n abstract destroy?(): Promise<void>;\n\n abstract describe?(): string;\n abstract beforeInvokeAction?(actionName: string, param: any): Promise<void>;\n abstract afterInvokeAction?(actionName: string, param: any): Promise<void>;\n\n // for web only\n registerFileChooserListener?(\n handler: (chooser: FileChooserHandler) => Promise<void>,\n ): Promise<{ dispose: () => void; getError: () => Error | undefined }>;\n\n // @deprecated do NOT extend this method\n abstract getElementsNodeTree?: () => Promise<ElementNode>;\n\n // @deprecated do NOT extend this method\n abstract url?: () => string | Promise<string>;\n\n // @deprecated do NOT extend this method\n abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;\n\n // @deprecated do NOT extend this method\n abstract getContext?(): Promise<UIContext>;\n}\n\n// Generic function to define actions with proper type inference\n// TRuntime allows specifying a different type for the runtime parameter (after location resolution)\n// TReturn allows specifying the return type of the action\nexport const defineAction = <\n TSchema extends z.ZodType | undefined = undefined,\n TRuntime = TSchema extends z.ZodType ? z.infer<TSchema> : undefined,\n TReturn = any,\n>(\n config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema?: TSchema;\n call: (param: TRuntime) => Promise<TReturn> | TReturn;\n } & Partial<\n Omit<\n DeviceAction<TRuntime, TReturn>,\n 'name' | 'description' | 'interfaceAlias' | 'paramSchema' | 'call'\n >\n >,\n): DeviceAction<TRuntime, TReturn> => {\n return config as any; // Type assertion needed because schema validation type differs from runtime type\n};\n\n// Tap\nexport const actionTapParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be tapped'),\n});\nexport type ActionTapParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionTap = (\n call: (param: ActionTapParam) => Promise<void>,\n): DeviceAction<ActionTapParam> => {\n return defineAction<typeof actionTapParamSchema, ActionTapParam>({\n name: 'Tap',\n description: 'Tap the element',\n interfaceAlias: 'aiTap',\n paramSchema: actionTapParamSchema,\n call,\n });\n};\n\n// RightClick\nexport const actionRightClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be right clicked',\n ),\n});\nexport type ActionRightClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionRightClick = (\n call: (param: ActionRightClickParam) => Promise<void>,\n): DeviceAction<ActionRightClickParam> => {\n return defineAction<\n typeof actionRightClickParamSchema,\n ActionRightClickParam\n >({\n name: 'RightClick',\n description: 'Right click the element',\n interfaceAlias: 'aiRightClick',\n paramSchema: actionRightClickParamSchema,\n call,\n });\n};\n\n// DoubleClick\nexport const actionDoubleClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be double clicked',\n ),\n});\nexport type ActionDoubleClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionDoubleClick = (\n call: (param: ActionDoubleClickParam) => Promise<void>,\n): DeviceAction<ActionDoubleClickParam> => {\n return defineAction<\n typeof actionDoubleClickParamSchema,\n ActionDoubleClickParam\n >({\n name: 'DoubleClick',\n description: 'Double click the element',\n interfaceAlias: 'aiDoubleClick',\n paramSchema: actionDoubleClickParamSchema,\n call,\n });\n};\n\n// Hover\nexport const actionHoverParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be hovered'),\n});\nexport type ActionHoverParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionHover = (\n call: (param: ActionHoverParam) => Promise<void>,\n): DeviceAction<ActionHoverParam> => {\n return defineAction<typeof actionHoverParamSchema, ActionHoverParam>({\n name: 'Hover',\n description: 'Move the mouse to the element',\n interfaceAlias: 'aiHover',\n paramSchema: actionHoverParamSchema,\n call,\n });\n};\n\n// Input\nconst inputLocateDescription =\n 'the position of the placeholder or text content in the target input field. If there is no content, locate the center of the input field.';\nexport const actionInputParamSchema = z.object({\n value: z\n .union([z.string(), z.number()])\n .transform((val) => String(val))\n .describe(\n 'The text to input. Provide the final content for replace/append modes, or an empty string when using clear mode to remove existing text.',\n ),\n locate: getMidsceneLocationSchema()\n .describe(inputLocateDescription)\n .optional(),\n mode: z\n .enum(['replace', 'clear', 'append'])\n .default('replace')\n .optional()\n .describe(\n 'Input mode: \"replace\" (default) - clear the field and input the value; \"append\" - append the value to existing content; \"clear\" - clear the field without inputting new text.',\n ),\n});\nexport type ActionInputParam = {\n value: string;\n locate?: LocateResultElement;\n mode?: 'replace' | 'clear' | 'append';\n};\n\nexport const defineActionInput = (\n call: (param: ActionInputParam) => Promise<void>,\n): DeviceAction<ActionInputParam> => {\n return defineAction<typeof actionInputParamSchema, ActionInputParam>({\n name: 'Input',\n description: 'Input the value into the element',\n interfaceAlias: 'aiInput',\n paramSchema: actionInputParamSchema,\n call,\n });\n};\n\n// KeyboardPress\nexport const actionKeyboardPressParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The element to be clicked before pressing the key')\n .optional(),\n keyName: z\n .string()\n .describe(\n \"The key to be pressed. Use '+' for key combinations, e.g., 'Control+A', 'Shift+Enter'\",\n ),\n});\nexport type ActionKeyboardPressParam = {\n locate?: LocateResultElement;\n keyName: string;\n};\n\nexport const defineActionKeyboardPress = (\n call: (param: ActionKeyboardPressParam) => Promise<void>,\n): DeviceAction<ActionKeyboardPressParam> => {\n return defineAction<\n typeof actionKeyboardPressParamSchema,\n ActionKeyboardPressParam\n >({\n name: 'KeyboardPress',\n description:\n 'Press a key or key combination, like \"Enter\", \"Tab\", \"Escape\", or \"Control+A\", \"Shift+Enter\". Do not use this to type text.',\n interfaceAlias: 'aiKeyboardPress',\n paramSchema: actionKeyboardPressParamSchema,\n call,\n });\n};\n\n// Scroll\nexport const actionScrollParamSchema = z.object({\n scrollType: z\n .enum([\n 'singleAction',\n 'scrollToBottom',\n 'scrollToTop',\n 'scrollToRight',\n 'scrollToLeft',\n ])\n .default('singleAction')\n .describe(\n 'The scroll behavior: \"singleAction\" for a single scroll action, \"scrollToBottom\" for scrolling to the bottom, \"scrollToTop\" for scrolling to the top, \"scrollToRight\" for scrolling to the right, \"scrollToLeft\" for scrolling to the left',\n ),\n direction: z\n .enum(['down', 'up', 'right', 'left'])\n .default('down')\n .describe(\n 'The direction to scroll. Only effective when scrollType is \"singleAction\".',\n ),\n distance: z\n .number()\n .nullable()\n .optional()\n .describe('The distance in pixels to scroll'),\n locate: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Describe the target element to be scrolled on, like \"the table\" or \"the list\" or \"the content area\" or \"the scrollable area\". Do NOT provide a general intent like \"scroll to find some element\"',\n ),\n});\n\nexport const defineActionScroll = (\n call: (param: ActionScrollParam) => Promise<void>,\n): DeviceAction<ActionScrollParam> => {\n return defineAction<typeof actionScrollParamSchema, ActionScrollParam>({\n name: 'Scroll',\n description:\n 'Scroll the page or an element. The direction to scroll, the scroll type, and the distance to scroll. The distance is the number of pixels to scroll. If not specified, use `down` direction, `once` scroll type, and `null` distance.',\n interfaceAlias: 'aiScroll',\n paramSchema: actionScrollParamSchema,\n call,\n });\n};\n\n// DragAndDrop\nexport const actionDragAndDropParamSchema = z.object({\n from: getMidsceneLocationSchema().describe('The position to be dragged'),\n to: getMidsceneLocationSchema().describe('The position to be dropped'),\n});\nexport type ActionDragAndDropParam = {\n from: LocateResultElement;\n to: LocateResultElement;\n};\n\nexport const defineActionDragAndDrop = (\n call: (param: ActionDragAndDropParam) => Promise<void>,\n): DeviceAction<ActionDragAndDropParam> => {\n return defineAction<\n typeof actionDragAndDropParamSchema,\n ActionDragAndDropParam\n >({\n name: 'DragAndDrop',\n description:\n 'Drag and drop (hold the mouse or finger down and move the mouse) ',\n interfaceAlias: 'aiDragAndDrop',\n paramSchema: actionDragAndDropParamSchema,\n call,\n });\n};\n\nexport const ActionLongPressParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be long pressed',\n ),\n duration: z\n .number()\n .default(500)\n .optional()\n .describe('Long press duration in milliseconds'),\n});\n\nexport type ActionLongPressParam = {\n locate: LocateResultElement;\n duration?: number;\n};\nexport const defineActionLongPress = (\n call: (param: ActionLongPressParam) => Promise<void>,\n): DeviceAction<ActionLongPressParam> => {\n return defineAction<typeof ActionLongPressParamSchema, ActionLongPressParam>({\n name: 'LongPress',\n description: 'Long press the element',\n paramSchema: ActionLongPressParamSchema,\n call,\n });\n};\n\nexport const ActionSwipeParamSchema = z.object({\n start: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Starting point of the swipe gesture, if not specified, the center of the page will be used',\n ),\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .optional()\n .describe(\n 'The direction to swipe (required when using distance). The direction means the direction of the finger swipe.',\n ),\n distance: z\n .number()\n .optional()\n .describe('The distance in pixels to swipe (mutually exclusive with end)'),\n end: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Ending point of the swipe gesture (mutually exclusive with distance)',\n ),\n duration: z\n .number()\n .default(300)\n .describe('Duration of the swipe gesture in milliseconds'),\n repeat: z\n .number()\n .optional()\n .describe(\n 'The number of times to repeat the swipe gesture. 1 for default, 0 for infinite (e.g. endless swipe until the end of the page)',\n ),\n});\n\nexport type ActionSwipeParam = {\n start?: LocateResultElement;\n direction?: 'up' | 'down' | 'left' | 'right';\n distance?: number;\n end?: LocateResultElement;\n duration?: number;\n repeat?: number;\n};\n\nexport const defineActionSwipe = (\n call: (param: ActionSwipeParam) => Promise<void>,\n): DeviceAction<ActionSwipeParam> => {\n return defineAction<typeof ActionSwipeParamSchema, ActionSwipeParam>({\n name: 'Swipe',\n description:\n 'Perform a swipe gesture. You must specify either \"end\" (target location) or \"distance\" + \"direction\" - they are mutually exclusive. Use \"end\" for precise location-based swipes, or \"distance\" + \"direction\" for relative movement.',\n paramSchema: ActionSwipeParamSchema,\n call,\n });\n};\n\n// ClearInput\nexport const actionClearInputParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The input field to be cleared'),\n});\nexport type ActionClearInputParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionClearInput = (\n call: (param: ActionClearInputParam) => Promise<void>,\n): DeviceAction<ActionClearInputParam> => {\n return defineAction<\n typeof actionClearInputParamSchema,\n ActionClearInputParam\n >({\n name: 'ClearInput',\n description: inputLocateDescription,\n interfaceAlias: 'aiClearInput',\n paramSchema: actionClearInputParamSchema,\n call,\n });\n};\n\n// Assert\nexport const actionAssertParamSchema = z.object({\n condition: z.string().describe('The condition of the assertion'),\n thought: z\n .string()\n .describe(\n 'The thought of the assertion, like \"I can see there are A, B, C elements on the page, which means ... , so the assertion is true\"',\n ),\n result: z.boolean().describe('The result of the assertion, true or false'),\n});\nexport type ActionAssertParam = {\n condition: string;\n thought: string;\n result: boolean;\n};\n\nexport const defineActionAssert = (): DeviceAction<ActionAssertParam> => {\n return defineAction<typeof actionAssertParamSchema, ActionAssertParam>({\n name: 'Print_Assert_Result',\n description:\n 'Print the result of the assertion. Use this only when the user asks for an assertion',\n paramSchema: actionAssertParamSchema,\n call: async (param) => {\n if (typeof param?.result !== 'boolean') {\n throw new Error(\n `The result of the assertion must be a boolean, but got: ${typeof param?.result}. ${param.thought || '(no thought)'}`,\n );\n }\n\n getDebug('device:common-action')(\n `Assert: ${param.condition}, Thought: ${param.thought}, Result: ${param.result}`,\n );\n\n if (!param.result) {\n throw new Error(\n `Assertion failed: ${param.thought || '(no thought)'} (Assertion = ${param.condition})`,\n );\n }\n },\n });\n};\n\n// Sleep\nexport const ActionSleepParamSchema = z.object({\n millisecond: z\n .number()\n .default(1000)\n .optional()\n .describe('Sleep duration in milliseconds, defaults to 1000ms (1 second)'),\n});\n\nexport type ActionSleepParam = {\n millisecond?: number;\n};\n\nexport const defineActionSleep = (): DeviceAction<ActionSleepParam> => {\n return defineAction<typeof ActionSleepParamSchema, ActionSleepParam>({\n name: 'Sleep',\n description:\n 'Wait for a specified duration before continuing. Defaults to 1 second (1000ms) if not specified.',\n paramSchema: ActionSleepParamSchema,\n call: async (param) => {\n const duration = param?.millisecond ?? 1000;\n getDebug('device:common-action')(`Sleeping for ${duration}ms`);\n await new Promise((resolve) => setTimeout(resolve, duration));\n },\n });\n};\n\n// Finalize\nexport const actionFinalizeParamSchema = z.object({\n message: z\n .string()\n .optional()\n .describe(\n 'The conclusion, data, or return value that the user needs. This message will be provided to the user when the task is finalized.',\n ),\n});\nexport type ActionFinalizeParam = {\n message?: string;\n};\n\nexport const defineActionFinalize = (): DeviceAction<ActionFinalizeParam> => {\n return defineAction<typeof actionFinalizeParamSchema, ActionFinalizeParam>({\n name: finalizeActionName,\n description:\n 'Finalize the task. You can provide the conclusion, data, or return value that the user needs in the message.',\n paramSchema: actionFinalizeParamSchema,\n call: async (param) => {\n getDebug('device:common-action')(\n `Task finalized${param?.message ? `: ${param.message}` : ''}`,\n );\n return param.message;\n },\n });\n};\n\nexport type { DeviceAction } from '../types';\nexport type {\n AndroidDeviceOpt,\n AndroidDeviceInputOpt,\n IOSDeviceOpt,\n IOSDeviceInputOpt,\n} from './device-options';\n"],"names":["AbstractInterface","defineAction","config","actionTapParamSchema","z","getMidsceneLocationSchema","defineActionTap","call","actionRightClickParamSchema","defineActionRightClick","actionDoubleClickParamSchema","defineActionDoubleClick","actionHoverParamSchema","defineActionHover","inputLocateDescription","actionInputParamSchema","val","String","defineActionInput","actionKeyboardPressParamSchema","defineActionKeyboardPress","actionScrollParamSchema","defineActionScroll","actionDragAndDropParamSchema","defineActionDragAndDrop","ActionLongPressParamSchema","defineActionLongPress","ActionSwipeParamSchema","defineActionSwipe","actionClearInputParamSchema","defineActionClearInput","actionAssertParamSchema","defineActionAssert","param","Error","getDebug","ActionSleepParamSchema","defineActionSleep","duration","Promise","resolve","setTimeout","actionFinalizeParamSchema","defineActionFinalize","finalizeActionName"],"mappings":";;;AAiBO,MAAeA;AAwCtB;AAKO,MAAMC,eAAe,CAK1BC,SAaOA;AAIF,MAAMC,uBAAuBC,EAAE,MAAM,CAAC;IAC3C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMC,kBAAkB,CAC7BC,OAEON,aAA0D;QAC/D,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaE;QACbI;IACF;AAIK,MAAMC,8BAA8BJ,EAAE,MAAM,CAAC;IAClD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMI,yBAAyB,CACpCF,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaO;QACbD;IACF;AAIK,MAAMG,+BAA+BN,EAAE,MAAM,CAAC;IACnD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMM,0BAA0B,CACrCJ,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaS;QACbH;IACF;AAIK,MAAMK,yBAAyBR,EAAE,MAAM,CAAC;IAC7C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMQ,oBAAoB,CAC/BN,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaW;QACbL;IACF;AAIF,MAAMO,yBACJ;AACK,MAAMC,yBAAyBX,EAAE,MAAM,CAAC;IAC7C,OAAOA,EAAAA,KACC,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG,EAC9B,SAAS,CAAC,CAACY,MAAQC,OAAOD,MAC1B,QAAQ,CACP;IAEJ,QAAQX,4BACL,QAAQ,CAACS,wBACT,QAAQ;IACX,MAAMV,CAAC,CAADA,OACC,CAAC;QAAC;QAAW;QAAS;KAAS,EACnC,OAAO,CAAC,WACR,QAAQ,GACR,QAAQ,CACP;AAEN;AAOO,MAAMc,oBAAoB,CAC/BX,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAac;QACbR;IACF;AAIK,MAAMY,iCAAiCf,EAAE,MAAM,CAAC;IACrD,QAAQC,4BACL,QAAQ,CAAC,qDACT,QAAQ;IACX,SAASD,EAAAA,MACA,GACN,QAAQ,CACP;AAEN;AAMO,MAAMgB,4BAA4B,CACvCb,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAakB;QACbZ;IACF;AAIK,MAAMc,0BAA0BjB,EAAE,MAAM,CAAC;IAC9C,YAAYA,CAAC,CAADA,OACL,CAAC;QACJ;QACA;QACA;QACA;QACA;KACD,EACA,OAAO,CAAC,gBACR,QAAQ,CACP;IAEJ,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;QAAM;QAAS;KAAO,EACpC,OAAO,CAAC,QACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CAAC;IACZ,QAAQC,4BACL,QAAQ,GACR,QAAQ,CACP;AAEN;AAEO,MAAMiB,qBAAqB,CAChCf,OAEON,aAAgE;QACrE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAaoB;QACbd;IACF;AAIK,MAAMgB,+BAA+BnB,EAAE,MAAM,CAAC;IACnD,MAAMC,4BAA4B,QAAQ,CAAC;IAC3C,IAAIA,4BAA4B,QAAQ,CAAC;AAC3C;AAMO,MAAMmB,0BAA0B,CACrCjB,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAasB;QACbhB;IACF;AAGK,MAAMkB,6BAA6BrB,EAAE,MAAM,CAAC;IACjD,QAAQC,4BAA4B,QAAQ,CAC1C;IAEF,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMsB,wBAAwB,CACnCnB,OAEON,aAAsE;QAC3E,MAAM;QACN,aAAa;QACb,aAAawB;QACblB;IACF;AAGK,MAAMoB,yBAAyBvB,EAAE,MAAM,CAAC;IAC7C,OAAOC,4BACJ,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,CAAC,CAADA,OACJ,CAAC;QAAC;QAAM;QAAQ;QAAQ;KAAQ,EACpC,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;IACZ,KAAKC,4BACF,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,CAAC;IACZ,QAAQA,EAAAA,MACC,GACN,QAAQ,GACR,QAAQ,CACP;AAEN;AAWO,MAAMwB,oBAAoB,CAC/BrB,OAEON,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAa0B;QACbpB;IACF;AAIK,MAAMsB,8BAA8BzB,EAAE,MAAM,CAAC;IAClD,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMyB,yBAAyB,CACpCvB,OAEON,aAGL;QACA,MAAM;QACN,aAAaa;QACb,gBAAgB;QAChB,aAAae;QACbtB;IACF;AAIK,MAAMwB,0BAA0B3B,EAAE,MAAM,CAAC;IAC9C,WAAWA,EAAE,MAAM,GAAG,QAAQ,CAAC;IAC/B,SAASA,EAAAA,MACA,GACN,QAAQ,CACP;IAEJ,QAAQA,EAAE,OAAO,GAAG,QAAQ,CAAC;AAC/B;AAOO,MAAM4B,qBAAqB,IACzB/B,aAAgE;QACrE,MAAM;QACN,aACE;QACF,aAAa8B;QACb,MAAM,OAAOE;YACX,IAAI,AAAyB,aAAzB,OAAOA,OAAO,QAChB,MAAM,IAAIC,MACR,CAAC,wDAAwD,EAAE,OAAOD,OAAO,OAAO,EAAE,EAAEA,MAAM,OAAO,IAAI,gBAAgB;YAIzHE,SAAS,wBACP,CAAC,QAAQ,EAAEF,MAAM,SAAS,CAAC,WAAW,EAAEA,MAAM,OAAO,CAAC,UAAU,EAAEA,MAAM,MAAM,EAAE;YAGlF,IAAI,CAACA,MAAM,MAAM,EACf,MAAM,IAAIC,MACR,CAAC,kBAAkB,EAAED,MAAM,OAAO,IAAI,eAAe,cAAc,EAAEA,MAAM,SAAS,CAAC,CAAC,CAAC;QAG7F;IACF;AAIK,MAAMG,yBAAyBhC,EAAE,MAAM,CAAC;IAC7C,aAAaA,EAAAA,MACJ,GACN,OAAO,CAAC,MACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMiC,oBAAoB,IACxBpC,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAamC;QACb,MAAM,OAAOH;YACX,MAAMK,WAAWL,OAAO,eAAe;YACvCE,SAAS,wBAAwB,CAAC,aAAa,EAAEG,SAAS,EAAE,CAAC;YAC7D,MAAM,IAAIC,QAAQ,CAACC,UAAYC,WAAWD,SAASF;QACrD;IACF;AAIK,MAAMI,4BAA4BtC,EAAE,MAAM,CAAC;IAChD,SAASA,EAAAA,MACA,GACN,QAAQ,GACR,QAAQ,CACP;AAEN;AAKO,MAAMuC,uBAAuB,IAC3B1C,aAAoE;QACzE,MAAM2C;QACN,aACE;QACF,aAAaF;QACb,MAAM,OAAOT;YACXE,SAAS,wBACP,CAAC,cAAc,EAAEF,OAAO,UAAU,CAAC,EAAE,EAAEA,MAAM,OAAO,EAAE,GAAG,IAAI;YAE/D,OAAOA,MAAM,OAAO;QACtB;IACF"}
1
+ {"version":3,"file":"device/index.mjs","sources":["../../../src/device/index.ts"],"sourcesContent":["import { getMidsceneLocationSchema } from '@/common';\nimport type {\n ActionScrollParam,\n DeviceAction,\n LocateResultElement,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport type { ElementNode } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { z } from 'zod';\nimport type { ElementCacheFeature, Rect, Size, UIContext } from '../types';\n\nexport interface FileChooserHandler {\n accept(files: string[]): Promise<void>;\n}\n\nexport abstract class AbstractInterface {\n abstract interfaceType: string;\n\n abstract screenshotBase64(): Promise<string>;\n abstract size(): Promise<Size>;\n abstract actionSpace(): DeviceAction[];\n\n abstract cacheFeatureForRect?(\n rect: Rect,\n options?: {\n targetDescription?: string;\n modelConfig?: IModelConfig;\n },\n ): Promise<ElementCacheFeature>;\n abstract rectMatchesCacheFeature?(\n feature: ElementCacheFeature,\n ): Promise<Rect>;\n\n abstract destroy?(): Promise<void>;\n\n abstract describe?(): string;\n abstract beforeInvokeAction?(actionName: string, param: any): Promise<void>;\n abstract afterInvokeAction?(actionName: string, param: any): Promise<void>;\n\n // for web only\n registerFileChooserListener?(\n handler: (chooser: FileChooserHandler) => Promise<void>,\n ): Promise<{ dispose: () => void; getError: () => Error | undefined }>;\n\n // @deprecated do NOT extend this method\n abstract getElementsNodeTree?: () => Promise<ElementNode>;\n\n // @deprecated do NOT extend this method\n abstract url?: () => string | Promise<string>;\n\n // @deprecated do NOT extend this method\n abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;\n\n // @deprecated do NOT extend this method\n abstract getContext?(): Promise<UIContext>;\n}\n\n// Generic function to define actions with proper type inference\n// TRuntime allows specifying a different type for the runtime parameter (after location resolution)\n// TReturn allows specifying the return type of the action\nexport const defineAction = <\n TSchema extends z.ZodType | undefined = undefined,\n TRuntime = TSchema extends z.ZodType ? z.infer<TSchema> : undefined,\n TReturn = any,\n>(\n config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema?: TSchema;\n call: (param: TRuntime) => Promise<TReturn> | TReturn;\n } & Partial<\n Omit<\n DeviceAction<TRuntime, TReturn>,\n 'name' | 'description' | 'interfaceAlias' | 'paramSchema' | 'call'\n >\n >,\n): DeviceAction<TRuntime, TReturn> => {\n return config as any; // Type assertion needed because schema validation type differs from runtime type\n};\n\n// Tap\nexport const actionTapParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be tapped'),\n});\nexport type ActionTapParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionTap = (\n call: (param: ActionTapParam) => Promise<void>,\n): DeviceAction<ActionTapParam> => {\n return defineAction<typeof actionTapParamSchema, ActionTapParam>({\n name: 'Tap',\n description: 'Tap the element',\n interfaceAlias: 'aiTap',\n paramSchema: actionTapParamSchema,\n call,\n });\n};\n\n// RightClick\nexport const actionRightClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be right clicked',\n ),\n});\nexport type ActionRightClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionRightClick = (\n call: (param: ActionRightClickParam) => Promise<void>,\n): DeviceAction<ActionRightClickParam> => {\n return defineAction<\n typeof actionRightClickParamSchema,\n ActionRightClickParam\n >({\n name: 'RightClick',\n description: 'Right click the element',\n interfaceAlias: 'aiRightClick',\n paramSchema: actionRightClickParamSchema,\n call,\n });\n};\n\n// DoubleClick\nexport const actionDoubleClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be double clicked',\n ),\n});\nexport type ActionDoubleClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionDoubleClick = (\n call: (param: ActionDoubleClickParam) => Promise<void>,\n): DeviceAction<ActionDoubleClickParam> => {\n return defineAction<\n typeof actionDoubleClickParamSchema,\n ActionDoubleClickParam\n >({\n name: 'DoubleClick',\n description: 'Double click the element',\n interfaceAlias: 'aiDoubleClick',\n paramSchema: actionDoubleClickParamSchema,\n call,\n });\n};\n\n// Hover\nexport const actionHoverParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be hovered'),\n});\nexport type ActionHoverParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionHover = (\n call: (param: ActionHoverParam) => Promise<void>,\n): DeviceAction<ActionHoverParam> => {\n return defineAction<typeof actionHoverParamSchema, ActionHoverParam>({\n name: 'Hover',\n description: 'Move the mouse to the element',\n interfaceAlias: 'aiHover',\n paramSchema: actionHoverParamSchema,\n call,\n });\n};\n\n// Input\nconst inputLocateDescription =\n 'the position of the placeholder or text content in the target input field. If there is no content, locate the center of the input field.';\nexport const actionInputParamSchema = z.object({\n value: z\n .union([z.string(), z.number()])\n .transform((val) => String(val))\n .describe(\n 'The text to input. Provide the final content for replace/append modes, or an empty string when using clear mode to remove existing text.',\n ),\n locate: getMidsceneLocationSchema()\n .describe(inputLocateDescription)\n .optional(),\n mode: z\n .enum(['replace', 'clear', 'append'])\n .default('replace')\n .optional()\n .describe(\n 'Input mode: \"replace\" (default) - clear the field and input the value; \"append\" - append the value to existing content; \"clear\" - clear the field without inputting new text.',\n ),\n});\nexport type ActionInputParam = {\n value: string;\n locate?: LocateResultElement;\n mode?: 'replace' | 'clear' | 'append';\n};\n\nexport const defineActionInput = (\n call: (param: ActionInputParam) => Promise<void>,\n): DeviceAction<ActionInputParam> => {\n return defineAction<typeof actionInputParamSchema, ActionInputParam>({\n name: 'Input',\n description: 'Input the value into the element',\n interfaceAlias: 'aiInput',\n paramSchema: actionInputParamSchema,\n call,\n });\n};\n\n// KeyboardPress\nexport const actionKeyboardPressParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The element to be clicked before pressing the key')\n .optional(),\n keyName: z\n .string()\n .describe(\n \"The key to be pressed. Use '+' for key combinations, e.g., 'Control+A', 'Shift+Enter'\",\n ),\n});\nexport type ActionKeyboardPressParam = {\n locate?: LocateResultElement;\n keyName: string;\n};\n\nexport const defineActionKeyboardPress = (\n call: (param: ActionKeyboardPressParam) => Promise<void>,\n): DeviceAction<ActionKeyboardPressParam> => {\n return defineAction<\n typeof actionKeyboardPressParamSchema,\n ActionKeyboardPressParam\n >({\n name: 'KeyboardPress',\n description:\n 'Press a key or key combination, like \"Enter\", \"Tab\", \"Escape\", or \"Control+A\", \"Shift+Enter\". Do not use this to type text.',\n interfaceAlias: 'aiKeyboardPress',\n paramSchema: actionKeyboardPressParamSchema,\n call,\n });\n};\n\n// Scroll\nexport const actionScrollParamSchema = z.object({\n scrollType: z\n .enum([\n 'singleAction',\n 'scrollToBottom',\n 'scrollToTop',\n 'scrollToRight',\n 'scrollToLeft',\n ])\n .default('singleAction')\n .describe(\n 'The scroll behavior: \"singleAction\" for a single scroll action, \"scrollToBottom\" for scrolling to the bottom, \"scrollToTop\" for scrolling to the top, \"scrollToRight\" for scrolling to the right, \"scrollToLeft\" for scrolling to the left',\n ),\n direction: z\n .enum(['down', 'up', 'right', 'left'])\n .default('down')\n .describe(\n 'The direction to scroll. Only effective when scrollType is \"singleAction\".',\n ),\n distance: z\n .number()\n .nullable()\n .optional()\n .describe('The distance in pixels to scroll'),\n locate: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Describe the target element to be scrolled on, like \"the table\" or \"the list\" or \"the content area\" or \"the scrollable area\". Do NOT provide a general intent like \"scroll to find some element\"',\n ),\n});\n\nexport const defineActionScroll = (\n call: (param: ActionScrollParam) => Promise<void>,\n): DeviceAction<ActionScrollParam> => {\n return defineAction<typeof actionScrollParamSchema, ActionScrollParam>({\n name: 'Scroll',\n description:\n 'Scroll the page or an element. The direction to scroll, the scroll type, and the distance to scroll. The distance is the number of pixels to scroll. If not specified, use `down` direction, `once` scroll type, and `null` distance.',\n interfaceAlias: 'aiScroll',\n paramSchema: actionScrollParamSchema,\n call,\n });\n};\n\n// DragAndDrop\nexport const actionDragAndDropParamSchema = z.object({\n from: getMidsceneLocationSchema().describe('The position to be dragged'),\n to: getMidsceneLocationSchema().describe('The position to be dropped'),\n});\nexport type ActionDragAndDropParam = {\n from: LocateResultElement;\n to: LocateResultElement;\n};\n\nexport const defineActionDragAndDrop = (\n call: (param: ActionDragAndDropParam) => Promise<void>,\n): DeviceAction<ActionDragAndDropParam> => {\n return defineAction<\n typeof actionDragAndDropParamSchema,\n ActionDragAndDropParam\n >({\n name: 'DragAndDrop',\n description:\n 'Drag and drop (hold the mouse or finger down and move the mouse) ',\n interfaceAlias: 'aiDragAndDrop',\n paramSchema: actionDragAndDropParamSchema,\n call,\n });\n};\n\nexport const ActionLongPressParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be long pressed',\n ),\n duration: z\n .number()\n .default(500)\n .optional()\n .describe('Long press duration in milliseconds'),\n});\n\nexport type ActionLongPressParam = {\n locate: LocateResultElement;\n duration?: number;\n};\nexport const defineActionLongPress = (\n call: (param: ActionLongPressParam) => Promise<void>,\n): DeviceAction<ActionLongPressParam> => {\n return defineAction<typeof ActionLongPressParamSchema, ActionLongPressParam>({\n name: 'LongPress',\n description: 'Long press the element',\n paramSchema: ActionLongPressParamSchema,\n call,\n });\n};\n\nexport const ActionSwipeParamSchema = z.object({\n start: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Starting point of the swipe gesture, if not specified, the center of the page will be used',\n ),\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .optional()\n .describe(\n 'The direction to swipe (required when using distance). The direction means the direction of the finger swipe.',\n ),\n distance: z\n .number()\n .optional()\n .describe('The distance in pixels to swipe (mutually exclusive with end)'),\n end: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Ending point of the swipe gesture (mutually exclusive with distance)',\n ),\n duration: z\n .number()\n .default(300)\n .describe('Duration of the swipe gesture in milliseconds'),\n repeat: z\n .number()\n .optional()\n .describe(\n 'The number of times to repeat the swipe gesture. 1 for default, 0 for infinite (e.g. endless swipe until the end of the page)',\n ),\n});\n\nexport type ActionSwipeParam = {\n start?: LocateResultElement;\n direction?: 'up' | 'down' | 'left' | 'right';\n distance?: number;\n end?: LocateResultElement;\n duration?: number;\n repeat?: number;\n};\n\nexport const defineActionSwipe = (\n call: (param: ActionSwipeParam) => Promise<void>,\n): DeviceAction<ActionSwipeParam> => {\n return defineAction<typeof ActionSwipeParamSchema, ActionSwipeParam>({\n name: 'Swipe',\n description:\n 'Perform a swipe gesture. You must specify either \"end\" (target location) or \"distance\" + \"direction\" - they are mutually exclusive. Use \"end\" for precise location-based swipes, or \"distance\" + \"direction\" for relative movement.',\n paramSchema: ActionSwipeParamSchema,\n call,\n });\n};\n\n// ClearInput\nexport const actionClearInputParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The input field to be cleared'),\n});\nexport type ActionClearInputParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionClearInput = (\n call: (param: ActionClearInputParam) => Promise<void>,\n): DeviceAction<ActionClearInputParam> => {\n return defineAction<\n typeof actionClearInputParamSchema,\n ActionClearInputParam\n >({\n name: 'ClearInput',\n description: inputLocateDescription,\n interfaceAlias: 'aiClearInput',\n paramSchema: actionClearInputParamSchema,\n call,\n });\n};\n\n// Assert\nexport const actionAssertParamSchema = z.object({\n condition: z.string().describe('The condition of the assertion'),\n thought: z\n .string()\n .describe(\n 'The thought of the assertion, like \"I can see there are A, B, C elements on the page, which means ... , so the assertion is true\"',\n ),\n result: z.boolean().describe('The result of the assertion, true or false'),\n});\nexport type ActionAssertParam = {\n condition: string;\n thought: string;\n result: boolean;\n};\n\nexport const defineActionAssert = (): DeviceAction<ActionAssertParam> => {\n return defineAction<typeof actionAssertParamSchema, ActionAssertParam>({\n name: 'Print_Assert_Result',\n description: 'Print the result of the assertion',\n paramSchema: actionAssertParamSchema,\n call: async (param) => {\n if (typeof param?.result !== 'boolean') {\n throw new Error(\n `The result of the assertion must be a boolean, but got: ${typeof param?.result}. ${param.thought || '(no thought)'}`,\n );\n }\n\n getDebug('device:common-action')(\n `Assert: ${param.condition}, Thought: ${param.thought}, Result: ${param.result}`,\n );\n\n if (!param.result) {\n throw new Error(\n `Assertion failed: ${param.thought || '(no thought)'} (Assertion = ${param.condition})`,\n );\n }\n },\n });\n};\n\nexport type { DeviceAction } from '../types';\nexport type {\n AndroidDeviceOpt,\n AndroidDeviceInputOpt,\n IOSDeviceOpt,\n IOSDeviceInputOpt,\n} from './device-options';\n"],"names":["AbstractInterface","defineAction","config","actionTapParamSchema","z","getMidsceneLocationSchema","defineActionTap","call","actionRightClickParamSchema","defineActionRightClick","actionDoubleClickParamSchema","defineActionDoubleClick","actionHoverParamSchema","defineActionHover","inputLocateDescription","actionInputParamSchema","val","String","defineActionInput","actionKeyboardPressParamSchema","defineActionKeyboardPress","actionScrollParamSchema","defineActionScroll","actionDragAndDropParamSchema","defineActionDragAndDrop","ActionLongPressParamSchema","defineActionLongPress","ActionSwipeParamSchema","defineActionSwipe","actionClearInputParamSchema","defineActionClearInput","actionAssertParamSchema","defineActionAssert","param","Error","getDebug"],"mappings":";;;AAiBO,MAAeA;AAwCtB;AAKO,MAAMC,eAAe,CAK1BC,SAaOA;AAIF,MAAMC,uBAAuBC,EAAE,MAAM,CAAC;IAC3C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMC,kBAAkB,CAC7BC,OAEON,aAA0D;QAC/D,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaE;QACbI;IACF;AAIK,MAAMC,8BAA8BJ,EAAE,MAAM,CAAC;IAClD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMI,yBAAyB,CACpCF,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaO;QACbD;IACF;AAIK,MAAMG,+BAA+BN,EAAE,MAAM,CAAC;IACnD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMM,0BAA0B,CACrCJ,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaS;QACbH;IACF;AAIK,MAAMK,yBAAyBR,EAAE,MAAM,CAAC;IAC7C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMQ,oBAAoB,CAC/BN,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaW;QACbL;IACF;AAIF,MAAMO,yBACJ;AACK,MAAMC,yBAAyBX,EAAE,MAAM,CAAC;IAC7C,OAAOA,EAAAA,KACC,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG,EAC9B,SAAS,CAAC,CAACY,MAAQC,OAAOD,MAC1B,QAAQ,CACP;IAEJ,QAAQX,4BACL,QAAQ,CAACS,wBACT,QAAQ;IACX,MAAMV,CAAC,CAADA,OACC,CAAC;QAAC;QAAW;QAAS;KAAS,EACnC,OAAO,CAAC,WACR,QAAQ,GACR,QAAQ,CACP;AAEN;AAOO,MAAMc,oBAAoB,CAC/BX,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAac;QACbR;IACF;AAIK,MAAMY,iCAAiCf,EAAE,MAAM,CAAC;IACrD,QAAQC,4BACL,QAAQ,CAAC,qDACT,QAAQ;IACX,SAASD,EAAAA,MACA,GACN,QAAQ,CACP;AAEN;AAMO,MAAMgB,4BAA4B,CACvCb,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAakB;QACbZ;IACF;AAIK,MAAMc,0BAA0BjB,EAAE,MAAM,CAAC;IAC9C,YAAYA,CAAC,CAADA,OACL,CAAC;QACJ;QACA;QACA;QACA;QACA;KACD,EACA,OAAO,CAAC,gBACR,QAAQ,CACP;IAEJ,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;QAAM;QAAS;KAAO,EACpC,OAAO,CAAC,QACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CAAC;IACZ,QAAQC,4BACL,QAAQ,GACR,QAAQ,CACP;AAEN;AAEO,MAAMiB,qBAAqB,CAChCf,OAEON,aAAgE;QACrE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAaoB;QACbd;IACF;AAIK,MAAMgB,+BAA+BnB,EAAE,MAAM,CAAC;IACnD,MAAMC,4BAA4B,QAAQ,CAAC;IAC3C,IAAIA,4BAA4B,QAAQ,CAAC;AAC3C;AAMO,MAAMmB,0BAA0B,CACrCjB,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAasB;QACbhB;IACF;AAGK,MAAMkB,6BAA6BrB,EAAE,MAAM,CAAC;IACjD,QAAQC,4BAA4B,QAAQ,CAC1C;IAEF,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMsB,wBAAwB,CACnCnB,OAEON,aAAsE;QAC3E,MAAM;QACN,aAAa;QACb,aAAawB;QACblB;IACF;AAGK,MAAMoB,yBAAyBvB,EAAE,MAAM,CAAC;IAC7C,OAAOC,4BACJ,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,CAAC,CAADA,OACJ,CAAC;QAAC;QAAM;QAAQ;QAAQ;KAAQ,EACpC,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;IACZ,KAAKC,4BACF,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,CAAC;IACZ,QAAQA,EAAAA,MACC,GACN,QAAQ,GACR,QAAQ,CACP;AAEN;AAWO,MAAMwB,oBAAoB,CAC/BrB,OAEON,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAa0B;QACbpB;IACF;AAIK,MAAMsB,8BAA8BzB,EAAE,MAAM,CAAC;IAClD,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMyB,yBAAyB,CACpCvB,OAEON,aAGL;QACA,MAAM;QACN,aAAaa;QACb,gBAAgB;QAChB,aAAae;QACbtB;IACF;AAIK,MAAMwB,0BAA0B3B,EAAE,MAAM,CAAC;IAC9C,WAAWA,EAAE,MAAM,GAAG,QAAQ,CAAC;IAC/B,SAASA,EAAAA,MACA,GACN,QAAQ,CACP;IAEJ,QAAQA,EAAE,OAAO,GAAG,QAAQ,CAAC;AAC/B;AAOO,MAAM4B,qBAAqB,IACzB/B,aAAgE;QACrE,MAAM;QACN,aAAa;QACb,aAAa8B;QACb,MAAM,OAAOE;YACX,IAAI,AAAyB,aAAzB,OAAOA,OAAO,QAChB,MAAM,IAAIC,MACR,CAAC,wDAAwD,EAAE,OAAOD,OAAO,OAAO,EAAE,EAAEA,MAAM,OAAO,IAAI,gBAAgB;YAIzHE,SAAS,wBACP,CAAC,QAAQ,EAAEF,MAAM,SAAS,CAAC,WAAW,EAAEA,MAAM,OAAO,CAAC,UAAU,EAAEA,MAAM,MAAM,EAAE;YAGlF,IAAI,CAACA,MAAM,MAAM,EACf,MAAM,IAAIC,MACR,CAAC,kBAAkB,EAAED,MAAM,OAAO,IAAI,eAAe,cAAc,EAAEA,MAAM,SAAS,CAAC,CAAC,CAAC;QAG7F;IACF"}
@@ -1 +1 @@
1
- {"version":3,"file":"types.mjs","sources":["../../src/types.ts"],"sourcesContent":["/* eslint-disable @typescript-eslint/no-explicit-any */\n\nimport type { NodeType } from '@midscene/shared/constants';\nimport type { CreateOpenAIClientFn, TModelConfig } from '@midscene/shared/env';\nimport type {\n BaseElement,\n LocateResultElement,\n Rect,\n Size,\n} from '@midscene/shared/types';\nimport type { z } from 'zod';\nimport type { TUserPrompt } from './common';\nimport type { DetailedLocateParam, MidsceneYamlFlowItem } from './yaml';\n\nexport type {\n ElementTreeNode,\n BaseElement,\n Rect,\n Size,\n Point,\n} from '@midscene/shared/types';\nexport * from './yaml';\n\nexport type AIUsageInfo = Record<string, any> & {\n prompt_tokens: number | undefined;\n completion_tokens: number | undefined;\n total_tokens: number | undefined;\n cached_input: number | undefined;\n time_cost: number | undefined;\n model_name: string | undefined;\n model_description: string | undefined;\n intent: string | undefined;\n};\n\nexport type { LocateResultElement };\n\n/**\n * openai\n *\n */\nexport enum AIResponseFormat {\n JSON = 'json_object',\n TEXT = 'text',\n}\n\nexport type AISingleElementResponseByPosition = {\n position?: {\n x: number;\n y: number;\n };\n bbox?: [number, number, number, number];\n reason: string;\n text: string;\n};\n\nexport interface AIElementCoordinatesResponse {\n bbox: [number, number, number, number];\n errors?: string[];\n}\n\nexport type AIElementResponse = AIElementCoordinatesResponse;\n\nexport interface AIDataExtractionResponse<DataDemand> {\n data: DataDemand;\n errors?: string[];\n thought?: string;\n}\n\nexport interface AISectionLocatorResponse {\n bbox: [number, number, number, number];\n references_bbox?: [number, number, number, number][];\n error?: string;\n}\n\nexport interface AIAssertionResponse {\n pass: boolean;\n thought: string;\n}\n\nexport interface AIDescribeElementResponse {\n description: string;\n error?: string;\n}\n\nexport interface LocatorValidatorOption {\n centerDistanceThreshold?: number;\n}\n\nexport interface LocateValidatorResult {\n pass: boolean;\n rect: Rect;\n center: [number, number];\n centerDistance?: number;\n}\n\nexport interface AgentDescribeElementAtPointResult {\n prompt: string;\n deepThink: boolean;\n verifyResult?: LocateValidatorResult;\n}\n\n/**\n * context\n */\n\nexport abstract class UIContext {\n abstract screenshotBase64: string;\n\n abstract size: Size;\n\n abstract _isFrozen?: boolean;\n}\n\nexport type EnsureObject<T> = { [K in keyof T]: any };\n\nexport type ServiceAction = 'locate' | 'extract' | 'assert' | 'describe';\n\nexport type ServiceExtractParam = string | Record<string, string>;\n\nexport type ElementCacheFeature = Record<string, unknown>;\n\nexport interface LocateResult {\n element: LocateResultElement | null;\n rect?: Rect;\n}\n\nexport type ThinkingLevel = 'off' | 'medium' | 'high';\n\nexport type DeepThinkOption = 'unset' | true | false;\n\nexport interface ServiceTaskInfo {\n durationMs: number;\n formatResponse?: string;\n rawResponse?: string;\n usage?: AIUsageInfo;\n searchArea?: Rect;\n searchAreaRawResponse?: string;\n searchAreaUsage?: AIUsageInfo;\n reasoning_content?: string;\n}\n\nexport interface DumpMeta {\n logTime: number;\n}\n\nexport interface ReportDumpWithAttributes {\n dumpString: string;\n attributes?: Record<string, any>;\n}\n\nexport interface ServiceDump extends DumpMeta {\n type: 'locate' | 'extract' | 'assert';\n logId: string;\n userQuery: {\n element?: TUserPrompt;\n dataDemand?: ServiceExtractParam;\n assertion?: TUserPrompt;\n };\n matchedElement: LocateResultElement[];\n matchedRect?: Rect;\n deepThink?: boolean;\n data: any;\n assertionPass?: boolean;\n assertionThought?: string;\n taskInfo: ServiceTaskInfo;\n error?: string;\n output?: any;\n}\n\nexport type PartialServiceDumpFromSDK = Omit<\n ServiceDump,\n 'logTime' | 'logId' | 'model_name'\n>;\n\nexport interface ServiceResultBase {\n dump: ServiceDump;\n}\n\nexport type LocateResultWithDump = LocateResult & ServiceResultBase;\n\nexport interface ServiceExtractResult<T> extends ServiceResultBase {\n data: T;\n thought?: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}\n\nexport class ServiceError extends Error {\n dump: ServiceDump;\n\n constructor(message: string, dump: ServiceDump) {\n super(message);\n this.name = 'ServiceError';\n this.dump = dump;\n }\n}\n\n// intermediate variables to optimize the return value by AI\nexport interface LiteUISection {\n name: string;\n description: string;\n sectionCharacteristics: string;\n textIds: string[];\n}\n\nexport type ElementById = (id: string) => BaseElement | null;\n\nexport type ServiceAssertionResponse = AIAssertionResponse & {\n usage?: AIUsageInfo;\n};\n\n/**\n * agent\n */\n\nexport type OnTaskStartTip = (tip: string) => Promise<void> | void;\n\nexport interface AgentWaitForOpt {\n checkIntervalMs?: number;\n timeoutMs?: number;\n [key: string]: unknown;\n}\n\nexport interface AgentAssertOpt {\n keepRawResponse?: boolean;\n}\n\n/**\n * planning\n *\n */\n\nexport interface PlanningLocateParam extends DetailedLocateParam {\n bbox?: [number, number, number, number];\n}\n\nexport interface PlanningAction<ParamType = any> {\n thought?: string;\n type: string;\n param: ParamType;\n}\n\nexport interface RawResponsePlanningAIResponse {\n action: PlanningAction;\n log: string;\n error?: string;\n}\n\nexport interface PlanningAIResponse\n extends Omit<RawResponsePlanningAIResponse, 'action'> {\n actions?: PlanningAction[];\n usage?: AIUsageInfo;\n rawResponse?: string;\n yamlFlow?: MidsceneYamlFlowItem[];\n yamlString?: string;\n error?: string;\n reasoning_content?: string;\n shouldContinuePlanning: boolean;\n}\n\nexport interface PlanningActionParamSleep {\n timeMs: number;\n}\n\nexport interface PlanningActionParamError {\n thought: string;\n}\n\nexport type PlanningActionParamWaitFor = AgentWaitForOpt & {};\n\nexport interface LongPressParam {\n duration?: number;\n}\n\nexport interface PullParam {\n direction: 'up' | 'down';\n distance?: number;\n duration?: number;\n}\n/**\n * misc\n */\n\nexport interface Color {\n name: string;\n hex: string;\n}\n\nexport interface BaseAgentParserOpt {\n selector?: string;\n}\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PuppeteerParserOpt extends BaseAgentParserOpt {}\n\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PlaywrightParserOpt extends BaseAgentParserOpt {}\n\n/*\naction\n*/\nexport interface ExecutionTaskProgressOptions {\n onTaskStart?: (task: ExecutionTask) => Promise<void> | void;\n}\n\nexport interface ExecutionRecorderItem {\n type: 'screenshot';\n ts: number;\n screenshot?: string;\n timing?: string;\n}\n\nexport type ExecutionTaskType = 'Planning' | 'Insight' | 'Action Space' | 'Log';\n\nexport interface ExecutorContext {\n task: ExecutionTask;\n element?: LocateResultElement | null;\n uiContext?: UIContext;\n}\n\nexport interface ExecutionTaskApply<\n Type extends ExecutionTaskType = any,\n TaskParam = any,\n TaskOutput = any,\n TaskLog = any,\n> {\n type: Type;\n subType?: string;\n subTask?: boolean;\n param?: TaskParam;\n thought?: string;\n uiContext?: UIContext;\n executor: (\n param: TaskParam,\n context: ExecutorContext,\n ) => // biome-ignore lint/suspicious/noConfusingVoidType: <explanation>\n | Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>\n | undefined\n | void;\n}\n\nexport interface ExecutionTaskHitBy {\n from: string;\n context: Record<string, any>;\n}\n\nexport interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {\n output?: TaskOutput;\n log?: TaskLog;\n recorder?: ExecutionRecorderItem[];\n hitBy?: ExecutionTaskHitBy;\n}\n\nexport type ExecutionTask<\n E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<\n any,\n any,\n any\n >,\n> = E &\n ExecutionTaskReturn<\n E extends ExecutionTaskApply<any, any, infer TaskOutput, any>\n ? TaskOutput\n : unknown,\n E extends ExecutionTaskApply<any, any, any, infer TaskLog>\n ? TaskLog\n : unknown\n > & {\n status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';\n error?: Error;\n errorMessage?: string;\n errorStack?: string;\n timing?: {\n start: number;\n end?: number;\n cost?: number;\n };\n usage?: AIUsageInfo;\n searchAreaUsage?: AIUsageInfo;\n reasoning_content?: string;\n };\n\nexport interface ExecutionDump extends DumpMeta {\n name: string;\n description?: string;\n tasks: ExecutionTask[];\n aiActContext?: string;\n}\n\n/*\ntask - service-locate\n*/\nexport type ExecutionTaskInsightLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskInsightLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport type ExecutionTaskInsightDump = ServiceDump;\n\nexport type ExecutionTaskInsightLocateApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightLocateParam,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightLocate =\n ExecutionTask<ExecutionTaskInsightLocateApply>;\n\n/*\ntask - service-query\n*/\nexport interface ExecutionTaskInsightQueryParam {\n dataDemand: ServiceExtractParam;\n}\n\nexport interface ExecutionTaskInsightQueryOutput {\n data: any;\n}\n\nexport type ExecutionTaskInsightQueryApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightQueryParam,\n any,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightQuery =\n ExecutionTask<ExecutionTaskInsightQueryApply>;\n\n/*\ntask - assertion\n*/\nexport interface ExecutionTaskInsightAssertionParam {\n assertion: string;\n}\n\nexport type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightAssertionParam,\n ServiceAssertionResponse,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightAssertion =\n ExecutionTask<ExecutionTaskInsightAssertionApply>;\n\n/*\ntask - action (i.e. interact) \n*/\nexport type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<\n 'Action Space',\n ActionParam,\n void,\n void\n>;\n\nexport type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;\n\n/*\ntask - Log\n*/\n\nexport type ExecutionTaskLogApply<\n LogParam = {\n content: string;\n },\n> = ExecutionTaskApply<'Log', LogParam, void, void>;\n\nexport type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;\n\n/*\ntask - planning\n*/\n\nexport type ExecutionTaskPlanningApply = ExecutionTaskApply<\n 'Planning',\n {\n userInstruction: string;\n aiActContext?: string;\n },\n PlanningAIResponse\n>;\n\nexport type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;\n\n/*\ntask - planning-locate\n*/\nexport type ExecutionTaskPlanningLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskPlanningLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport type ExecutionTaskPlanningDump = ServiceDump;\n\nexport type ExecutionTaskPlanningLocateApply = ExecutionTaskApply<\n 'Planning',\n ExecutionTaskPlanningLocateParam,\n ExecutionTaskPlanningLocateOutput,\n ExecutionTaskPlanningDump\n>;\n\nexport type ExecutionTaskPlanningLocate =\n ExecutionTask<ExecutionTaskPlanningLocateApply>;\n\n/*\nGrouped dump\n*/\nexport interface GroupedActionDump {\n sdkVersion: string;\n groupName: string;\n groupDescription?: string;\n modelBriefs: string[];\n executions: ExecutionDump[];\n}\n\nexport type InterfaceType =\n | 'puppeteer'\n | 'playwright'\n | 'static'\n | 'chrome-extension-proxy'\n | 'android'\n | string;\n\nexport interface StreamingCodeGenerationOptions {\n /** Whether to enable streaming output */\n stream?: boolean;\n /** Callback function to handle streaming chunks */\n onChunk?: StreamingCallback;\n /** Callback function to handle streaming completion */\n onComplete?: (finalCode: string) => void;\n /** Callback function to handle streaming errors */\n onError?: (error: Error) => void;\n}\n\nexport type StreamingCallback = (chunk: CodeGenerationChunk) => void;\n\nexport interface CodeGenerationChunk {\n /** The incremental content chunk */\n content: string;\n /** The reasoning content */\n reasoning_content: string;\n /** The accumulated content so far */\n accumulated: string;\n /** Whether this is the final chunk */\n isComplete: boolean;\n /** Token usage information if available */\n usage?: AIUsageInfo;\n}\n\nexport interface StreamingAIResponse {\n /** The final accumulated content */\n content: string;\n /** Token usage information */\n usage?: AIUsageInfo;\n /** Whether the response was streamed */\n isStreamed: boolean;\n}\n\nexport interface DeviceAction<TParam = any, TReturn = any> {\n name: string;\n description?: string;\n interfaceAlias?: string;\n paramSchema?: z.ZodType<TParam>;\n call: (param: TParam, context: ExecutorContext) => Promise<TReturn> | TReturn;\n delayAfterRunner?: number;\n}\n\n/**\n * Type utilities for extracting types from DeviceAction definitions\n */\n\n/**\n * Extract parameter type from a DeviceAction\n */\nexport type ActionParam<Action extends DeviceAction<any, any>> =\n Action extends DeviceAction<infer P, any> ? P : never;\n\n/**\n * Extract return type from a DeviceAction\n */\nexport type ActionReturn<Action extends DeviceAction<any, any>> =\n Action extends DeviceAction<any, infer R> ? R : never;\n\n/**\n * Web-specific types\n */\nexport interface WebElementInfo extends BaseElement {\n id: string;\n attributes: {\n nodeType: NodeType;\n [key: string]: string;\n };\n}\n\nexport type WebUIContext = UIContext;\n\n/**\n * Agent\n */\n\nexport type CacheConfig = {\n strategy?: 'read-only' | 'read-write' | 'write-only';\n id: string;\n};\n\nexport type Cache =\n | false // No read, no write\n | true // Will throw error at runtime - deprecated\n | CacheConfig; // Object configuration (requires explicit id)\n\nexport interface AgentOpt {\n testId?: string;\n // @deprecated\n cacheId?: string; // Keep backward compatibility, but marked as deprecated\n groupName?: string;\n groupDescription?: string;\n /* if auto generate report, default true */\n generateReport?: boolean;\n /* if auto print report msg, default true */\n autoPrintReportMsg?: boolean;\n onTaskStartTip?: OnTaskStartTip;\n aiActContext?: string;\n aiActionContext?: string;\n /* custom report file name */\n reportFileName?: string;\n modelConfig?: TModelConfig;\n cache?: Cache;\n /**\n * Maximum number of replanning cycles for aiAct.\n * Defaults to 20 (40 for `vlm-ui-tars`) when not provided.\n * If omitted, the agent will also read `MIDSCENE_REPLANNING_CYCLE_LIMIT` for backward compatibility.\n */\n replanningCycleLimit?: number;\n\n /**\n * Custom OpenAI client factory function\n *\n * If provided, this function will be called to create OpenAI client instances\n * for each AI call, allowing you to:\n * - Wrap clients with observability tools (langsmith, langfuse)\n * - Use custom OpenAI-compatible clients\n * - Apply different configurations based on intent\n *\n * @param config - Resolved model configuration\n * @returns OpenAI client instance (original or wrapped)\n *\n * @example\n * ```typescript\n * createOpenAIClient: async (openai, opts) => {\n * // Wrap with langsmith for planning tasks\n * if (opts.baseURL?.includes('planning')) {\n * return wrapOpenAI(openai, { metadata: { task: 'planning' } });\n * }\n *\n * return openai;\n * }\n * ```\n */\n createOpenAIClient?: CreateOpenAIClientFn;\n}\n\nexport type TestStatus =\n | 'passed'\n | 'failed'\n | 'timedOut'\n | 'skipped'\n | 'interrupted';\n\nexport interface ReportFileWithAttributes {\n reportFilePath: string;\n reportAttributes: {\n testDuration: number;\n testStatus: TestStatus;\n testTitle: string;\n testId: string;\n testDescription: string;\n };\n}\n"],"names":["AIResponseFormat","UIContext","ServiceError","Error","message","dump"],"mappings":";AAAqD;;;;;;;;;;AAwC9C,IAAKA,yBAAgBA,WAAAA,GAAAA,SAAhBA,gBAAgB;;;WAAhBA;;AAiEL,MAAeC;AAMtB;AA4EO,MAAMC,qBAAqBC;IAGhC,YAAYC,OAAe,EAAEC,IAAiB,CAAE;QAC9C,KAAK,CAACD,UAHR;QAIE,IAAI,CAAC,IAAI,GAAG;QACZ,IAAI,CAAC,IAAI,GAAGC;IACd;AACF"}
1
+ {"version":3,"file":"types.mjs","sources":["../../src/types.ts"],"sourcesContent":["/* eslint-disable @typescript-eslint/no-explicit-any */\n\nimport type { NodeType } from '@midscene/shared/constants';\nimport type { CreateOpenAIClientFn, TModelConfig } from '@midscene/shared/env';\nimport type {\n BaseElement,\n LocateResultElement,\n Rect,\n Size,\n} from '@midscene/shared/types';\nimport type { z } from 'zod';\nimport type { TUserPrompt } from './common';\nimport type { DetailedLocateParam, MidsceneYamlFlowItem } from './yaml';\n\nexport type {\n ElementTreeNode,\n BaseElement,\n Rect,\n Size,\n Point,\n} from '@midscene/shared/types';\nexport * from './yaml';\n\nexport type AIUsageInfo = Record<string, any> & {\n prompt_tokens: number | undefined;\n completion_tokens: number | undefined;\n total_tokens: number | undefined;\n cached_input: number | undefined;\n time_cost: number | undefined;\n model_name: string | undefined;\n model_description: string | undefined;\n intent: string | undefined;\n};\n\nexport type { LocateResultElement };\n\n/**\n * openai\n *\n */\nexport enum AIResponseFormat {\n JSON = 'json_object',\n TEXT = 'text',\n}\n\nexport type AISingleElementResponseByPosition = {\n position?: {\n x: number;\n y: number;\n };\n bbox?: [number, number, number, number];\n reason: string;\n text: string;\n};\n\nexport interface AIElementCoordinatesResponse {\n bbox: [number, number, number, number];\n errors?: string[];\n}\n\nexport type AIElementResponse = AIElementCoordinatesResponse;\n\nexport interface AIDataExtractionResponse<DataDemand> {\n data: DataDemand;\n errors?: string[];\n thought?: string;\n}\n\nexport interface AISectionLocatorResponse {\n bbox: [number, number, number, number];\n references_bbox?: [number, number, number, number][];\n error?: string;\n}\n\nexport interface AIAssertionResponse {\n pass: boolean;\n thought: string;\n}\n\nexport interface AIDescribeElementResponse {\n description: string;\n error?: string;\n}\n\nexport interface LocatorValidatorOption {\n centerDistanceThreshold?: number;\n}\n\nexport interface LocateValidatorResult {\n pass: boolean;\n rect: Rect;\n center: [number, number];\n centerDistance?: number;\n}\n\nexport interface AgentDescribeElementAtPointResult {\n prompt: string;\n deepThink: boolean;\n verifyResult?: LocateValidatorResult;\n}\n\n/**\n * context\n */\n\nexport abstract class UIContext {\n abstract screenshotBase64: string;\n\n abstract size: Size;\n\n abstract _isFrozen?: boolean;\n}\n\nexport type EnsureObject<T> = { [K in keyof T]: any };\n\nexport type ServiceAction = 'locate' | 'extract' | 'assert' | 'describe';\n\nexport type ServiceExtractParam = string | Record<string, string>;\n\nexport type ElementCacheFeature = Record<string, unknown>;\n\nexport interface LocateResult {\n element: LocateResultElement | null;\n rect?: Rect;\n}\n\nexport type ThinkingLevel = 'off' | 'medium' | 'high';\n\nexport type DeepThinkOption = 'unset' | true | false;\n\nexport interface ServiceTaskInfo {\n durationMs: number;\n formatResponse?: string;\n rawResponse?: string;\n usage?: AIUsageInfo;\n searchArea?: Rect;\n searchAreaRawResponse?: string;\n searchAreaUsage?: AIUsageInfo;\n reasoning_content?: string;\n}\n\nexport interface DumpMeta {\n logTime: number;\n}\n\nexport interface ReportDumpWithAttributes {\n dumpString: string;\n attributes?: Record<string, any>;\n}\n\nexport interface ServiceDump extends DumpMeta {\n type: 'locate' | 'extract' | 'assert';\n logId: string;\n userQuery: {\n element?: TUserPrompt;\n dataDemand?: ServiceExtractParam;\n assertion?: TUserPrompt;\n };\n matchedElement: LocateResultElement[];\n matchedRect?: Rect;\n deepThink?: boolean;\n data: any;\n assertionPass?: boolean;\n assertionThought?: string;\n taskInfo: ServiceTaskInfo;\n error?: string;\n output?: any;\n}\n\nexport type PartialServiceDumpFromSDK = Omit<\n ServiceDump,\n 'logTime' | 'logId' | 'model_name'\n>;\n\nexport interface ServiceResultBase {\n dump: ServiceDump;\n}\n\nexport type LocateResultWithDump = LocateResult & ServiceResultBase;\n\nexport interface ServiceExtractResult<T> extends ServiceResultBase {\n data: T;\n thought?: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}\n\nexport class ServiceError extends Error {\n dump: ServiceDump;\n\n constructor(message: string, dump: ServiceDump) {\n super(message);\n this.name = 'ServiceError';\n this.dump = dump;\n }\n}\n\n// intermediate variables to optimize the return value by AI\nexport interface LiteUISection {\n name: string;\n description: string;\n sectionCharacteristics: string;\n textIds: string[];\n}\n\nexport type ElementById = (id: string) => BaseElement | null;\n\nexport type ServiceAssertionResponse = AIAssertionResponse & {\n usage?: AIUsageInfo;\n};\n\n/**\n * agent\n */\n\nexport type OnTaskStartTip = (tip: string) => Promise<void> | void;\n\nexport interface AgentWaitForOpt {\n checkIntervalMs?: number;\n timeoutMs?: number;\n [key: string]: unknown;\n}\n\nexport interface AgentAssertOpt {\n keepRawResponse?: boolean;\n}\n\n/**\n * planning\n *\n */\n\nexport interface PlanningLocateParam extends DetailedLocateParam {\n bbox?: [number, number, number, number];\n}\n\nexport interface PlanningAction<ParamType = any> {\n thought?: string;\n type: string;\n param: ParamType;\n}\n\nexport interface RawResponsePlanningAIResponse {\n action: PlanningAction;\n more_actions_needed_by_instruction: boolean;\n log: string;\n sleep?: number;\n error?: string;\n}\n\nexport interface PlanningAIResponse\n extends Omit<RawResponsePlanningAIResponse, 'action'> {\n actions?: PlanningAction[];\n usage?: AIUsageInfo;\n rawResponse?: string;\n yamlFlow?: MidsceneYamlFlowItem[];\n yamlString?: string;\n error?: string;\n reasoning_content?: string;\n}\n\nexport interface PlanningActionParamSleep {\n timeMs: number;\n}\n\nexport interface PlanningActionParamError {\n thought: string;\n}\n\nexport type PlanningActionParamWaitFor = AgentWaitForOpt & {};\n\nexport interface LongPressParam {\n duration?: number;\n}\n\nexport interface PullParam {\n direction: 'up' | 'down';\n distance?: number;\n duration?: number;\n}\n/**\n * misc\n */\n\nexport interface Color {\n name: string;\n hex: string;\n}\n\nexport interface BaseAgentParserOpt {\n selector?: string;\n}\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PuppeteerParserOpt extends BaseAgentParserOpt {}\n\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PlaywrightParserOpt extends BaseAgentParserOpt {}\n\n/*\naction\n*/\nexport interface ExecutionTaskProgressOptions {\n onTaskStart?: (task: ExecutionTask) => Promise<void> | void;\n}\n\nexport interface ExecutionRecorderItem {\n type: 'screenshot';\n ts: number;\n screenshot?: string;\n timing?: string;\n}\n\nexport type ExecutionTaskType = 'Planning' | 'Insight' | 'Action Space' | 'Log';\n\nexport interface ExecutorContext {\n task: ExecutionTask;\n element?: LocateResultElement | null;\n uiContext?: UIContext;\n}\n\nexport interface ExecutionTaskApply<\n Type extends ExecutionTaskType = any,\n TaskParam = any,\n TaskOutput = any,\n TaskLog = any,\n> {\n type: Type;\n subType?: string;\n subTask?: boolean;\n param?: TaskParam;\n thought?: string;\n uiContext?: UIContext;\n executor: (\n param: TaskParam,\n context: ExecutorContext,\n ) => // biome-ignore lint/suspicious/noConfusingVoidType: <explanation>\n | Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>\n | undefined\n | void;\n}\n\nexport interface ExecutionTaskHitBy {\n from: string;\n context: Record<string, any>;\n}\n\nexport interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {\n output?: TaskOutput;\n log?: TaskLog;\n recorder?: ExecutionRecorderItem[];\n hitBy?: ExecutionTaskHitBy;\n}\n\nexport type ExecutionTask<\n E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<\n any,\n any,\n any\n >,\n> = E &\n ExecutionTaskReturn<\n E extends ExecutionTaskApply<any, any, infer TaskOutput, any>\n ? TaskOutput\n : unknown,\n E extends ExecutionTaskApply<any, any, any, infer TaskLog>\n ? TaskLog\n : unknown\n > & {\n status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';\n error?: Error;\n errorMessage?: string;\n errorStack?: string;\n timing?: {\n start: number;\n end?: number;\n cost?: number;\n };\n usage?: AIUsageInfo;\n searchAreaUsage?: AIUsageInfo;\n reasoning_content?: string;\n };\n\nexport interface ExecutionDump extends DumpMeta {\n name: string;\n description?: string;\n tasks: ExecutionTask[];\n aiActContext?: string;\n}\n\n/*\ntask - service-locate\n*/\nexport type ExecutionTaskInsightLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskInsightLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport type ExecutionTaskInsightDump = ServiceDump;\n\nexport type ExecutionTaskInsightLocateApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightLocateParam,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightLocate =\n ExecutionTask<ExecutionTaskInsightLocateApply>;\n\n/*\ntask - service-query\n*/\nexport interface ExecutionTaskInsightQueryParam {\n dataDemand: ServiceExtractParam;\n}\n\nexport interface ExecutionTaskInsightQueryOutput {\n data: any;\n}\n\nexport type ExecutionTaskInsightQueryApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightQueryParam,\n any,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightQuery =\n ExecutionTask<ExecutionTaskInsightQueryApply>;\n\n/*\ntask - assertion\n*/\nexport interface ExecutionTaskInsightAssertionParam {\n assertion: string;\n}\n\nexport type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightAssertionParam,\n ServiceAssertionResponse,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightAssertion =\n ExecutionTask<ExecutionTaskInsightAssertionApply>;\n\n/*\ntask - action (i.e. interact) \n*/\nexport type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<\n 'Action Space',\n ActionParam,\n void,\n void\n>;\n\nexport type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;\n\n/*\ntask - Log\n*/\n\nexport type ExecutionTaskLogApply<\n LogParam = {\n content: string;\n },\n> = ExecutionTaskApply<'Log', LogParam, void, void>;\n\nexport type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;\n\n/*\ntask - planning\n*/\n\nexport type ExecutionTaskPlanningApply = ExecutionTaskApply<\n 'Planning',\n {\n userInstruction: string;\n aiActContext?: string;\n },\n PlanningAIResponse\n>;\n\nexport type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;\n\n/*\ntask - planning-locate\n*/\nexport type ExecutionTaskPlanningLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskPlanningLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport type ExecutionTaskPlanningDump = ServiceDump;\n\nexport type ExecutionTaskPlanningLocateApply = ExecutionTaskApply<\n 'Planning',\n ExecutionTaskPlanningLocateParam,\n ExecutionTaskPlanningLocateOutput,\n ExecutionTaskPlanningDump\n>;\n\nexport type ExecutionTaskPlanningLocate =\n ExecutionTask<ExecutionTaskPlanningLocateApply>;\n\n/*\nGrouped dump\n*/\nexport interface GroupedActionDump {\n sdkVersion: string;\n groupName: string;\n groupDescription?: string;\n modelBriefs: string[];\n executions: ExecutionDump[];\n}\n\nexport type InterfaceType =\n | 'puppeteer'\n | 'playwright'\n | 'static'\n | 'chrome-extension-proxy'\n | 'android'\n | string;\n\nexport interface StreamingCodeGenerationOptions {\n /** Whether to enable streaming output */\n stream?: boolean;\n /** Callback function to handle streaming chunks */\n onChunk?: StreamingCallback;\n /** Callback function to handle streaming completion */\n onComplete?: (finalCode: string) => void;\n /** Callback function to handle streaming errors */\n onError?: (error: Error) => void;\n}\n\nexport type StreamingCallback = (chunk: CodeGenerationChunk) => void;\n\nexport interface CodeGenerationChunk {\n /** The incremental content chunk */\n content: string;\n /** The reasoning content */\n reasoning_content: string;\n /** The accumulated content so far */\n accumulated: string;\n /** Whether this is the final chunk */\n isComplete: boolean;\n /** Token usage information if available */\n usage?: AIUsageInfo;\n}\n\nexport interface StreamingAIResponse {\n /** The final accumulated content */\n content: string;\n /** Token usage information */\n usage?: AIUsageInfo;\n /** Whether the response was streamed */\n isStreamed: boolean;\n}\n\nexport interface DeviceAction<TParam = any, TReturn = any> {\n name: string;\n description?: string;\n interfaceAlias?: string;\n paramSchema?: z.ZodType<TParam>;\n call: (param: TParam, context: ExecutorContext) => Promise<TReturn> | TReturn;\n delayAfterRunner?: number;\n}\n\n/**\n * Type utilities for extracting types from DeviceAction definitions\n */\n\n/**\n * Extract parameter type from a DeviceAction\n */\nexport type ActionParam<Action extends DeviceAction<any, any>> =\n Action extends DeviceAction<infer P, any> ? P : never;\n\n/**\n * Extract return type from a DeviceAction\n */\nexport type ActionReturn<Action extends DeviceAction<any, any>> =\n Action extends DeviceAction<any, infer R> ? R : never;\n\n/**\n * Web-specific types\n */\nexport interface WebElementInfo extends BaseElement {\n id: string;\n attributes: {\n nodeType: NodeType;\n [key: string]: string;\n };\n}\n\nexport type WebUIContext = UIContext;\n\n/**\n * Agent\n */\n\nexport type CacheConfig = {\n strategy?: 'read-only' | 'read-write' | 'write-only';\n id: string;\n};\n\nexport type Cache =\n | false // No read, no write\n | true // Will throw error at runtime - deprecated\n | CacheConfig; // Object configuration (requires explicit id)\n\nexport interface AgentOpt {\n testId?: string;\n // @deprecated\n cacheId?: string; // Keep backward compatibility, but marked as deprecated\n groupName?: string;\n groupDescription?: string;\n /* if auto generate report, default true */\n generateReport?: boolean;\n /* if auto print report msg, default true */\n autoPrintReportMsg?: boolean;\n onTaskStartTip?: OnTaskStartTip;\n aiActContext?: string;\n aiActionContext?: string;\n /* custom report file name */\n reportFileName?: string;\n modelConfig?: TModelConfig;\n cache?: Cache;\n /**\n * Maximum number of replanning cycles for aiAct.\n * Defaults to 20 (40 for `vlm-ui-tars`) when not provided.\n * If omitted, the agent will also read `MIDSCENE_REPLANNING_CYCLE_LIMIT` for backward compatibility.\n */\n replanningCycleLimit?: number;\n\n /**\n * Custom OpenAI client factory function\n *\n * If provided, this function will be called to create OpenAI client instances\n * for each AI call, allowing you to:\n * - Wrap clients with observability tools (langsmith, langfuse)\n * - Use custom OpenAI-compatible clients\n * - Apply different configurations based on intent\n *\n * @param config - Resolved model configuration\n * @returns OpenAI client instance (original or wrapped)\n *\n * @example\n * ```typescript\n * createOpenAIClient: async (openai, opts) => {\n * // Wrap with langsmith for planning tasks\n * if (opts.baseURL?.includes('planning')) {\n * return wrapOpenAI(openai, { metadata: { task: 'planning' } });\n * }\n *\n * return openai;\n * }\n * ```\n */\n createOpenAIClient?: CreateOpenAIClientFn;\n}\n\nexport type TestStatus =\n | 'passed'\n | 'failed'\n | 'timedOut'\n | 'skipped'\n | 'interrupted';\n\nexport interface ReportFileWithAttributes {\n reportFilePath: string;\n reportAttributes: {\n testDuration: number;\n testStatus: TestStatus;\n testTitle: string;\n testId: string;\n testDescription: string;\n };\n}\n"],"names":["AIResponseFormat","UIContext","ServiceError","Error","message","dump"],"mappings":";AAAqD;;;;;;;;;;AAwC9C,IAAKA,yBAAgBA,WAAAA,GAAAA,SAAhBA,gBAAgB;;;WAAhBA;;AAiEL,MAAeC;AAMtB;AA4EO,MAAMC,qBAAqBC;IAGhC,YAAYC,OAAe,EAAEC,IAAiB,CAAE;QAC9C,KAAK,CAACD,UAHR;QAIE,IAAI,CAAC,IAAI,GAAG;QACZ,IAAI,CAAC,IAAI,GAAGC;IACd;AACF"}