@midscene/core 0.29.0 → 0.29.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -186,7 +186,7 @@ function trimContextByViewport(execution) {
186
186
  }) : execution.tasks
187
187
  };
188
188
  }
189
- const getMidsceneVersion = ()=>"0.29.0";
189
+ const getMidsceneVersion = ()=>"0.29.1";
190
190
  const parsePrompt = (prompt)=>{
191
191
  if ('string' == typeof prompt) return {
192
192
  textPrompt: prompt,
@@ -39,6 +39,7 @@ __webpack_require__.d(__webpack_exports__, {
39
39
  PointSchema: ()=>PointSchema,
40
40
  adaptBboxToRect: ()=>adaptBboxToRect,
41
41
  markupImageForLLM: ()=>markupImageForLLM,
42
+ normalized01000: ()=>normalized01000,
42
43
  adaptDoubaoBbox: ()=>adaptDoubaoBbox,
43
44
  dumpMidsceneLocatorField: ()=>dumpMidsceneLocatorField,
44
45
  getMidsceneLocationSchema: ()=>getMidsceneLocationSchema,
@@ -65,12 +66,12 @@ var common_AIActionType = /*#__PURE__*/ function(AIActionType) {
65
66
  }({});
66
67
  const defaultBboxSize = 20;
67
68
  const debugInspectUtils = (0, logger_namespaceObject.getDebug)('ai:common');
68
- function fillBboxParam(locate, width, height, vlMode) {
69
+ function fillBboxParam(locate, width, height, rightLimit, bottomLimit, vlMode) {
69
70
  if (locate.bbox_2d && !(null == locate ? void 0 : locate.bbox)) {
70
71
  locate.bbox = locate.bbox_2d;
71
72
  delete locate.bbox_2d;
72
73
  }
73
- if (null == locate ? void 0 : locate.bbox) locate.bbox = adaptBbox(locate.bbox, width, height, vlMode);
74
+ if (null == locate ? void 0 : locate.bbox) locate.bbox = adaptBbox(locate.bbox, width, height, rightLimit, bottomLimit, vlMode);
74
75
  return locate;
75
76
  }
76
77
  function adaptQwenBbox(bbox) {
@@ -132,10 +133,25 @@ function adaptDoubaoBbox(bbox, width, height) {
132
133
  const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;
133
134
  throw new Error(msg);
134
135
  }
135
- function adaptBbox(bbox, width, height, vlMode) {
136
- if ('doubao-vision' === vlMode || 'vlm-ui-tars' === vlMode) return adaptDoubaoBbox(bbox, width, height);
137
- if ('gemini' === vlMode) return adaptGeminiBbox(bbox, width, height);
138
- return adaptQwenBbox(bbox);
136
+ function adaptBbox(bbox, width, height, rightLimit, bottomLimit, vlMode) {
137
+ let result = [
138
+ 0,
139
+ 0,
140
+ 0,
141
+ 0
142
+ ];
143
+ result = 'doubao-vision' === vlMode || 'vlm-ui-tars' === vlMode ? adaptDoubaoBbox(bbox, width, height) : 'gemini' === vlMode ? adaptGeminiBbox(bbox, width, height) : 'qwen3-vl' === vlMode ? normalized01000(bbox, width, height) : adaptQwenBbox(bbox);
144
+ result[2] = Math.min(result[2], rightLimit);
145
+ result[3] = Math.min(result[3], bottomLimit);
146
+ return result;
147
+ }
148
+ function normalized01000(bbox, width, height) {
149
+ return [
150
+ Math.round(bbox[0] * width / 1000),
151
+ Math.round(bbox[1] * height / 1000),
152
+ Math.round(bbox[2] * width / 1000),
153
+ Math.round(bbox[3] * height / 1000)
154
+ ];
139
155
  }
140
156
  function adaptGeminiBbox(bbox, width, height) {
141
157
  const left = Math.round(bbox[1] * width / 1000);
@@ -149,9 +165,9 @@ function adaptGeminiBbox(bbox, width, height) {
149
165
  bottom
150
166
  ];
151
167
  }
152
- function adaptBboxToRect(bbox, width, height, offsetX = 0, offsetY = 0, vlMode) {
153
- debugInspectUtils('adaptBboxToRect', bbox, width, height, offsetX, offsetY, vlMode);
154
- const [left, top, right, bottom] = adaptBbox(bbox, width, height, vlMode);
168
+ function adaptBboxToRect(bbox, width, height, offsetX = 0, offsetY = 0, rightLimit = width, bottomLimit = height, vlMode) {
169
+ debugInspectUtils('adaptBboxToRect', bbox, width, height, 'offset', offsetX, offsetY, 'limit', rightLimit, bottomLimit, 'vlMode', vlMode);
170
+ const [left, top, right, bottom] = adaptBbox(bbox, width, height, rightLimit, bottomLimit, vlMode);
155
171
  const rectLeft = left;
156
172
  const rectTop = top;
157
173
  let rectWidth = right - left;
@@ -382,6 +398,7 @@ exports.ifMidsceneLocatorField = __webpack_exports__.ifMidsceneLocatorField;
382
398
  exports.loadActionParam = __webpack_exports__.loadActionParam;
383
399
  exports.markupImageForLLM = __webpack_exports__.markupImageForLLM;
384
400
  exports.mergeRects = __webpack_exports__.mergeRects;
401
+ exports.normalized01000 = __webpack_exports__.normalized01000;
385
402
  exports.warnGPT4oSizeLimit = __webpack_exports__.warnGPT4oSizeLimit;
386
403
  for(var __webpack_i__ in __webpack_exports__)if (-1 === [
387
404
  "AIActionType",
@@ -406,6 +423,7 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
406
423
  "loadActionParam",
407
424
  "markupImageForLLM",
408
425
  "mergeRects",
426
+ "normalized01000",
409
427
  "warnGPT4oSizeLimit"
410
428
  ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
411
429
  Object.defineProperty(exports, '__esModule', {
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/common.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/common.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIUsageInfo,\n BaseElement,\n DeviceAction,\n ElementTreeNode,\n MidsceneYamlFlowItem,\n PlanningAction,\n Rect,\n Size,\n} from '@/types';\nimport { assert } from '@midscene/shared/utils';\n\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\n\nimport type { PlanningLocateParam } from '@/types';\nimport { NodeType } from '@midscene/shared/constants';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport { treeToList } from '@midscene/shared/extractor';\nimport { compositeElementInfoImg } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { z } from 'zod';\n\nexport type AIArgs = ChatCompletionMessageParam[];\n\nexport enum AIActionType {\n ASSERT = 0,\n INSPECT_ELEMENT = 1,\n EXTRACT_DATA = 2,\n PLAN = 3,\n DESCRIBE_ELEMENT = 4,\n TEXT = 5,\n}\n\nconst defaultBboxSize = 20; // must be even number\nconst debugInspectUtils = getDebug('ai:common');\n\n// transform the param of locate from qwen mode\nexport function fillBboxParam(\n locate: PlanningLocateParam,\n width: number,\n height: number,\n vlMode: TVlModeTypes | undefined,\n) {\n // The Qwen model might have hallucinations of naming bbox as bbox_2d.\n if ((locate as any).bbox_2d && !locate?.bbox) {\n locate.bbox = (locate as any).bbox_2d;\n // biome-ignore lint/performance/noDelete: <explanation>\n delete (locate as any).bbox_2d;\n }\n\n if (locate?.bbox) {\n locate.bbox = adaptBbox(locate.bbox, width, height, vlMode);\n }\n\n return locate;\n}\n\nexport function adaptQwenBbox(\n bbox: number[],\n): [number, number, number, number] {\n if (bbox.length < 2) {\n const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n }\n\n const result: [number, number, number, number] = [\n Math.round(bbox[0]),\n Math.round(bbox[1]),\n typeof bbox[2] === 'number'\n ? Math.round(bbox[2])\n : Math.round(bbox[0] + defaultBboxSize),\n typeof bbox[3] === 'number'\n ? Math.round(bbox[3])\n : Math.round(bbox[1] + defaultBboxSize),\n ];\n return result;\n}\n\nexport function adaptDoubaoBbox(\n bbox: string[] | number[] | string,\n width: number,\n height: number,\n): [number, number, number, number] {\n assert(\n width > 0 && height > 0,\n 'width and height must be greater than 0 in doubao mode',\n );\n\n if (typeof bbox === 'string') {\n assert(\n /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n `invalid bbox data string for doubao-vision mode: ${bbox}`,\n );\n const splitted = bbox.split(' ');\n if (splitted.length === 4) {\n return [\n Math.round((Number(splitted[0]) * width) / 1000),\n Math.round((Number(splitted[1]) * height) / 1000),\n Math.round((Number(splitted[2]) * width) / 1000),\n Math.round((Number(splitted[3]) * height) / 1000),\n ];\n }\n throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);\n }\n\n if (Array.isArray(bbox) && Array.isArray(bbox[0])) {\n bbox = bbox[0];\n }\n\n let bboxList: number[] = [];\n if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n bbox.forEach((item) => {\n if (typeof item === 'string' && item.includes(',')) {\n const [x, y] = item.split(',');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else if (typeof item === 'string' && item.includes(' ')) {\n const [x, y] = item.split(' ');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else {\n bboxList.push(Number(item));\n }\n });\n } else {\n bboxList = bbox as any;\n }\n\n if (bboxList.length === 4 || bboxList.length === 5) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[2] * width) / 1000),\n Math.round((bboxList[3] * height) / 1000),\n ];\n }\n\n // treat the bbox as a center point\n if (\n bboxList.length === 6 ||\n bboxList.length === 2 ||\n bboxList.length === 3 ||\n bboxList.length === 7\n ) {\n return [\n Math.max(\n 0,\n Math.round((bboxList[0] * width) / 1000) - defaultBboxSize / 2,\n ),\n Math.max(\n 0,\n Math.round((bboxList[1] * height) / 1000) - defaultBboxSize / 2,\n ),\n Math.min(\n width,\n Math.round((bboxList[0] * width) / 1000) + defaultBboxSize / 2,\n ),\n Math.min(\n height,\n Math.round((bboxList[1] * height) / 1000) + defaultBboxSize / 2,\n ),\n ];\n }\n\n if (bbox.length === 8) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[4] * width) / 1000),\n Math.round((bboxList[5] * height) / 1000),\n ];\n }\n\n const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n}\n\nexport function adaptBbox(\n bbox: number[],\n width: number,\n height: number,\n vlMode: TVlModeTypes | undefined,\n): [number, number, number, number] {\n if (vlMode === 'doubao-vision' || vlMode === 'vlm-ui-tars') {\n return adaptDoubaoBbox(bbox, width, height);\n }\n\n if (vlMode === 'gemini') {\n return adaptGeminiBbox(bbox, width, height);\n }\n\n return adaptQwenBbox(bbox);\n}\n\nexport function adaptGeminiBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n const left = Math.round((bbox[1] * width) / 1000);\n const top = Math.round((bbox[0] * height) / 1000);\n const right = Math.round((bbox[3] * width) / 1000);\n const bottom = Math.round((bbox[2] * height) / 1000);\n return [left, top, right, bottom];\n}\n\nexport function adaptBboxToRect(\n bbox: number[],\n width: number,\n height: number,\n offsetX = 0,\n offsetY = 0,\n vlMode?: TVlModeTypes | undefined,\n): Rect {\n debugInspectUtils(\n 'adaptBboxToRect',\n bbox,\n width,\n height,\n offsetX,\n offsetY,\n vlMode,\n );\n const [left, top, right, bottom] = adaptBbox(bbox, width, height, vlMode);\n\n // Calculate initial rect dimensions\n const rectLeft = left;\n const rectTop = top;\n let rectWidth = right - left;\n let rectHeight = bottom - top;\n\n // Ensure the rect doesn't exceed image boundaries\n // If right edge exceeds width, adjust the width\n if (rectLeft + rectWidth > width) {\n rectWidth = width - rectLeft;\n }\n\n // If bottom edge exceeds height, adjust the height\n if (rectTop + rectHeight > height) {\n rectHeight = height - rectTop;\n }\n\n // Ensure minimum dimensions (width and height should be at least 1)\n rectWidth = Math.max(1, rectWidth);\n rectHeight = Math.max(1, rectHeight);\n\n const rect = {\n left: rectLeft + offsetX,\n top: rectTop + offsetY,\n width: rectWidth,\n height: rectHeight,\n };\n debugInspectUtils('adaptBboxToRect, result=', rect);\n return rect;\n}\n\nlet warned = false;\nexport function warnGPT4oSizeLimit(size: Size, modelName: string) {\n if (warned) return;\n if (modelName.toLowerCase().includes('gpt-4o')) {\n const warningMsg = `GPT-4o has a maximum image input size of 2000x768 or 768x2000, but got ${size.width}x${size.height}. Please set your interface to a smaller resolution. Otherwise, the result may be inaccurate.`;\n\n if (\n Math.max(size.width, size.height) > 2000 ||\n Math.min(size.width, size.height) > 768\n ) {\n console.warn(warningMsg);\n warned = true;\n }\n } else if (size.width > 1800 || size.height > 1800) {\n console.warn(\n `The image size seems too large (${size.width}x${size.height}). It may lead to more token usage, slower response, and inaccurate result.`,\n );\n warned = true;\n }\n}\n\nexport function mergeRects(rects: Rect[]) {\n const minLeft = Math.min(...rects.map((r) => r.left));\n const minTop = Math.min(...rects.map((r) => r.top));\n const maxRight = Math.max(...rects.map((r) => r.left + r.width));\n const maxBottom = Math.max(...rects.map((r) => r.top + r.height));\n return {\n left: minLeft,\n top: minTop,\n width: maxRight - minLeft,\n height: maxBottom - minTop,\n };\n}\n\n// expand the search area to at least 300 x 300, or add a default padding\nexport function expandSearchArea(\n rect: Rect,\n screenSize: Size,\n vlMode: TVlModeTypes | undefined,\n) {\n const minEdgeSize = vlMode === 'doubao-vision' ? 500 : 300;\n const defaultPadding = 160;\n\n // Calculate padding needed to reach minimum edge size\n const paddingSizeHorizontal =\n rect.width < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.width) / 2)\n : defaultPadding;\n const paddingSizeVertical =\n rect.height < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.height) / 2)\n : defaultPadding;\n\n // Calculate new dimensions (ensure minimum edge size)\n let newWidth = Math.max(minEdgeSize, rect.width + paddingSizeHorizontal * 2);\n let newHeight = Math.max(minEdgeSize, rect.height + paddingSizeVertical * 2);\n\n // Calculate initial position with padding\n let newLeft = rect.left - paddingSizeHorizontal;\n let newTop = rect.top - paddingSizeVertical;\n\n // Ensure the rect doesn't exceed screen boundaries by adjusting position\n // If the rect goes beyond the right edge, shift it left\n if (newLeft + newWidth > screenSize.width) {\n newLeft = screenSize.width - newWidth;\n }\n\n // If the rect goes beyond the bottom edge, shift it up\n if (newTop + newHeight > screenSize.height) {\n newTop = screenSize.height - newHeight;\n }\n\n // Ensure the rect doesn't go beyond the left/top edges\n newLeft = Math.max(0, newLeft);\n newTop = Math.max(0, newTop);\n\n // If after position adjustment, the rect still exceeds screen boundaries,\n // clamp the dimensions to fit within screen\n if (newLeft + newWidth > screenSize.width) {\n newWidth = screenSize.width - newLeft;\n }\n if (newTop + newHeight > screenSize.height) {\n newHeight = screenSize.height - newTop;\n }\n\n rect.left = newLeft;\n rect.top = newTop;\n rect.width = newWidth;\n rect.height = newHeight;\n\n return rect;\n}\n\nexport async function markupImageForLLM(\n screenshotBase64: string,\n tree: ElementTreeNode<BaseElement>,\n size: Size,\n) {\n const elementsInfo = treeToList(tree);\n const elementsPositionInfoWithoutText = elementsInfo!.filter(\n (elementInfo) => {\n if (elementInfo.attributes.nodeType === NodeType.TEXT) {\n return false;\n }\n return true;\n },\n );\n\n const imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: elementsPositionInfoWithoutText,\n size,\n });\n return imagePayload;\n}\n\nexport function buildYamlFlowFromPlans(\n plans: PlanningAction[],\n actionSpace: DeviceAction<any>[],\n sleep?: number,\n): MidsceneYamlFlowItem[] {\n const flow: MidsceneYamlFlowItem[] = [];\n\n for (const plan of plans) {\n const verb = plan.type;\n\n const action = actionSpace.find((action) => action.name === verb);\n if (!action) {\n console.warn(\n `Cannot convert action ${verb} to yaml flow. Will ignore it.`,\n );\n continue;\n }\n\n const flowKey = action.interfaceAlias || verb;\n const flowParam = action.paramSchema\n ? dumpActionParam(plan.param || {}, action.paramSchema)\n : {};\n\n const flowItem: MidsceneYamlFlowItem = {\n [flowKey]: '',\n ...flowParam,\n };\n\n flow.push(flowItem);\n }\n\n if (sleep) {\n flow.push({\n sleep,\n });\n }\n\n return flow;\n}\n\n// Zod schemas for shared types\nexport const PointSchema = z.object({\n left: z.number(),\n top: z.number(),\n});\n\nexport const SizeSchema = z.object({\n width: z.number(),\n height: z.number(),\n dpr: z.number().optional(),\n});\n\nexport const RectSchema = PointSchema.and(SizeSchema).and(\n z.object({\n zoom: z.number().optional(),\n }),\n);\n\n// Zod schema for TMultimodalPrompt\nexport const TMultimodalPromptSchema = z.object({\n images: z\n .array(\n z.object({\n name: z.string(),\n url: z.string(),\n }),\n )\n .optional(),\n convertHttpImage2Base64: z.boolean().optional(),\n});\n\n// Zod schema for TUserPrompt\nexport const TUserPromptSchema = z.union([\n z.string(),\n z\n .object({\n prompt: z.string(),\n })\n .and(TMultimodalPromptSchema.partial()),\n]);\n\n// Generate TypeScript types from Zod schemas\nexport type TMultimodalPrompt = z.infer<typeof TMultimodalPromptSchema>;\nexport type TUserPrompt = z.infer<typeof TUserPromptSchema>;\n\nconst locateFieldFlagName = 'midscene_location_field_flag';\n\nconst MidsceneLocationResult = z\n .object({\n [locateFieldFlagName]: z.literal(true),\n prompt: TUserPromptSchema,\n\n // optional fields\n deepThink: z.boolean().optional(), // only available in vl model\n cacheable: z.boolean().optional(),\n xpath: z.boolean().optional(), // preset result for xpath\n\n // these two fields will only appear in the result\n center: z.tuple([z.number(), z.number()]),\n rect: RectSchema,\n })\n .passthrough();\n\nexport type MidsceneLocationResultType = z.infer<typeof MidsceneLocationResult>;\nexport const getMidsceneLocationSchema = () => {\n return MidsceneLocationResult;\n};\n\nexport const ifMidsceneLocatorField = (field: any): boolean => {\n // Handle optional fields by getting the inner type\n let actualField = field;\n if (actualField._def?.typeName === 'ZodOptional') {\n actualField = actualField._def.innerType;\n }\n\n // Check if this is a ZodUnion (the new MidsceneLocation structure)\n if (actualField._def?.typeName === 'ZodObject') {\n const shape = actualField._def.shape();\n return locateFieldFlagName in shape;\n }\n\n return false;\n};\n\nexport const dumpMidsceneLocatorField = (field: any): string => {\n assert(\n ifMidsceneLocatorField(field),\n 'field is not a midscene locator field',\n );\n\n // If field is a string, return it directly\n if (typeof field === 'string') {\n return field;\n }\n\n // If field is an object with prompt property\n if (field && typeof field === 'object' && field.prompt) {\n // If prompt is a string, return it directly\n if (typeof field.prompt === 'string') {\n return field.prompt;\n }\n // If prompt is a TUserPrompt object, extract the prompt string\n if (typeof field.prompt === 'object' && field.prompt.prompt) {\n return field.prompt.prompt; // TODO: dump images if necessary\n }\n }\n\n // Fallback: try to convert to string\n return String(field);\n};\n\nexport const findAllMidsceneLocatorField = (\n zodType?: z.ZodType<any>,\n requiredOnly?: boolean,\n): string[] => {\n if (!zodType) {\n return [];\n }\n\n // Check if this is a ZodObject by checking if it has a shape property\n const zodObject = zodType as any;\n if (zodObject._def?.typeName === 'ZodObject' && zodObject.shape) {\n const keys = Object.keys(zodObject.shape);\n return keys.filter((key) => {\n const field = zodObject.shape[key];\n if (!ifMidsceneLocatorField(field)) {\n return false;\n }\n\n // If requiredOnly is true, filter out optional fields\n if (requiredOnly) {\n return field._def?.typeName !== 'ZodOptional';\n }\n\n return true;\n });\n }\n\n // For other ZodType instances, we can't extract field names\n return [];\n};\n\nexport const dumpActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n const locatorFields = findAllMidsceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue) {\n // If it's already a string, keep it as is\n if (typeof fieldValue === 'string') {\n result[fieldName] = fieldValue;\n } else if (typeof fieldValue === 'object') {\n // Check if this field is actually a MidsceneLocationType object\n if (fieldValue.prompt) {\n // If prompt is a string, use it directly\n if (typeof fieldValue.prompt === 'string') {\n result[fieldName] = fieldValue.prompt;\n } else if (\n typeof fieldValue.prompt === 'object' &&\n fieldValue.prompt.prompt\n ) {\n // If prompt is a TUserPrompt object, extract the prompt string\n result[fieldName] = fieldValue.prompt.prompt;\n }\n }\n }\n }\n }\n\n return result;\n};\n\nexport const loadActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n const locatorFields = findAllMidsceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue && typeof fieldValue === 'string') {\n result[fieldName] = {\n [locateFieldFlagName]: true,\n prompt: fieldValue,\n };\n }\n }\n\n return result;\n};\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","AIActionType","defaultBboxSize","debugInspectUtils","getDebug","fillBboxParam","locate","width","height","vlMode","adaptBbox","adaptQwenBbox","bbox","msg","JSON","Error","result","Math","adaptDoubaoBbox","assert","splitted","Number","Array","bboxList","item","x","y","adaptGeminiBbox","left","top","right","bottom","adaptBboxToRect","offsetX","offsetY","rectLeft","rectTop","rectWidth","rectHeight","rect","warned","warnGPT4oSizeLimit","size","modelName","warningMsg","console","mergeRects","rects","minLeft","r","minTop","maxRight","maxBottom","expandSearchArea","screenSize","minEdgeSize","defaultPadding","paddingSizeHorizontal","paddingSizeVertical","newWidth","newHeight","newLeft","newTop","markupImageForLLM","screenshotBase64","tree","elementsInfo","treeToList","elementsPositionInfoWithoutText","elementInfo","NodeType","imagePayload","compositeElementInfoImg","buildYamlFlowFromPlans","plans","actionSpace","sleep","flow","plan","verb","action","flowKey","flowParam","dumpActionParam","flowItem","PointSchema","z","SizeSchema","RectSchema","TMultimodalPromptSchema","TUserPromptSchema","locateFieldFlagName","MidsceneLocationResult","getMidsceneLocationSchema","ifMidsceneLocatorField","field","_actualField__def","_actualField__def1","actualField","shape","dumpMidsceneLocatorField","String","findAllMidsceneLocatorField","zodType","requiredOnly","_zodObject__def","zodObject","keys","_field__def","jsonObject","zodSchema","locatorFields","fieldName","fieldValue","loadActionParam"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACkBO,IAAKI,sBAAYA,WAAAA,GAAAA,SAAZA,YAAY;;;;;;;WAAZA;;AASZ,MAAMC,kBAAkB;AACxB,MAAMC,oBAAoBC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAG5B,SAASC,cACdC,MAA2B,EAC3BC,KAAa,EACbC,MAAc,EACdC,MAAgC;IAGhC,IAAKH,OAAe,OAAO,IAAI,CAACA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,AAAD,GAAG;QAC5CA,OAAO,IAAI,GAAIA,OAAe,OAAO;QAErC,OAAQA,OAAe,OAAO;IAChC;IAEA,IAAIA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,EACdA,OAAO,IAAI,GAAGI,UAAUJ,OAAO,IAAI,EAAEC,OAAOC,QAAQC;IAGtD,OAAOH;AACT;AAEO,SAASK,cACdC,IAAc;IAEd,IAAIA,KAAK,MAAM,GAAG,GAAG;QACnB,MAAMC,MAAM,CAAC,oCAAoC,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;QAC1E,MAAM,IAAIG,MAAMF;IAClB;IAEA,MAAMG,SAA2C;QAC/CC,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QACC,YAAnB,OAAOA,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGV;QACN,YAAnB,OAAOU,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGV;KAC1B;IACD,OAAOc;AACT;AAEO,SAASE,gBACdN,IAAkC,EAClCL,KAAa,EACbC,MAAc;IAEdW,IAAAA,sBAAAA,MAAAA,AAAAA,EACEZ,QAAQ,KAAKC,SAAS,GACtB;IAGF,IAAI,AAAgB,YAAhB,OAAOI,MAAmB;QAC5BO,IAAAA,sBAAAA,MAAAA,AAAAA,EACE,+BAA+B,IAAI,CAACP,KAAK,IAAI,KAC7C,CAAC,iDAAiD,EAAEA,MAAM;QAE5D,MAAMQ,WAAWR,KAAK,KAAK,CAAC;QAC5B,IAAIQ,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACLH,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIb,QAAS;YAC3CU,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,SAAU;YAC5CS,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIb,QAAS;YAC3CU,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,SAAU;SAC7C;QAEH,MAAM,IAAIO,MAAM,CAAC,iDAAiD,EAAEH,MAAM;IAC5E;IAEA,IAAIU,MAAM,OAAO,CAACV,SAASU,MAAM,OAAO,CAACV,IAAI,CAAC,EAAE,GAC9CA,OAAOA,IAAI,CAAC,EAAE;IAGhB,IAAIW,WAAqB,EAAE;IAC3B,IAAID,MAAM,OAAO,CAACV,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACY;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAOF,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OACEH,SAAS,IAAI,CAACF,OAAOG;IAEzB;SAEAD,WAAWX;IAGb,IAAIW,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACLN,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGhB,QAAS;QACnCU,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,SAAU;QACpCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGhB,QAAS;QACnCU,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,SAAU;KACrC;IAIH,IACEe,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QACLN,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGhB,QAAS,QAAQL,kBAAkB;QAE/De,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,SAAU,QAAQN,kBAAkB;QAEhEe,KAAK,GAAG,CACNV,OACAU,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGhB,QAAS,QAAQL,kBAAkB;QAE/De,KAAK,GAAG,CACNT,QACAS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,SAAU,QAAQN,kBAAkB;KAEjE;IAGH,IAAIU,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACLK,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGhB,QAAS;QACnCU,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,SAAU;QACpCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGhB,QAAS;QACnCU,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,SAAU;KACrC;IAGH,MAAMK,MAAM,CAAC,0CAA0C,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;IAChF,MAAM,IAAIG,MAAMF;AAClB;AAEO,SAASH,UACdE,IAAc,EACdL,KAAa,EACbC,MAAc,EACdC,MAAgC;IAEhC,IAAIA,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,QAChC,OAAOS,gBAAgBN,MAAML,OAAOC;IAGtC,IAAIC,AAAW,aAAXA,QACF,OAAOkB,gBAAgBf,MAAML,OAAOC;IAGtC,OAAOG,cAAcC;AACvB;AAEO,SAASe,gBACdf,IAAc,EACdL,KAAa,EACbC,MAAc;IAEd,MAAMoB,OAAOX,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGL,QAAS;IAC5C,MAAMsB,MAAMZ,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,SAAU;IAC5C,MAAMsB,QAAQb,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGL,QAAS;IAC7C,MAAMwB,SAASd,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,SAAU;IAC/C,OAAO;QAACoB;QAAMC;QAAKC;QAAOC;KAAO;AACnC;AAEO,SAASC,gBACdpB,IAAc,EACdL,KAAa,EACbC,MAAc,EACdyB,UAAU,CAAC,EACXC,UAAU,CAAC,EACXzB,MAAiC;IAEjCN,kBACE,mBACAS,MACAL,OACAC,QACAyB,SACAC,SACAzB;IAEF,MAAM,CAACmB,MAAMC,KAAKC,OAAOC,OAAO,GAAGrB,UAAUE,MAAML,OAAOC,QAAQC;IAGlE,MAAM0B,WAAWP;IACjB,MAAMQ,UAAUP;IAChB,IAAIQ,YAAYP,QAAQF;IACxB,IAAIU,aAAaP,SAASF;IAI1B,IAAIM,WAAWE,YAAY9B,OACzB8B,YAAY9B,QAAQ4B;IAItB,IAAIC,UAAUE,aAAa9B,QACzB8B,aAAa9B,SAAS4B;IAIxBC,YAAYpB,KAAK,GAAG,CAAC,GAAGoB;IACxBC,aAAarB,KAAK,GAAG,CAAC,GAAGqB;IAEzB,MAAMC,OAAO;QACX,MAAMJ,WAAWF;QACjB,KAAKG,UAAUF;QACf,OAAOG;QACP,QAAQC;IACV;IACAnC,kBAAkB,4BAA4BoC;IAC9C,OAAOA;AACT;AAEA,IAAIC,SAAS;AACN,SAASC,mBAAmBC,IAAU,EAAEC,SAAiB;IAC9D,IAAIH,QAAQ;IACZ,IAAIG,UAAU,WAAW,GAAG,QAAQ,CAAC,WAAW;QAC9C,MAAMC,aAAa,CAAC,uEAAuE,EAAEF,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,6FAA6F,CAAC;QAErN,IACEzB,KAAK,GAAG,CAACyB,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,QACpCzB,KAAK,GAAG,CAACyB,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,KACpC;YACAG,QAAQ,IAAI,CAACD;YACbJ,SAAS;QACX;IACF,OAAO,IAAIE,KAAK,KAAK,GAAG,QAAQA,KAAK,MAAM,GAAG,MAAM;QAClDG,QAAQ,IAAI,CACV,CAAC,gCAAgC,EAAEH,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,2EAA2E,CAAC;QAE3IF,SAAS;IACX;AACF;AAEO,SAASM,WAAWC,KAAa;IACtC,MAAMC,UAAU/B,KAAK,GAAG,IAAI8B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI;IACnD,MAAMC,SAASjC,KAAK,GAAG,IAAI8B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG;IACjD,MAAME,WAAWlC,KAAK,GAAG,IAAI8B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI,GAAGA,EAAE,KAAK;IAC9D,MAAMG,YAAYnC,KAAK,GAAG,IAAI8B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG,GAAGA,EAAE,MAAM;IAC/D,OAAO;QACL,MAAMD;QACN,KAAKE;QACL,OAAOC,WAAWH;QAClB,QAAQI,YAAYF;IACtB;AACF;AAGO,SAASG,iBACdd,IAAU,EACVe,UAAgB,EAChB7C,MAAgC;IAEhC,MAAM8C,cAAc9C,AAAW,oBAAXA,SAA6B,MAAM;IACvD,MAAM+C,iBAAiB;IAGvB,MAAMC,wBACJlB,KAAK,KAAK,GAAGgB,cACTtC,KAAK,IAAI,CAAEsC,AAAAA,CAAAA,cAAchB,KAAK,KAAI,IAAK,KACvCiB;IACN,MAAME,sBACJnB,KAAK,MAAM,GAAGgB,cACVtC,KAAK,IAAI,CAAEsC,AAAAA,CAAAA,cAAchB,KAAK,MAAK,IAAK,KACxCiB;IAGN,IAAIG,WAAW1C,KAAK,GAAG,CAACsC,aAAahB,KAAK,KAAK,GAAGkB,AAAwB,IAAxBA;IAClD,IAAIG,YAAY3C,KAAK,GAAG,CAACsC,aAAahB,KAAK,MAAM,GAAGmB,AAAsB,IAAtBA;IAGpD,IAAIG,UAAUtB,KAAK,IAAI,GAAGkB;IAC1B,IAAIK,SAASvB,KAAK,GAAG,GAAGmB;IAIxB,IAAIG,UAAUF,WAAWL,WAAW,KAAK,EACvCO,UAAUP,WAAW,KAAK,GAAGK;IAI/B,IAAIG,SAASF,YAAYN,WAAW,MAAM,EACxCQ,SAASR,WAAW,MAAM,GAAGM;IAI/BC,UAAU5C,KAAK,GAAG,CAAC,GAAG4C;IACtBC,SAAS7C,KAAK,GAAG,CAAC,GAAG6C;IAIrB,IAAID,UAAUF,WAAWL,WAAW,KAAK,EACvCK,WAAWL,WAAW,KAAK,GAAGO;IAEhC,IAAIC,SAASF,YAAYN,WAAW,MAAM,EACxCM,YAAYN,WAAW,MAAM,GAAGQ;IAGlCvB,KAAK,IAAI,GAAGsB;IACZtB,KAAK,GAAG,GAAGuB;IACXvB,KAAK,KAAK,GAAGoB;IACbpB,KAAK,MAAM,GAAGqB;IAEd,OAAOrB;AACT;AAEO,eAAewB,kBACpBC,gBAAwB,EACxBC,IAAkC,EAClCvB,IAAU;IAEV,MAAMwB,eAAeC,AAAAA,IAAAA,0BAAAA,UAAAA,AAAAA,EAAWF;IAChC,MAAMG,kCAAkCF,aAAc,MAAM,CAC1D,CAACG;QACC,IAAIA,YAAY,UAAU,CAAC,QAAQ,KAAKC,0BAAAA,QAAAA,CAAAA,IAAa,EACnD,OAAO;QAET,OAAO;IACT;IAGF,MAAMC,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,uBAAAA,AAAAA,EAAwB;QACjD,gBAAgBR;QAChB,sBAAsBI;QACtB1B;IACF;IACA,OAAO6B;AACT;AAEO,SAASE,uBACdC,KAAuB,EACvBC,WAAgC,EAChCC,KAAc;IAEd,MAAMC,OAA+B,EAAE;IAEvC,KAAK,MAAMC,QAAQJ,MAAO;QACxB,MAAMK,OAAOD,KAAK,IAAI;QAEtB,MAAME,SAASL,YAAY,IAAI,CAAC,CAACK,SAAWA,OAAO,IAAI,KAAKD;QAC5D,IAAI,CAACC,QAAQ;YACXnC,QAAQ,IAAI,CACV,CAAC,sBAAsB,EAAEkC,KAAK,8BAA8B,CAAC;YAE/D;QACF;QAEA,MAAME,UAAUD,OAAO,cAAc,IAAID;QACzC,MAAMG,YAAYF,OAAO,WAAW,GAChCG,gBAAgBL,KAAK,KAAK,IAAI,CAAC,GAAGE,OAAO,WAAW,IACpD,CAAC;QAEL,MAAMI,WAAiC;YACrC,CAACH,QAAQ,EAAE;YACX,GAAGC,SAAS;QACd;QAEAL,KAAK,IAAI,CAACO;IACZ;IAEA,IAAIR,OACFC,KAAK,IAAI,CAAC;QACRD;IACF;IAGF,OAAOC;AACT;AAGO,MAAMQ,cAAcC,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAClC,MAAMA,6BAAAA,CAAAA,CAAAA,MAAQ;IACd,KAAKA,6BAAAA,CAAAA,CAAAA,MAAQ;AACf;AAEO,MAAMC,aAAaD,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IACjC,OAAOA,6BAAAA,CAAAA,CAAAA,MAAQ;IACf,QAAQA,6BAAAA,CAAAA,CAAAA,MAAQ;IAChB,KAAKA,6BAAAA,CAAAA,CAAAA,MAAQ,GAAG,QAAQ;AAC1B;AAEO,MAAME,aAAaH,YAAY,GAAG,CAACE,YAAY,GAAG,CACvDD,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IACP,MAAMA,6BAAAA,CAAAA,CAAAA,MAAQ,GAAG,QAAQ;AAC3B;AAIK,MAAMG,0BAA0BH,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAC9C,QAAQA,6BAAAA,CAAAA,CAAAA,KACA,CACJA,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;QACP,MAAMA,6BAAAA,CAAAA,CAAAA,MAAQ;QACd,KAAKA,6BAAAA,CAAAA,CAAAA,MAAQ;IACf,IAED,QAAQ;IACX,yBAAyBA,6BAAAA,CAAAA,CAAAA,OAAS,GAAG,QAAQ;AAC/C;AAGO,MAAMI,oBAAoBJ,6BAAAA,CAAAA,CAAAA,KAAO,CAAC;IACvCA,6BAAAA,CAAAA,CAAAA,MAAQ;IACRA,6BAAAA,CAAAA,CAAAA,MACS,CAAC;QACN,QAAQA,6BAAAA,CAAAA,CAAAA,MAAQ;IAClB,GACC,GAAG,CAACG,wBAAwB,OAAO;CACvC;AAMD,MAAME,sBAAsB;AAE5B,MAAMC,yBAAyBN,6BAAAA,CAAAA,CAAAA,MACtB,CAAC;IACN,CAACK,oBAAoB,EAAEL,6BAAAA,CAAAA,CAAAA,OAAS,CAAC;IACjC,QAAQI;IAGR,WAAWJ,6BAAAA,CAAAA,CAAAA,OAAS,GAAG,QAAQ;IAC/B,WAAWA,6BAAAA,CAAAA,CAAAA,OAAS,GAAG,QAAQ;IAC/B,OAAOA,6BAAAA,CAAAA,CAAAA,OAAS,GAAG,QAAQ;IAG3B,QAAQA,6BAAAA,CAAAA,CAAAA,KAAO,CAAC;QAACA,6BAAAA,CAAAA,CAAAA,MAAQ;QAAIA,6BAAAA,CAAAA,CAAAA,MAAQ;KAAG;IACxC,MAAME;AACR,GACC,WAAW;AAGP,MAAMK,4BAA4B,IAChCD;AAGF,MAAME,yBAAyB,CAACC;QAGjCC,mBAKAC;IANJ,IAAIC,cAAcH;IAClB,IAAIC,AAAAA,SAAAA,CAAAA,oBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,kBAAkB,QAAQ,AAAD,MAAM,eACjCE,cAAcA,YAAY,IAAI,CAAC,SAAS;IAI1C,IAAID,AAAAA,SAAAA,CAAAA,qBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,mBAAkB,QAAQ,AAAD,MAAM,aAAa;QAC9C,MAAME,QAAQD,YAAY,IAAI,CAAC,KAAK;QACpC,OAAOP,uBAAuBQ;IAChC;IAEA,OAAO;AACT;AAEO,MAAMC,2BAA2B,CAACL;IACvC5E,IAAAA,sBAAAA,MAAAA,AAAAA,EACE2E,uBAAuBC,QACvB;IAIF,IAAI,AAAiB,YAAjB,OAAOA,OACT,OAAOA;IAIT,IAAIA,SAAS,AAAiB,YAAjB,OAAOA,SAAsBA,MAAM,MAAM,EAAE;QAEtD,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,EACrB,OAAOA,MAAM,MAAM;QAGrB,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,IAAiBA,MAAM,MAAM,CAAC,MAAM,EACzD,OAAOA,MAAM,MAAM,CAAC,MAAM;IAE9B;IAGA,OAAOM,OAAON;AAChB;AAEO,MAAMO,8BAA8B,CACzCC,SACAC;QAQIC;IANJ,IAAI,CAACF,SACH,OAAO,EAAE;IAIX,MAAMG,YAAYH;IAClB,IAAIE,AAAAA,SAAAA,CAAAA,kBAAAA,UAAU,IAAI,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,QAAQ,AAAD,MAAM,eAAeC,UAAU,KAAK,EAAE;QAC/D,MAAMC,OAAO9G,OAAO,IAAI,CAAC6G,UAAU,KAAK;QACxC,OAAOC,KAAK,MAAM,CAAC,CAAC/G;YAClB,MAAMmG,QAAQW,UAAU,KAAK,CAAC9G,IAAI;YAClC,IAAI,CAACkG,uBAAuBC,QAC1B,OAAO;YAIT,IAAIS,cAAc;oBACTI;gBAAP,OAAOA,AAAAA,SAAAA,CAAAA,cAAAA,MAAM,IAAI,AAAD,IAATA,KAAAA,IAAAA,YAAY,QAAQ,AAAD,MAAM;YAClC;YAEA,OAAO;QACT;IACF;IAGA,OAAO,EAAE;AACX;AAEO,MAAMzB,kBAAkB,CAC7B0B,YACAC;IAEA,MAAMC,gBAAgBT,4BAA4BQ;IAClD,MAAM9F,SAAS;QAAE,GAAG6F,UAAU;IAAC;IAE/B,KAAK,MAAMG,aAAaD,cAAe;QACrC,MAAME,aAAajG,MAAM,CAACgG,UAAU;QACpC,IAAIC,YAEF;YAAA,IAAI,AAAsB,YAAtB,OAAOA,YACTjG,MAAM,CAACgG,UAAU,GAAGC;iBACf,IAAI,AAAsB,YAAtB,OAAOA,YAEhB;gBAAA,IAAIA,WAAW,MAAM,EAEnB;oBAAA,IAAI,AAA6B,YAA7B,OAAOA,WAAW,MAAM,EAC1BjG,MAAM,CAACgG,UAAU,GAAGC,WAAW,MAAM;yBAChC,IACL,AAA6B,YAA7B,OAAOA,WAAW,MAAM,IACxBA,WAAW,MAAM,CAAC,MAAM,EAGxBjG,MAAM,CAACgG,UAAU,GAAGC,WAAW,MAAM,CAAC,MAAM;gBAC9C;YACF;QACF;IAEJ;IAEA,OAAOjG;AACT;AAEO,MAAMkG,kBAAkB,CAC7BL,YACAC;IAEA,MAAMC,gBAAgBT,4BAA4BQ;IAClD,MAAM9F,SAAS;QAAE,GAAG6F,UAAU;IAAC;IAE/B,KAAK,MAAMG,aAAaD,cAAe;QACrC,MAAME,aAAajG,MAAM,CAACgG,UAAU;QACpC,IAAIC,cAAc,AAAsB,YAAtB,OAAOA,YACvBjG,MAAM,CAACgG,UAAU,GAAG;YAClB,CAACrB,oBAAoB,EAAE;YACvB,QAAQsB;QACV;IAEJ;IAEA,OAAOjG;AACT"}
1
+ {"version":3,"file":"ai-model/common.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/common.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIUsageInfo,\n BaseElement,\n DeviceAction,\n ElementTreeNode,\n MidsceneYamlFlowItem,\n PlanningAction,\n Rect,\n Size,\n} from '@/types';\nimport { assert } from '@midscene/shared/utils';\n\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\n\nimport type { PlanningLocateParam } from '@/types';\nimport { NodeType } from '@midscene/shared/constants';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport { treeToList } from '@midscene/shared/extractor';\nimport { compositeElementInfoImg } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { z } from 'zod';\n\nexport type AIArgs = ChatCompletionMessageParam[];\n\nexport enum AIActionType {\n ASSERT = 0,\n INSPECT_ELEMENT = 1,\n EXTRACT_DATA = 2,\n PLAN = 3,\n DESCRIBE_ELEMENT = 4,\n TEXT = 5,\n}\n\nconst defaultBboxSize = 20; // must be even number\nconst debugInspectUtils = getDebug('ai:common');\n\n// transform the param of locate from qwen mode\nexport function fillBboxParam(\n locate: PlanningLocateParam,\n width: number,\n height: number,\n rightLimit: number,\n bottomLimit: number,\n vlMode: TVlModeTypes | undefined,\n) {\n // The Qwen model might have hallucinations of naming bbox as bbox_2d.\n if ((locate as any).bbox_2d && !locate?.bbox) {\n locate.bbox = (locate as any).bbox_2d;\n // biome-ignore lint/performance/noDelete: <explanation>\n delete (locate as any).bbox_2d;\n }\n\n if (locate?.bbox) {\n locate.bbox = adaptBbox(\n locate.bbox,\n width,\n height,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n }\n\n return locate;\n}\n\nexport function adaptQwenBbox(\n bbox: number[],\n): [number, number, number, number] {\n if (bbox.length < 2) {\n const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n }\n\n const result: [number, number, number, number] = [\n Math.round(bbox[0]),\n Math.round(bbox[1]),\n typeof bbox[2] === 'number'\n ? Math.round(bbox[2])\n : Math.round(bbox[0] + defaultBboxSize),\n typeof bbox[3] === 'number'\n ? Math.round(bbox[3])\n : Math.round(bbox[1] + defaultBboxSize),\n ];\n return result;\n}\n\nexport function adaptDoubaoBbox(\n bbox: string[] | number[] | string,\n width: number,\n height: number,\n): [number, number, number, number] {\n assert(\n width > 0 && height > 0,\n 'width and height must be greater than 0 in doubao mode',\n );\n\n if (typeof bbox === 'string') {\n assert(\n /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n `invalid bbox data string for doubao-vision mode: ${bbox}`,\n );\n const splitted = bbox.split(' ');\n if (splitted.length === 4) {\n return [\n Math.round((Number(splitted[0]) * width) / 1000),\n Math.round((Number(splitted[1]) * height) / 1000),\n Math.round((Number(splitted[2]) * width) / 1000),\n Math.round((Number(splitted[3]) * height) / 1000),\n ];\n }\n throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);\n }\n\n if (Array.isArray(bbox) && Array.isArray(bbox[0])) {\n bbox = bbox[0];\n }\n\n let bboxList: number[] = [];\n if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n bbox.forEach((item) => {\n if (typeof item === 'string' && item.includes(',')) {\n const [x, y] = item.split(',');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else if (typeof item === 'string' && item.includes(' ')) {\n const [x, y] = item.split(' ');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else {\n bboxList.push(Number(item));\n }\n });\n } else {\n bboxList = bbox as any;\n }\n\n if (bboxList.length === 4 || bboxList.length === 5) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[2] * width) / 1000),\n Math.round((bboxList[3] * height) / 1000),\n ];\n }\n\n // treat the bbox as a center point\n if (\n bboxList.length === 6 ||\n bboxList.length === 2 ||\n bboxList.length === 3 ||\n bboxList.length === 7\n ) {\n return [\n Math.max(\n 0,\n Math.round((bboxList[0] * width) / 1000) - defaultBboxSize / 2,\n ),\n Math.max(\n 0,\n Math.round((bboxList[1] * height) / 1000) - defaultBboxSize / 2,\n ),\n Math.min(\n width,\n Math.round((bboxList[0] * width) / 1000) + defaultBboxSize / 2,\n ),\n Math.min(\n height,\n Math.round((bboxList[1] * height) / 1000) + defaultBboxSize / 2,\n ),\n ];\n }\n\n if (bbox.length === 8) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[4] * width) / 1000),\n Math.round((bboxList[5] * height) / 1000),\n ];\n }\n\n const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n}\n\nexport function adaptBbox(\n bbox: number[],\n width: number,\n height: number,\n rightLimit: number,\n bottomLimit: number,\n vlMode: TVlModeTypes | undefined,\n): [number, number, number, number] {\n let result: [number, number, number, number] = [0, 0, 0, 0];\n if (vlMode === 'doubao-vision' || vlMode === 'vlm-ui-tars') {\n result = adaptDoubaoBbox(bbox, width, height);\n } else if (vlMode === 'gemini') {\n result = adaptGeminiBbox(bbox, width, height);\n } else if (vlMode === 'qwen3-vl') {\n result = normalized01000(bbox, width, height);\n } else {\n result = adaptQwenBbox(bbox);\n }\n\n result[2] = Math.min(result[2], rightLimit);\n result[3] = Math.min(result[3], bottomLimit);\n\n return result;\n}\n\n// x1, y1, x2, y2 -> 0-1000\nexport function normalized01000(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n return [\n Math.round((bbox[0] * width) / 1000),\n Math.round((bbox[1] * height) / 1000),\n Math.round((bbox[2] * width) / 1000),\n Math.round((bbox[3] * height) / 1000),\n ];\n}\n\n// y1, x1, y2, x2 -> 0-1000\nexport function adaptGeminiBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n const left = Math.round((bbox[1] * width) / 1000);\n const top = Math.round((bbox[0] * height) / 1000);\n const right = Math.round((bbox[3] * width) / 1000);\n const bottom = Math.round((bbox[2] * height) / 1000);\n return [left, top, right, bottom];\n}\n\nexport function adaptBboxToRect(\n bbox: number[],\n width: number,\n height: number,\n offsetX = 0,\n offsetY = 0,\n rightLimit = width,\n bottomLimit = height,\n vlMode?: TVlModeTypes | undefined,\n): Rect {\n debugInspectUtils(\n 'adaptBboxToRect',\n bbox,\n width,\n height,\n 'offset',\n offsetX,\n offsetY,\n 'limit',\n rightLimit,\n bottomLimit,\n 'vlMode',\n vlMode,\n );\n const [left, top, right, bottom] = adaptBbox(\n bbox,\n width,\n height,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n\n // Calculate initial rect dimensions\n const rectLeft = left;\n const rectTop = top;\n let rectWidth = right - left;\n let rectHeight = bottom - top;\n\n // Ensure the rect doesn't exceed image boundaries\n // If right edge exceeds width, adjust the width\n if (rectLeft + rectWidth > width) {\n rectWidth = width - rectLeft;\n }\n\n // If bottom edge exceeds height, adjust the height\n if (rectTop + rectHeight > height) {\n rectHeight = height - rectTop;\n }\n\n // Ensure minimum dimensions (width and height should be at least 1)\n rectWidth = Math.max(1, rectWidth);\n rectHeight = Math.max(1, rectHeight);\n\n const rect = {\n left: rectLeft + offsetX,\n top: rectTop + offsetY,\n width: rectWidth,\n height: rectHeight,\n };\n debugInspectUtils('adaptBboxToRect, result=', rect);\n\n return rect;\n}\n\nlet warned = false;\nexport function warnGPT4oSizeLimit(size: Size, modelName: string) {\n if (warned) return;\n if (modelName.toLowerCase().includes('gpt-4o')) {\n const warningMsg = `GPT-4o has a maximum image input size of 2000x768 or 768x2000, but got ${size.width}x${size.height}. Please set your interface to a smaller resolution. Otherwise, the result may be inaccurate.`;\n\n if (\n Math.max(size.width, size.height) > 2000 ||\n Math.min(size.width, size.height) > 768\n ) {\n console.warn(warningMsg);\n warned = true;\n }\n } else if (size.width > 1800 || size.height > 1800) {\n console.warn(\n `The image size seems too large (${size.width}x${size.height}). It may lead to more token usage, slower response, and inaccurate result.`,\n );\n warned = true;\n }\n}\n\nexport function mergeRects(rects: Rect[]) {\n const minLeft = Math.min(...rects.map((r) => r.left));\n const minTop = Math.min(...rects.map((r) => r.top));\n const maxRight = Math.max(...rects.map((r) => r.left + r.width));\n const maxBottom = Math.max(...rects.map((r) => r.top + r.height));\n return {\n left: minLeft,\n top: minTop,\n width: maxRight - minLeft,\n height: maxBottom - minTop,\n };\n}\n\n// expand the search area to at least 300 x 300, or add a default padding\nexport function expandSearchArea(\n rect: Rect,\n screenSize: Size,\n vlMode: TVlModeTypes | undefined,\n) {\n const minEdgeSize = vlMode === 'doubao-vision' ? 500 : 300;\n const defaultPadding = 160;\n\n // Calculate padding needed to reach minimum edge size\n const paddingSizeHorizontal =\n rect.width < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.width) / 2)\n : defaultPadding;\n const paddingSizeVertical =\n rect.height < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.height) / 2)\n : defaultPadding;\n\n // Calculate new dimensions (ensure minimum edge size)\n let newWidth = Math.max(minEdgeSize, rect.width + paddingSizeHorizontal * 2);\n let newHeight = Math.max(minEdgeSize, rect.height + paddingSizeVertical * 2);\n\n // Calculate initial position with padding\n let newLeft = rect.left - paddingSizeHorizontal;\n let newTop = rect.top - paddingSizeVertical;\n\n // Ensure the rect doesn't exceed screen boundaries by adjusting position\n // If the rect goes beyond the right edge, shift it left\n if (newLeft + newWidth > screenSize.width) {\n newLeft = screenSize.width - newWidth;\n }\n\n // If the rect goes beyond the bottom edge, shift it up\n if (newTop + newHeight > screenSize.height) {\n newTop = screenSize.height - newHeight;\n }\n\n // Ensure the rect doesn't go beyond the left/top edges\n newLeft = Math.max(0, newLeft);\n newTop = Math.max(0, newTop);\n\n // If after position adjustment, the rect still exceeds screen boundaries,\n // clamp the dimensions to fit within screen\n if (newLeft + newWidth > screenSize.width) {\n newWidth = screenSize.width - newLeft;\n }\n if (newTop + newHeight > screenSize.height) {\n newHeight = screenSize.height - newTop;\n }\n\n rect.left = newLeft;\n rect.top = newTop;\n rect.width = newWidth;\n rect.height = newHeight;\n\n return rect;\n}\n\nexport async function markupImageForLLM(\n screenshotBase64: string,\n tree: ElementTreeNode<BaseElement>,\n size: Size,\n) {\n const elementsInfo = treeToList(tree);\n const elementsPositionInfoWithoutText = elementsInfo!.filter(\n (elementInfo) => {\n if (elementInfo.attributes.nodeType === NodeType.TEXT) {\n return false;\n }\n return true;\n },\n );\n\n const imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: elementsPositionInfoWithoutText,\n size,\n });\n return imagePayload;\n}\n\nexport function buildYamlFlowFromPlans(\n plans: PlanningAction[],\n actionSpace: DeviceAction<any>[],\n sleep?: number,\n): MidsceneYamlFlowItem[] {\n const flow: MidsceneYamlFlowItem[] = [];\n\n for (const plan of plans) {\n const verb = plan.type;\n\n const action = actionSpace.find((action) => action.name === verb);\n if (!action) {\n console.warn(\n `Cannot convert action ${verb} to yaml flow. Will ignore it.`,\n );\n continue;\n }\n\n const flowKey = action.interfaceAlias || verb;\n const flowParam = action.paramSchema\n ? dumpActionParam(plan.param || {}, action.paramSchema)\n : {};\n\n const flowItem: MidsceneYamlFlowItem = {\n [flowKey]: '',\n ...flowParam,\n };\n\n flow.push(flowItem);\n }\n\n if (sleep) {\n flow.push({\n sleep,\n });\n }\n\n return flow;\n}\n\n// Zod schemas for shared types\nexport const PointSchema = z.object({\n left: z.number(),\n top: z.number(),\n});\n\nexport const SizeSchema = z.object({\n width: z.number(),\n height: z.number(),\n dpr: z.number().optional(),\n});\n\nexport const RectSchema = PointSchema.and(SizeSchema).and(\n z.object({\n zoom: z.number().optional(),\n }),\n);\n\n// Zod schema for TMultimodalPrompt\nexport const TMultimodalPromptSchema = z.object({\n images: z\n .array(\n z.object({\n name: z.string(),\n url: z.string(),\n }),\n )\n .optional(),\n convertHttpImage2Base64: z.boolean().optional(),\n});\n\n// Zod schema for TUserPrompt\nexport const TUserPromptSchema = z.union([\n z.string(),\n z\n .object({\n prompt: z.string(),\n })\n .and(TMultimodalPromptSchema.partial()),\n]);\n\n// Generate TypeScript types from Zod schemas\nexport type TMultimodalPrompt = z.infer<typeof TMultimodalPromptSchema>;\nexport type TUserPrompt = z.infer<typeof TUserPromptSchema>;\n\nconst locateFieldFlagName = 'midscene_location_field_flag';\n\nconst MidsceneLocationResult = z\n .object({\n [locateFieldFlagName]: z.literal(true),\n prompt: TUserPromptSchema,\n\n // optional fields\n deepThink: z.boolean().optional(), // only available in vl model\n cacheable: z.boolean().optional(),\n xpath: z.boolean().optional(), // preset result for xpath\n\n // these two fields will only appear in the result\n center: z.tuple([z.number(), z.number()]),\n rect: RectSchema,\n })\n .passthrough();\n\nexport type MidsceneLocationResultType = z.infer<typeof MidsceneLocationResult>;\nexport const getMidsceneLocationSchema = () => {\n return MidsceneLocationResult;\n};\n\nexport const ifMidsceneLocatorField = (field: any): boolean => {\n // Handle optional fields by getting the inner type\n let actualField = field;\n if (actualField._def?.typeName === 'ZodOptional') {\n actualField = actualField._def.innerType;\n }\n\n // Check if this is a ZodUnion (the new MidsceneLocation structure)\n if (actualField._def?.typeName === 'ZodObject') {\n const shape = actualField._def.shape();\n return locateFieldFlagName in shape;\n }\n\n return false;\n};\n\nexport const dumpMidsceneLocatorField = (field: any): string => {\n assert(\n ifMidsceneLocatorField(field),\n 'field is not a midscene locator field',\n );\n\n // If field is a string, return it directly\n if (typeof field === 'string') {\n return field;\n }\n\n // If field is an object with prompt property\n if (field && typeof field === 'object' && field.prompt) {\n // If prompt is a string, return it directly\n if (typeof field.prompt === 'string') {\n return field.prompt;\n }\n // If prompt is a TUserPrompt object, extract the prompt string\n if (typeof field.prompt === 'object' && field.prompt.prompt) {\n return field.prompt.prompt; // TODO: dump images if necessary\n }\n }\n\n // Fallback: try to convert to string\n return String(field);\n};\n\nexport const findAllMidsceneLocatorField = (\n zodType?: z.ZodType<any>,\n requiredOnly?: boolean,\n): string[] => {\n if (!zodType) {\n return [];\n }\n\n // Check if this is a ZodObject by checking if it has a shape property\n const zodObject = zodType as any;\n if (zodObject._def?.typeName === 'ZodObject' && zodObject.shape) {\n const keys = Object.keys(zodObject.shape);\n return keys.filter((key) => {\n const field = zodObject.shape[key];\n if (!ifMidsceneLocatorField(field)) {\n return false;\n }\n\n // If requiredOnly is true, filter out optional fields\n if (requiredOnly) {\n return field._def?.typeName !== 'ZodOptional';\n }\n\n return true;\n });\n }\n\n // For other ZodType instances, we can't extract field names\n return [];\n};\n\nexport const dumpActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n const locatorFields = findAllMidsceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue) {\n // If it's already a string, keep it as is\n if (typeof fieldValue === 'string') {\n result[fieldName] = fieldValue;\n } else if (typeof fieldValue === 'object') {\n // Check if this field is actually a MidsceneLocationType object\n if (fieldValue.prompt) {\n // If prompt is a string, use it directly\n if (typeof fieldValue.prompt === 'string') {\n result[fieldName] = fieldValue.prompt;\n } else if (\n typeof fieldValue.prompt === 'object' &&\n fieldValue.prompt.prompt\n ) {\n // If prompt is a TUserPrompt object, extract the prompt string\n result[fieldName] = fieldValue.prompt.prompt;\n }\n }\n }\n }\n }\n\n return result;\n};\n\nexport const loadActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n const locatorFields = findAllMidsceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue && typeof fieldValue === 'string') {\n result[fieldName] = {\n [locateFieldFlagName]: true,\n prompt: fieldValue,\n };\n }\n }\n\n return result;\n};\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","AIActionType","defaultBboxSize","debugInspectUtils","getDebug","fillBboxParam","locate","width","height","rightLimit","bottomLimit","vlMode","adaptBbox","adaptQwenBbox","bbox","msg","JSON","Error","result","Math","adaptDoubaoBbox","assert","splitted","Number","Array","bboxList","item","x","y","adaptGeminiBbox","normalized01000","left","top","right","bottom","adaptBboxToRect","offsetX","offsetY","rectLeft","rectTop","rectWidth","rectHeight","rect","warned","warnGPT4oSizeLimit","size","modelName","warningMsg","console","mergeRects","rects","minLeft","r","minTop","maxRight","maxBottom","expandSearchArea","screenSize","minEdgeSize","defaultPadding","paddingSizeHorizontal","paddingSizeVertical","newWidth","newHeight","newLeft","newTop","markupImageForLLM","screenshotBase64","tree","elementsInfo","treeToList","elementsPositionInfoWithoutText","elementInfo","NodeType","imagePayload","compositeElementInfoImg","buildYamlFlowFromPlans","plans","actionSpace","sleep","flow","plan","verb","action","flowKey","flowParam","dumpActionParam","flowItem","PointSchema","z","SizeSchema","RectSchema","TMultimodalPromptSchema","TUserPromptSchema","locateFieldFlagName","MidsceneLocationResult","getMidsceneLocationSchema","ifMidsceneLocatorField","field","_actualField__def","_actualField__def1","actualField","shape","dumpMidsceneLocatorField","String","findAllMidsceneLocatorField","zodType","requiredOnly","_zodObject__def","zodObject","keys","_field__def","jsonObject","zodSchema","locatorFields","fieldName","fieldValue","loadActionParam"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACkBO,IAAKI,sBAAYA,WAAAA,GAAAA,SAAZA,YAAY;;;;;;;WAAZA;;AASZ,MAAMC,kBAAkB;AACxB,MAAMC,oBAAoBC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAG5B,SAASC,cACdC,MAA2B,EAC3BC,KAAa,EACbC,MAAc,EACdC,UAAkB,EAClBC,WAAmB,EACnBC,MAAgC;IAGhC,IAAKL,OAAe,OAAO,IAAI,CAACA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,AAAD,GAAG;QAC5CA,OAAO,IAAI,GAAIA,OAAe,OAAO;QAErC,OAAQA,OAAe,OAAO;IAChC;IAEA,IAAIA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,EACdA,OAAO,IAAI,GAAGM,UACZN,OAAO,IAAI,EACXC,OACAC,QACAC,YACAC,aACAC;IAIJ,OAAOL;AACT;AAEO,SAASO,cACdC,IAAc;IAEd,IAAIA,KAAK,MAAM,GAAG,GAAG;QACnB,MAAMC,MAAM,CAAC,oCAAoC,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;QAC1E,MAAM,IAAIG,MAAMF;IAClB;IAEA,MAAMG,SAA2C;QAC/CC,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QACC,YAAnB,OAAOA,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGZ;QACN,YAAnB,OAAOY,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGZ;KAC1B;IACD,OAAOgB;AACT;AAEO,SAASE,gBACdN,IAAkC,EAClCP,KAAa,EACbC,MAAc;IAEda,IAAAA,sBAAAA,MAAAA,AAAAA,EACEd,QAAQ,KAAKC,SAAS,GACtB;IAGF,IAAI,AAAgB,YAAhB,OAAOM,MAAmB;QAC5BO,IAAAA,sBAAAA,MAAAA,AAAAA,EACE,+BAA+B,IAAI,CAACP,KAAK,IAAI,KAC7C,CAAC,iDAAiD,EAAEA,MAAM;QAE5D,MAAMQ,WAAWR,KAAK,KAAK,CAAC;QAC5B,IAAIQ,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACLH,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIf,QAAS;YAC3CY,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAId,SAAU;YAC5CW,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIf,QAAS;YAC3CY,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAId,SAAU;SAC7C;QAEH,MAAM,IAAIS,MAAM,CAAC,iDAAiD,EAAEH,MAAM;IAC5E;IAEA,IAAIU,MAAM,OAAO,CAACV,SAASU,MAAM,OAAO,CAACV,IAAI,CAAC,EAAE,GAC9CA,OAAOA,IAAI,CAAC,EAAE;IAGhB,IAAIW,WAAqB,EAAE;IAC3B,IAAID,MAAM,OAAO,CAACV,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACY;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAOF,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OACEH,SAAS,IAAI,CAACF,OAAOG;IAEzB;SAEAD,WAAWX;IAGb,IAAIW,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACLN,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGlB,QAAS;QACnCY,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGjB,SAAU;QACpCW,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGlB,QAAS;QACnCY,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGjB,SAAU;KACrC;IAIH,IACEiB,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QACLN,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGlB,QAAS,QAAQL,kBAAkB;QAE/DiB,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGjB,SAAU,QAAQN,kBAAkB;QAEhEiB,KAAK,GAAG,CACNZ,OACAY,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGlB,QAAS,QAAQL,kBAAkB;QAE/DiB,KAAK,GAAG,CACNX,QACAW,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGjB,SAAU,QAAQN,kBAAkB;KAEjE;IAGH,IAAIY,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACLK,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGlB,QAAS;QACnCY,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGjB,SAAU;QACpCW,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGlB,QAAS;QACnCY,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGjB,SAAU;KACrC;IAGH,MAAMO,MAAM,CAAC,0CAA0C,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;IAChF,MAAM,IAAIG,MAAMF;AAClB;AAEO,SAASH,UACdE,IAAc,EACdP,KAAa,EACbC,MAAc,EACdC,UAAkB,EAClBC,WAAmB,EACnBC,MAAgC;IAEhC,IAAIO,SAA2C;QAAC;QAAG;QAAG;QAAG;KAAE;IAEzDA,SADEP,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,SACvBS,gBAAgBN,MAAMP,OAAOC,UAC7BG,AAAW,aAAXA,SACAkB,gBAAgBf,MAAMP,OAAOC,UAC7BG,AAAW,eAAXA,SACAmB,gBAAgBhB,MAAMP,OAAOC,UAE7BK,cAAcC;IAGzBI,MAAM,CAAC,EAAE,GAAGC,KAAK,GAAG,CAACD,MAAM,CAAC,EAAE,EAAET;IAChCS,MAAM,CAAC,EAAE,GAAGC,KAAK,GAAG,CAACD,MAAM,CAAC,EAAE,EAAER;IAEhC,OAAOQ;AACT;AAGO,SAASY,gBACdhB,IAAc,EACdP,KAAa,EACbC,MAAc;IAEd,OAAO;QACLW,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;QAC/BY,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;QAChCW,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;QAC/BY,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;KACjC;AACH;AAGO,SAASqB,gBACdf,IAAc,EACdP,KAAa,EACbC,MAAc;IAEd,MAAMuB,OAAOZ,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;IAC5C,MAAMyB,MAAMb,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;IAC5C,MAAMyB,QAAQd,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;IAC7C,MAAM2B,SAASf,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;IAC/C,OAAO;QAACuB;QAAMC;QAAKC;QAAOC;KAAO;AACnC;AAEO,SAASC,gBACdrB,IAAc,EACdP,KAAa,EACbC,MAAc,EACd4B,UAAU,CAAC,EACXC,UAAU,CAAC,EACX5B,aAAaF,KAAK,EAClBG,cAAcF,MAAM,EACpBG,MAAiC;IAEjCR,kBACE,mBACAW,MACAP,OACAC,QACA,UACA4B,SACAC,SACA,SACA5B,YACAC,aACA,UACAC;IAEF,MAAM,CAACoB,MAAMC,KAAKC,OAAOC,OAAO,GAAGtB,UACjCE,MACAP,OACAC,QACAC,YACAC,aACAC;IAIF,MAAM2B,WAAWP;IACjB,MAAMQ,UAAUP;IAChB,IAAIQ,YAAYP,QAAQF;IACxB,IAAIU,aAAaP,SAASF;IAI1B,IAAIM,WAAWE,YAAYjC,OACzBiC,YAAYjC,QAAQ+B;IAItB,IAAIC,UAAUE,aAAajC,QACzBiC,aAAajC,SAAS+B;IAIxBC,YAAYrB,KAAK,GAAG,CAAC,GAAGqB;IACxBC,aAAatB,KAAK,GAAG,CAAC,GAAGsB;IAEzB,MAAMC,OAAO;QACX,MAAMJ,WAAWF;QACjB,KAAKG,UAAUF;QACf,OAAOG;QACP,QAAQC;IACV;IACAtC,kBAAkB,4BAA4BuC;IAE9C,OAAOA;AACT;AAEA,IAAIC,SAAS;AACN,SAASC,mBAAmBC,IAAU,EAAEC,SAAiB;IAC9D,IAAIH,QAAQ;IACZ,IAAIG,UAAU,WAAW,GAAG,QAAQ,CAAC,WAAW;QAC9C,MAAMC,aAAa,CAAC,uEAAuE,EAAEF,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,6FAA6F,CAAC;QAErN,IACE1B,KAAK,GAAG,CAAC0B,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,QACpC1B,KAAK,GAAG,CAAC0B,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,KACpC;YACAG,QAAQ,IAAI,CAACD;YACbJ,SAAS;QACX;IACF,OAAO,IAAIE,KAAK,KAAK,GAAG,QAAQA,KAAK,MAAM,GAAG,MAAM;QAClDG,QAAQ,IAAI,CACV,CAAC,gCAAgC,EAAEH,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,2EAA2E,CAAC;QAE3IF,SAAS;IACX;AACF;AAEO,SAASM,WAAWC,KAAa;IACtC,MAAMC,UAAUhC,KAAK,GAAG,IAAI+B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI;IACnD,MAAMC,SAASlC,KAAK,GAAG,IAAI+B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG;IACjD,MAAME,WAAWnC,KAAK,GAAG,IAAI+B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI,GAAGA,EAAE,KAAK;IAC9D,MAAMG,YAAYpC,KAAK,GAAG,IAAI+B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG,GAAGA,EAAE,MAAM;IAC/D,OAAO;QACL,MAAMD;QACN,KAAKE;QACL,OAAOC,WAAWH;QAClB,QAAQI,YAAYF;IACtB;AACF;AAGO,SAASG,iBACdd,IAAU,EACVe,UAAgB,EAChB9C,MAAgC;IAEhC,MAAM+C,cAAc/C,AAAW,oBAAXA,SAA6B,MAAM;IACvD,MAAMgD,iBAAiB;IAGvB,MAAMC,wBACJlB,KAAK,KAAK,GAAGgB,cACTvC,KAAK,IAAI,CAAEuC,AAAAA,CAAAA,cAAchB,KAAK,KAAI,IAAK,KACvCiB;IACN,MAAME,sBACJnB,KAAK,MAAM,GAAGgB,cACVvC,KAAK,IAAI,CAAEuC,AAAAA,CAAAA,cAAchB,KAAK,MAAK,IAAK,KACxCiB;IAGN,IAAIG,WAAW3C,KAAK,GAAG,CAACuC,aAAahB,KAAK,KAAK,GAAGkB,AAAwB,IAAxBA;IAClD,IAAIG,YAAY5C,KAAK,GAAG,CAACuC,aAAahB,KAAK,MAAM,GAAGmB,AAAsB,IAAtBA;IAGpD,IAAIG,UAAUtB,KAAK,IAAI,GAAGkB;IAC1B,IAAIK,SAASvB,KAAK,GAAG,GAAGmB;IAIxB,IAAIG,UAAUF,WAAWL,WAAW,KAAK,EACvCO,UAAUP,WAAW,KAAK,GAAGK;IAI/B,IAAIG,SAASF,YAAYN,WAAW,MAAM,EACxCQ,SAASR,WAAW,MAAM,GAAGM;IAI/BC,UAAU7C,KAAK,GAAG,CAAC,GAAG6C;IACtBC,SAAS9C,KAAK,GAAG,CAAC,GAAG8C;IAIrB,IAAID,UAAUF,WAAWL,WAAW,KAAK,EACvCK,WAAWL,WAAW,KAAK,GAAGO;IAEhC,IAAIC,SAASF,YAAYN,WAAW,MAAM,EACxCM,YAAYN,WAAW,MAAM,GAAGQ;IAGlCvB,KAAK,IAAI,GAAGsB;IACZtB,KAAK,GAAG,GAAGuB;IACXvB,KAAK,KAAK,GAAGoB;IACbpB,KAAK,MAAM,GAAGqB;IAEd,OAAOrB;AACT;AAEO,eAAewB,kBACpBC,gBAAwB,EACxBC,IAAkC,EAClCvB,IAAU;IAEV,MAAMwB,eAAeC,AAAAA,IAAAA,0BAAAA,UAAAA,AAAAA,EAAWF;IAChC,MAAMG,kCAAkCF,aAAc,MAAM,CAC1D,CAACG;QACC,IAAIA,YAAY,UAAU,CAAC,QAAQ,KAAKC,0BAAAA,QAAAA,CAAAA,IAAa,EACnD,OAAO;QAET,OAAO;IACT;IAGF,MAAMC,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,uBAAAA,AAAAA,EAAwB;QACjD,gBAAgBR;QAChB,sBAAsBI;QACtB1B;IACF;IACA,OAAO6B;AACT;AAEO,SAASE,uBACdC,KAAuB,EACvBC,WAAgC,EAChCC,KAAc;IAEd,MAAMC,OAA+B,EAAE;IAEvC,KAAK,MAAMC,QAAQJ,MAAO;QACxB,MAAMK,OAAOD,KAAK,IAAI;QAEtB,MAAME,SAASL,YAAY,IAAI,CAAC,CAACK,SAAWA,OAAO,IAAI,KAAKD;QAC5D,IAAI,CAACC,QAAQ;YACXnC,QAAQ,IAAI,CACV,CAAC,sBAAsB,EAAEkC,KAAK,8BAA8B,CAAC;YAE/D;QACF;QAEA,MAAME,UAAUD,OAAO,cAAc,IAAID;QACzC,MAAMG,YAAYF,OAAO,WAAW,GAChCG,gBAAgBL,KAAK,KAAK,IAAI,CAAC,GAAGE,OAAO,WAAW,IACpD,CAAC;QAEL,MAAMI,WAAiC;YACrC,CAACH,QAAQ,EAAE;YACX,GAAGC,SAAS;QACd;QAEAL,KAAK,IAAI,CAACO;IACZ;IAEA,IAAIR,OACFC,KAAK,IAAI,CAAC;QACRD;IACF;IAGF,OAAOC;AACT;AAGO,MAAMQ,cAAcC,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAClC,MAAMA,6BAAAA,CAAAA,CAAAA,MAAQ;IACd,KAAKA,6BAAAA,CAAAA,CAAAA,MAAQ;AACf;AAEO,MAAMC,aAAaD,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IACjC,OAAOA,6BAAAA,CAAAA,CAAAA,MAAQ;IACf,QAAQA,6BAAAA,CAAAA,CAAAA,MAAQ;IAChB,KAAKA,6BAAAA,CAAAA,CAAAA,MAAQ,GAAG,QAAQ;AAC1B;AAEO,MAAME,aAAaH,YAAY,GAAG,CAACE,YAAY,GAAG,CACvDD,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IACP,MAAMA,6BAAAA,CAAAA,CAAAA,MAAQ,GAAG,QAAQ;AAC3B;AAIK,MAAMG,0BAA0BH,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAC9C,QAAQA,6BAAAA,CAAAA,CAAAA,KACA,CACJA,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;QACP,MAAMA,6BAAAA,CAAAA,CAAAA,MAAQ;QACd,KAAKA,6BAAAA,CAAAA,CAAAA,MAAQ;IACf,IAED,QAAQ;IACX,yBAAyBA,6BAAAA,CAAAA,CAAAA,OAAS,GAAG,QAAQ;AAC/C;AAGO,MAAMI,oBAAoBJ,6BAAAA,CAAAA,CAAAA,KAAO,CAAC;IACvCA,6BAAAA,CAAAA,CAAAA,MAAQ;IACRA,6BAAAA,CAAAA,CAAAA,MACS,CAAC;QACN,QAAQA,6BAAAA,CAAAA,CAAAA,MAAQ;IAClB,GACC,GAAG,CAACG,wBAAwB,OAAO;CACvC;AAMD,MAAME,sBAAsB;AAE5B,MAAMC,yBAAyBN,6BAAAA,CAAAA,CAAAA,MACtB,CAAC;IACN,CAACK,oBAAoB,EAAEL,6BAAAA,CAAAA,CAAAA,OAAS,CAAC;IACjC,QAAQI;IAGR,WAAWJ,6BAAAA,CAAAA,CAAAA,OAAS,GAAG,QAAQ;IAC/B,WAAWA,6BAAAA,CAAAA,CAAAA,OAAS,GAAG,QAAQ;IAC/B,OAAOA,6BAAAA,CAAAA,CAAAA,OAAS,GAAG,QAAQ;IAG3B,QAAQA,6BAAAA,CAAAA,CAAAA,KAAO,CAAC;QAACA,6BAAAA,CAAAA,CAAAA,MAAQ;QAAIA,6BAAAA,CAAAA,CAAAA,MAAQ;KAAG;IACxC,MAAME;AACR,GACC,WAAW;AAGP,MAAMK,4BAA4B,IAChCD;AAGF,MAAME,yBAAyB,CAACC;QAGjCC,mBAKAC;IANJ,IAAIC,cAAcH;IAClB,IAAIC,AAAAA,SAAAA,CAAAA,oBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,kBAAkB,QAAQ,AAAD,MAAM,eACjCE,cAAcA,YAAY,IAAI,CAAC,SAAS;IAI1C,IAAID,AAAAA,SAAAA,CAAAA,qBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,mBAAkB,QAAQ,AAAD,MAAM,aAAa;QAC9C,MAAME,QAAQD,YAAY,IAAI,CAAC,KAAK;QACpC,OAAOP,uBAAuBQ;IAChC;IAEA,OAAO;AACT;AAEO,MAAMC,2BAA2B,CAACL;IACvC7E,IAAAA,sBAAAA,MAAAA,AAAAA,EACE4E,uBAAuBC,QACvB;IAIF,IAAI,AAAiB,YAAjB,OAAOA,OACT,OAAOA;IAIT,IAAIA,SAAS,AAAiB,YAAjB,OAAOA,SAAsBA,MAAM,MAAM,EAAE;QAEtD,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,EACrB,OAAOA,MAAM,MAAM;QAGrB,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,IAAiBA,MAAM,MAAM,CAAC,MAAM,EACzD,OAAOA,MAAM,MAAM,CAAC,MAAM;IAE9B;IAGA,OAAOM,OAAON;AAChB;AAEO,MAAMO,8BAA8B,CACzCC,SACAC;QAQIC;IANJ,IAAI,CAACF,SACH,OAAO,EAAE;IAIX,MAAMG,YAAYH;IAClB,IAAIE,AAAAA,SAAAA,CAAAA,kBAAAA,UAAU,IAAI,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,QAAQ,AAAD,MAAM,eAAeC,UAAU,KAAK,EAAE;QAC/D,MAAMC,OAAOjH,OAAO,IAAI,CAACgH,UAAU,KAAK;QACxC,OAAOC,KAAK,MAAM,CAAC,CAAClH;YAClB,MAAMsG,QAAQW,UAAU,KAAK,CAACjH,IAAI;YAClC,IAAI,CAACqG,uBAAuBC,QAC1B,OAAO;YAIT,IAAIS,cAAc;oBACTI;gBAAP,OAAOA,AAAAA,SAAAA,CAAAA,cAAAA,MAAM,IAAI,AAAD,IAATA,KAAAA,IAAAA,YAAY,QAAQ,AAAD,MAAM;YAClC;YAEA,OAAO;QACT;IACF;IAGA,OAAO,EAAE;AACX;AAEO,MAAMzB,kBAAkB,CAC7B0B,YACAC;IAEA,MAAMC,gBAAgBT,4BAA4BQ;IAClD,MAAM/F,SAAS;QAAE,GAAG8F,UAAU;IAAC;IAE/B,KAAK,MAAMG,aAAaD,cAAe;QACrC,MAAME,aAAalG,MAAM,CAACiG,UAAU;QACpC,IAAIC,YAEF;YAAA,IAAI,AAAsB,YAAtB,OAAOA,YACTlG,MAAM,CAACiG,UAAU,GAAGC;iBACf,IAAI,AAAsB,YAAtB,OAAOA,YAEhB;gBAAA,IAAIA,WAAW,MAAM,EAEnB;oBAAA,IAAI,AAA6B,YAA7B,OAAOA,WAAW,MAAM,EAC1BlG,MAAM,CAACiG,UAAU,GAAGC,WAAW,MAAM;yBAChC,IACL,AAA6B,YAA7B,OAAOA,WAAW,MAAM,IACxBA,WAAW,MAAM,CAAC,MAAM,EAGxBlG,MAAM,CAACiG,UAAU,GAAGC,WAAW,MAAM,CAAC,MAAM;gBAC9C;YACF;QACF;IAEJ;IAEA,OAAOlG;AACT;AAEO,MAAMmG,kBAAkB,CAC7BL,YACAC;IAEA,MAAMC,gBAAgBT,4BAA4BQ;IAClD,MAAM/F,SAAS;QAAE,GAAG8F,UAAU;IAAC;IAE/B,KAAK,MAAMG,aAAaD,cAAe;QACrC,MAAME,aAAalG,MAAM,CAACiG,UAAU;QACpC,IAAIC,cAAc,AAAsB,YAAtB,OAAOA,YACvBlG,MAAM,CAACiG,UAAU,GAAG;YAClB,CAACrB,oBAAoB,EAAE;YACvB,QAAQsB;QACV;IAEJ;IAEA,OAAOlG;AACT"}
@@ -97,12 +97,30 @@ async function AiLocateElement(options) {
97
97
  });
98
98
  const systemPrompt = (0, llm_locator_js_namespaceObject.systemPromptToLocateElement)(vlMode);
99
99
  let imagePayload = screenshotBase64;
100
+ let imageWidth = context.size.width;
101
+ let imageHeight = context.size.height;
102
+ let originalImageWidth = imageWidth;
103
+ let originalImageHeight = imageHeight;
100
104
  if (options.searchConfig) {
105
+ var _options_searchConfig_rect, _options_searchConfig_rect1;
101
106
  (0, utils_namespaceObject.assert)(options.searchConfig.rect, 'searchArea is provided but its rect cannot be found. Failed to locate element');
102
107
  (0, utils_namespaceObject.assert)(options.searchConfig.imageBase64, 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element');
103
108
  imagePayload = options.searchConfig.imageBase64;
104
- } else if ('qwen-vl' === vlMode) imagePayload = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload);
105
- else if (!vlMode) imagePayload = await (0, external_common_js_namespaceObject.markupImageForLLM)(screenshotBase64, context.tree, context.size);
109
+ imageWidth = null == (_options_searchConfig_rect = options.searchConfig.rect) ? void 0 : _options_searchConfig_rect.width;
110
+ imageHeight = null == (_options_searchConfig_rect1 = options.searchConfig.rect) ? void 0 : _options_searchConfig_rect1.height;
111
+ originalImageWidth = imageWidth;
112
+ originalImageHeight = imageHeight;
113
+ } else if ('qwen-vl' === vlMode) {
114
+ const paddedResult = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload);
115
+ imageWidth = paddedResult.width;
116
+ imageHeight = paddedResult.height;
117
+ imagePayload = paddedResult.imageBase64;
118
+ } else if ('qwen3-vl' === vlMode) {
119
+ const paddedResult = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload, 32);
120
+ imageWidth = paddedResult.width;
121
+ imageHeight = paddedResult.height;
122
+ imagePayload = paddedResult.imageBase64;
123
+ } else if (!vlMode) imagePayload = await (0, external_common_js_namespaceObject.markupImageForLLM)(screenshotBase64, context.tree, context.size);
106
124
  const msgs = [
107
125
  {
108
126
  role: 'system',
@@ -139,8 +157,8 @@ async function AiLocateElement(options) {
139
157
  let errors = 'errors' in res.content ? res.content.errors : [];
140
158
  try {
141
159
  if ('bbox' in res.content && Array.isArray(res.content.bbox)) {
142
- var _options_searchConfig_rect, _options_searchConfig, _options_searchConfig_rect1, _options_searchConfig1, _options_searchConfig_rect2, _options_searchConfig2, _options_searchConfig_rect3, _options_searchConfig3;
143
- resRect = (0, external_common_js_namespaceObject.adaptBboxToRect)(res.content.bbox, (null == (_options_searchConfig = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect = _options_searchConfig.rect) ? void 0 : _options_searchConfig_rect.width) || context.size.width, (null == (_options_searchConfig1 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect1 = _options_searchConfig1.rect) ? void 0 : _options_searchConfig_rect1.height) || context.size.height, null == (_options_searchConfig2 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect2 = _options_searchConfig2.rect) ? void 0 : _options_searchConfig_rect2.left, null == (_options_searchConfig3 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect3 = _options_searchConfig3.rect) ? void 0 : _options_searchConfig_rect3.top, vlMode);
160
+ var _options_searchConfig_rect2, _options_searchConfig, _options_searchConfig_rect3, _options_searchConfig1;
161
+ resRect = (0, external_common_js_namespaceObject.adaptBboxToRect)(res.content.bbox, imageWidth, imageHeight, null == (_options_searchConfig = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect2 = _options_searchConfig.rect) ? void 0 : _options_searchConfig_rect2.left, null == (_options_searchConfig1 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect3 = _options_searchConfig1.rect) ? void 0 : _options_searchConfig_rect3.top, originalImageWidth, originalImageHeight, vlMode);
144
162
  debugInspect('resRect', resRect);
145
163
  const rectCenter = {
146
164
  x: resRect.left + resRect.width / 2,
@@ -219,11 +237,11 @@ async function AiLocateSection(options) {
219
237
  let sectionRect;
220
238
  const sectionBbox = result.content.bbox;
221
239
  if (sectionBbox) {
222
- const targetRect = (0, external_common_js_namespaceObject.adaptBboxToRect)(sectionBbox, context.size.width, context.size.height, 0, 0, vlMode);
240
+ const targetRect = (0, external_common_js_namespaceObject.adaptBboxToRect)(sectionBbox, context.size.width, context.size.height, 0, 0, context.size.width, context.size.height, vlMode);
223
241
  debugSection('original targetRect %j', targetRect);
224
242
  const referenceBboxList = result.content.references_bbox || [];
225
243
  debugSection('referenceBboxList %j', referenceBboxList);
226
- const referenceRects = referenceBboxList.filter((bbox)=>Array.isArray(bbox)).map((bbox)=>(0, external_common_js_namespaceObject.adaptBboxToRect)(bbox, context.size.width, context.size.height, 0, 0, vlMode));
244
+ const referenceRects = referenceBboxList.filter((bbox)=>Array.isArray(bbox)).map((bbox)=>(0, external_common_js_namespaceObject.adaptBboxToRect)(bbox, context.size.width, context.size.height, 0, 0, context.size.width, context.size.height, vlMode));
227
245
  debugSection('referenceRects %j', referenceRects);
228
246
  const mergedRect = (0, external_common_js_namespaceObject.mergeRects)([
229
247
  targetRect,
@@ -234,7 +252,12 @@ async function AiLocateSection(options) {
234
252
  debugSection('expanded sectionRect %j', sectionRect);
235
253
  }
236
254
  let imageBase64 = screenshotBase64;
237
- if (sectionRect) imageBase64 = await (0, img_namespaceObject.cropByRect)(screenshotBase64, sectionRect, 'qwen-vl' === vlMode);
255
+ if (sectionRect) {
256
+ const croppedResult = await (0, img_namespaceObject.cropByRect)(screenshotBase64, sectionRect, 'qwen-vl' === vlMode);
257
+ imageBase64 = croppedResult.imageBase64;
258
+ sectionRect.width = croppedResult.width;
259
+ sectionRect.height = croppedResult.height;
260
+ }
238
261
  return {
239
262
  rect: sectionRect,
240
263
  imageBase64,
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/inspect.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIDataExtractionResponse,\n AIElementLocatorResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n BaseElement,\n ElementById,\n InsightExtractOption,\n Rect,\n ReferenceImage,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from './common';\nimport {\n AIActionType,\n adaptBboxToRect,\n expandSearchArea,\n markupImageForLLM,\n mergeRects,\n} from './common';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n describeUserPage,\n distance,\n distanceThreshold,\n elementByPositionWithElementInfo,\n} from './prompt/util';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `reference image ${item.name}:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement<\n ElementType extends BaseElement = BaseElement,\n>(options: {\n context: UIContext<ElementType>;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: AIElementLocatorResponse;\n rect?: Rect;\n rawResponse: string;\n elementById: ElementById;\n usage?: AIUsageInfo;\n isOrderSensitive?: boolean;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const { description, elementById, insertElementByPosition } =\n await describeUserPage(context, { vlMode });\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const userInstructionPrompt = await findElementPrompt.format({\n pageDescription: description,\n targetElementDescription: extraTextFromUserPrompt(targetElementDescription),\n });\n const systemPrompt = systemPromptToLocateElement(vlMode);\n\n let imagePayload = screenshotBase64;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n } else if (vlMode === 'qwen-vl') {\n imagePayload = await paddingToMatchBlockByBase64(imagePayload);\n } else if (!vlMode) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: AIElementLocatorResponse['elements'] =\n 'elements' in res.content ? res.content.elements : [];\n let errors: AIElementLocatorResponse['errors'] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if ('bbox' in res.content && Array.isArray(res.content.bbox)) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n options.searchConfig?.rect?.width || context.size.width,\n options.searchConfig?.rect?.height || context.size.height,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n vlMode,\n );\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n let element = elementByPositionWithElementInfo(context.tree, rectCenter);\n\n const distanceToCenter = element\n ? distance({ x: element.center[0], y: element.center[1] }, rectCenter)\n : 0;\n\n if (!element || distanceToCenter > distanceThreshold) {\n element = insertElementByPosition(rectCenter);\n }\n\n if (element) {\n matchedElements = [element];\n errors = [];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse,\n elementById,\n usage: res.usage,\n isOrderSensitive:\n typeof res.content === 'object' &&\n res.content !== null &&\n 'isOrderSensitive' in res.content\n ? (res.content as any).isOrderSensitive\n : undefined,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext<BaseElement>;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToLocateSection(vlMode);\n const sectionLocatorInstructionText = await sectionLocatorInstruction.format({\n sectionDescription: extraTextFromUserPrompt(sectionDescription),\n });\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n vlMode,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n vlMode,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, vlMode);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n imageBase64 = await cropByRect(\n screenshotBase64,\n sectionRect,\n vlMode === 'qwen-vl',\n );\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<\n T,\n ElementType extends BaseElement = BaseElement,\n>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext<ElementType>;\n extractOption?: InsightExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const { vlMode } = modelConfig;\n const systemPrompt = systemPromptToExtract();\n\n const { screenshotBase64 } = context;\n\n const { description, elementById } = await describeUserPage(context, {\n truncateTextLength: 200,\n filterNonTextContent: false,\n visibleOnly: false,\n domIncluded: extractOption?.domIncluded,\n vlMode,\n });\n\n const extractDataPromptText = await extractDataQueryPrompt(\n description,\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (options.extractOption?.returnThought) {\n msgs.push({\n role: 'user',\n content: 'Please provide reasons.',\n });\n }\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AIDataExtractionResponse<T>>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n return {\n parseResult: result.content,\n elementById,\n usage: result.usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","_multimodalPrompt_images","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","vlMode","screenshotBase64","description","elementById","insertElementByPosition","describeUserPage","assert","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","imagePayload","paddingToMatchBlockByBase64","markupImageForLLM","addOns","res","AIActionType","rawResponse","JSON","resRect","matchedElements","errors","Array","_options_searchConfig_rect","_options_searchConfig_rect1","_options_searchConfig_rect2","_options_searchConfig_rect3","adaptBboxToRect","rectCenter","element","elementByPositionWithElementInfo","distanceToCenter","distance","distanceThreshold","e","msg","Error","undefined","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","cropByRect","AiExtractElementInfo","_options_extractOption","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;ACoDA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;QAGIC;IADJ,MAAMC,OAAyC,EAAE;IACjD,IAAID,QAAAA,mBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,iBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBAA0B,MAAM,EAAE;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQH,iBAAiB,MAAM,CAAE;YAC1C,MAAMI,SAAS,MAAMC,AAAAA,IAAAA,oBAAAA,kBAAAA,AAAAA,EACnBF,KAAK,GAAG,EACR,CAAC,CAACH,iBAAiB,uBAAuB;YAG5CE,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,gBAAgB,EAAEC,KAAK,IAAI,CAAC,CAAC,CAAC;oBACvC;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAEpBC,OASD;IAQC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAM,EAAEM,WAAW,EAAEC,WAAW,EAAEC,uBAAuB,EAAE,GACzD,MAAMC,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAAiBT,SAAS;QAAEI;IAAO;IAE3CM,IAAAA,sBAAAA,MAAAA,AAAAA,EACET,0BACA;IAEF,MAAMU,wBAAwB,MAAMC,+BAAAA,iBAAAA,CAAAA,MAAwB,CAAC;QAC3D,iBAAiBN;QACjB,0BAA0BjB,wBAAwBY;IACpD;IACA,MAAMY,eAAeC,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EAA4BV;IAEjD,IAAIW,eAAeV;IAEnB,IAAIN,QAAQ,YAAY,EAAE;QACxBW,IAAAA,sBAAAA,MAAAA,AAAAA,EACEX,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFW,IAAAA,sBAAAA,MAAAA,AAAAA,EACEX,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFgB,eAAehB,QAAQ,YAAY,CAAC,WAAW;IACjD,OAAO,IAAIK,AAAW,cAAXA,QACTW,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BD;SAC5C,IAAI,CAACX,QACVW,eAAe,MAAME,AAAAA,IAAAA,mCAAAA,iBAAAA,AAAAA,EACnBZ,kBACAL,QAAQ,IAAI,EACZA,QAAQ,IAAI;IAIhB,MAAMN,OAAe;QACnB;YAAE,MAAM;YAAU,SAASmB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKE;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMJ;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOV,0BAAuC;QAChD,MAAMiB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQU,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAIwB;IACf;IAEA,MAAMC,MAAM,MAAMjB,SAASR,MAAM0B,mCAAAA,YAAAA,CAAAA,eAA4B,EAAEjB;IAE/D,MAAMkB,cAAcC,KAAK,SAAS,CAACH,IAAI,OAAO;IAE9C,IAAII;IACJ,IAAIC,kBACF,cAAcL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IACvD,IAAIM,SACF,YAAYN,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IAAI,UAAUA,IAAI,OAAO,IAAIO,MAAM,OAAO,CAACP,IAAI,OAAO,CAAC,IAAI,GAAG;gBAG1DQ,4BAAAA,uBACAC,6BAAAA,wBACAC,6BAAAA,wBACAC,6BAAAA;YALFP,UAAUQ,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACRZ,IAAI,OAAO,CAAC,IAAI,EAChBQ,AAAAA,SAAAA,CAAAA,wBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,6BAAAA,sBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,2BAA4B,KAAK,AAAD,KAAK3B,QAAQ,IAAI,CAAC,KAAK,EACvD4B,AAAAA,SAAAA,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,MAAM,AAAD,KAAK5B,QAAQ,IAAI,CAAC,MAAM,UACzD6B,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,IAAI,UAChCC,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,GAAG,EAC/B1B;YAEFlB,aAAa,WAAWqC;YAExB,MAAMS,aAAa;gBACjB,GAAGT,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YACA,IAAIU,UAAUC,AAAAA,IAAAA,wBAAAA,gCAAAA,AAAAA,EAAiClC,QAAQ,IAAI,EAAEgC;YAE7D,MAAMG,mBAAmBF,UACrBG,AAAAA,IAAAA,wBAAAA,QAAAA,AAAAA,EAAS;gBAAE,GAAGH,QAAQ,MAAM,CAAC,EAAE;gBAAE,GAAGA,QAAQ,MAAM,CAAC,EAAE;YAAC,GAAGD,cACzD;YAEJ,IAAI,CAACC,WAAWE,mBAAmBE,wBAAAA,iBAAiBA,EAClDJ,UAAUzB,wBAAwBwB;YAGpC,IAAIC,SAAS;gBACXT,kBAAkB;oBAACS;iBAAQ;gBAC3BR,SAAS,EAAE;YACb;QACF;IACF,EAAE,OAAOa,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACb,UAAUA,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,AAAD,MAAM,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEc,IAAI,CAAC,CAAC;aAFtBd,SAAS;YAACc;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMhB;QACN,aAAa;YACX,UAAUC;YACVC;QACF;QACAJ;QACAd;QACA,OAAOY,IAAI,KAAK;QAChB,kBACE,AAAuB,YAAvB,OAAOA,IAAI,OAAO,IAClBA,AAAgB,SAAhBA,IAAI,OAAO,IACX,sBAAsBA,IAAI,OAAO,GAC5BA,IAAI,OAAO,CAAS,gBAAgB,GACrCsB;IACR;AACF;AAEO,eAAeC,gBAAgB3C,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAE2C,kBAAkB,EAAExC,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAMa,eAAe+B,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EAA4BxC;IACjD,MAAMyC,gCAAgC,MAAMC,uCAAAA,yBAAAA,CAAAA,MAAgC,CAAC;QAC3E,oBAAoBzD,wBAAwBsD;IAC9C;IACA,MAAMjD,OAAe;QACnB;YAAE,MAAM;YAAU,SAASmB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKR;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMwC;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMzB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQoD,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAjD,KAAK,IAAI,IAAIwB;IACf;IAEA,MAAM6B,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnBtD,MACA0B,mCAAAA,YAAAA,CAAAA,YAAyB,EACzBjB;IAGF,IAAI8C;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAapB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACjBmB,aACAlD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAI;QAEFhB,aAAa,0BAA0B+D;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9D3D,aAAa,wBAAwBgE;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAS5B,MAAM,OAAO,CAAC4B,OAC/B,GAAG,CAAC,CAACA,OACGvB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACLuB,MACAtD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAI;QAGNhB,aAAa,qBAAqBiE;QAGlC,MAAME,aAAaC,AAAAA,IAAAA,mCAAAA,UAAAA,AAAAA,EAAW;YAACL;eAAeE;SAAe;QAC7DjE,aAAa,iBAAiBmE;QAG9BN,cAAcQ,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBF,YAAYvD,QAAQ,IAAI,EAAEI;QACzDhB,aAAa,2BAA2B6D;IAC1C;IAEA,IAAIS,cAAcrD;IAClB,IAAI4C,aACFS,cAAc,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAClBtD,kBACA4C,aACA7C,AAAW,cAAXA;IAIJ,OAAO;QACL,MAAM6C;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAazB,KAAK,SAAS,CAACyB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAea,qBAGpB7D,OAMD;QA8CK8D;IA7CJ,MAAM,EAAEC,SAAS,EAAE9D,OAAO,EAAE+D,aAAa,EAAEvE,gBAAgB,EAAEW,WAAW,EAAE,GACxEJ;IACF,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAMU,eAAemD,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA;IAErB,MAAM,EAAE3D,gBAAgB,EAAE,GAAGL;IAE7B,MAAM,EAAEM,WAAW,EAAEC,WAAW,EAAE,GAAG,MAAME,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAAiBT,SAAS;QACnE,oBAAoB;QACpB,sBAAsB;QACtB,aAAa;QACb,aAAa+D,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,WAAW;QACvC3D;IACF;IAEA,MAAM6D,wBAAwB,MAAMC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAClC5D,aACAwD;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,kBAAkB,AAAD,MAAM,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAK9D;YACL,QAAQ;QACV;IACF;IAGF8D,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMvE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASmB;QAAa;QACxC;YACE,MAAM;YACN,SAASsD;QACX;KACD;IAED,IAAI,QAAAN,CAAAA,yBAAAA,QAAQ,aAAa,AAAD,IAApBA,KAAAA,IAAAA,uBAAuB,aAAa,EACtCnE,KAAK,IAAI,CAAC;QACR,MAAM;QACN,SAAS;IACX;IAGF,IAAIF,kBAAkB;QACpB,MAAM0B,SAAS,MAAM3B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAE,KAAK,IAAI,IAAIwB;IACf;IAEA,MAAM6B,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnBtD,MACA0B,mCAAAA,YAAAA,CAAAA,YAAyB,EACzBjB;IAEF,OAAO;QACL,aAAa4C,OAAO,OAAO;QAC3BxC;QACA,OAAOwC,OAAO,KAAK;IACrB;AACF"}
1
+ {"version":3,"file":"ai-model/inspect.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIDataExtractionResponse,\n AIElementLocatorResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n BaseElement,\n ElementById,\n InsightExtractOption,\n Rect,\n ReferenceImage,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from './common';\nimport {\n AIActionType,\n adaptBboxToRect,\n expandSearchArea,\n markupImageForLLM,\n mergeRects,\n} from './common';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n describeUserPage,\n distance,\n distanceThreshold,\n elementByPositionWithElementInfo,\n} from './prompt/util';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `reference image ${item.name}:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement<\n ElementType extends BaseElement = BaseElement,\n>(options: {\n context: UIContext<ElementType>;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: AIElementLocatorResponse;\n rect?: Rect;\n rawResponse: string;\n elementById: ElementById;\n usage?: AIUsageInfo;\n isOrderSensitive?: boolean;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const { description, elementById, insertElementByPosition } =\n await describeUserPage(context, { vlMode });\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const userInstructionPrompt = await findElementPrompt.format({\n pageDescription: description,\n targetElementDescription: extraTextFromUserPrompt(targetElementDescription),\n });\n const systemPrompt = systemPromptToLocateElement(vlMode);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.size.width;\n let imageHeight = context.size.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (vlMode === 'qwen-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n } else if (vlMode === 'qwen3-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload, 32);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n } else if (!vlMode) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: AIElementLocatorResponse['elements'] =\n 'elements' in res.content ? res.content.elements : [];\n let errors: AIElementLocatorResponse['errors'] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if ('bbox' in res.content && Array.isArray(res.content.bbox)) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n vlMode,\n );\n\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n let element = elementByPositionWithElementInfo(context.tree, rectCenter);\n\n const distanceToCenter = element\n ? distance({ x: element.center[0], y: element.center[1] }, rectCenter)\n : 0;\n\n if (!element || distanceToCenter > distanceThreshold) {\n element = insertElementByPosition(rectCenter);\n }\n\n if (element) {\n matchedElements = [element];\n errors = [];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse,\n elementById,\n usage: res.usage,\n isOrderSensitive:\n typeof res.content === 'object' &&\n res.content !== null &&\n 'isOrderSensitive' in res.content\n ? (res.content as any).isOrderSensitive\n : undefined,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext<BaseElement>;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToLocateSection(vlMode);\n const sectionLocatorInstructionText = await sectionLocatorInstruction.format({\n sectionDescription: extraTextFromUserPrompt(sectionDescription),\n });\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, vlMode);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n vlMode === 'qwen-vl',\n );\n imageBase64 = croppedResult.imageBase64;\n sectionRect.width = croppedResult.width;\n sectionRect.height = croppedResult.height;\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<\n T,\n ElementType extends BaseElement = BaseElement,\n>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext<ElementType>;\n extractOption?: InsightExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const { vlMode } = modelConfig;\n const systemPrompt = systemPromptToExtract();\n\n const { screenshotBase64 } = context;\n\n const { description, elementById } = await describeUserPage(context, {\n truncateTextLength: 200,\n filterNonTextContent: false,\n visibleOnly: false,\n domIncluded: extractOption?.domIncluded,\n vlMode,\n });\n\n const extractDataPromptText = await extractDataQueryPrompt(\n description,\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (options.extractOption?.returnThought) {\n msgs.push({\n role: 'user',\n content: 'Please provide reasons.',\n });\n }\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AIDataExtractionResponse<T>>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n return {\n parseResult: result.content,\n elementById,\n usage: result.usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","_multimodalPrompt_images","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","vlMode","screenshotBase64","description","elementById","insertElementByPosition","describeUserPage","assert","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","_options_searchConfig_rect","_options_searchConfig_rect1","paddedResult","paddingToMatchBlockByBase64","markupImageForLLM","addOns","res","AIActionType","rawResponse","JSON","resRect","matchedElements","errors","Array","_options_searchConfig_rect2","_options_searchConfig_rect3","adaptBboxToRect","rectCenter","element","elementByPositionWithElementInfo","distanceToCenter","distance","distanceThreshold","e","msg","Error","undefined","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","croppedResult","cropByRect","AiExtractElementInfo","_options_extractOption","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;ACoDA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;QAGIC;IADJ,MAAMC,OAAyC,EAAE;IACjD,IAAID,QAAAA,mBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,iBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBAA0B,MAAM,EAAE;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQH,iBAAiB,MAAM,CAAE;YAC1C,MAAMI,SAAS,MAAMC,AAAAA,IAAAA,oBAAAA,kBAAAA,AAAAA,EACnBF,KAAK,GAAG,EACR,CAAC,CAACH,iBAAiB,uBAAuB;YAG5CE,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,gBAAgB,EAAEC,KAAK,IAAI,CAAC,CAAC,CAAC;oBACvC;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAEpBC,OASD;IAQC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAM,EAAEM,WAAW,EAAEC,WAAW,EAAEC,uBAAuB,EAAE,GACzD,MAAMC,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAAiBT,SAAS;QAAEI;IAAO;IAE3CM,IAAAA,sBAAAA,MAAAA,AAAAA,EACET,0BACA;IAEF,MAAMU,wBAAwB,MAAMC,+BAAAA,iBAAAA,CAAAA,MAAwB,CAAC;QAC3D,iBAAiBN;QACjB,0BAA0BjB,wBAAwBY;IACpD;IACA,MAAMY,eAAeC,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EAA4BV;IAEjD,IAAIW,eAAeV;IACnB,IAAIW,aAAahB,QAAQ,IAAI,CAAC,KAAK;IACnC,IAAIiB,cAAcjB,QAAQ,IAAI,CAAC,MAAM;IACrC,IAAIkB,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIlB,QAAQ,YAAY,EAAE;YAWXqB,4BACCC;QAXdX,IAAAA,sBAAAA,MAAAA,AAAAA,EACEX,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFW,IAAAA,sBAAAA,MAAAA,AAAAA,EACEX,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFgB,eAAehB,QAAQ,YAAY,CAAC,WAAW;QAC/CiB,aAAa,QAAAI,CAAAA,6BAAAA,QAAQ,YAAY,CAAC,IAAI,AAAD,IAAxBA,KAAAA,IAAAA,2BAA2B,KAAK;QAC7CH,cAAc,QAAAI,CAAAA,8BAAAA,QAAQ,YAAY,CAAC,IAAI,AAAD,IAAxBA,KAAAA,IAAAA,4BAA2B,MAAM;QAC/CH,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIb,AAAW,cAAXA,QAAsB;QAC/B,MAAMkB,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BR;QACvDC,aAAaM,aAAa,KAAK;QAC/BL,cAAcK,aAAa,MAAM;QACjCP,eAAeO,aAAa,WAAW;IACzC,OAAO,IAAIlB,AAAW,eAAXA,QAAuB;QAChC,MAAMkB,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BR,cAAc;QACrEC,aAAaM,aAAa,KAAK;QAC/BL,cAAcK,aAAa,MAAM;QACjCP,eAAeO,aAAa,WAAW;IACzC,OAAO,IAAI,CAAClB,QACVW,eAAe,MAAMS,AAAAA,IAAAA,mCAAAA,iBAAAA,AAAAA,EACnBnB,kBACAL,QAAQ,IAAI,EACZA,QAAQ,IAAI;IAIhB,MAAMN,OAAe;QACnB;YAAE,MAAM;YAAU,SAASmB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKE;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMJ;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOV,0BAAuC;QAChD,MAAMwB,SAAS,MAAMlC,mBAAmB;YACtC,QAAQU,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAI+B;IACf;IAEA,MAAMC,MAAM,MAAMxB,SAASR,MAAMiC,mCAAAA,YAAAA,CAAAA,eAA4B,EAAExB;IAE/D,MAAMyB,cAAcC,KAAK,SAAS,CAACH,IAAI,OAAO;IAE9C,IAAII;IACJ,IAAIC,kBACF,cAAcL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IACvD,IAAIM,SACF,YAAYN,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IAAI,UAAUA,IAAI,OAAO,IAAIO,MAAM,OAAO,CAACP,IAAI,OAAO,CAAC,IAAI,GAAG;gBAK1DQ,6BAAAA,uBACAC,6BAAAA;YALFL,UAAUM,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACRV,IAAI,OAAO,CAAC,IAAI,EAChBV,YACAC,aAAAA,QACAiB,CAAAA,wBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,sBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,IAAI,UAChCC,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,GAAG,EAC/BjB,oBACAC,qBACAf;YAGFlB,aAAa,WAAW4C;YAExB,MAAMO,aAAa;gBACjB,GAAGP,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YACA,IAAIQ,UAAUC,AAAAA,IAAAA,wBAAAA,gCAAAA,AAAAA,EAAiCvC,QAAQ,IAAI,EAAEqC;YAE7D,MAAMG,mBAAmBF,UACrBG,AAAAA,IAAAA,wBAAAA,QAAAA,AAAAA,EAAS;gBAAE,GAAGH,QAAQ,MAAM,CAAC,EAAE;gBAAE,GAAGA,QAAQ,MAAM,CAAC,EAAE;YAAC,GAAGD,cACzD;YAEJ,IAAI,CAACC,WAAWE,mBAAmBE,wBAAAA,iBAAiBA,EAClDJ,UAAU9B,wBAAwB6B;YAGpC,IAAIC,SAAS;gBACXP,kBAAkB;oBAACO;iBAAQ;gBAC3BN,SAAS,EAAE;YACb;QACF;IACF,EAAE,OAAOW,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACX,UAAUA,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,AAAD,MAAM,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEY,IAAI,CAAC,CAAC;aAFtBZ,SAAS;YAACY;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMd;QACN,aAAa;YACX,UAAUC;YACVC;QACF;QACAJ;QACArB;QACA,OAAOmB,IAAI,KAAK;QAChB,kBACE,AAAuB,YAAvB,OAAOA,IAAI,OAAO,IAClBA,AAAgB,SAAhBA,IAAI,OAAO,IACX,sBAAsBA,IAAI,OAAO,GAC5BA,IAAI,OAAO,CAAS,gBAAgB,GACrCoB;IACR;AACF;AAEO,eAAeC,gBAAgBhD,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAEgD,kBAAkB,EAAE7C,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAMa,eAAeoC,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EAA4B7C;IACjD,MAAM8C,gCAAgC,MAAMC,uCAAAA,yBAAAA,CAAAA,MAAgC,CAAC;QAC3E,oBAAoB9D,wBAAwB2D;IAC9C;IACA,MAAMtD,OAAe;QACnB;YAAE,MAAM;YAAU,SAASmB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKR;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAM6C;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMvB,SAAS,MAAMlC,mBAAmB;YACtC,QAAQyD,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAtD,KAAK,IAAI,IAAI+B;IACf;IAEA,MAAM2B,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnB3D,MACAiC,mCAAAA,YAAAA,CAAAA,YAAyB,EACzBxB;IAGF,IAAImD;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAapB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACjBmB,aACAvD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAEFhB,aAAa,0BAA0BoE;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DhE,aAAa,wBAAwBqE;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAS1B,MAAM,OAAO,CAAC0B,OAC/B,GAAG,CAAC,CAACA,OACGvB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACLuB,MACA3D,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAGNhB,aAAa,qBAAqBsE;QAGlC,MAAME,aAAaC,AAAAA,IAAAA,mCAAAA,UAAAA,AAAAA,EAAW;YAACL;eAAeE;SAAe;QAC7DtE,aAAa,iBAAiBwE;QAG9BN,cAAcQ,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBF,YAAY5D,QAAQ,IAAI,EAAEI;QACzDhB,aAAa,2BAA2BkE;IAC1C;IAEA,IAAIS,cAAc1D;IAClB,IAAIiD,aAAa;QACf,MAAMU,gBAAgB,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAC1B5D,kBACAiD,aACAlD,AAAW,cAAXA;QAEF2D,cAAcC,cAAc,WAAW;QACvCV,YAAY,KAAK,GAAGU,cAAc,KAAK;QACvCV,YAAY,MAAM,GAAGU,cAAc,MAAM;IAC3C;IAEA,OAAO;QACL,MAAMV;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAavB,KAAK,SAAS,CAACuB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAec,qBAGpBnE,OAMD;QA8CKoE;IA7CJ,MAAM,EAAEC,SAAS,EAAEpE,OAAO,EAAEqE,aAAa,EAAE7E,gBAAgB,EAAEW,WAAW,EAAE,GACxEJ;IACF,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAMU,eAAeyD,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA;IAErB,MAAM,EAAEjE,gBAAgB,EAAE,GAAGL;IAE7B,MAAM,EAAEM,WAAW,EAAEC,WAAW,EAAE,GAAG,MAAME,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAAiBT,SAAS;QACnE,oBAAoB;QACpB,sBAAsB;QACtB,aAAa;QACb,aAAaqE,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,WAAW;QACvCjE;IACF;IAEA,MAAMmE,wBAAwB,MAAMC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAClClE,aACA8D;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,kBAAkB,AAAD,MAAM,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAKpE;YACL,QAAQ;QACV;IACF;IAGFoE,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAM7E,OAAe;QACnB;YAAE,MAAM;YAAU,SAASmB;QAAa;QACxC;YACE,MAAM;YACN,SAAS4D;QACX;KACD;IAED,IAAI,QAAAN,CAAAA,yBAAAA,QAAQ,aAAa,AAAD,IAApBA,KAAAA,IAAAA,uBAAuB,aAAa,EACtCzE,KAAK,IAAI,CAAC;QACR,MAAM;QACN,SAAS;IACX;IAGF,IAAIF,kBAAkB;QACpB,MAAMiC,SAAS,MAAMlC,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAE,KAAK,IAAI,IAAI+B;IACf;IAEA,MAAM2B,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnB3D,MACAiC,mCAAAA,YAAAA,CAAAA,YAAyB,EACzBxB;IAEF,OAAO;QACL,aAAaiD,OAAO,OAAO;QAC3B7C;QACA,OAAO6C,OAAO,KAAK;IACrB;AACF"}
@@ -47,10 +47,26 @@ async function plan(userInstruction, opts) {
47
47
  vlMode: vlMode
48
48
  });
49
49
  let imagePayload = screenshotBase64;
50
- if ('qwen-vl' === vlMode) imagePayload = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload);
51
- else if (!vlMode) imagePayload = await (0, external_common_js_namespaceObject.markupImageForLLM)(screenshotBase64, context.tree, context.size);
50
+ let imageWidth = size.width;
51
+ let imageHeight = size.height;
52
+ const rightLimit = imageWidth;
53
+ const bottomLimit = imageHeight;
54
+ if ('qwen-vl' === vlMode) {
55
+ const paddedResult = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload);
56
+ imageWidth = paddedResult.width;
57
+ imageHeight = paddedResult.height;
58
+ imagePayload = paddedResult.imageBase64;
59
+ } else if ('qwen3-vl' === vlMode) {
60
+ const paddedResult = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload, 32);
61
+ imageWidth = paddedResult.width;
62
+ imageHeight = paddedResult.height;
63
+ imagePayload = paddedResult.imageBase64;
64
+ } else if (!vlMode) imagePayload = await (0, external_common_js_namespaceObject.markupImageForLLM)(screenshotBase64, context.tree, {
65
+ width: imageWidth,
66
+ height: imageHeight
67
+ });
52
68
  (0, external_common_js_namespaceObject.warnGPT4oSizeLimit)(size, modelName);
53
- const historyLog = (null == (_opts_conversationHistory = opts.conversationHistory) ? void 0 : _opts_conversationHistory.snapshot().filter((item)=>'assistant' === item.role)) || [];
69
+ const historyLog = (null == (_opts_conversationHistory = opts.conversationHistory) ? void 0 : _opts_conversationHistory.snapshot()) || [];
54
70
  const knowledgeContext = opts.actionContext ? [
55
71
  {
56
72
  role: 'user',
@@ -122,7 +138,7 @@ async function plan(userInstruction, opts) {
122
138
  debug('locateFields', locateFields);
123
139
  locateFields.forEach((field)=>{
124
140
  const locateResult = action.param[field];
125
- if (locateResult) if (vlMode) action.param[field] = (0, external_common_js_namespaceObject.fillBboxParam)(locateResult, size.width, size.height, vlMode);
141
+ if (locateResult) if (vlMode) action.param[field] = (0, external_common_js_namespaceObject.fillBboxParam)(locateResult, imageWidth, imageHeight, rightLimit, bottomLimit, vlMode);
126
142
  else {
127
143
  const element = elementById(locateResult);
128
144
  if (element) action.param[field].id = element.id;
@@ -140,6 +156,15 @@ async function plan(userInstruction, opts) {
140
156
  }
141
157
  ]
142
158
  });
159
+ null == conversationHistory || conversationHistory.append({
160
+ role: 'user',
161
+ content: [
162
+ {
163
+ type: 'text',
164
+ text: 'I have finished the action previously planned'
165
+ }
166
+ ]
167
+ });
143
168
  return returnValue;
144
169
  }
145
170
  exports.plan = __webpack_exports__.plan;
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/llm-planning.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionContentPart,\n ChatCompletionMessageParam,\n} from 'openai/resources/index';\nimport {\n AIActionType,\n type AIArgs,\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n markupImageForLLM,\n warnGPT4oSizeLimit,\n} from './common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { describeUserPage } from './prompt/util';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nconst debug = getDebug('planning');\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory?: ConversationHistory;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { screenshotBase64, size } = context;\n\n const { modelName, vlMode } = modelConfig;\n\n const { description: pageDescription, elementById } = await describeUserPage(\n context,\n { vlMode },\n );\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode: vlMode,\n });\n\n let imagePayload = screenshotBase64;\n if (vlMode === 'qwen-vl') {\n imagePayload = await paddingToMatchBlockByBase64(imagePayload);\n } else if (!vlMode) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n warnGPT4oSizeLimit(size, modelName);\n\n const historyLog =\n opts.conversationHistory\n ?.snapshot()\n .filter((item) => item.role === 'assistant') || [];\n\n const knowledgeContext: ChatCompletionMessageParam[] = opts.actionContext\n ? [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>`,\n },\n ],\n },\n ]\n : [];\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...knowledgeContext,\n ...instruction,\n ...historyLog,\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ...(vlMode\n ? []\n : ([\n {\n type: 'text',\n text: pageDescription,\n },\n ] as ChatCompletionContentPart[])),\n ],\n },\n ];\n\n const { content, usage } = await callAIWithObjectResponse<PlanningAIResponse>(\n msgs,\n AIActionType.PLAN,\n modelConfig,\n );\n const rawResponse = JSON.stringify(content, undefined, 2);\n const planFromAI = content;\n\n const actions =\n (planFromAI.action?.type ? [planFromAI.action] : planFromAI.actions) || [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n yamlFlow: buildYamlFlowFromPlans(\n actions,\n opts.actionSpace,\n planFromAI.sleep,\n ),\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n // TODO: use zod.parse to parse the action.param, and then fill the bbox param.\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult) {\n if (vlMode) {\n action.param[field] = fillBboxParam(\n locateResult,\n size.width,\n size.height,\n vlMode,\n );\n } else {\n const element = elementById(locateResult);\n if (element) {\n action.param[field].id = element.id;\n }\n }\n }\n });\n });\n // in Qwen-VL, error means error. In GPT-4o, error may mean more actions are needed.\n assert(!planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);\n\n if (\n actions.length === 0 &&\n returnValue.more_actions_needed_by_instruction &&\n !returnValue.sleep\n ) {\n console.warn(\n 'No actions planned for the prompt, but model said more actions are needed:',\n userInstruction,\n );\n }\n\n conversationHistory?.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","plan","userInstruction","opts","_opts_conversationHistory","_planFromAI_action","context","modelConfig","conversationHistory","screenshotBase64","size","modelName","vlMode","pageDescription","elementById","describeUserPage","systemPrompt","systemPromptToTaskPlanning","imagePayload","paddingToMatchBlockByBase64","markupImageForLLM","warnGPT4oSizeLimit","historyLog","item","knowledgeContext","instruction","msgs","content","usage","callAIWithObjectResponse","AIActionType","rawResponse","JSON","undefined","planFromAI","actions","returnValue","buildYamlFlowFromPlans","assert","action","type","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam","element","console"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;ACsBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAEhB,eAAeC,KACpBC,eAAuB,EACvBC,IAOC;QA8BCC,2BAkECC;IA9FH,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGL;IACtD,MAAM,EAAEM,gBAAgB,EAAEC,IAAI,EAAE,GAAGJ;IAEnC,MAAM,EAAEK,SAAS,EAAEC,MAAM,EAAE,GAAGL;IAE9B,MAAM,EAAE,aAAaM,eAAe,EAAEC,WAAW,EAAE,GAAG,MAAMC,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAC1DT,SACA;QAAEM;IAAO;IAEX,MAAMI,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAad,KAAK,WAAW;QAC7B,QAAQS;IACV;IAEA,IAAIM,eAAeT;IACnB,IAAIG,AAAW,cAAXA,QACFM,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BD;SAC5C,IAAI,CAACN,QACVM,eAAe,MAAME,AAAAA,IAAAA,mCAAAA,iBAAAA,AAAAA,EACnBX,kBACAH,QAAQ,IAAI,EACZA,QAAQ,IAAI;IAIhBe,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBX,MAAMC;IAEzB,MAAMW,aACJlB,AAAAA,SAAAA,CAAAA,4BAAAA,KAAK,mBAAmB,AAAD,IAAvBA,KAAAA,IAAAA,0BACI,QAAQ,GACT,MAAM,CAAC,CAACmB,OAASA,AAAc,gBAAdA,KAAK,IAAI,CAAgB,KAAK,EAAE;IAEtD,MAAMC,mBAAiDrB,KAAK,aAAa,GACrE;QACE;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,0BAA0B,CAAC;gBAClF;aACD;QACH;KACD,GACD,EAAE;IAEN,MAAMsB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,CAAC,kBAAkB,EAAEvB,gBAAgB,mBAAmB,CAAC;gBACjE;aACD;QACH;KACD;IAED,MAAMwB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASV;QAAa;WACrCQ;WACAC;WACAH;QACH;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKJ;wBACL,QAAQ;oBACV;gBACF;mBACIN,SACA,EAAE,GACD;oBACC;wBACE,MAAM;wBACN,MAAMC;oBACR;iBACD;aACN;QACH;KACD;IAED,MAAM,EAAEc,OAAO,EAAEC,KAAK,EAAE,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAC/BH,MACAI,mCAAAA,YAAAA,CAAAA,IAAiB,EACjBvB;IAEF,MAAMwB,cAAcC,KAAK,SAAS,CAACL,SAASM,QAAW;IACvD,MAAMC,aAAaP;IAEnB,MAAMQ,UACH9B,AAAAA,CAAAA,SAAAA,CAAAA,qBAAAA,WAAW,MAAM,AAAD,IAAhBA,KAAAA,IAAAA,mBAAmB,IAAI,AAAD,IAAI;QAAC6B,WAAW,MAAM;KAAC,GAAGA,WAAW,OAAM,KAAM,EAAE;IAC5E,MAAME,cAAkC;QACtC,GAAGF,UAAU;QACbC;QACAJ;QACAH;QACA,UAAUS,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EACRF,SACAhC,KAAK,WAAW,EAChB+B,WAAW,KAAK;IAEpB;IAEAI,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOJ,YAAY;IAGnBC,QAAQ,OAAO,CAAC,CAACI;QACf,MAAMC,OAAOD,OAAO,IAAI;QACxB,MAAME,sBAAsBtC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACoC,SAAWA,OAAO,IAAI,KAAKC;QAG9BzC,MAAM,+BAA+B0C;QACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;QAEN1C,MAAM,gBAAgB2C;QAEtBA,aAAa,OAAO,CAAC,CAACE;YACpB,MAAMC,eAAeN,OAAO,KAAK,CAACK,MAAM;YACxC,IAAIC,cACF,IAAIjC,QACF2B,OAAO,KAAK,CAACK,MAAM,GAAGE,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EACpBD,cACAnC,KAAK,KAAK,EACVA,KAAK,MAAM,EACXE;iBAEG;gBACL,MAAMmC,UAAUjC,YAAY+B;gBAC5B,IAAIE,SACFR,OAAO,KAAK,CAACK,MAAM,CAAC,EAAE,GAAGG,QAAQ,EAAE;YAEvC;QAEJ;IACF;IAEAT,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO,CAACJ,WAAW,KAAK,EAAE,CAAC,wBAAwB,EAAEA,WAAW,KAAK,EAAE;IAEvE,IACEC,AAAmB,MAAnBA,QAAQ,MAAM,IACdC,YAAY,kCAAkC,IAC9C,CAACA,YAAY,KAAK,EAElBY,QAAQ,IAAI,CACV,8EACA9C;IAIJM,QAAAA,uBAAAA,oBAAqB,MAAM,CAAC;QAC1B,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAMuB;YACR;SACD;IACH;IAEA,OAAOK;AACT"}
1
+ {"version":3,"file":"ai-model/llm-planning.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionContentPart,\n ChatCompletionMessageParam,\n} from 'openai/resources/index';\nimport {\n AIActionType,\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n markupImageForLLM,\n warnGPT4oSizeLimit,\n} from './common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { describeUserPage } from './prompt/util';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nconst debug = getDebug('planning');\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory?: ConversationHistory;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { screenshotBase64, size } = context;\n\n const { modelName, vlMode } = modelConfig;\n\n const { description: pageDescription, elementById } = await describeUserPage(\n context,\n { vlMode },\n );\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode: vlMode,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = size.width;\n let imageHeight = size.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n if (vlMode === 'qwen-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n } else if (vlMode === 'qwen3-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload, 32);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n } else if (!vlMode) {\n imagePayload = await markupImageForLLM(screenshotBase64, context.tree, {\n width: imageWidth,\n height: imageHeight,\n });\n }\n\n warnGPT4oSizeLimit(size, modelName);\n\n const historyLog = opts.conversationHistory?.snapshot() || [];\n // .filter((item) => item.role === 'assistant') || [];\n\n const knowledgeContext: ChatCompletionMessageParam[] = opts.actionContext\n ? [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>`,\n },\n ],\n },\n ]\n : [];\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...knowledgeContext,\n ...instruction,\n ...historyLog,\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ...(vlMode\n ? []\n : ([\n {\n type: 'text',\n text: pageDescription,\n },\n ] as ChatCompletionContentPart[])),\n ],\n },\n ];\n\n const { content, usage } = await callAIWithObjectResponse<PlanningAIResponse>(\n msgs,\n AIActionType.PLAN,\n modelConfig,\n );\n const rawResponse = JSON.stringify(content, undefined, 2);\n const planFromAI = content;\n\n const actions =\n (planFromAI.action?.type ? [planFromAI.action] : planFromAI.actions) || [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n yamlFlow: buildYamlFlowFromPlans(\n actions,\n opts.actionSpace,\n planFromAI.sleep,\n ),\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n // TODO: use zod.parse to parse the action.param, and then fill the bbox param.\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult) {\n if (vlMode) {\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n } else {\n const element = elementById(locateResult);\n if (element) {\n action.param[field].id = element.id;\n }\n }\n }\n });\n });\n // in Qwen-VL, error means error. In GPT-4o, error may mean more actions are needed.\n assert(!planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);\n\n if (\n actions.length === 0 &&\n returnValue.more_actions_needed_by_instruction &&\n !returnValue.sleep\n ) {\n console.warn(\n 'No actions planned for the prompt, but model said more actions are needed:',\n userInstruction,\n );\n }\n\n conversationHistory?.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n conversationHistory?.append({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'I have finished the action previously planned',\n },\n ],\n });\n\n return returnValue;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","plan","userInstruction","opts","_opts_conversationHistory","_planFromAI_action","context","modelConfig","conversationHistory","screenshotBase64","size","modelName","vlMode","pageDescription","elementById","describeUserPage","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","rightLimit","bottomLimit","paddedResult","paddingToMatchBlockByBase64","markupImageForLLM","warnGPT4oSizeLimit","historyLog","knowledgeContext","instruction","msgs","content","usage","callAIWithObjectResponse","AIActionType","rawResponse","JSON","undefined","planFromAI","actions","returnValue","buildYamlFlowFromPlans","assert","action","type","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam","element","console"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;ACqBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAEhB,eAAeC,KACpBC,eAAuB,EACvBC,IAOC;QAwCkBC,2BAiEhBC;IAvGH,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGL;IACtD,MAAM,EAAEM,gBAAgB,EAAEC,IAAI,EAAE,GAAGJ;IAEnC,MAAM,EAAEK,SAAS,EAAEC,MAAM,EAAE,GAAGL;IAE9B,MAAM,EAAE,aAAaM,eAAe,EAAEC,WAAW,EAAE,GAAG,MAAMC,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAC1DT,SACA;QAAEM;IAAO;IAEX,MAAMI,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAad,KAAK,WAAW;QAC7B,QAAQS;IACV;IAEA,IAAIM,eAAeT;IACnB,IAAIU,aAAaT,KAAK,KAAK;IAC3B,IAAIU,cAAcV,KAAK,MAAM;IAC7B,MAAMW,aAAaF;IACnB,MAAMG,cAAcF;IACpB,IAAIR,AAAW,cAAXA,QAAsB;QACxB,MAAMW,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC,OAAO,IAAIX,AAAW,eAAXA,QAAuB;QAChC,MAAMW,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BN,cAAc;QACrEC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC,OAAO,IAAI,CAACX,QACVM,eAAe,MAAMO,AAAAA,IAAAA,mCAAAA,iBAAAA,AAAAA,EAAkBhB,kBAAkBH,QAAQ,IAAI,EAAE;QACrE,OAAOa;QACP,QAAQC;IACV;IAGFM,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBhB,MAAMC;IAEzB,MAAMgB,aAAavB,AAAAA,SAAAA,CAAAA,4BAAAA,KAAK,mBAAmB,AAAD,IAAvBA,KAAAA,IAAAA,0BAA0B,QAAQ,EAAC,KAAK,EAAE;IAG7D,MAAMwB,mBAAiDzB,KAAK,aAAa,GACrE;QACE;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,0BAA0B,CAAC;gBAClF;aACD;QACH;KACD,GACD,EAAE;IAEN,MAAM0B,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,CAAC,kBAAkB,EAAE3B,gBAAgB,mBAAmB,CAAC;gBACjE;aACD;QACH;KACD;IAED,MAAM4B,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASd;QAAa;WACrCY;WACAC;WACAF;QACH;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKT;wBACL,QAAQ;oBACV;gBACF;mBACIN,SACA,EAAE,GACD;oBACC;wBACE,MAAM;wBACN,MAAMC;oBACR;iBACD;aACN;QACH;KACD;IAED,MAAM,EAAEkB,OAAO,EAAEC,KAAK,EAAE,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAC/BH,MACAI,mCAAAA,YAAAA,CAAAA,IAAiB,EACjB3B;IAEF,MAAM4B,cAAcC,KAAK,SAAS,CAACL,SAASM,QAAW;IACvD,MAAMC,aAAaP;IAEnB,MAAMQ,UACHlC,AAAAA,CAAAA,SAAAA,CAAAA,qBAAAA,WAAW,MAAM,AAAD,IAAhBA,KAAAA,IAAAA,mBAAmB,IAAI,AAAD,IAAI;QAACiC,WAAW,MAAM;KAAC,GAAGA,WAAW,OAAM,KAAM,EAAE;IAC5E,MAAME,cAAkC;QACtC,GAAGF,UAAU;QACbC;QACAJ;QACAH;QACA,UAAUS,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EACRF,SACApC,KAAK,WAAW,EAChBmC,WAAW,KAAK;IAEpB;IAEAI,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOJ,YAAY;IAGnBC,QAAQ,OAAO,CAAC,CAACI;QACf,MAAMC,OAAOD,OAAO,IAAI;QACxB,MAAME,sBAAsB1C,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACwC,SAAWA,OAAO,IAAI,KAAKC;QAG9B7C,MAAM,+BAA+B8C;QACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;QAEN9C,MAAM,gBAAgB+C;QAEtBA,aAAa,OAAO,CAAC,CAACE;YACpB,MAAMC,eAAeN,OAAO,KAAK,CAACK,MAAM;YACxC,IAAIC,cACF,IAAIrC,QACF+B,OAAO,KAAK,CAACK,MAAM,GAAGE,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EACpBD,cACA9B,YACAC,aACAC,YACAC,aACAV;iBAEG;gBACL,MAAMuC,UAAUrC,YAAYmC;gBAC5B,IAAIE,SACFR,OAAO,KAAK,CAACK,MAAM,CAAC,EAAE,GAAGG,QAAQ,EAAE;YAEvC;QAEJ;IACF;IAEAT,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO,CAACJ,WAAW,KAAK,EAAE,CAAC,wBAAwB,EAAEA,WAAW,KAAK,EAAE;IAEvE,IACEC,AAAmB,MAAnBA,QAAQ,MAAM,IACdC,YAAY,kCAAkC,IAC9C,CAACA,YAAY,KAAK,EAElBY,QAAQ,IAAI,CACV,8EACAlD;IAIJM,QAAAA,uBAAAA,oBAAqB,MAAM,CAAC;QAC1B,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM2B;YACR;SACD;IACH;IACA3B,QAAAA,uBAAAA,oBAAqB,MAAM,CAAC;QAC1B,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM;YACR;SACD;IACH;IAEA,OAAOgC;AACT"}
@@ -30,7 +30,7 @@ __webpack_require__.d(__webpack_exports__, {
30
30
  });
31
31
  const external_common_js_namespaceObject = require("../common.js");
32
32
  const external_common_js_namespaceObject_1 = require("./common.js");
33
- const vlCurrentLog = '"log": string, // Log your thoughts and what the next one action (ONLY ONE!) you can do according to the screenshot and the instruction. The typical log looks like "The user wants to do ... . According to the instruction and the previous logs, i think the next action should be ....". If no action should be done, log the reason. Use the same language as the user\'s instruction.';
33
+ const vlCurrentLog = '"log": string, // Log your thoughts and what the next one action (ONLY ONE!) you can do according to the screenshot and the instruction. The typical log looks like "The user wants to do ... . According to the instruction and the previous logs, now i should use action \'{ action-type }\' to do ....". If no action should be done, log the reason. Use the same language as the user\'s instruction.';
34
34
  const llmCurrentLog = '"log": string, // Log what the next actions you can do according to the screenshot and the instruction. The typical log looks like "Now i want to use action \'{ action-type }\' to do ..". If no action should be done, log the reason. ". Use the same language as the user\'s instruction.';
35
35
  const commonOutputFields = `"error"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.
36
36
  "more_actions_needed_by_instruction": boolean, // Consider if there is still more action(s) to do after the action in "Log" is done, according to the instruction. If so, set this field to true. Otherwise, set it to false.`;
@@ -140,7 +140,7 @@ For example, when the instruction is "click 'Confirm' button, and click 'Yes' in
140
140
  this and output the JSON:
141
141
 
142
142
  {
143
- "log": "The user wants to do click 'Confirm' button, and click 'Yes' in popup. According to the instruction and the previous logs, i think the next action should be click 'Yes' in popup.",
143
+ "log": "The user wants to do click 'Confirm' button, and click 'Yes' in popup. According to the instruction and the previous logs, now i should use action 'Tap' to click 'Yes' in popup.",
144
144
  "action": {
145
145
  "type": "Tap",
146
146
  "param": {