@midscene/core 1.4.5 → 1.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/dist/es/agent/agent.mjs +4 -56
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/task-builder.mjs +4 -1
  4. package/dist/es/agent/task-builder.mjs.map +1 -1
  5. package/dist/es/agent/utils.mjs +91 -18
  6. package/dist/es/agent/utils.mjs.map +1 -1
  7. package/dist/es/ai-model/auto-glm/planning.mjs +1 -1
  8. package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -1
  9. package/dist/es/ai-model/inspect.mjs +5 -5
  10. package/dist/es/ai-model/inspect.mjs.map +1 -1
  11. package/dist/es/ai-model/llm-planning.mjs +3 -3
  12. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  13. package/dist/es/ai-model/service-caller/index.mjs +73 -47
  14. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  15. package/dist/es/ai-model/ui-tars-planning.mjs +14 -14
  16. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
  17. package/dist/es/common.mjs +2 -15
  18. package/dist/es/common.mjs.map +1 -1
  19. package/dist/es/device/index.mjs.map +1 -1
  20. package/dist/es/dump/html-utils.mjs +2 -1
  21. package/dist/es/dump/html-utils.mjs.map +1 -1
  22. package/dist/es/index.mjs.map +1 -1
  23. package/dist/es/report-generator.mjs +2 -2
  24. package/dist/es/report-generator.mjs.map +1 -1
  25. package/dist/es/report.mjs +39 -7
  26. package/dist/es/report.mjs.map +1 -1
  27. package/dist/es/service/index.mjs +6 -6
  28. package/dist/es/service/index.mjs.map +1 -1
  29. package/dist/es/types.mjs.map +1 -1
  30. package/dist/es/utils.mjs +2 -2
  31. package/dist/lib/agent/agent.js +4 -56
  32. package/dist/lib/agent/agent.js.map +1 -1
  33. package/dist/lib/agent/task-builder.js +4 -1
  34. package/dist/lib/agent/task-builder.js.map +1 -1
  35. package/dist/lib/agent/utils.js +96 -14
  36. package/dist/lib/agent/utils.js.map +1 -1
  37. package/dist/lib/ai-model/auto-glm/planning.js +1 -1
  38. package/dist/lib/ai-model/auto-glm/planning.js.map +1 -1
  39. package/dist/lib/ai-model/inspect.js +5 -5
  40. package/dist/lib/ai-model/inspect.js.map +1 -1
  41. package/dist/lib/ai-model/llm-planning.js +3 -3
  42. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  43. package/dist/lib/ai-model/service-caller/index.js +75 -49
  44. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  45. package/dist/lib/ai-model/ui-tars-planning.js +14 -14
  46. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
  47. package/dist/lib/common.js +2 -15
  48. package/dist/lib/common.js.map +1 -1
  49. package/dist/lib/device/index.js.map +1 -1
  50. package/dist/lib/dump/html-utils.js +4 -0
  51. package/dist/lib/dump/html-utils.js.map +1 -1
  52. package/dist/lib/index.js.map +1 -1
  53. package/dist/lib/report-generator.js +1 -1
  54. package/dist/lib/report-generator.js.map +1 -1
  55. package/dist/lib/report.js +36 -4
  56. package/dist/lib/report.js.map +1 -1
  57. package/dist/lib/service/index.js +6 -6
  58. package/dist/lib/service/index.js.map +1 -1
  59. package/dist/lib/types.js.map +1 -1
  60. package/dist/lib/utils.js +2 -2
  61. package/dist/types/agent/agent.d.ts +1 -15
  62. package/dist/types/agent/utils.d.ts +13 -1
  63. package/dist/types/ai-model/index.d.ts +1 -1
  64. package/dist/types/ai-model/service-caller/index.d.ts +4 -2
  65. package/dist/types/common.d.ts +0 -310
  66. package/dist/types/device/index.d.ts +1 -2
  67. package/dist/types/dump/html-utils.d.ts +11 -0
  68. package/dist/types/index.d.ts +1 -1
  69. package/dist/types/report.d.ts +5 -0
  70. package/dist/types/types.d.ts +2 -2
  71. package/package.json +2 -2
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/inspect.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n generateElementByPoint,\n generateElementByRect,\n} from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n scaleImage,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport { adaptBboxToRect, expandSearchArea, mergeRects } from '../common';\nimport { parseAutoGLMLocateResponse } from './auto-glm/parser';\nimport { getAutoGLMLocatePrompt } from './auto-glm/prompt';\nimport { isAutoGLM } from './auto-glm/util';\nimport {\n extractDataQueryPrompt,\n parseXMLExtractionResponse,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport {\n AIResponseParseError,\n callAI,\n callAIWithObjectResponse,\n callAIWithStringResponse,\n} from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}':`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const { context, targetElementDescription, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const targetElementDescriptionText = extraTextFromUserPrompt(\n targetElementDescription,\n );\n const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);\n const systemPrompt = isAutoGLM(modelFamily)\n ? getAutoGLMLocatePrompt(modelFamily)\n : systemPromptToLocateElement(modelFamily);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.size.width;\n let imageHeight = context.size.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: isAutoGLM(modelFamily)\n ? `Tap: ${userInstructionPrompt}`\n : userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n if (isAutoGLM(modelFamily)) {\n const { content: rawResponseContent, usage } =\n await callAIWithStringResponse(msgs, modelConfig);\n\n debugInspect('auto-glm rawResponse:', rawResponseContent);\n\n const parsed = parseAutoGLMLocateResponse(rawResponseContent);\n\n debugInspect('auto-glm thinking:', parsed.think);\n debugInspect('auto-glm coordinates:', parsed.coordinates);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] = [];\n\n if (parsed.error || !parsed.coordinates) {\n errors = [parsed.error || 'Failed to parse auto-glm response'];\n debugInspect('auto-glm parse error:', errors[0]);\n } else {\n const { x, y } = parsed.coordinates;\n\n debugInspect('auto-glm coordinates [0-999]:', { x, y });\n\n // Convert auto-glm coordinates [0,999] to pixel bbox\n // Map from [0,999] to pixel coordinates\n const pixelX = Math.round((x * imageWidth) / 1000);\n const pixelY = Math.round((y * imageHeight) / 1000);\n\n debugInspect('auto-glm pixel coordinates:', { pixelX, pixelY });\n\n // Apply offset if searching in a cropped area\n let finalX = pixelX;\n let finalY = pixelY;\n if (options.searchConfig?.rect) {\n finalX += options.searchConfig.rect.left;\n finalY += options.searchConfig.rect.top;\n }\n\n const element: LocateResultElement = generateElementByPoint(\n [finalX, finalY],\n targetElementDescriptionText as string,\n );\n\n resRect = element.rect;\n debugInspect('auto-glm resRect:', resRect);\n\n if (element) {\n matchedElements = [element];\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse: rawResponseContent,\n usage,\n reasoning_content: parsed.think,\n };\n }\n\n let res: Awaited<\n ReturnType<\n typeof callAIWithObjectResponse<AIElementResponse | [number, number]>\n >\n >;\n try {\n res = await callAIWithObjectResponse<AIElementResponse | [number, number]>(\n msgs,\n modelConfig,\n );\n } catch (callError) {\n // Return error with usage and rawResponse if available\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n parseResult: {\n elements: [],\n errors: [`AI call error: ${errorMessage}`],\n },\n rawResponse,\n usage,\n reasoning_content: undefined,\n };\n }\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if (\n 'bbox' in res.content &&\n Array.isArray(res.content.bbox) &&\n res.content.bbox.length >= 1\n ) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n modelFamily,\n options.searchConfig?.scale,\n );\n\n debugInspect('resRect', resRect);\n\n const element: LocateResultElement = generateElementByRect(\n resRect,\n targetElementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n scale?: number;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n const systemPrompt = systemPromptToLocateSection(modelFamily);\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n let result: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AISectionLocatorResponse>>\n >;\n try {\n result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelConfig,\n );\n } catch (callError) {\n // Return error with usage and rawResponse if available\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n imageBase64: undefined,\n error: `AI call error: ${errorMessage}`,\n rawResponse,\n usage,\n };\n }\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n modelFamily,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n modelFamily,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n sectionRect = expandSearchArea(mergedRect, context.size);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n let scale: number | undefined;\n\n if (sectionRect) {\n const originalWidth = sectionRect.width;\n const originalHeight = sectionRect.height;\n\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n modelFamily === 'qwen2.5-vl',\n );\n\n const scaleRatio = 2;\n const scaledResult = await scaleImage(\n croppedResult.imageBase64,\n scaleRatio,\n );\n\n imageBase64 = scaledResult.imageBase64;\n scale = scaleRatio;\n sectionRect.width = scaledResult.width;\n sectionRect.height = scaledResult.height;\n\n debugSection(\n 'scaled sectionRect from %dx%d to %dx%d (scale=%d)',\n originalWidth,\n originalHeight,\n sectionRect.width,\n sectionRect.height,\n scale,\n );\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n scale,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const screenshotBase64 = context.screenshot.base64;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig);\n\n // Parse XML response to JSON object\n let parseResult: AIDataExtractionResponse<T>;\n try {\n parseResult = parseXMLExtractionResponse<T>(rawResponse);\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n\n return {\n parseResult,\n rawResponse,\n usage,\n reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n const result = await callAIFn(msgs, modelConfig);\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","modelConfig","modelFamily","screenshotBase64","assert","targetElementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","isAutoGLM","getAutoGLMLocatePrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","paddedResult","paddingToMatchBlockByBase64","addOns","rawResponseContent","usage","callAIWithStringResponse","parsed","parseAutoGLMLocateResponse","resRect","matchedElements","errors","x","y","pixelX","Math","pixelY","finalX","finalY","element","generateElementByPoint","res","callAIWithObjectResponse","callError","errorMessage","Error","String","rawResponse","AIResponseParseError","undefined","JSON","Array","adaptBboxToRect","generateElementByRect","e","msg","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","scale","originalWidth","originalHeight","croppedResult","cropByRect","scaleRatio","scaledResult","scaleImage","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","reasoning_content","callAI","parseResult","parseXMLExtractionResponse","parseError","AiJudgeOrderSensitive","description","callAIFn","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;ACuDA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,AAAAA,IAAAA,oBAAAA,kBAAAA,AAAAA,EACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,EAAE,CAAC;oBAC3D;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBC,OAKrC;IAUC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,WAAW,EAAE,GAAGH;IAC3D,MAAM,EAAEI,WAAW,EAAE,GAAGD;IACxB,MAAME,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElDK,IAAAA,sBAAAA,MAAAA,AAAAA,EACEJ,0BACA;IAEF,MAAMK,+BAA+BhB,wBACnCW;IAEF,MAAMM,wBAAwBC,AAAAA,IAAAA,+BAAAA,iBAAAA,AAAAA,EAAkBF;IAChD,MAAMG,eAAeC,AAAAA,IAAAA,wBAAAA,SAAAA,AAAAA,EAAUP,eAC3BQ,AAAAA,IAAAA,0BAAAA,sBAAAA,AAAAA,EAAuBR,eACvBS,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EAA4BT;IAEhC,IAAIU,eAAeT;IACnB,IAAIU,aAAad,QAAQ,IAAI,CAAC,KAAK;IACnC,IAAIe,cAAcf,QAAQ,IAAI,CAAC,MAAM;IACrC,IAAIgB,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIhB,QAAQ,YAAY,EAAE;QACxBM,IAAAA,sBAAAA,MAAAA,AAAAA,EACEN,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFM,IAAAA,sBAAAA,MAAAA,AAAAA,EACEN,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFc,eAAed,QAAQ,YAAY,CAAC,WAAW;QAC/Ce,aAAaf,QAAQ,YAAY,CAAC,IAAI,EAAE;QACxCgB,cAAchB,QAAQ,YAAY,CAAC,IAAI,EAAE;QACzCiB,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIZ,AAAgB,iBAAhBA,aAA8B;QACvC,MAAMe,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAMxB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKI;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMH,AAAAA,IAAAA,wBAAAA,SAAAA,AAAAA,EAAUP,eACZ,CAAC,KAAK,EAAEI,uBAAuB,GAC/BA;gBACN;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAON,0BAAuC;QAChD,MAAMmB,SAAS,MAAM5B,mBAAmB;YACtC,QAAQS,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAI0B;IACf;IAEA,IAAIV,AAAAA,IAAAA,wBAAAA,SAAAA,AAAAA,EAAUP,cAAc;QAC1B,MAAM,EAAE,SAASkB,kBAAkB,EAAEC,KAAK,EAAE,GAC1C,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAAyB7B,MAAMQ;QAEvCf,aAAa,yBAAyBkC;QAEtC,MAAMG,SAASC,AAAAA,IAAAA,0BAAAA,0BAAAA,AAAAA,EAA2BJ;QAE1ClC,aAAa,sBAAsBqC,OAAO,KAAK;QAC/CrC,aAAa,yBAAyBqC,OAAO,WAAW;QAExD,IAAIE;QACJ,IAAIC,kBAAyC,EAAE;QAC/C,IAAIC,SAAmB,EAAE;QAEzB,IAAIJ,OAAO,KAAK,IAAI,CAACA,OAAO,WAAW,EAAE;YACvCI,SAAS;gBAACJ,OAAO,KAAK,IAAI;aAAoC;YAC9DrC,aAAa,yBAAyByC,MAAM,CAAC,EAAE;QACjD,OAAO;YACL,MAAM,EAAEC,CAAC,EAAEC,CAAC,EAAE,GAAGN,OAAO,WAAW;YAEnCrC,aAAa,iCAAiC;gBAAE0C;gBAAGC;YAAE;YAIrD,MAAMC,SAASC,KAAK,KAAK,CAAEH,IAAIf,aAAc;YAC7C,MAAMmB,SAASD,KAAK,KAAK,CAAEF,IAAIf,cAAe;YAE9C5B,aAAa,+BAA+B;gBAAE4C;gBAAQE;YAAO;YAG7D,IAAIC,SAASH;YACb,IAAII,SAASF;YACb,IAAIlC,QAAQ,YAAY,EAAE,MAAM;gBAC9BmC,UAAUnC,QAAQ,YAAY,CAAC,IAAI,CAAC,IAAI;gBACxCoC,UAAUpC,QAAQ,YAAY,CAAC,IAAI,CAAC,GAAG;YACzC;YAEA,MAAMqC,UAA+BC,AAAAA,IAAAA,yBAAAA,sBAAAA,AAAAA,EACnC;gBAACH;gBAAQC;aAAO,EAChB7B;YAGFoB,UAAUU,QAAQ,IAAI;YACtBjD,aAAa,qBAAqBuC;YAElC,IAAIU,SACFT,kBAAkB;gBAACS;aAAQ;QAE/B;QAEA,OAAO;YACL,MAAMV;YACN,aAAa;gBACX,UAAUC;gBACVC;YACF;YACA,aAAaP;YACbC;YACA,mBAAmBE,OAAO,KAAK;QACjC;IACF;IAEA,IAAIc;IAKJ,IAAI;QACFA,MAAM,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACV7C,MACAQ;IAEJ,EAAE,OAAOsC,WAAW;QAElB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,yBAAAA,oBAAoBA,GACrCL,UAAU,WAAW,GACrBC;QACN,MAAMnB,QACJkB,qBAAqBK,yBAAAA,oBAAoBA,GAAGL,UAAU,KAAK,GAAGM;QAChE,OAAO;YACL,MAAMA;YACN,aAAa;gBACX,UAAU,EAAE;gBACZ,QAAQ;oBAAC,CAAC,eAAe,EAAEL,cAAc;iBAAC;YAC5C;YACAG;YACAtB;YACA,mBAAmBwB;QACrB;IACF;IAEA,MAAMF,cAAcG,KAAK,SAAS,CAACT,IAAI,OAAO;IAE9C,IAAIZ;IACJ,IAAIC,kBAAyC,EAAE;IAC/C,IAAIC,SACF,YAAYU,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IACE,UAAUA,IAAI,OAAO,IACrBU,MAAM,OAAO,CAACV,IAAI,OAAO,CAAC,IAAI,KAC9BA,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,GAC3B;YACAZ,UAAUuB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACRX,IAAI,OAAO,CAAC,IAAI,EAChBxB,YACAC,aACAhB,QAAQ,YAAY,EAAE,MAAM,MAC5BA,QAAQ,YAAY,EAAE,MAAM,KAC5BiB,oBACAC,qBACAd,aACAJ,QAAQ,YAAY,EAAE;YAGxBZ,aAAa,WAAWuC;YAExB,MAAMU,UAA+Bc,AAAAA,IAAAA,yBAAAA,qBAAAA,AAAAA,EACnCxB,SACApB;YAEFsB,SAAS,EAAE;YAEX,IAAIQ,SACFT,kBAAkB;gBAACS;aAAQ;QAE/B;IACF,EAAE,OAAOe,GAAG;QACV,MAAMC,MACJD,aAAaT,QACT,CAAC,sBAAsB,EAAES,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACvB,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEwB,IAAI,CAAC,CAAC;aAFtBxB,SAAS;YAACwB;SAAI;IAIlB;IAEA,OAAO;QACL,MAAM1B;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAgB;QACA,OAAON,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAee,gBAAgBtD,OAIrC;IAQC,MAAM,EAAEC,OAAO,EAAEsD,kBAAkB,EAAEpD,WAAW,EAAE,GAAGH;IACrD,MAAM,EAAEI,WAAW,EAAE,GAAGD;IACxB,MAAME,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAMS,eAAe8C,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EAA4BpD;IACjD,MAAMqD,gCAAgCC,AAAAA,IAAAA,uCAAAA,yBAAAA,AAAAA,EACpCnE,wBAAwBgE;IAE1B,MAAM5D,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMoD;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMlC,SAAS,MAAM5B,mBAAmB;YACtC,QAAQ8D,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACA5D,KAAK,IAAI,IAAI0B;IACf;IAEA,IAAIsC;IAGJ,IAAI;QACFA,SAAS,MAAMnB,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACb7C,MACAQ;IAEJ,EAAE,OAAOsC,WAAW;QAElB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,yBAAAA,oBAAoBA,GACrCL,UAAU,WAAW,GACrBC;QACN,MAAMnB,QACJkB,qBAAqBK,yBAAAA,oBAAoBA,GAAGL,UAAU,KAAK,GAAGM;QAChE,OAAO;YACL,MAAMA;YACN,aAAaA;YACb,OAAO,CAAC,eAAe,EAAEL,cAAc;YACvCG;YACAtB;QACF;IACF;IAEA,IAAIqC;IACJ,MAAMC,cAAcF,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIE,aAAa;QACf,MAAMC,aAAaZ,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACjBW,aACA5D,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBG;QAEFd,aAAa,0BAA0BwE;QAEvC,MAAMC,oBAAoBJ,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DrE,aAAa,wBAAwByE;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAShB,MAAM,OAAO,CAACgB,OAC/B,GAAG,CAAC,CAACA,OACGf,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACLe,MACAhE,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBG;QAGNd,aAAa,qBAAqB0E;QAGlC,MAAME,aAAaC,AAAAA,IAAAA,mCAAAA,UAAAA,AAAAA,EAAW;YAACL;eAAeE;SAAe;QAC7D1E,aAAa,iBAAiB4E;QAE9BN,cAAcQ,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBF,YAAYjE,QAAQ,IAAI;QACvDX,aAAa,2BAA2BsE;IAC1C;IAEA,IAAIS,cAAchE;IAClB,IAAIiE;IAEJ,IAAIV,aAAa;QACf,MAAMW,gBAAgBX,YAAY,KAAK;QACvC,MAAMY,iBAAiBZ,YAAY,MAAM;QAEzC,MAAMa,gBAAgB,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAC1BrE,kBACAuD,aACAxD,AAAgB,iBAAhBA;QAGF,MAAMuE,aAAa;QACnB,MAAMC,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EACzBJ,cAAc,WAAW,EACzBE;QAGFN,cAAcO,aAAa,WAAW;QACtCN,QAAQK;QACRf,YAAY,KAAK,GAAGgB,aAAa,KAAK;QACtChB,YAAY,MAAM,GAAGgB,aAAa,MAAM;QAExCtF,aACE,qDACAiF,eACAC,gBACAZ,YAAY,KAAK,EACjBA,YAAY,MAAM,EAClBU;IAEJ;IAEA,OAAO;QACL,MAAMV;QACNS;QACAC;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAaX,KAAK,SAAS,CAACW,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAemB,qBAAwB9E,OAO7C;IACC,MAAM,EAAE+E,SAAS,EAAE9E,OAAO,EAAE+E,aAAa,EAAEtF,gBAAgB,EAAES,WAAW,EAAE,GACxEH;IACF,MAAMU,eAAeuE,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA;IACrB,MAAM5E,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAMiF,wBAAwBC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAC5BnF,QAAQ,eAAe,IAAI,IAC3B+E;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAK/E;YACL,QAAQ;QACV;IACF;IAGF+E,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMvF,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS0E;QACX;KACD;IAED,IAAI1F,kBAAkB;QACpB,MAAM2B,SAAS,MAAM5B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAI0B;IACf;IAEA,MAAM,EACJ,SAASwB,WAAW,EACpBtB,KAAK,EACL8D,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAO3F,MAAMQ;IAGvB,IAAIoF;IACJ,IAAI;QACFA,cAAcC,AAAAA,IAAAA,8BAAAA,0BAAAA,AAAAA,EAA8B3C;IAC9C,EAAE,OAAO4C,YAAY;QAEnB,MAAM/C,eACJ+C,sBAAsB9C,QAAQ8C,WAAW,OAAO,GAAG7C,OAAO6C;QAC5D,MAAM,IAAI3C,yBAAAA,oBAAoBA,CAC5B,CAAC,iBAAiB,EAAEJ,cAAc,EAClCG,aACAtB;IAEJ;IAEA,OAAO;QACLgE;QACA1C;QACAtB;QACA8D;IACF;AACF;AAEO,eAAeK,sBACpBC,WAAmB,EACnBC,QAAwE,EACxEzF,WAAyB;IAKzB,MAAMO,eAAemF,AAAAA,IAAAA,yCAAAA,iCAAAA,AAAAA;IACrB,MAAMC,aAAaC,AAAAA,IAAAA,yCAAAA,yBAAAA,AAAAA,EAA0BJ;IAE7C,MAAMhG,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAASoF;QACX;KACD;IAED,MAAMnC,SAAS,MAAMiC,SAASjG,MAAMQ;IAEpC,OAAO;QACL,kBAAkBwD,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
1
+ {"version":3,"file":"ai-model/inspect.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n generateElementByPoint,\n generateElementByRect,\n} from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n scaleImage,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport { adaptBboxToRect, expandSearchArea, mergeRects } from '../common';\nimport { parseAutoGLMLocateResponse } from './auto-glm/parser';\nimport { getAutoGLMLocatePrompt } from './auto-glm/prompt';\nimport { isAutoGLM } from './auto-glm/util';\nimport {\n extractDataQueryPrompt,\n parseXMLExtractionResponse,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport {\n AIResponseParseError,\n callAI,\n callAIWithObjectResponse,\n callAIWithStringResponse,\n} from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}':`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const { context, targetElementDescription, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const targetElementDescriptionText = extraTextFromUserPrompt(\n targetElementDescription,\n );\n const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);\n const systemPrompt = isAutoGLM(modelFamily)\n ? getAutoGLMLocatePrompt(modelFamily)\n : systemPromptToLocateElement(modelFamily);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.shotSize.width;\n let imageHeight = context.shotSize.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: isAutoGLM(modelFamily)\n ? `Tap: ${userInstructionPrompt}`\n : userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n if (isAutoGLM(modelFamily)) {\n const { content: rawResponseContent, usage } =\n await callAIWithStringResponse(msgs, modelConfig);\n\n debugInspect('auto-glm rawResponse:', rawResponseContent);\n\n const parsed = parseAutoGLMLocateResponse(rawResponseContent);\n\n debugInspect('auto-glm thinking:', parsed.think);\n debugInspect('auto-glm coordinates:', parsed.coordinates);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] = [];\n\n if (parsed.error || !parsed.coordinates) {\n errors = [parsed.error || 'Failed to parse auto-glm response'];\n debugInspect('auto-glm parse error:', errors[0]);\n } else {\n const { x, y } = parsed.coordinates;\n\n debugInspect('auto-glm coordinates [0-999]:', { x, y });\n\n // Convert auto-glm coordinates [0,999] to pixel bbox\n // Map from [0,999] to pixel coordinates\n const pixelX = Math.round((x * imageWidth) / 1000);\n const pixelY = Math.round((y * imageHeight) / 1000);\n\n debugInspect('auto-glm pixel coordinates:', { pixelX, pixelY });\n\n // Apply offset if searching in a cropped area\n let finalX = pixelX;\n let finalY = pixelY;\n if (options.searchConfig?.rect) {\n finalX += options.searchConfig.rect.left;\n finalY += options.searchConfig.rect.top;\n }\n\n const element: LocateResultElement = generateElementByPoint(\n [finalX, finalY],\n targetElementDescriptionText as string,\n );\n\n resRect = element.rect;\n debugInspect('auto-glm resRect:', resRect);\n\n if (element) {\n matchedElements = [element];\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse: rawResponseContent,\n usage,\n reasoning_content: parsed.think,\n };\n }\n\n let res: Awaited<\n ReturnType<\n typeof callAIWithObjectResponse<AIElementResponse | [number, number]>\n >\n >;\n try {\n res = await callAIWithObjectResponse<AIElementResponse | [number, number]>(\n msgs,\n modelConfig,\n );\n } catch (callError) {\n // Return error with usage and rawResponse if available\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n parseResult: {\n elements: [],\n errors: [`AI call error: ${errorMessage}`],\n },\n rawResponse,\n usage,\n reasoning_content: undefined,\n };\n }\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if (\n 'bbox' in res.content &&\n Array.isArray(res.content.bbox) &&\n res.content.bbox.length >= 1\n ) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n modelFamily,\n options.searchConfig?.scale,\n );\n\n debugInspect('resRect', resRect);\n\n const element: LocateResultElement = generateElementByRect(\n resRect,\n targetElementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n scale?: number;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n const systemPrompt = systemPromptToLocateSection(modelFamily);\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n let result: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AISectionLocatorResponse>>\n >;\n try {\n result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelConfig,\n );\n } catch (callError) {\n // Return error with usage and rawResponse if available\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n imageBase64: undefined,\n error: `AI call error: ${errorMessage}`,\n rawResponse,\n usage,\n };\n }\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.shotSize.width,\n context.shotSize.height,\n 0,\n 0,\n context.shotSize.width,\n context.shotSize.height,\n modelFamily,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.shotSize.width,\n context.shotSize.height,\n 0,\n 0,\n context.shotSize.width,\n context.shotSize.height,\n modelFamily,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n sectionRect = expandSearchArea(mergedRect, context.shotSize);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n let scale: number | undefined;\n\n if (sectionRect) {\n const originalWidth = sectionRect.width;\n const originalHeight = sectionRect.height;\n\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n modelFamily === 'qwen2.5-vl',\n );\n\n const scaleRatio = 2;\n const scaledResult = await scaleImage(\n croppedResult.imageBase64,\n scaleRatio,\n );\n\n imageBase64 = scaledResult.imageBase64;\n scale = scaleRatio;\n sectionRect.width = scaledResult.width;\n sectionRect.height = scaledResult.height;\n\n debugSection(\n 'scaled sectionRect from %dx%d to %dx%d (scale=%d)',\n originalWidth,\n originalHeight,\n sectionRect.width,\n sectionRect.height,\n scale,\n );\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n scale,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const screenshotBase64 = context.screenshot.base64;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig);\n\n // Parse XML response to JSON object\n let parseResult: AIDataExtractionResponse<T>;\n try {\n parseResult = parseXMLExtractionResponse<T>(rawResponse);\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n\n return {\n parseResult,\n rawResponse,\n usage,\n reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n const result = await callAIFn(msgs, modelConfig);\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","modelConfig","modelFamily","screenshotBase64","assert","targetElementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","isAutoGLM","getAutoGLMLocatePrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","paddedResult","paddingToMatchBlockByBase64","addOns","rawResponseContent","usage","callAIWithStringResponse","parsed","parseAutoGLMLocateResponse","resRect","matchedElements","errors","x","y","pixelX","Math","pixelY","finalX","finalY","element","generateElementByPoint","res","callAIWithObjectResponse","callError","errorMessage","Error","String","rawResponse","AIResponseParseError","undefined","JSON","Array","adaptBboxToRect","generateElementByRect","e","msg","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","scale","originalWidth","originalHeight","croppedResult","cropByRect","scaleRatio","scaledResult","scaleImage","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","reasoning_content","callAI","parseResult","parseXMLExtractionResponse","parseError","AiJudgeOrderSensitive","description","callAIFn","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;ACuDA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,AAAAA,IAAAA,oBAAAA,kBAAAA,AAAAA,EACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,EAAE,CAAC;oBAC3D;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBC,OAKrC;IAUC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,WAAW,EAAE,GAAGH;IAC3D,MAAM,EAAEI,WAAW,EAAE,GAAGD;IACxB,MAAME,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElDK,IAAAA,sBAAAA,MAAAA,AAAAA,EACEJ,0BACA;IAEF,MAAMK,+BAA+BhB,wBACnCW;IAEF,MAAMM,wBAAwBC,AAAAA,IAAAA,+BAAAA,iBAAAA,AAAAA,EAAkBF;IAChD,MAAMG,eAAeC,AAAAA,IAAAA,wBAAAA,SAAAA,AAAAA,EAAUP,eAC3BQ,AAAAA,IAAAA,0BAAAA,sBAAAA,AAAAA,EAAuBR,eACvBS,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EAA4BT;IAEhC,IAAIU,eAAeT;IACnB,IAAIU,aAAad,QAAQ,QAAQ,CAAC,KAAK;IACvC,IAAIe,cAAcf,QAAQ,QAAQ,CAAC,MAAM;IACzC,IAAIgB,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIhB,QAAQ,YAAY,EAAE;QACxBM,IAAAA,sBAAAA,MAAAA,AAAAA,EACEN,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFM,IAAAA,sBAAAA,MAAAA,AAAAA,EACEN,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFc,eAAed,QAAQ,YAAY,CAAC,WAAW;QAC/Ce,aAAaf,QAAQ,YAAY,CAAC,IAAI,EAAE;QACxCgB,cAAchB,QAAQ,YAAY,CAAC,IAAI,EAAE;QACzCiB,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIZ,AAAgB,iBAAhBA,aAA8B;QACvC,MAAMe,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAMxB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKI;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMH,AAAAA,IAAAA,wBAAAA,SAAAA,AAAAA,EAAUP,eACZ,CAAC,KAAK,EAAEI,uBAAuB,GAC/BA;gBACN;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAON,0BAAuC;QAChD,MAAMmB,SAAS,MAAM5B,mBAAmB;YACtC,QAAQS,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAI0B;IACf;IAEA,IAAIV,AAAAA,IAAAA,wBAAAA,SAAAA,AAAAA,EAAUP,cAAc;QAC1B,MAAM,EAAE,SAASkB,kBAAkB,EAAEC,KAAK,EAAE,GAC1C,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAAyB7B,MAAMQ;QAEvCf,aAAa,yBAAyBkC;QAEtC,MAAMG,SAASC,AAAAA,IAAAA,0BAAAA,0BAAAA,AAAAA,EAA2BJ;QAE1ClC,aAAa,sBAAsBqC,OAAO,KAAK;QAC/CrC,aAAa,yBAAyBqC,OAAO,WAAW;QAExD,IAAIE;QACJ,IAAIC,kBAAyC,EAAE;QAC/C,IAAIC,SAAmB,EAAE;QAEzB,IAAIJ,OAAO,KAAK,IAAI,CAACA,OAAO,WAAW,EAAE;YACvCI,SAAS;gBAACJ,OAAO,KAAK,IAAI;aAAoC;YAC9DrC,aAAa,yBAAyByC,MAAM,CAAC,EAAE;QACjD,OAAO;YACL,MAAM,EAAEC,CAAC,EAAEC,CAAC,EAAE,GAAGN,OAAO,WAAW;YAEnCrC,aAAa,iCAAiC;gBAAE0C;gBAAGC;YAAE;YAIrD,MAAMC,SAASC,KAAK,KAAK,CAAEH,IAAIf,aAAc;YAC7C,MAAMmB,SAASD,KAAK,KAAK,CAAEF,IAAIf,cAAe;YAE9C5B,aAAa,+BAA+B;gBAAE4C;gBAAQE;YAAO;YAG7D,IAAIC,SAASH;YACb,IAAII,SAASF;YACb,IAAIlC,QAAQ,YAAY,EAAE,MAAM;gBAC9BmC,UAAUnC,QAAQ,YAAY,CAAC,IAAI,CAAC,IAAI;gBACxCoC,UAAUpC,QAAQ,YAAY,CAAC,IAAI,CAAC,GAAG;YACzC;YAEA,MAAMqC,UAA+BC,AAAAA,IAAAA,yBAAAA,sBAAAA,AAAAA,EACnC;gBAACH;gBAAQC;aAAO,EAChB7B;YAGFoB,UAAUU,QAAQ,IAAI;YACtBjD,aAAa,qBAAqBuC;YAElC,IAAIU,SACFT,kBAAkB;gBAACS;aAAQ;QAE/B;QAEA,OAAO;YACL,MAAMV;YACN,aAAa;gBACX,UAAUC;gBACVC;YACF;YACA,aAAaP;YACbC;YACA,mBAAmBE,OAAO,KAAK;QACjC;IACF;IAEA,IAAIc;IAKJ,IAAI;QACFA,MAAM,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACV7C,MACAQ;IAEJ,EAAE,OAAOsC,WAAW;QAElB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,yBAAAA,oBAAoBA,GACrCL,UAAU,WAAW,GACrBC;QACN,MAAMnB,QACJkB,qBAAqBK,yBAAAA,oBAAoBA,GAAGL,UAAU,KAAK,GAAGM;QAChE,OAAO;YACL,MAAMA;YACN,aAAa;gBACX,UAAU,EAAE;gBACZ,QAAQ;oBAAC,CAAC,eAAe,EAAEL,cAAc;iBAAC;YAC5C;YACAG;YACAtB;YACA,mBAAmBwB;QACrB;IACF;IAEA,MAAMF,cAAcG,KAAK,SAAS,CAACT,IAAI,OAAO;IAE9C,IAAIZ;IACJ,IAAIC,kBAAyC,EAAE;IAC/C,IAAIC,SACF,YAAYU,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IACE,UAAUA,IAAI,OAAO,IACrBU,MAAM,OAAO,CAACV,IAAI,OAAO,CAAC,IAAI,KAC9BA,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,GAC3B;YACAZ,UAAUuB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACRX,IAAI,OAAO,CAAC,IAAI,EAChBxB,YACAC,aACAhB,QAAQ,YAAY,EAAE,MAAM,MAC5BA,QAAQ,YAAY,EAAE,MAAM,KAC5BiB,oBACAC,qBACAd,aACAJ,QAAQ,YAAY,EAAE;YAGxBZ,aAAa,WAAWuC;YAExB,MAAMU,UAA+Bc,AAAAA,IAAAA,yBAAAA,qBAAAA,AAAAA,EACnCxB,SACApB;YAEFsB,SAAS,EAAE;YAEX,IAAIQ,SACFT,kBAAkB;gBAACS;aAAQ;QAE/B;IACF,EAAE,OAAOe,GAAG;QACV,MAAMC,MACJD,aAAaT,QACT,CAAC,sBAAsB,EAAES,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACvB,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEwB,IAAI,CAAC,CAAC;aAFtBxB,SAAS;YAACwB;SAAI;IAIlB;IAEA,OAAO;QACL,MAAM1B;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAgB;QACA,OAAON,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAee,gBAAgBtD,OAIrC;IAQC,MAAM,EAAEC,OAAO,EAAEsD,kBAAkB,EAAEpD,WAAW,EAAE,GAAGH;IACrD,MAAM,EAAEI,WAAW,EAAE,GAAGD;IACxB,MAAME,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAMS,eAAe8C,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EAA4BpD;IACjD,MAAMqD,gCAAgCC,AAAAA,IAAAA,uCAAAA,yBAAAA,AAAAA,EACpCnE,wBAAwBgE;IAE1B,MAAM5D,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMoD;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMlC,SAAS,MAAM5B,mBAAmB;YACtC,QAAQ8D,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACA5D,KAAK,IAAI,IAAI0B;IACf;IAEA,IAAIsC;IAGJ,IAAI;QACFA,SAAS,MAAMnB,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACb7C,MACAQ;IAEJ,EAAE,OAAOsC,WAAW;QAElB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,yBAAAA,oBAAoBA,GACrCL,UAAU,WAAW,GACrBC;QACN,MAAMnB,QACJkB,qBAAqBK,yBAAAA,oBAAoBA,GAAGL,UAAU,KAAK,GAAGM;QAChE,OAAO;YACL,MAAMA;YACN,aAAaA;YACb,OAAO,CAAC,eAAe,EAAEL,cAAc;YACvCG;YACAtB;QACF;IACF;IAEA,IAAIqC;IACJ,MAAMC,cAAcF,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIE,aAAa;QACf,MAAMC,aAAaZ,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACjBW,aACA5D,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvB,GACA,GACAA,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvBG;QAEFd,aAAa,0BAA0BwE;QAEvC,MAAMC,oBAAoBJ,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DrE,aAAa,wBAAwByE;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAShB,MAAM,OAAO,CAACgB,OAC/B,GAAG,CAAC,CAACA,OACGf,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACLe,MACAhE,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvB,GACA,GACAA,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvBG;QAGNd,aAAa,qBAAqB0E;QAGlC,MAAME,aAAaC,AAAAA,IAAAA,mCAAAA,UAAAA,AAAAA,EAAW;YAACL;eAAeE;SAAe;QAC7D1E,aAAa,iBAAiB4E;QAE9BN,cAAcQ,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBF,YAAYjE,QAAQ,QAAQ;QAC3DX,aAAa,2BAA2BsE;IAC1C;IAEA,IAAIS,cAAchE;IAClB,IAAIiE;IAEJ,IAAIV,aAAa;QACf,MAAMW,gBAAgBX,YAAY,KAAK;QACvC,MAAMY,iBAAiBZ,YAAY,MAAM;QAEzC,MAAMa,gBAAgB,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAC1BrE,kBACAuD,aACAxD,AAAgB,iBAAhBA;QAGF,MAAMuE,aAAa;QACnB,MAAMC,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EACzBJ,cAAc,WAAW,EACzBE;QAGFN,cAAcO,aAAa,WAAW;QACtCN,QAAQK;QACRf,YAAY,KAAK,GAAGgB,aAAa,KAAK;QACtChB,YAAY,MAAM,GAAGgB,aAAa,MAAM;QAExCtF,aACE,qDACAiF,eACAC,gBACAZ,YAAY,KAAK,EACjBA,YAAY,MAAM,EAClBU;IAEJ;IAEA,OAAO;QACL,MAAMV;QACNS;QACAC;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAaX,KAAK,SAAS,CAACW,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAemB,qBAAwB9E,OAO7C;IACC,MAAM,EAAE+E,SAAS,EAAE9E,OAAO,EAAE+E,aAAa,EAAEtF,gBAAgB,EAAES,WAAW,EAAE,GACxEH;IACF,MAAMU,eAAeuE,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA;IACrB,MAAM5E,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAMiF,wBAAwBC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAC5BnF,QAAQ,eAAe,IAAI,IAC3B+E;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAK/E;YACL,QAAQ;QACV;IACF;IAGF+E,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMvF,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS0E;QACX;KACD;IAED,IAAI1F,kBAAkB;QACpB,MAAM2B,SAAS,MAAM5B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAI0B;IACf;IAEA,MAAM,EACJ,SAASwB,WAAW,EACpBtB,KAAK,EACL8D,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAO3F,MAAMQ;IAGvB,IAAIoF;IACJ,IAAI;QACFA,cAAcC,AAAAA,IAAAA,8BAAAA,0BAAAA,AAAAA,EAA8B3C;IAC9C,EAAE,OAAO4C,YAAY;QAEnB,MAAM/C,eACJ+C,sBAAsB9C,QAAQ8C,WAAW,OAAO,GAAG7C,OAAO6C;QAC5D,MAAM,IAAI3C,yBAAAA,oBAAoBA,CAC5B,CAAC,iBAAiB,EAAEJ,cAAc,EAClCG,aACAtB;IAEJ;IAEA,OAAO;QACLgE;QACA1C;QACAtB;QACA8D;IACF;AACF;AAEO,eAAeK,sBACpBC,WAAmB,EACnBC,QAAwE,EACxEzF,WAAyB;IAKzB,MAAMO,eAAemF,AAAAA,IAAAA,yCAAAA,iCAAAA,AAAAA;IACrB,MAAMC,aAAaC,AAAAA,IAAAA,yCAAAA,yBAAAA,AAAAA,EAA0BJ;IAE7C,MAAMhG,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAASoF;QACX;KACD;IAED,MAAMnC,SAAS,MAAMiC,SAASjG,MAAMQ;IAEpC,OAAO;QACL,kBAAkBwD,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
@@ -91,7 +91,7 @@ function parseXMLPlanningResponse(xmlString, modelFamily) {
91
91
  }
92
92
  async function plan(userInstruction, opts) {
93
93
  const { context, modelConfig, conversationHistory } = opts;
94
- const { size } = context;
94
+ const { shotSize } = context;
95
95
  const screenshotBase64 = context.screenshot.base64;
96
96
  const { modelFamily } = modelConfig;
97
97
  const systemPrompt = await (0, llm_planning_js_namespaceObject.systemPromptToTaskPlanning)({
@@ -102,8 +102,8 @@ async function plan(userInstruction, opts) {
102
102
  deepThink: true === opts.deepThink
103
103
  });
104
104
  let imagePayload = screenshotBase64;
105
- let imageWidth = size.width;
106
- let imageHeight = size.height;
105
+ let imageWidth = shotSize.width;
106
+ let imageHeight = shotSize.height;
107
107
  if ('qwen2.5-vl' === modelFamily) {
108
108
  const paddedResult = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload);
109
109
  imageWidth = paddedResult.width;
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeepThinkOption,\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig, TModelFamily } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { extractXMLTag } from './prompt/util';\nimport {\n AIResponseParseError,\n callAI,\n safeParseJson,\n} from './service-caller/index';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n modelFamily: TModelFamily | undefined,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n const type = actionType.trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = safeParseJson(actionParamStr, modelFamily);\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n deepThink?: DeepThinkOption;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { size } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n const { modelFamily } = modelConfig;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n modelFamily,\n includeBbox: opts.includeBbox,\n includeThought: true, // always include thought\n deepThink: opts.deepThink === true,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = size.width;\n let imageHeight = size.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build historical execution logs text to include in the message\n const historicalLogsText = conversationHistory.historicalLogsToText();\n const historicalLogsSection = historicalLogsText\n ? `\\n\\n${historicalLogsText}`\n : '';\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${historicalLogsSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the latest screenshot${memoriesSection}${historicalLogsSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig, {\n deepThink: opts.deepThink === 'unset' ? undefined : opts.deepThink,\n });\n\n // Parse XML response to JSON object, capture parsing errors\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && modelFamily !== undefined) {\n // Always use model family to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n modelFamily,\n );\n }\n });\n });\n\n // Accumulate logs as historical execution steps\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","warnLog","parseXMLPlanningResponse","xmlString","modelFamily","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","action","type","param","safeParseJson","e","Error","plan","userInstruction","opts","context","modelConfig","conversationHistory","size","screenshotBase64","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","historicalLogsText","historicalLogsSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;ACqBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,UAAUD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,YAAY;IAAE,SAAS;AAAK;AAK9C,SAASE,yBACdC,SAAiB,EACjBC,WAAqC;IAErC,MAAMC,UAAUC,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACzC,MAAMI,SAASD,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACxC,MAAMK,MAAMF,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACvC,MAAMO,aAAaJ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,IAAIC,SAAc;IAClB,IAAIP,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QACrD,MAAMQ,OAAOR,WAAW,IAAI;QAC5B,IAAIS;QAEJ,IAAIR,gBACF,IAAI;YAEFQ,QAAQC,AAAAA,IAAAA,yBAAAA,aAAAA,AAAAA,EAAcT,gBAAgBP;QACxC,EAAE,OAAOiB,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFJ,SAAS;YACPC;YACA,GAAIC,AAAUH,WAAVG,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAId,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1BQ;QACA,GAAIH,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;IAC9D;AACF;AAEO,eAAeQ,KACpBC,eAAuB,EACvBC,IAUC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,IAAI,EAAE,GAAGH;IACjB,MAAMI,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM,EAAEtB,WAAW,EAAE,GAAGuB;IAExB,MAAMI,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaP,KAAK,WAAW;QAC7BrB;QACA,aAAaqB,KAAK,WAAW;QAC7B,gBAAgB;QAChB,WAAWA,AAAmB,SAAnBA,KAAK,SAAS;IAC3B;IAEA,IAAIQ,eAAeH;IACnB,IAAII,aAAaL,KAAK,KAAK;IAC3B,IAAIM,cAAcN,KAAK,MAAM;IAK7B,IAAIzB,AAAgB,iBAAhBA,aAA8B;QAChC,MAAMgC,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BJ;QACvDC,aAAaE,aAAa,KAAK;QAC/BD,cAAcC,aAAa,MAAM;QACjCH,eAAeG,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBb,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMc,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEd,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAIgB;IAGJ,MAAMC,qBAAqBb,oBAAoB,oBAAoB;IACnE,MAAMc,wBAAwBD,qBAC1B,CAAC,IAAI,EAAEA,oBAAoB,GAC3B;IAGJ,MAAME,eAAef,oBAAoB,cAAc;IACvD,MAAMgB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIf,oBAAoB,sBAAsB,EAAE;QAC9CY,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGZ,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEgB,kBAAkBF,uBAAuB;gBACtN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKT;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAL,oBAAoB,mCAAmC;IACzD,OACEY,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,6BAA6B,EAAEI,kBAAkBF,uBAAuB;YACjF;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFL,oBAAoB,MAAM,CAACY;IAG3BZ,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMiB,aAAajB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMqB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASf;QAAa;WACrCQ;WACAM;KACJ;IAED,MAAM,EACJ,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOJ,MAAMnB,aAAa;QAClC,WAAWF,AAAmB,YAAnBA,KAAK,SAAS,GAAeT,SAAYS,KAAK,SAAS;IACpE;IAGA,IAAI0B;IACJ,IAAI;QACFA,aAAajD,yBAAyB6C,aAAa3C;QAEnD,IAAI+C,WAAW,MAAM,IAAIA,AAA+BnC,WAA/BmC,WAAW,eAAe,EAAgB;YACjElD,QACE;YAEFkD,WAAW,eAAe,GAAGnC;YAC7BmC,WAAW,eAAe,GAAGnC;QAC/B;QAEA,MAAMoC,UAAUD,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIE,yBAAyB;QAG7B,IAAIF,AAA+BnC,WAA/BmC,WAAW,eAAe,EAAgB;YAC5CpD,MAAM;YACNsD,yBAAyB;QAC3B;QAEA,MAAMC,cAAkC;YACtC,GAAGH,UAAU;YACbC;YACAL;YACAC;YACAC;YACA,UAAUM,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBH,SAAS3B,KAAK,WAAW;YAC1D4B;QACF;QAEAG,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOL,YAAY;QAEnBC,QAAQ,OAAO,CAAC,CAACnC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAMwC,sBAAsBhC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACR,SAAWA,OAAO,IAAI,KAAKC;YAG9BnB,MAAM,+BAA+B0D;YACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAEN1D,MAAM,gBAAgB2D;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAe5C,OAAO,KAAK,CAAC2C,MAAM;gBACxC,IAAIC,gBAAgBzD,AAAgBY,WAAhBZ,aAElBa,OAAO,KAAK,CAAC2C,MAAM,GAAGE,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EACpBD,cACA3B,YACAC,aACA/B;YAGN;QACF;QAGA,IAAI+C,WAAW,GAAG,EAChBvB,oBAAoB,mBAAmB,CAACuB,WAAW,GAAG;QAIxD,IAAIA,WAAW,MAAM,EACnBvB,oBAAoB,YAAY,CAACuB,WAAW,MAAM;QAGpDvB,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMmB;gBACR;aACD;QACH;QAEA,OAAOO;IACT,EAAE,OAAOS,YAAY;QAEnB,MAAMC,eACJD,sBAAsBzC,QAAQyC,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,yBAAAA,oBAAoBA,CAC5B,CAAC,iBAAiB,EAAEF,cAAc,EAClCjB,aACAC;IAEJ;AACF"}
1
+ {"version":3,"file":"ai-model/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeepThinkOption,\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig, TModelFamily } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { extractXMLTag } from './prompt/util';\nimport {\n AIResponseParseError,\n callAI,\n safeParseJson,\n} from './service-caller/index';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n modelFamily: TModelFamily | undefined,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n const type = actionType.trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = safeParseJson(actionParamStr, modelFamily);\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n deepThink?: DeepThinkOption;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n const { modelFamily } = modelConfig;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n modelFamily,\n includeBbox: opts.includeBbox,\n includeThought: true, // always include thought\n deepThink: opts.deepThink === true,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = shotSize.width;\n let imageHeight = shotSize.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build historical execution logs text to include in the message\n const historicalLogsText = conversationHistory.historicalLogsToText();\n const historicalLogsSection = historicalLogsText\n ? `\\n\\n${historicalLogsText}`\n : '';\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${historicalLogsSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the latest screenshot${memoriesSection}${historicalLogsSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig, {\n deepThink: opts.deepThink === 'unset' ? undefined : opts.deepThink,\n });\n\n // Parse XML response to JSON object, capture parsing errors\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && modelFamily !== undefined) {\n // Always use model family to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n modelFamily,\n );\n }\n });\n });\n\n // Accumulate logs as historical execution steps\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","warnLog","parseXMLPlanningResponse","xmlString","modelFamily","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","action","type","param","safeParseJson","e","Error","plan","userInstruction","opts","context","modelConfig","conversationHistory","shotSize","screenshotBase64","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","historicalLogsText","historicalLogsSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;ACqBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,UAAUD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,YAAY;IAAE,SAAS;AAAK;AAK9C,SAASE,yBACdC,SAAiB,EACjBC,WAAqC;IAErC,MAAMC,UAAUC,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACzC,MAAMI,SAASD,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACxC,MAAMK,MAAMF,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACvC,MAAMO,aAAaJ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,IAAIC,SAAc;IAClB,IAAIP,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QACrD,MAAMQ,OAAOR,WAAW,IAAI;QAC5B,IAAIS;QAEJ,IAAIR,gBACF,IAAI;YAEFQ,QAAQC,AAAAA,IAAAA,yBAAAA,aAAAA,AAAAA,EAAcT,gBAAgBP;QACxC,EAAE,OAAOiB,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFJ,SAAS;YACPC;YACA,GAAIC,AAAUH,WAAVG,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAId,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1BQ;QACA,GAAIH,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;IAC9D;AACF;AAEO,eAAeQ,KACpBC,eAAuB,EACvBC,IAUC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,QAAQ,EAAE,GAAGH;IACrB,MAAMI,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM,EAAEtB,WAAW,EAAE,GAAGuB;IAExB,MAAMI,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaP,KAAK,WAAW;QAC7BrB;QACA,aAAaqB,KAAK,WAAW;QAC7B,gBAAgB;QAChB,WAAWA,AAAmB,SAAnBA,KAAK,SAAS;IAC3B;IAEA,IAAIQ,eAAeH;IACnB,IAAII,aAAaL,SAAS,KAAK;IAC/B,IAAIM,cAAcN,SAAS,MAAM;IAKjC,IAAIzB,AAAgB,iBAAhBA,aAA8B;QAChC,MAAMgC,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BJ;QACvDC,aAAaE,aAAa,KAAK;QAC/BD,cAAcC,aAAa,MAAM;QACjCH,eAAeG,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBb,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMc,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEd,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAIgB;IAGJ,MAAMC,qBAAqBb,oBAAoB,oBAAoB;IACnE,MAAMc,wBAAwBD,qBAC1B,CAAC,IAAI,EAAEA,oBAAoB,GAC3B;IAGJ,MAAME,eAAef,oBAAoB,cAAc;IACvD,MAAMgB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIf,oBAAoB,sBAAsB,EAAE;QAC9CY,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGZ,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEgB,kBAAkBF,uBAAuB;gBACtN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKT;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAL,oBAAoB,mCAAmC;IACzD,OACEY,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,6BAA6B,EAAEI,kBAAkBF,uBAAuB;YACjF;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFL,oBAAoB,MAAM,CAACY;IAG3BZ,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMiB,aAAajB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMqB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASf;QAAa;WACrCQ;WACAM;KACJ;IAED,MAAM,EACJ,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOJ,MAAMnB,aAAa;QAClC,WAAWF,AAAmB,YAAnBA,KAAK,SAAS,GAAeT,SAAYS,KAAK,SAAS;IACpE;IAGA,IAAI0B;IACJ,IAAI;QACFA,aAAajD,yBAAyB6C,aAAa3C;QAEnD,IAAI+C,WAAW,MAAM,IAAIA,AAA+BnC,WAA/BmC,WAAW,eAAe,EAAgB;YACjElD,QACE;YAEFkD,WAAW,eAAe,GAAGnC;YAC7BmC,WAAW,eAAe,GAAGnC;QAC/B;QAEA,MAAMoC,UAAUD,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIE,yBAAyB;QAG7B,IAAIF,AAA+BnC,WAA/BmC,WAAW,eAAe,EAAgB;YAC5CpD,MAAM;YACNsD,yBAAyB;QAC3B;QAEA,MAAMC,cAAkC;YACtC,GAAGH,UAAU;YACbC;YACAL;YACAC;YACAC;YACA,UAAUM,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBH,SAAS3B,KAAK,WAAW;YAC1D4B;QACF;QAEAG,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOL,YAAY;QAEnBC,QAAQ,OAAO,CAAC,CAACnC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAMwC,sBAAsBhC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACR,SAAWA,OAAO,IAAI,KAAKC;YAG9BnB,MAAM,+BAA+B0D;YACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAEN1D,MAAM,gBAAgB2D;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAe5C,OAAO,KAAK,CAAC2C,MAAM;gBACxC,IAAIC,gBAAgBzD,AAAgBY,WAAhBZ,aAElBa,OAAO,KAAK,CAAC2C,MAAM,GAAGE,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EACpBD,cACA3B,YACAC,aACA/B;YAGN;QACF;QAGA,IAAI+C,WAAW,GAAG,EAChBvB,oBAAoB,mBAAmB,CAACuB,WAAW,GAAG;QAIxD,IAAIA,WAAW,MAAM,EACnBvB,oBAAoB,YAAY,CAACuB,WAAW,MAAM;QAGpDvB,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMmB;gBACR;aACD;QACH;QAEA,OAAOO;IACT,EAAE,OAAOS,YAAY;QAEnB,MAAMC,eACJD,sBAAsBzC,QAAQyC,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,yBAAAA,oBAAoBA,CAC5B,CAAC,iBAAiB,EAAEF,cAAc,EAClCjB,aACAC;IAEJ;AACF"}
@@ -36,9 +36,9 @@ __webpack_require__.d(__webpack_exports__, {
36
36
  callAIWithObjectResponse: ()=>callAIWithObjectResponse,
37
37
  extractJSONFromCodeBlock: ()=>extractJSONFromCodeBlock,
38
38
  preprocessDoubaoBboxJson: ()=>preprocessDoubaoBboxJson,
39
+ resolveReasoningConfig: ()=>resolveReasoningConfig,
39
40
  safeParseJson: ()=>safeParseJson,
40
41
  callAI: ()=>callAI,
41
- resolveDeepThinkConfig: ()=>resolveDeepThinkConfig,
42
42
  AIResponseParseError: ()=>AIResponseParseError,
43
43
  callAIWithStringResponse: ()=>callAIWithStringResponse
44
44
  });
@@ -219,11 +219,19 @@ async function callAI(messages, modelConfig, options) {
219
219
  commonConfig.top_p = 0.85;
220
220
  commonConfig.frequency_penalty = 0.2;
221
221
  }
222
- const { config: deepThinkConfig, debugMessage, warningMessage } = resolveDeepThinkConfig({
223
- deepThink: options?.deepThink,
222
+ const mergedEnableReasoning = (()=>{
223
+ const normalizedDeepThink = options?.deepThink === 'unset' ? void 0 : options?.deepThink;
224
+ if (true === normalizedDeepThink) return true;
225
+ if (false === normalizedDeepThink) return false;
226
+ return modelConfig.reasoningEnabled;
227
+ })();
228
+ const { config: reasoningEffortConfig, debugMessage: reasoningEffortDebugMessage, warningMessage } = resolveReasoningConfig({
229
+ reasoningEnabled: mergedEnableReasoning,
230
+ reasoningEffort: modelConfig.reasoningEffort,
231
+ reasoningBudget: modelConfig.reasoningBudget,
224
232
  modelFamily
225
233
  });
226
- if (debugMessage) debugCall(debugMessage);
234
+ if (reasoningEffortDebugMessage) debugCall(reasoningEffortDebugMessage);
227
235
  if (warningMessage) warnCall(warningMessage);
228
236
  try {
229
237
  debugCall(`sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`);
@@ -232,7 +240,7 @@ async function callAI(messages, modelConfig, options) {
232
240
  model: modelName,
233
241
  messages,
234
242
  ...commonConfig,
235
- ...deepThinkConfig
243
+ ...reasoningEffortConfig
236
244
  }, {
237
245
  stream: true
238
246
  });
@@ -286,7 +294,7 @@ async function callAI(messages, modelConfig, options) {
286
294
  model: modelName,
287
295
  messages,
288
296
  ...commonConfig,
289
- ...deepThinkConfig
297
+ ...reasoningEffortConfig
290
298
  });
291
299
  timeCost = Date.now() - startTime;
292
300
  debugProfileStats(`model, ${modelName}, mode, ${modelFamily || 'default'}, ui-tars-version, ${uiTarsModelVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}, temperature, ${temperature ?? ''}`);
@@ -296,7 +304,7 @@ async function callAI(messages, modelConfig, options) {
296
304
  accumulatedReasoning = result.choices[0].message?.reasoning_content || '';
297
305
  usage = result.usage;
298
306
  requestId = result._request_id;
299
- if (!content && accumulatedReasoning && 'doubao-vision' === modelFamily) {
307
+ if (!content && accumulatedReasoning && ('doubao-vision' === modelFamily || 'doubao-seed' === modelFamily)) {
300
308
  warnCall('empty content from AI model, using reasoning content');
301
309
  content = accumulatedReasoning;
302
310
  }
@@ -372,50 +380,68 @@ function preprocessDoubaoBboxJson(input) {
372
380
  if (input.includes('bbox')) while(/\d+\s+\d+/.test(input))input = input.replace(/(\d+)\s+(\d+)/g, '$1,$2');
373
381
  return input;
374
382
  }
375
- function resolveDeepThinkConfig({ deepThink, modelFamily }) {
376
- const normalizedDeepThink = 'unset' === deepThink ? void 0 : deepThink;
377
- if (void 0 === normalizedDeepThink) return {
378
- config: {},
379
- debugMessage: void 0
380
- };
381
- if ('qwen3-vl' === modelFamily) return {
382
- config: {
383
- enable_thinking: normalizedDeepThink
384
- },
385
- debugMessage: `deepThink mapped to enable_thinking=${normalizedDeepThink} for qwen3-vl`
386
- };
387
- if ('doubao-vision' === modelFamily) return {
388
- config: {
389
- thinking: {
390
- type: normalizedDeepThink ? 'enabled' : 'disabled'
391
- }
392
- },
393
- debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for doubao-vision`
394
- };
395
- if ('glm-v' === modelFamily) return {
396
- config: {
397
- thinking: {
398
- type: normalizedDeepThink ? 'enabled' : 'disabled'
399
- }
400
- },
401
- debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for glm-v`
383
+ function resolveReasoningConfig({ reasoningEnabled, reasoningEffort, reasoningBudget, modelFamily }) {
384
+ if (void 0 === reasoningEnabled && !reasoningEffort && void 0 === reasoningBudget) return {
385
+ config: {}
402
386
  };
403
- if ('gpt-5' === modelFamily) return {
404
- config: normalizedDeepThink ? {
405
- reasoning: {
387
+ const debugMessages = [];
388
+ const config = {};
389
+ if ('qwen3-vl' === modelFamily || 'qwen3.5' === modelFamily) {
390
+ if (void 0 !== reasoningEnabled) {
391
+ config.enable_thinking = reasoningEnabled;
392
+ debugMessages.push(`enable_thinking=${reasoningEnabled}`);
393
+ }
394
+ if (void 0 !== reasoningBudget) {
395
+ config.thinking_budget = reasoningBudget;
396
+ debugMessages.push(`thinking_budget=${reasoningBudget}`);
397
+ }
398
+ } else if ('doubao-vision' === modelFamily || 'doubao-seed' === modelFamily) {
399
+ if (void 0 !== reasoningEnabled) {
400
+ config.thinking = {
401
+ type: reasoningEnabled ? 'enabled' : 'disabled'
402
+ };
403
+ debugMessages.push(`thinking.type=${reasoningEnabled ? 'enabled' : 'disabled'}`);
404
+ }
405
+ if (reasoningEffort) {
406
+ config.reasoning_effort = reasoningEffort;
407
+ debugMessages.push(`reasoning_effort="${reasoningEffort}"`);
408
+ }
409
+ } else if ('glm-v' === modelFamily) {
410
+ if (void 0 !== reasoningEnabled) {
411
+ config.thinking = {
412
+ type: reasoningEnabled ? 'enabled' : 'disabled'
413
+ };
414
+ debugMessages.push(`thinking.type=${reasoningEnabled ? 'enabled' : 'disabled'}`);
415
+ }
416
+ } else if ('gpt-5' === modelFamily) {
417
+ if (reasoningEffort) {
418
+ config.reasoning = {
419
+ effort: reasoningEffort
420
+ };
421
+ debugMessages.push(`reasoning.effort="${reasoningEffort}"`);
422
+ } else if (true === reasoningEnabled) {
423
+ config.reasoning = {
406
424
  effort: 'high'
407
- }
408
- } : {
409
- reasoning: {
425
+ };
426
+ debugMessages.push('reasoning.effort="high" (from reasoningEnabled)');
427
+ } else if (false === reasoningEnabled) {
428
+ config.reasoning = {
410
429
  effort: 'low'
411
- }
412
- },
413
- debugMessage: normalizedDeepThink ? 'deepThink mapped to reasoning.effort=high for gpt-5' : 'deepThink disabled for gpt-5'
430
+ };
431
+ debugMessages.push('reasoning.effort="low" (from reasoningEnabled)');
432
+ }
433
+ } else if (!modelFamily) return {
434
+ config: {},
435
+ debugMessage: 'reasoning config ignored: no model_family configured',
436
+ warningMessage: 'Reasoning config is set but no model_family is configured. Set MIDSCENE_MODEL_FAMILY to enable reasoning config pass-through.'
414
437
  };
438
+ else if (reasoningEffort) {
439
+ config.reasoning_effort = reasoningEffort;
440
+ debugMessages.push(`reasoning_effort="${reasoningEffort}"`);
441
+ }
415
442
  return {
416
- config: {},
417
- debugMessage: `deepThink ignored: unsupported model_family "${modelFamily ?? 'default'}"`,
418
- warningMessage: `The "deepThink" option is not supported for model_family "${modelFamily ?? 'default'}".`
443
+ config,
444
+ debugMessage: debugMessages.length ? `reasoning config for ${modelFamily}: ${debugMessages.join(', ')}` : void 0
419
445
  };
420
446
  }
421
447
  function normalizeJsonObject(obj) {
@@ -451,7 +477,7 @@ function safeParseJson(input, modelFamily) {
451
477
  } catch (error) {
452
478
  lastError = error;
453
479
  }
454
- if ('doubao-vision' === modelFamily || (0, util_js_namespaceObject.isUITars)(modelFamily)) {
480
+ if ('doubao-vision' === modelFamily || 'doubao-seed' === modelFamily || (0, util_js_namespaceObject.isUITars)(modelFamily)) {
455
481
  const jsonString = preprocessDoubaoBboxJson(cleanJsonString);
456
482
  try {
457
483
  parsed = JSON.parse((0, external_jsonrepair_namespaceObject.jsonrepair)(jsonString));
@@ -468,7 +494,7 @@ exports.callAIWithObjectResponse = __webpack_exports__.callAIWithObjectResponse;
468
494
  exports.callAIWithStringResponse = __webpack_exports__.callAIWithStringResponse;
469
495
  exports.extractJSONFromCodeBlock = __webpack_exports__.extractJSONFromCodeBlock;
470
496
  exports.preprocessDoubaoBboxJson = __webpack_exports__.preprocessDoubaoBboxJson;
471
- exports.resolveDeepThinkConfig = __webpack_exports__.resolveDeepThinkConfig;
497
+ exports.resolveReasoningConfig = __webpack_exports__.resolveReasoningConfig;
472
498
  exports.safeParseJson = __webpack_exports__.safeParseJson;
473
499
  for(var __rspack_i in __webpack_exports__)if (-1 === [
474
500
  "AIResponseParseError",
@@ -477,7 +503,7 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
477
503
  "callAIWithStringResponse",
478
504
  "extractJSONFromCodeBlock",
479
505
  "preprocessDoubaoBboxJson",
480
- "resolveDeepThinkConfig",
506
+ "resolveReasoningConfig",
481
507
  "safeParseJson"
482
508
  ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
483
509
  Object.defineProperty(exports, '__esModule', {