@midscene/core 1.9.1 → 1.9.2-beta-20260605084246.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dist/es/agent/agent.mjs +2 -1
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/task-cache.mjs +43 -8
  4. package/dist/es/agent/task-cache.mjs.map +1 -1
  5. package/dist/es/agent/tasks.mjs +18 -3
  6. package/dist/es/agent/tasks.mjs.map +1 -1
  7. package/dist/es/agent/ui-utils.mjs +2 -1
  8. package/dist/es/agent/ui-utils.mjs.map +1 -1
  9. package/dist/es/agent/utils.mjs +1 -1
  10. package/dist/es/ai-model/inspect.mjs +8 -60
  11. package/dist/es/ai-model/inspect.mjs.map +1 -1
  12. package/dist/es/ai-model/llm-planning.mjs +6 -3
  13. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  14. package/dist/es/ai-model/models/auto-glm/planning.mjs +8 -3
  15. package/dist/es/ai-model/models/auto-glm/planning.mjs.map +1 -1
  16. package/dist/es/ai-model/models/ui-tars/planning.mjs +6 -2
  17. package/dist/es/ai-model/models/ui-tars/planning.mjs.map +1 -1
  18. package/dist/es/common.mjs +50 -3
  19. package/dist/es/common.mjs.map +1 -1
  20. package/dist/es/types.mjs.map +1 -1
  21. package/dist/es/utils.mjs +2 -2
  22. package/dist/es/yaml/player.mjs +9 -4
  23. package/dist/es/yaml/player.mjs.map +1 -1
  24. package/dist/lib/agent/agent.js +2 -1
  25. package/dist/lib/agent/agent.js.map +1 -1
  26. package/dist/lib/agent/task-cache.js +43 -8
  27. package/dist/lib/agent/task-cache.js.map +1 -1
  28. package/dist/lib/agent/tasks.js +17 -2
  29. package/dist/lib/agent/tasks.js.map +1 -1
  30. package/dist/lib/agent/ui-utils.js +3 -2
  31. package/dist/lib/agent/ui-utils.js.map +1 -1
  32. package/dist/lib/agent/utils.js +1 -1
  33. package/dist/lib/ai-model/inspect.js +7 -59
  34. package/dist/lib/ai-model/inspect.js.map +1 -1
  35. package/dist/lib/ai-model/llm-planning.js +6 -3
  36. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  37. package/dist/lib/ai-model/models/auto-glm/planning.js +8 -3
  38. package/dist/lib/ai-model/models/auto-glm/planning.js.map +1 -1
  39. package/dist/lib/ai-model/models/ui-tars/planning.js +6 -2
  40. package/dist/lib/ai-model/models/ui-tars/planning.js.map +1 -1
  41. package/dist/lib/common.js +59 -3
  42. package/dist/lib/common.js.map +1 -1
  43. package/dist/lib/types.js.map +1 -1
  44. package/dist/lib/utils.js +2 -2
  45. package/dist/lib/yaml/player.js +9 -4
  46. package/dist/lib/yaml/player.js.map +1 -1
  47. package/dist/types/agent/agent.d.ts +2 -2
  48. package/dist/types/agent/task-cache.d.ts +18 -2
  49. package/dist/types/agent/tasks.d.ts +15 -2
  50. package/dist/types/ai-model/inspect.d.ts +1 -2
  51. package/dist/types/ai-model/llm-planning.d.ts +2 -1
  52. package/dist/types/ai-model/models/auto-glm/planning.d.ts +2 -1
  53. package/dist/types/ai-model/models/ui-tars/planning.d.ts +2 -1
  54. package/dist/types/ai-model/workflows/planning/types.d.ts +4 -1
  55. package/dist/types/common.d.ts +4 -0
  56. package/dist/types/types.d.ts +1 -1
  57. package/dist/types/yaml.d.ts +4 -3
  58. package/package.json +2 -2
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/inspect.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIDataExtractionResponse,\n AIElementLocateResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport {\n cropByRect,\n preProcessImageUrl,\n scaleImage,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport { expandSearchArea } from '../common';\nimport type { ModelRuntime } from './models';\nimport {\n extractDataQueryPrompt,\n parseXMLExtractionResponse,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport {\n AIResponseParseError,\n callAI,\n callAIWithObjectResponse,\n} from './service-caller/index';\nimport { prepareModelImage } from './workflows/image-preprocess';\nimport {\n mergePixelBboxesToRect,\n pixelBboxToRect,\n} from './workflows/inspect/locate-result-rect';\nimport { mapSearchAreaPixelBboxToOriginalPixelBbox } from './workflows/inspect/search-area-mapping';\nimport type {\n LocateOptions,\n LocateResult,\n SearchAreaConfig,\n} from './workflows/inspect/types';\n\nexport type InspectAIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nfunction hasLocateResult(input: unknown, resultKey: string) {\n if (!input || typeof input !== 'object') {\n return false;\n }\n\n const record = input as Record<string, unknown>;\n const locateResult = record[resultKey];\n return Array.isArray(locateResult)\n ? locateResult.length > 0\n : locateResult !== undefined;\n}\n\nexport async function buildSearchAreaConfig(options: {\n context: UIContext;\n baseRect: Rect;\n}): Promise<SearchAreaConfig> {\n const { context, baseRect } = options;\n const scaleRatio = 2;\n const sectionRect = expandSearchArea(baseRect, context.shotSize);\n\n const croppedResult = await cropByRect(\n context.screenshot.base64,\n sectionRect,\n );\n\n const scaledResult = await scaleImage(croppedResult.imageBase64, scaleRatio);\n return {\n sourceRect: sectionRect,\n image: {\n imageBase64: scaledResult.imageBase64,\n width: scaledResult.width,\n height: scaledResult.height,\n },\n mapping: {\n offset: {\n x: sectionRect.left,\n y: sectionRect.top,\n },\n scale: scaleRatio,\n },\n };\n}\n\nexport const extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n }\n return prompt.prompt;\n};\n\nexport const promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images. These reference images are supporting context only, not the current screenshot being evaluated, unless the task explicitly asks for comparison or matching.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}'. It is a reference image, not the current screenshot:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(\n options: LocateOptions & { targetElementDescription: TUserPrompt },\n): Promise<LocateResult> {\n const { targetElementDescription, ...locateOptions } = options;\n const locateAdapter = options.modelRuntime.adapter.locate;\n if (locateAdapter.kind === 'custom') {\n return locateAdapter.locateFn(targetElementDescription, locateOptions);\n }\n return genericLocate(targetElementDescription, locateOptions);\n}\n\nexport async function genericLocate(\n elementDescription: TUserPrompt,\n options: LocateOptions,\n): Promise<LocateResult> {\n const { context } = options;\n const modelRuntime = options.modelRuntime;\n const { adapter } = modelRuntime;\n assert(\n adapter.locate.kind === 'standard',\n 'generic locate requires a standard locate adapter',\n );\n const screenshotBase64 = context.screenshot.base64;\n\n assert(elementDescription, 'cannot find the target element description');\n const elementDescriptionText = extraTextFromUserPrompt(elementDescription);\n const userInstructionPrompt = findElementPrompt(elementDescriptionText);\n const systemPrompt = systemPromptToLocateElement(\n adapter.locate.resultAdapter.promptSpec,\n );\n\n const modelImage = options.searchConfig?.image ?? {\n imageBase64: screenshotBase64,\n width: context.shotSize.width,\n height: context.shotSize.height,\n };\n const preparedImage = await prepareModelImage({\n imageBase64: modelImage.imageBase64,\n width: modelImage.width,\n height: modelImage.height,\n policy: adapter.imagePreprocess,\n });\n\n const imagePayload = preparedImage.imageBase64;\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof elementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: elementDescription.images,\n convertHttpImage2Base64: elementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n let res: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AIElementLocateResponse>>\n >;\n try {\n res = await callAIWithObjectResponse<AIElementLocateResponse>(\n msgs,\n modelRuntime,\n {\n abortSignal: options.abortSignal,\n jsonParserSource: 'locate',\n },\n );\n } catch (callError) {\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n parseResult: {\n element: undefined,\n errors: [`AI call error: ${errorMessage}`],\n },\n rawResponse,\n usage,\n reasoning_content: undefined,\n };\n }\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElement: LocateResultElement | undefined;\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n const resultAdapter = adapter.locate.resultAdapter;\n if (!hasLocateResult(res.content, resultAdapter.promptSpec.resultKey)) {\n return {\n rect: undefined,\n parseResult: {\n element: undefined,\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n }\n\n try {\n const mapping = options.searchConfig?.mapping;\n const targetPixelBbox = resultAdapter.adaptElementLocateResultToPixelBbox(\n res.content,\n {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n },\n );\n resRect = pixelBboxToRect(\n mapSearchAreaPixelBboxToOriginalPixelBbox(targetPixelBbox, mapping),\n );\n\n debugInspect('resRect', resRect);\n\n const element: LocateResultElement = generateElementByRect(\n resRect,\n elementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElement = element;\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse locate result: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n element: matchedElement,\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelRuntime: ModelRuntime;\n abortSignal?: AbortSignal;\n}): Promise<{\n searchAreaConfig?: SearchAreaConfig;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription } = options;\n const modelRuntime = options.modelRuntime;\n const { adapter } = modelRuntime;\n assert(\n adapter.locate.kind === 'standard',\n 'section locate requires a standard locate adapter',\n );\n const screenshotBase64 = context.screenshot.base64;\n const preparedImage = await prepareModelImage({\n imageBase64: screenshotBase64,\n width: context.shotSize.width,\n height: context.shotSize.height,\n policy: adapter.imagePreprocess,\n });\n\n const systemPrompt = systemPromptToLocateSection(\n adapter.locate.resultAdapter.promptSpec,\n );\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: preparedImage.imageBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n let result: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AISectionLocatorResponse>>\n >;\n try {\n result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelRuntime,\n {\n abortSignal: options.abortSignal,\n jsonParserSource: 'section-locator',\n },\n );\n } catch (callError) {\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n searchAreaConfig: undefined,\n error: `AI call error: ${errorMessage}`,\n rawResponse,\n usage,\n };\n }\n\n let searchAreaConfig:\n | Awaited<ReturnType<typeof buildSearchAreaConfig>>\n | undefined;\n let sectionError = result.content.error;\n const resultAdapter = adapter.locate.resultAdapter;\n if (!hasLocateResult(result.content, resultAdapter.promptSpec.resultKey)) {\n return {\n searchAreaConfig: undefined,\n error: sectionError,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n }\n\n try {\n const adaptedResult =\n resultAdapter.adaptSectionLocateResultToPixelBboxGroup(result.content, {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n });\n const mergedRect = mergePixelBboxesToRect([\n adaptedResult.target,\n ...(adaptedResult.references ?? []),\n ]);\n debugSection('mergedRect %j', mergedRect);\n\n const expandedRect = expandSearchArea(mergedRect, context.shotSize);\n const originalWidth = expandedRect.width;\n const originalHeight = expandedRect.height;\n debugSection('expanded sectionRect %j', expandedRect);\n\n searchAreaConfig = await buildSearchAreaConfig({\n context,\n baseRect: mergedRect,\n });\n\n debugSection(\n 'scaled section image from %dx%d to %dx%d (scale=%d)',\n originalWidth,\n originalHeight,\n searchAreaConfig.image.width,\n searchAreaConfig.image.height,\n searchAreaConfig.mapping.scale,\n );\n } catch (error) {\n const parseErrorMessage =\n error instanceof Error\n ? `Failed to parse section locate result: ${error.message}`\n : 'unknown error in section locate';\n sectionError = sectionError\n ? `${sectionError} (${parseErrorMessage})`\n : parseErrorMessage;\n }\n\n return {\n searchAreaConfig,\n error: sectionError,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelRuntime: ModelRuntime;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelRuntime } =\n options;\n const systemPrompt = systemPromptToExtract({\n screenshotIncluded: extractOption?.screenshotIncluded !== false,\n referenceImagesIncluded: !!multimodalPrompt?.images?.length,\n });\n const screenshotBase64 = context.screenshot.base64;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'text',\n text: 'This is the current screenshot to evaluate. Unless <DATA_DEMAND> explicitly asks for comparison or matching against reference images, base your answer on this screenshot and its contents when provided.',\n });\n\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelRuntime);\n\n let parseResult: AIDataExtractionResponse<T>;\n try {\n parseResult = parseXMLExtractionResponse<T>(rawResponse);\n } catch (parseError) {\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n\n return {\n parseResult,\n rawResponse,\n usage,\n reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n modelRuntime: ModelRuntime,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n debugInspect('AiJudgeOrderSensitive: description=%s', description);\n\n const result = await callAIWithObjectResponse<{ isOrderSensitive: boolean }>(\n msgs,\n modelRuntime,\n {\n jsonParserSource: 'generic-object',\n },\n );\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","hasLocateResult","input","resultKey","record","locateResult","Array","undefined","buildSearchAreaConfig","options","context","baseRect","scaleRatio","sectionRect","expandSearchArea","croppedResult","cropByRect","scaledResult","scaleImage","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","targetElementDescription","locateOptions","locateAdapter","genericLocate","elementDescription","modelRuntime","adapter","assert","screenshotBase64","elementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","modelImage","preparedImage","prepareModelImage","imagePayload","addOns","res","callAIWithObjectResponse","callError","errorMessage","Error","String","rawResponse","AIResponseParseError","usage","JSON","resRect","matchedElement","errors","resultAdapter","mapping","targetPixelBbox","pixelBboxToRect","mapSearchAreaPixelBboxToOriginalPixelBbox","element","generateElementByRect","e","msg","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","searchAreaConfig","sectionError","adaptedResult","mergedRect","mergePixelBboxesToRect","expandedRect","originalWidth","originalHeight","error","parseErrorMessage","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","reasoning_content","callAI","parseResult","parseXMLExtractionResponse","parseError","AiJudgeOrderSensitive","description","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;;;;;AC0DA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAE9B,SAASE,gBAAgBC,KAAc,EAAEC,SAAiB;IACxD,IAAI,CAACD,SAAS,AAAiB,YAAjB,OAAOA,OACnB,OAAO;IAGT,MAAME,SAASF;IACf,MAAMG,eAAeD,MAAM,CAACD,UAAU;IACtC,OAAOG,MAAM,OAAO,CAACD,gBACjBA,aAAa,MAAM,GAAG,IACtBA,AAAiBE,WAAjBF;AACN;AAEO,eAAeG,sBAAsBC,OAG3C;IACC,MAAM,EAAEC,OAAO,EAAEC,QAAQ,EAAE,GAAGF;IAC9B,MAAMG,aAAa;IACnB,MAAMC,cAAcC,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBH,UAAUD,QAAQ,QAAQ;IAE/D,MAAMK,gBAAgB,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAC1BN,QAAQ,UAAU,CAAC,MAAM,EACzBG;IAGF,MAAMI,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAAWH,cAAc,WAAW,EAAEH;IACjE,OAAO;QACL,YAAYC;QACZ,OAAO;YACL,aAAaI,aAAa,WAAW;YACrC,OAAOA,aAAa,KAAK;YACzB,QAAQA,aAAa,MAAM;QAC7B;QACA,SAAS;YACP,QAAQ;gBACN,GAAGJ,YAAY,IAAI;gBACnB,GAAGA,YAAY,GAAG;YACpB;YACA,OAAOD;QACT;IACF;AACF;AAEO,MAAMO,0BAA0B,CAACC;IACtC,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAET,OAAOA,OAAO,MAAM;AACtB;AAEO,MAAMC,qBAAqB,OAChCC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,AAAAA,IAAAA,oBAAAA,kBAAAA,AAAAA,EACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,uDAAuD,CAAC;oBAChH;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBACpBlB,OAAkE;IAElE,MAAM,EAAEmB,wBAAwB,EAAE,GAAGC,eAAe,GAAGpB;IACvD,MAAMqB,gBAAgBrB,QAAQ,YAAY,CAAC,OAAO,CAAC,MAAM;IACzD,IAAIqB,AAAuB,aAAvBA,cAAc,IAAI,EACpB,OAAOA,cAAc,QAAQ,CAACF,0BAA0BC;IAE1D,OAAOE,cAAcH,0BAA0BC;AACjD;AAEO,eAAeE,cACpBC,kBAA+B,EAC/BvB,OAAsB;IAEtB,MAAM,EAAEC,OAAO,EAAE,GAAGD;IACpB,MAAMwB,eAAexB,QAAQ,YAAY;IACzC,MAAM,EAAEyB,OAAO,EAAE,GAAGD;IACpBE,IAAAA,sBAAAA,MAAAA,AAAAA,EACED,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,EACnB;IAEF,MAAME,mBAAmB1B,QAAQ,UAAU,CAAC,MAAM;IAElDyB,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOH,oBAAoB;IAC3B,MAAMK,yBAAyBlB,wBAAwBa;IACvD,MAAMM,wBAAwBC,AAAAA,IAAAA,+BAAAA,iBAAAA,AAAAA,EAAkBF;IAChD,MAAMG,eAAeC,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EACnBP,QAAQ,MAAM,CAAC,aAAa,CAAC,UAAU;IAGzC,MAAMQ,aAAajC,QAAQ,YAAY,EAAE,SAAS;QAChD,aAAa2B;QACb,OAAO1B,QAAQ,QAAQ,CAAC,KAAK;QAC7B,QAAQA,QAAQ,QAAQ,CAAC,MAAM;IACjC;IACA,MAAMiC,gBAAgB,MAAMC,AAAAA,IAAAA,oCAAAA,iBAAAA,AAAAA,EAAkB;QAC5C,aAAaF,WAAW,WAAW;QACnC,OAAOA,WAAW,KAAK;QACvB,QAAQA,WAAW,MAAM;QACzB,QAAQR,QAAQ,eAAe;IACjC;IAEA,MAAMW,eAAeF,cAAc,WAAW;IAE9C,MAAMpB,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASiB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKK;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMP;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAON,oBAAiC;QAC1C,MAAMc,SAAS,MAAMzB,mBAAmB;YACtC,QAAQW,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAT,KAAK,IAAI,IAAIuB;IACf;IAEA,IAAIC;IAGJ,IAAI;QACFA,MAAM,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACVzB,MACAU,cACA;YACE,aAAaxB,QAAQ,WAAW;YAChC,kBAAkB;QACpB;IAEJ,EAAE,OAAOwC,WAAW;QAClB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,yBAAAA,oBAAoBA,GACrCL,UAAU,WAAW,GACrBC;QACN,MAAMK,QACJN,qBAAqBK,yBAAAA,oBAAoBA,GAAGL,UAAU,KAAK,GAAG1C;QAChE,OAAO;YACL,MAAMA;YACN,aAAa;gBACX,SAASA;gBACT,QAAQ;oBAAC,CAAC,eAAe,EAAE2C,cAAc;iBAAC;YAC5C;YACAG;YACAE;YACA,mBAAmBhD;QACrB;IACF;IAEA,MAAM8C,cAAcG,KAAK,SAAS,CAACT,IAAI,OAAO;IAE9C,IAAIU;IACJ,IAAIC;IACJ,IAAIC,SACF,YAAYZ,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,MAAMa,gBAAgB1B,QAAQ,MAAM,CAAC,aAAa;IAClD,IAAI,CAACjC,gBAAgB8C,IAAI,OAAO,EAAEa,cAAc,UAAU,CAAC,SAAS,GAClE,OAAO;QACL,MAAMrD;QACN,aAAa;YACX,SAASA;YACT,QAAQoD;QACV;QACAN;QACA,OAAON,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;IAGF,IAAI;QACF,MAAMc,UAAUpD,QAAQ,YAAY,EAAE;QACtC,MAAMqD,kBAAkBF,cAAc,mCAAmC,CACvEb,IAAI,OAAO,EACX;YACE,cAAcJ,cAAc,YAAY;YACxC,aAAaA,cAAc,WAAW;QACxC;QAEFc,UAAUM,AAAAA,IAAAA,sCAAAA,eAAAA,AAAAA,EACRC,AAAAA,IAAAA,uCAAAA,yCAAAA,AAAAA,EAA0CF,iBAAiBD;QAG7D/D,aAAa,WAAW2D;QAExB,MAAMQ,UAA+BC,AAAAA,IAAAA,0BAAAA,qBAAAA,AAAAA,EACnCT,SACApB;QAEFsB,SAAS,EAAE;QAEX,IAAIM,SACFP,iBAAiBO;IAErB,EAAE,OAAOE,GAAG;QACV,MAAMC,MACJD,aAAahB,QACT,CAAC,+BAA+B,EAAEgB,EAAE,OAAO,EAAE,GAC7C;QACN,IAAI,AAACR,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAES,IAAI,CAAC,CAAC;aAFtBT,SAAS;YAACS;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMX;QACN,aAAa;YACX,SAASC;YACT,QAAQC;QACV;QACAN;QACA,OAAON,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAesB,gBAAgB5D,OAKrC;IAMC,MAAM,EAAEC,OAAO,EAAE4D,kBAAkB,EAAE,GAAG7D;IACxC,MAAMwB,eAAexB,QAAQ,YAAY;IACzC,MAAM,EAAEyB,OAAO,EAAE,GAAGD;IACpBE,IAAAA,sBAAAA,MAAAA,AAAAA,EACED,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,EACnB;IAEF,MAAME,mBAAmB1B,QAAQ,UAAU,CAAC,MAAM;IAClD,MAAMiC,gBAAgB,MAAMC,AAAAA,IAAAA,oCAAAA,iBAAAA,AAAAA,EAAkB;QAC5C,aAAaR;QACb,OAAO1B,QAAQ,QAAQ,CAAC,KAAK;QAC7B,QAAQA,QAAQ,QAAQ,CAAC,MAAM;QAC/B,QAAQwB,QAAQ,eAAe;IACjC;IAEA,MAAMM,eAAe+B,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EACnBrC,QAAQ,MAAM,CAAC,aAAa,CAAC,UAAU;IAEzC,MAAMsC,gCAAgCC,AAAAA,IAAAA,uCAAAA,yBAAAA,AAAAA,EACpCtD,wBAAwBmD;IAE1B,MAAM/C,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASiB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKG,cAAc,WAAW;wBAC9B,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAM6B;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMxB,SAAS,MAAMzB,mBAAmB;YACtC,QAAQiD,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACA/C,KAAK,IAAI,IAAIuB;IACf;IAEA,IAAI4B;IAGJ,IAAI;QACFA,SAAS,MAAM1B,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACbzB,MACAU,cACA;YACE,aAAaxB,QAAQ,WAAW;YAChC,kBAAkB;QACpB;IAEJ,EAAE,OAAOwC,WAAW;QAClB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,yBAAAA,oBAAoBA,GACrCL,UAAU,WAAW,GACrBC;QACN,MAAMK,QACJN,qBAAqBK,yBAAAA,oBAAoBA,GAAGL,UAAU,KAAK,GAAG1C;QAChE,OAAO;YACL,kBAAkBA;YAClB,OAAO,CAAC,eAAe,EAAE2C,cAAc;YACvCG;YACAE;QACF;IACF;IAEA,IAAIoB;IAGJ,IAAIC,eAAeF,OAAO,OAAO,CAAC,KAAK;IACvC,MAAMd,gBAAgB1B,QAAQ,MAAM,CAAC,aAAa;IAClD,IAAI,CAACjC,gBAAgByE,OAAO,OAAO,EAAEd,cAAc,UAAU,CAAC,SAAS,GACrE,OAAO;QACL,kBAAkBrD;QAClB,OAAOqE;QACP,aAAapB,KAAK,SAAS,CAACkB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;IAGF,IAAI;QACF,MAAMG,gBACJjB,cAAc,wCAAwC,CAACc,OAAO,OAAO,EAAE;YACrE,cAAc/B,cAAc,YAAY;YACxC,aAAaA,cAAc,WAAW;QACxC;QACF,MAAMmC,aAAaC,AAAAA,IAAAA,sCAAAA,sBAAAA,AAAAA,EAAuB;YACxCF,cAAc,MAAM;eAChBA,cAAc,UAAU,IAAI,EAAE;SACnC;QACD7E,aAAa,iBAAiB8E;QAE9B,MAAME,eAAelE,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBgE,YAAYpE,QAAQ,QAAQ;QAClE,MAAMuE,gBAAgBD,aAAa,KAAK;QACxC,MAAME,iBAAiBF,aAAa,MAAM;QAC1ChF,aAAa,2BAA2BgF;QAExCL,mBAAmB,MAAMnE,sBAAsB;YAC7CE;YACA,UAAUoE;QACZ;QAEA9E,aACE,uDACAiF,eACAC,gBACAP,iBAAiB,KAAK,CAAC,KAAK,EAC5BA,iBAAiB,KAAK,CAAC,MAAM,EAC7BA,iBAAiB,OAAO,CAAC,KAAK;IAElC,EAAE,OAAOQ,OAAO;QACd,MAAMC,oBACJD,iBAAiBhC,QACb,CAAC,uCAAuC,EAAEgC,MAAM,OAAO,EAAE,GACzD;QACNP,eAAeA,eACX,GAAGA,aAAa,EAAE,EAAEQ,kBAAkB,CAAC,CAAC,GACxCA;IACN;IAEA,OAAO;QACLT;QACA,OAAOC;QACP,aAAapB,KAAK,SAAS,CAACkB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAeW,qBAAwB5E,OAO7C;IACC,MAAM,EAAE6E,SAAS,EAAE5E,OAAO,EAAE6E,aAAa,EAAEjE,gBAAgB,EAAEW,YAAY,EAAE,GACzExB;IACF,MAAM+B,eAAegD,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA,EAAsB;QACzC,oBAAoBD,eAAe,uBAAuB;QAC1D,yBAAyB,CAAC,CAACjE,kBAAkB,QAAQ;IACvD;IACA,MAAMc,mBAAmB1B,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM+E,wBAAwBC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAC5BjF,QAAQ,eAAe,IAAI,IAC3B6E;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OAAO;QAC/CI,YAAY,IAAI,CAAC;YACf,MAAM;YACN,MAAM;QACR;QAEAA,YAAY,IAAI,CAAC;YACf,MAAM;YACN,WAAW;gBACT,KAAKvD;gBACL,QAAQ;YACV;QACF;IACF;IAEAuD,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMlE,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASiB;QAAa;QACxC;YACE,MAAM;YACN,SAASmD;QACX;KACD;IAED,IAAIrE,kBAAkB;QACpB,MAAMwB,SAAS,MAAMzB,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAIuB;IACf;IAEA,MAAM,EACJ,SAASO,WAAW,EACpBE,KAAK,EACLqC,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOtE,MAAMU;IAEvB,IAAI6D;IACJ,IAAI;QACFA,cAAcC,AAAAA,IAAAA,8BAAAA,0BAAAA,AAAAA,EAA8B1C;IAC9C,EAAE,OAAO2C,YAAY;QACnB,MAAM9C,eACJ8C,sBAAsB7C,QAAQ6C,WAAW,OAAO,GAAG5C,OAAO4C;QAC5D,MAAM,IAAI1C,yBAAAA,oBAAoBA,CAC5B,CAAC,iBAAiB,EAAEJ,cAAc,EAClCG,aACAE;IAEJ;IAEA,OAAO;QACLuC;QACAzC;QACAE;QACAqC;IACF;AACF;AAEO,eAAeK,sBACpBC,WAAmB,EACnBjE,YAA0B;IAK1B,MAAMO,eAAe2D,AAAAA,IAAAA,yCAAAA,iCAAAA,AAAAA;IACrB,MAAMC,aAAaC,AAAAA,IAAAA,yCAAAA,yBAAAA,AAAAA,EAA0BH;IAE7C,MAAM3E,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASiB;QAAa;QACxC;YACE,MAAM;YACN,SAAS4D;QACX;KACD;IAEDtG,aAAa,yCAAyCoG;IAEtD,MAAMxB,SAAS,MAAM1B,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnBzB,MACAU,cACA;QACE,kBAAkB;IACpB;IAGF,OAAO;QACL,kBAAkByC,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
1
+ {"version":3,"file":"ai-model/inspect.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIDataExtractionResponse,\n AIElementLocateResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport { cropByRect, scaleImage } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport {\n expandSearchArea,\n multimodalPromptToChatMessages,\n userPromptToMultimodalPrompt,\n userPromptToString,\n} from '../common';\nimport type { ModelRuntime } from './models';\nimport {\n extractDataQueryPrompt,\n parseXMLExtractionResponse,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport {\n AIResponseParseError,\n callAI,\n callAIWithObjectResponse,\n} from './service-caller/index';\nimport { prepareModelImage } from './workflows/image-preprocess';\nimport {\n mergePixelBboxesToRect,\n pixelBboxToRect,\n} from './workflows/inspect/locate-result-rect';\nimport { mapSearchAreaPixelBboxToOriginalPixelBbox } from './workflows/inspect/search-area-mapping';\nimport type {\n LocateOptions,\n LocateResult,\n SearchAreaConfig,\n} from './workflows/inspect/types';\n\nexport type InspectAIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nexport {\n userPromptToString as extraTextFromUserPrompt,\n multimodalPromptToChatMessages as promptsToChatParam,\n} from '../common';\n\nfunction hasLocateResult(input: unknown, resultKey: string) {\n if (!input || typeof input !== 'object') {\n return false;\n }\n\n const record = input as Record<string, unknown>;\n const locateResult = record[resultKey];\n return Array.isArray(locateResult)\n ? locateResult.length > 0\n : locateResult !== undefined;\n}\n\nexport async function buildSearchAreaConfig(options: {\n context: UIContext;\n baseRect: Rect;\n}): Promise<SearchAreaConfig> {\n const { context, baseRect } = options;\n const scaleRatio = 2;\n const sectionRect = expandSearchArea(baseRect, context.shotSize);\n\n const croppedResult = await cropByRect(\n context.screenshot.base64,\n sectionRect,\n );\n\n const scaledResult = await scaleImage(croppedResult.imageBase64, scaleRatio);\n return {\n sourceRect: sectionRect,\n image: {\n imageBase64: scaledResult.imageBase64,\n width: scaledResult.width,\n height: scaledResult.height,\n },\n mapping: {\n offset: {\n x: sectionRect.left,\n y: sectionRect.top,\n },\n scale: scaleRatio,\n },\n };\n}\n\nexport async function AiLocateElement(\n options: LocateOptions & { targetElementDescription: TUserPrompt },\n): Promise<LocateResult> {\n const { targetElementDescription, ...locateOptions } = options;\n const locateAdapter = options.modelRuntime.adapter.locate;\n if (locateAdapter.kind === 'custom') {\n return locateAdapter.locateFn(targetElementDescription, locateOptions);\n }\n return genericLocate(targetElementDescription, locateOptions);\n}\n\nexport async function genericLocate(\n elementDescription: TUserPrompt,\n options: LocateOptions,\n): Promise<LocateResult> {\n const { context } = options;\n const modelRuntime = options.modelRuntime;\n const { adapter } = modelRuntime;\n assert(\n adapter.locate.kind === 'standard',\n 'generic locate requires a standard locate adapter',\n );\n const screenshotBase64 = context.screenshot.base64;\n\n assert(elementDescription, 'cannot find the target element description');\n const elementDescriptionText = userPromptToString(elementDescription);\n const userInstructionPrompt = findElementPrompt(elementDescriptionText);\n const systemPrompt = systemPromptToLocateElement(\n adapter.locate.resultAdapter.promptSpec,\n );\n\n const modelImage = options.searchConfig?.image ?? {\n imageBase64: screenshotBase64,\n width: context.shotSize.width,\n height: context.shotSize.height,\n };\n const preparedImage = await prepareModelImage({\n imageBase64: modelImage.imageBase64,\n width: modelImage.width,\n height: modelImage.height,\n policy: adapter.imagePreprocess,\n });\n\n const imagePayload = preparedImage.imageBase64;\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof elementDescription !== 'string') {\n const addOns = await multimodalPromptToChatMessages(\n userPromptToMultimodalPrompt(elementDescription),\n );\n msgs.push(...addOns);\n }\n\n let res: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AIElementLocateResponse>>\n >;\n try {\n res = await callAIWithObjectResponse<AIElementLocateResponse>(\n msgs,\n modelRuntime,\n {\n abortSignal: options.abortSignal,\n jsonParserSource: 'locate',\n },\n );\n } catch (callError) {\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n parseResult: {\n element: undefined,\n errors: [`AI call error: ${errorMessage}`],\n },\n rawResponse,\n usage,\n reasoning_content: undefined,\n };\n }\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElement: LocateResultElement | undefined;\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n const resultAdapter = adapter.locate.resultAdapter;\n if (!hasLocateResult(res.content, resultAdapter.promptSpec.resultKey)) {\n return {\n rect: undefined,\n parseResult: {\n element: undefined,\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n }\n\n try {\n const mapping = options.searchConfig?.mapping;\n const targetPixelBbox = resultAdapter.adaptElementLocateResultToPixelBbox(\n res.content,\n {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n },\n );\n resRect = pixelBboxToRect(\n mapSearchAreaPixelBboxToOriginalPixelBbox(targetPixelBbox, mapping),\n );\n\n debugInspect('resRect', resRect);\n\n const element: LocateResultElement = generateElementByRect(\n resRect,\n elementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElement = element;\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse locate result: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n element: matchedElement,\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelRuntime: ModelRuntime;\n abortSignal?: AbortSignal;\n}): Promise<{\n searchAreaConfig?: SearchAreaConfig;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription } = options;\n const modelRuntime = options.modelRuntime;\n const { adapter } = modelRuntime;\n assert(\n adapter.locate.kind === 'standard',\n 'section locate requires a standard locate adapter',\n );\n const screenshotBase64 = context.screenshot.base64;\n const preparedImage = await prepareModelImage({\n imageBase64: screenshotBase64,\n width: context.shotSize.width,\n height: context.shotSize.height,\n policy: adapter.imagePreprocess,\n });\n\n const systemPrompt = systemPromptToLocateSection(\n adapter.locate.resultAdapter.promptSpec,\n );\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n userPromptToString(sectionDescription),\n );\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: preparedImage.imageBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await multimodalPromptToChatMessages(\n userPromptToMultimodalPrompt(sectionDescription),\n );\n msgs.push(...addOns);\n }\n\n let result: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AISectionLocatorResponse>>\n >;\n try {\n result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelRuntime,\n {\n abortSignal: options.abortSignal,\n jsonParserSource: 'section-locator',\n },\n );\n } catch (callError) {\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n searchAreaConfig: undefined,\n error: `AI call error: ${errorMessage}`,\n rawResponse,\n usage,\n };\n }\n\n let searchAreaConfig:\n | Awaited<ReturnType<typeof buildSearchAreaConfig>>\n | undefined;\n let sectionError = result.content.error;\n const resultAdapter = adapter.locate.resultAdapter;\n if (!hasLocateResult(result.content, resultAdapter.promptSpec.resultKey)) {\n return {\n searchAreaConfig: undefined,\n error: sectionError,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n }\n\n try {\n const adaptedResult =\n resultAdapter.adaptSectionLocateResultToPixelBboxGroup(result.content, {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n });\n const mergedRect = mergePixelBboxesToRect([\n adaptedResult.target,\n ...(adaptedResult.references ?? []),\n ]);\n debugSection('mergedRect %j', mergedRect);\n\n const expandedRect = expandSearchArea(mergedRect, context.shotSize);\n const originalWidth = expandedRect.width;\n const originalHeight = expandedRect.height;\n debugSection('expanded sectionRect %j', expandedRect);\n\n searchAreaConfig = await buildSearchAreaConfig({\n context,\n baseRect: mergedRect,\n });\n\n debugSection(\n 'scaled section image from %dx%d to %dx%d (scale=%d)',\n originalWidth,\n originalHeight,\n searchAreaConfig.image.width,\n searchAreaConfig.image.height,\n searchAreaConfig.mapping.scale,\n );\n } catch (error) {\n const parseErrorMessage =\n error instanceof Error\n ? `Failed to parse section locate result: ${error.message}`\n : 'unknown error in section locate';\n sectionError = sectionError\n ? `${sectionError} (${parseErrorMessage})`\n : parseErrorMessage;\n }\n\n return {\n searchAreaConfig,\n error: sectionError,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelRuntime: ModelRuntime;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelRuntime } =\n options;\n const systemPrompt = systemPromptToExtract({\n screenshotIncluded: extractOption?.screenshotIncluded !== false,\n referenceImagesIncluded: !!multimodalPrompt?.images?.length,\n });\n const screenshotBase64 = context.screenshot.base64;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'text',\n text: 'This is the current screenshot to evaluate. Unless <DATA_DEMAND> explicitly asks for comparison or matching against reference images, base your answer on this screenshot and its contents when provided.',\n });\n\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await multimodalPromptToChatMessages(multimodalPrompt);\n msgs.push(...addOns);\n }\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelRuntime);\n\n let parseResult: AIDataExtractionResponse<T>;\n try {\n parseResult = parseXMLExtractionResponse<T>(rawResponse);\n } catch (parseError) {\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n\n return {\n parseResult,\n rawResponse,\n usage,\n reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n modelRuntime: ModelRuntime,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n debugInspect('AiJudgeOrderSensitive: description=%s', description);\n\n const result = await callAIWithObjectResponse<{ isOrderSensitive: boolean }>(\n msgs,\n modelRuntime,\n {\n jsonParserSource: 'generic-object',\n },\n );\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","hasLocateResult","input","resultKey","record","locateResult","Array","undefined","buildSearchAreaConfig","options","context","baseRect","scaleRatio","sectionRect","expandSearchArea","croppedResult","cropByRect","scaledResult","scaleImage","AiLocateElement","targetElementDescription","locateOptions","locateAdapter","genericLocate","elementDescription","modelRuntime","adapter","assert","screenshotBase64","elementDescriptionText","userPromptToString","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","modelImage","preparedImage","prepareModelImage","imagePayload","msgs","addOns","multimodalPromptToChatMessages","userPromptToMultimodalPrompt","res","callAIWithObjectResponse","callError","errorMessage","Error","String","rawResponse","AIResponseParseError","usage","JSON","resRect","matchedElement","errors","resultAdapter","mapping","targetPixelBbox","pixelBboxToRect","mapSearchAreaPixelBboxToOriginalPixelBbox","element","generateElementByRect","e","msg","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","searchAreaConfig","sectionError","adaptedResult","mergedRect","mergePixelBboxesToRect","expandedRect","originalWidth","originalHeight","error","parseErrorMessage","AiExtractElementInfo","dataQuery","extractOption","multimodalPrompt","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","reasoning_content","callAI","parseResult","parseXMLExtractionResponse","parseError","AiJudgeOrderSensitive","description","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;;;;;AC2DA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAO9B,SAASE,gBAAgBC,KAAc,EAAEC,SAAiB;IACxD,IAAI,CAACD,SAAS,AAAiB,YAAjB,OAAOA,OACnB,OAAO;IAGT,MAAME,SAASF;IACf,MAAMG,eAAeD,MAAM,CAACD,UAAU;IACtC,OAAOG,MAAM,OAAO,CAACD,gBACjBA,aAAa,MAAM,GAAG,IACtBA,AAAiBE,WAAjBF;AACN;AAEO,eAAeG,sBAAsBC,OAG3C;IACC,MAAM,EAAEC,OAAO,EAAEC,QAAQ,EAAE,GAAGF;IAC9B,MAAMG,aAAa;IACnB,MAAMC,cAAcC,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBH,UAAUD,QAAQ,QAAQ;IAE/D,MAAMK,gBAAgB,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAC1BN,QAAQ,UAAU,CAAC,MAAM,EACzBG;IAGF,MAAMI,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAAWH,cAAc,WAAW,EAAEH;IACjE,OAAO;QACL,YAAYC;QACZ,OAAO;YACL,aAAaI,aAAa,WAAW;YACrC,OAAOA,aAAa,KAAK;YACzB,QAAQA,aAAa,MAAM;QAC7B;QACA,SAAS;YACP,QAAQ;gBACN,GAAGJ,YAAY,IAAI;gBACnB,GAAGA,YAAY,GAAG;YACpB;YACA,OAAOD;QACT;IACF;AACF;AAEO,eAAeO,gBACpBV,OAAkE;IAElE,MAAM,EAAEW,wBAAwB,EAAE,GAAGC,eAAe,GAAGZ;IACvD,MAAMa,gBAAgBb,QAAQ,YAAY,CAAC,OAAO,CAAC,MAAM;IACzD,IAAIa,AAAuB,aAAvBA,cAAc,IAAI,EACpB,OAAOA,cAAc,QAAQ,CAACF,0BAA0BC;IAE1D,OAAOE,cAAcH,0BAA0BC;AACjD;AAEO,eAAeE,cACpBC,kBAA+B,EAC/Bf,OAAsB;IAEtB,MAAM,EAAEC,OAAO,EAAE,GAAGD;IACpB,MAAMgB,eAAehB,QAAQ,YAAY;IACzC,MAAM,EAAEiB,OAAO,EAAE,GAAGD;IACpBE,IAAAA,sBAAAA,MAAAA,AAAAA,EACED,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,EACnB;IAEF,MAAME,mBAAmBlB,QAAQ,UAAU,CAAC,MAAM;IAElDiB,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOH,oBAAoB;IAC3B,MAAMK,yBAAyBC,AAAAA,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBN;IAClD,MAAMO,wBAAwBC,AAAAA,IAAAA,+BAAAA,iBAAAA,AAAAA,EAAkBH;IAChD,MAAMI,eAAeC,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EACnBR,QAAQ,MAAM,CAAC,aAAa,CAAC,UAAU;IAGzC,MAAMS,aAAa1B,QAAQ,YAAY,EAAE,SAAS;QAChD,aAAamB;QACb,OAAOlB,QAAQ,QAAQ,CAAC,KAAK;QAC7B,QAAQA,QAAQ,QAAQ,CAAC,MAAM;IACjC;IACA,MAAM0B,gBAAgB,MAAMC,AAAAA,IAAAA,oCAAAA,iBAAAA,AAAAA,EAAkB;QAC5C,aAAaF,WAAW,WAAW;QACnC,OAAOA,WAAW,KAAK;QACvB,QAAQA,WAAW,MAAM;QACzB,QAAQT,QAAQ,eAAe;IACjC;IAEA,MAAMY,eAAeF,cAAc,WAAW;IAE9C,MAAMG,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASN;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKK;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMP;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOP,oBAAiC;QAC1C,MAAMgB,SAAS,MAAMC,AAAAA,IAAAA,mCAAAA,8BAAAA,AAAAA,EACnBC,AAAAA,IAAAA,mCAAAA,4BAAAA,AAAAA,EAA6BlB;QAE/Be,KAAK,IAAI,IAAIC;IACf;IAEA,IAAIG;IAGJ,IAAI;QACFA,MAAM,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACVL,MACAd,cACA;YACE,aAAahB,QAAQ,WAAW;YAChC,kBAAkB;QACpB;IAEJ,EAAE,OAAOoC,WAAW;QAClB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,yBAAAA,oBAAoBA,GACrCL,UAAU,WAAW,GACrBC;QACN,MAAMK,QACJN,qBAAqBK,yBAAAA,oBAAoBA,GAAGL,UAAU,KAAK,GAAGtC;QAChE,OAAO;YACL,MAAMA;YACN,aAAa;gBACX,SAASA;gBACT,QAAQ;oBAAC,CAAC,eAAe,EAAEuC,cAAc;iBAAC;YAC5C;YACAG;YACAE;YACA,mBAAmB5C;QACrB;IACF;IAEA,MAAM0C,cAAcG,KAAK,SAAS,CAACT,IAAI,OAAO;IAE9C,IAAIU;IACJ,IAAIC;IACJ,IAAIC,SACF,YAAYZ,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,MAAMa,gBAAgB9B,QAAQ,MAAM,CAAC,aAAa;IAClD,IAAI,CAACzB,gBAAgB0C,IAAI,OAAO,EAAEa,cAAc,UAAU,CAAC,SAAS,GAClE,OAAO;QACL,MAAMjD;QACN,aAAa;YACX,SAASA;YACT,QAAQgD;QACV;QACAN;QACA,OAAON,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;IAGF,IAAI;QACF,MAAMc,UAAUhD,QAAQ,YAAY,EAAE;QACtC,MAAMiD,kBAAkBF,cAAc,mCAAmC,CACvEb,IAAI,OAAO,EACX;YACE,cAAcP,cAAc,YAAY;YACxC,aAAaA,cAAc,WAAW;QACxC;QAEFiB,UAAUM,AAAAA,IAAAA,sCAAAA,eAAAA,AAAAA,EACRC,AAAAA,IAAAA,uCAAAA,yCAAAA,AAAAA,EAA0CF,iBAAiBD;QAG7D3D,aAAa,WAAWuD;QAExB,MAAMQ,UAA+BC,AAAAA,IAAAA,0BAAAA,qBAAAA,AAAAA,EACnCT,SACAxB;QAEF0B,SAAS,EAAE;QAEX,IAAIM,SACFP,iBAAiBO;IAErB,EAAE,OAAOE,GAAG;QACV,MAAMC,MACJD,aAAahB,QACT,CAAC,+BAA+B,EAAEgB,EAAE,OAAO,EAAE,GAC7C;QACN,IAAI,AAACR,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAES,IAAI,CAAC,CAAC;aAFtBT,SAAS;YAACS;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMX;QACN,aAAa;YACX,SAASC;YACT,QAAQC;QACV;QACAN;QACA,OAAON,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAesB,gBAAgBxD,OAKrC;IAMC,MAAM,EAAEC,OAAO,EAAEwD,kBAAkB,EAAE,GAAGzD;IACxC,MAAMgB,eAAehB,QAAQ,YAAY;IACzC,MAAM,EAAEiB,OAAO,EAAE,GAAGD;IACpBE,IAAAA,sBAAAA,MAAAA,AAAAA,EACED,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,EACnB;IAEF,MAAME,mBAAmBlB,QAAQ,UAAU,CAAC,MAAM;IAClD,MAAM0B,gBAAgB,MAAMC,AAAAA,IAAAA,oCAAAA,iBAAAA,AAAAA,EAAkB;QAC5C,aAAaT;QACb,OAAOlB,QAAQ,QAAQ,CAAC,KAAK;QAC7B,QAAQA,QAAQ,QAAQ,CAAC,MAAM;QAC/B,QAAQgB,QAAQ,eAAe;IACjC;IAEA,MAAMO,eAAekC,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EACnBzC,QAAQ,MAAM,CAAC,aAAa,CAAC,UAAU;IAEzC,MAAM0C,gCAAgCC,AAAAA,IAAAA,uCAAAA,yBAAAA,AAAAA,EACpCvC,AAAAA,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBoC;IAErB,MAAM3B,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASN;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKG,cAAc,WAAW;wBAC9B,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMgC;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAM1B,SAAS,MAAMC,AAAAA,IAAAA,mCAAAA,8BAAAA,AAAAA,EACnBC,AAAAA,IAAAA,mCAAAA,4BAAAA,AAAAA,EAA6BwB;QAE/B3B,KAAK,IAAI,IAAIC;IACf;IAEA,IAAI8B;IAGJ,IAAI;QACFA,SAAS,MAAM1B,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACbL,MACAd,cACA;YACE,aAAahB,QAAQ,WAAW;YAChC,kBAAkB;QACpB;IAEJ,EAAE,OAAOoC,WAAW;QAClB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,yBAAAA,oBAAoBA,GACrCL,UAAU,WAAW,GACrBC;QACN,MAAMK,QACJN,qBAAqBK,yBAAAA,oBAAoBA,GAAGL,UAAU,KAAK,GAAGtC;QAChE,OAAO;YACL,kBAAkBA;YAClB,OAAO,CAAC,eAAe,EAAEuC,cAAc;YACvCG;YACAE;QACF;IACF;IAEA,IAAIoB;IAGJ,IAAIC,eAAeF,OAAO,OAAO,CAAC,KAAK;IACvC,MAAMd,gBAAgB9B,QAAQ,MAAM,CAAC,aAAa;IAClD,IAAI,CAACzB,gBAAgBqE,OAAO,OAAO,EAAEd,cAAc,UAAU,CAAC,SAAS,GACrE,OAAO;QACL,kBAAkBjD;QAClB,OAAOiE;QACP,aAAapB,KAAK,SAAS,CAACkB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;IAGF,IAAI;QACF,MAAMG,gBACJjB,cAAc,wCAAwC,CAACc,OAAO,OAAO,EAAE;YACrE,cAAclC,cAAc,YAAY;YACxC,aAAaA,cAAc,WAAW;QACxC;QACF,MAAMsC,aAAaC,AAAAA,IAAAA,sCAAAA,sBAAAA,AAAAA,EAAuB;YACxCF,cAAc,MAAM;eAChBA,cAAc,UAAU,IAAI,EAAE;SACnC;QACDzE,aAAa,iBAAiB0E;QAE9B,MAAME,eAAe9D,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiB4D,YAAYhE,QAAQ,QAAQ;QAClE,MAAMmE,gBAAgBD,aAAa,KAAK;QACxC,MAAME,iBAAiBF,aAAa,MAAM;QAC1C5E,aAAa,2BAA2B4E;QAExCL,mBAAmB,MAAM/D,sBAAsB;YAC7CE;YACA,UAAUgE;QACZ;QAEA1E,aACE,uDACA6E,eACAC,gBACAP,iBAAiB,KAAK,CAAC,KAAK,EAC5BA,iBAAiB,KAAK,CAAC,MAAM,EAC7BA,iBAAiB,OAAO,CAAC,KAAK;IAElC,EAAE,OAAOQ,OAAO;QACd,MAAMC,oBACJD,iBAAiBhC,QACb,CAAC,uCAAuC,EAAEgC,MAAM,OAAO,EAAE,GACzD;QACNP,eAAeA,eACX,GAAGA,aAAa,EAAE,EAAEQ,kBAAkB,CAAC,CAAC,GACxCA;IACN;IAEA,OAAO;QACLT;QACA,OAAOC;QACP,aAAapB,KAAK,SAAS,CAACkB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAeW,qBAAwBxE,OAO7C;IACC,MAAM,EAAEyE,SAAS,EAAExE,OAAO,EAAEyE,aAAa,EAAEC,gBAAgB,EAAE3D,YAAY,EAAE,GACzEhB;IACF,MAAMwB,eAAeoD,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA,EAAsB;QACzC,oBAAoBF,eAAe,uBAAuB;QAC1D,yBAAyB,CAAC,CAACC,kBAAkB,QAAQ;IACvD;IACA,MAAMxD,mBAAmBlB,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM4E,wBAAwBC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAC5B9E,QAAQ,eAAe,IAAI,IAC3ByE;IAGF,MAAMM,cAAyD,EAAE;IAEjE,IAAIL,eAAe,uBAAuB,OAAO;QAC/CK,YAAY,IAAI,CAAC;YACf,MAAM;YACN,MAAM;QACR;QAEAA,YAAY,IAAI,CAAC;YACf,MAAM;YACN,WAAW;gBACT,KAAK5D;gBACL,QAAQ;YACV;QACF;IACF;IAEA4D,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAM/C,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASN;QAAa;QACxC;YACE,MAAM;YACN,SAASuD;QACX;KACD;IAED,IAAIJ,kBAAkB;QACpB,MAAM5C,SAAS,MAAMC,AAAAA,IAAAA,mCAAAA,8BAAAA,AAAAA,EAA+B2C;QACpD7C,KAAK,IAAI,IAAIC;IACf;IAEA,MAAM,EACJ,SAASS,WAAW,EACpBE,KAAK,EACLsC,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOnD,MAAMd;IAEvB,IAAIkE;IACJ,IAAI;QACFA,cAAcC,AAAAA,IAAAA,8BAAAA,0BAAAA,AAAAA,EAA8B3C;IAC9C,EAAE,OAAO4C,YAAY;QACnB,MAAM/C,eACJ+C,sBAAsB9C,QAAQ8C,WAAW,OAAO,GAAG7C,OAAO6C;QAC5D,MAAM,IAAI3C,yBAAAA,oBAAoBA,CAC5B,CAAC,iBAAiB,EAAEJ,cAAc,EAClCG,aACAE;IAEJ;IAEA,OAAO;QACLwC;QACA1C;QACAE;QACAsC;IACF;AACF;AAEO,eAAeK,sBACpBC,WAAmB,EACnBtE,YAA0B;IAK1B,MAAMQ,eAAe+D,AAAAA,IAAAA,yCAAAA,iCAAAA,AAAAA;IACrB,MAAMC,aAAaC,AAAAA,IAAAA,yCAAAA,yBAAAA,AAAAA,EAA0BH;IAE7C,MAAMxD,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASN;QAAa;QACxC;YACE,MAAM;YACN,SAASgE;QACX;KACD;IAEDnG,aAAa,yCAAyCiG;IAEtD,MAAMzB,SAAS,MAAM1B,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnBL,MACAd,cACA;QACE,kBAAkB;IACpB;IAGF,OAAO;QACL,kBAAkB6C,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
@@ -27,9 +27,9 @@ __webpack_require__.d(__webpack_exports__, {
27
27
  parseXMLPlanningResponse: ()=>parseXMLPlanningResponse,
28
28
  plan: ()=>plan
29
29
  });
30
+ const external_common_js_namespaceObject = require("../common.js");
30
31
  const logger_namespaceObject = require("@midscene/shared/logger");
31
32
  const utils_namespaceObject = require("@midscene/shared/utils");
32
- const external_common_js_namespaceObject = require("../common.js");
33
33
  const external_errors_js_namespaceObject = require("./errors.js");
34
34
  const llm_planning_js_namespaceObject = require("./prompt/llm-planning.js");
35
35
  const util_js_namespaceObject = require("./prompt/util.js");
@@ -129,17 +129,20 @@ async function plan(userInstruction, opts) {
129
129
  policy: adapter.imagePreprocess
130
130
  });
131
131
  const imagePayload = preparedImage.imageBase64;
132
+ const userInstructionText = (0, external_common_js_namespaceObject.userPromptToString)(userInstruction);
132
133
  const actionContext = opts.actionContext ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\n` : '';
134
+ const referenceImageMessages = opts.referenceImageMessages ?? [];
133
135
  const instruction = [
134
136
  {
135
137
  role: 'user',
136
138
  content: [
137
139
  {
138
140
  type: 'text',
139
- text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`
141
+ text: `${actionContext}<user_instruction>${userInstructionText}</user_instruction>`
140
142
  }
141
143
  ]
142
- }
144
+ },
145
+ ...referenceImageMessages
143
146
  ];
144
147
  let latestFeedbackMessage;
145
148
  const executionProgressText = includeSubGoals ? conversationHistory.subGoalsToText() : conversationHistory.historicalLogsToText();
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n} from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport { buildYamlFlowFromPlans, findAllMidsceneLocatorField } from '../common';\nimport { planningModelFamilyRequiredForLocateMessage } from './errors';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport {\n extractXMLTag,\n parseMarkFinishedIndexes,\n parseSubGoalsFromXML,\n} from './prompt/util';\nimport { AIResponseParseError, callAI } from './service-caller/index';\nimport type { JsonParser, JsonParserSource } from './service-caller/json';\nimport { prepareModelImage } from './workflows/image-preprocess';\nimport type { PlanOptions } from './workflows/planning/types';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\nconst noPreviousActionsText =\n 'No previous actions have been executed in this aiAct execution yet. If the instruction asks for actions, choose the first action to execute.';\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse.\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n jsonParser: JsonParser,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse sub-goal related tags\n const updatePlanContent = extractXMLTag(xmlString, 'update-plan-content');\n const markSubGoalDone = extractXMLTag(xmlString, 'mark-sub-goal-done');\n\n const updateSubGoals = updatePlanContent\n ? parseSubGoalsFromXML(updatePlanContent)\n : undefined;\n const markFinishedIndexes = markSubGoalDone\n ? parseMarkFinishedIndexes(markSubGoalDone)\n : undefined;\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n // Strip any trailing XML tags that LLM might have leaked into the action type\n // e.g. \"KeyboardPress</action-type>\\n<action-param-json>\" -> \"KeyboardPress\"\n const type = actionType.split('<')[0].trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = jsonParser(actionParamStr, {\n source: 'planning-action-param',\n preserveStringValueKeys:\n type.toLowerCase() === 'input' ? ['value'] : undefined,\n });\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n ...(updateSubGoals?.length ? { updateSubGoals } : {}),\n ...(markFinishedIndexes?.length ? { markFinishedIndexes } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: string,\n opts: PlanOptions,\n): Promise<PlanningAIResponse> {\n const { context, conversationHistory } = opts;\n const modelRuntime = opts.modelRuntime;\n const { adapter } = modelRuntime;\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n if (opts.includeLocateInPlanning && !modelRuntime.config.modelFamily) {\n throw new Error(\n planningModelFamilyRequiredForLocateMessage(modelRuntime.config.slot),\n );\n }\n\n const locateResultAdapter =\n modelRuntime.config.modelFamily && adapter.locate.kind === 'standard'\n ? adapter.locate.resultAdapter\n : undefined;\n\n // Only enable sub-goals when aiAct is in deep-thinking planning mode.\n const includeSubGoals = opts.deepThink === true;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n locatePromptSpec: locateResultAdapter?.promptSpec,\n includeLocateInPlanning: opts.includeLocateInPlanning,\n includeThought: true, // always include thought\n includeSubGoals,\n });\n\n const preparedImage = await prepareModelImage({\n imageBase64: screenshotBase64,\n width: shotSize.width,\n height: shotSize.height,\n policy: adapter.imagePreprocess,\n });\n const imagePayload = preparedImage.imageBase64;\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build sub-goal status text to include in the message\n // In planning deep-think mode: show full sub-goals with logs\n // Otherwise: show historical execution logs\n const executionProgressText = includeSubGoals\n ? conversationHistory.subGoalsToText()\n : conversationHistory.historicalLogsToText();\n const executionProgressSection = executionProgressText\n ? `\\n\\n${executionProgressText}`\n : conversationHistory.pendingFeedbackMessage\n ? ''\n : `\\n\\n${noPreviousActionsText}`;\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `This is the current screenshot.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n let {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Planning with locate results is localization-sensitive. Adapters decide\n // whether this should request original image detail.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n\n // Parse XML response to JSON object, retry once on parse failure\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n } catch {\n const retry = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Keep retry requests consistent with the initial planning call.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n rawResponse = retry.content;\n usage = retry.usage;\n reasoning_content = retry.reasoning_content;\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n }\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n // Mark all sub-goals as finished when goal is completed in planning deep-think mode.\n if (includeSubGoals) {\n conversationHistory.markAllSubGoalsFinished();\n }\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult) {\n if (!opts.includeLocateInPlanning) {\n if (typeof locateResult === 'object') {\n // In prompt-only planning mode, ignore any accidental coordinates from the model.\n action.param[field] = { prompt: locateResult.prompt };\n }\n return;\n }\n\n assert(\n locateResultAdapter,\n 'generic planning locate normalization requires a standard locate adapter',\n );\n action.param[field] = {\n ...locateResult,\n locatedPixelBbox: locateResultAdapter.adaptPlanningParamToPixelBbox(\n locateResult,\n {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n },\n ),\n };\n }\n });\n });\n\n // Update sub-goals in conversation history only in planning deep-think mode.\n if (includeSubGoals) {\n if (planFromAI.updateSubGoals?.length) {\n conversationHistory.mergeSubGoals(planFromAI.updateSubGoals);\n }\n if (planFromAI.markFinishedIndexes?.length) {\n for (const index of planFromAI.markFinishedIndexes) {\n conversationHistory.markSubGoalFinished(index);\n }\n }\n // Append the planning log to the currently running sub-goal\n if (planFromAI.log) {\n conversationHistory.appendSubGoalLog(planFromAI.log);\n }\n } else {\n // Without planning deep-think mode, accumulate logs as historical execution steps.\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","warnLog","noPreviousActionsText","parseXMLPlanningResponse","xmlString","jsonParser","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","updatePlanContent","markSubGoalDone","updateSubGoals","parseSubGoalsFromXML","markFinishedIndexes","parseMarkFinishedIndexes","action","type","param","e","Error","plan","userInstruction","opts","context","conversationHistory","modelRuntime","adapter","shotSize","screenshotBase64","planningModelFamilyRequiredForLocateMessage","locateResultAdapter","includeSubGoals","systemPrompt","systemPromptToTaskPlanning","preparedImage","prepareModelImage","imagePayload","actionContext","instruction","latestFeedbackMessage","executionProgressText","executionProgressSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","retry","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","index","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;ACcA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,UAAUD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,YAAY;IAAE,SAAS;AAAK;AAErD,MAAME,wBACJ;AAKK,SAASC,yBACdC,SAAiB,EACjBC,UAAsB;IAEtB,MAAMC,UAAUC,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACzC,MAAMI,SAASD,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACxC,MAAMK,MAAMF,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACvC,MAAMO,aAAaJ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,MAAMC,oBAAoBX,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACnD,MAAMe,kBAAkBZ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAEjD,MAAMgB,iBAAiBF,oBACnBG,AAAAA,IAAAA,wBAAAA,oBAAAA,AAAAA,EAAqBH,qBACrBD;IACJ,MAAMK,sBAAsBH,kBACxBI,AAAAA,IAAAA,wBAAAA,wBAAAA,AAAAA,EAAyBJ,mBACzBF;IAGJ,IAAIO,SAAc;IAClB,IAAIb,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QAGrD,MAAMc,OAAOd,WAAW,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI;QAC1C,IAAIe;QAEJ,IAAId,gBACF,IAAI;YAEFc,QAAQrB,WAAWO,gBAAgB;gBACjC,QAAQ;gBACR,yBACEa,AAAuB,YAAvBA,KAAK,WAAW,KAAiB;oBAAC;iBAAQ,GAAGR;YACjD;QACF,EAAE,OAAOU,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFH,SAAS;YACPC;YACA,GAAIC,AAAUT,WAAVS,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAIpB,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1Bc;QACA,GAAIT,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAII,gBAAgB,SAAS;YAAEA;QAAe,IAAI,CAAC,CAAC;QACpD,GAAIE,qBAAqB,SAAS;YAAEA;QAAoB,IAAI,CAAC,CAAC;IAChE;AACF;AAEO,eAAeO,KACpBC,eAAuB,EACvBC,IAAiB;IAEjB,MAAM,EAAEC,OAAO,EAAEC,mBAAmB,EAAE,GAAGF;IACzC,MAAMG,eAAeH,KAAK,YAAY;IACtC,MAAM,EAAEI,OAAO,EAAE,GAAGD;IACpB,MAAM,EAAEE,QAAQ,EAAE,GAAGJ;IACrB,MAAMK,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElD,IAAID,KAAK,uBAAuB,IAAI,CAACG,aAAa,MAAM,CAAC,WAAW,EAClE,MAAM,IAAIN,MACRU,AAAAA,IAAAA,mCAAAA,2CAAAA,AAAAA,EAA4CJ,aAAa,MAAM,CAAC,IAAI;IAIxE,MAAMK,sBACJL,aAAa,MAAM,CAAC,WAAW,IAAIC,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,GAClDA,QAAQ,MAAM,CAAC,aAAa,GAC5BlB;IAGN,MAAMuB,kBAAkBT,AAAmB,SAAnBA,KAAK,SAAS;IAEtC,MAAMU,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaX,KAAK,WAAW;QAC7B,kBAAkBQ,qBAAqB;QACvC,yBAAyBR,KAAK,uBAAuB;QACrD,gBAAgB;QAChBS;IACF;IAEA,MAAMG,gBAAgB,MAAMC,AAAAA,IAAAA,oCAAAA,iBAAAA,AAAAA,EAAkB;QAC5C,aAAaP;QACb,OAAOD,SAAS,KAAK;QACrB,QAAQA,SAAS,MAAM;QACvB,QAAQD,QAAQ,eAAe;IACjC;IACA,MAAMU,eAAeF,cAAc,WAAW;IAE9C,MAAMG,gBAAgBf,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMgB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEhB,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAIkB;IAKJ,MAAMC,wBAAwBT,kBAC1BP,oBAAoB,cAAc,KAClCA,oBAAoB,oBAAoB;IAC5C,MAAMiB,2BAA2BD,wBAC7B,CAAC,IAAI,EAAEA,uBAAuB,GAC9BhB,oBAAoB,sBAAsB,GACxC,KACA,CAAC,IAAI,EAAE/B,uBAAuB;IAGpC,MAAMiD,eAAelB,oBAAoB,cAAc;IACvD,MAAMmB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIlB,oBAAoB,sBAAsB,EAAE;QAC9Ce,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGf,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEmB,kBAAkBF,0BAA0B;gBACzN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAZ,oBAAoB,mCAAmC;IACzD,OACEe,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,+BAA+B,EAAEI,kBAAkBF,0BAA0B;YACtF;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKL;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFZ,oBAAoB,MAAM,CAACe;IAG3Bf,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMoB,aAAapB,oBAAoB,QAAQ,CAACF,KAAK,kBAAkB;IAEvE,MAAMuB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASb;QAAa;WACrCM;WACAM;KACJ;IAED,IAAI,EACF,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOJ,MAAMpB,cAAc;QACnC,aAAaH,KAAK,WAAW;QAG7B,6BAA6BA,KAAK,uBAAuB;IAC3D;IAGA,IAAI4B;IACJ,IAAI;QACF,IAAI;YACFA,aAAaxD,yBAAyBoD,aAAapB,QAAQ,UAAU;QACvE,EAAE,OAAM;YACN,MAAMyB,QAAQ,MAAMF,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOJ,MAAMpB,cAAc;gBAC7C,aAAaH,KAAK,WAAW;gBAE7B,6BAA6BA,KAAK,uBAAuB;YAC3D;YACAwB,cAAcK,MAAM,OAAO;YAC3BJ,QAAQI,MAAM,KAAK;YACnBH,oBAAoBG,MAAM,iBAAiB;YAC3CD,aAAaxD,yBAAyBoD,aAAapB,QAAQ,UAAU;QACvE;QAEA,IAAIwB,WAAW,MAAM,IAAIA,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YACjE1D,QACE;YAEF0D,WAAW,eAAe,GAAG1C;YAC7B0C,WAAW,eAAe,GAAG1C;QAC/B;QAEA,MAAM4C,UAAUF,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIG,yBAAyB;QAG7B,IAAIH,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YAC5C5D,MAAM;YACN+D,yBAAyB;YAEzB,IAAItB,iBACFP,oBAAoB,uBAAuB;QAE/C;QAEA,MAAM8B,cAAkC;YACtC,GAAGJ,UAAU;YACbE;YACAN;YACAC;YACAC;YACA,UAAUO,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBH,SAAS9B,KAAK,WAAW;YAC1D+B;QACF;QAEAG,IAAAA,sBAAAA,MAAAA,AAAAA,EAAON,YAAY;QAEnBE,QAAQ,OAAO,CAAC,CAACrC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAM0C,sBAAsBnC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACP,SAAWA,OAAO,IAAI,KAAKC;YAG9B1B,MAAM,+BAA+BmE;YACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAENnE,MAAM,gBAAgBoE;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAe9C,OAAO,KAAK,CAAC6C,MAAM;gBACxC,IAAIC,cAAc;oBAChB,IAAI,CAACvC,KAAK,uBAAuB,EAAE;wBACjC,IAAI,AAAwB,YAAxB,OAAOuC,cAET9C,OAAO,KAAK,CAAC6C,MAAM,GAAG;4BAAE,QAAQC,aAAa,MAAM;wBAAC;wBAEtD;oBACF;oBAEAL,IAAAA,sBAAAA,MAAAA,AAAAA,EACE1B,qBACA;oBAEFf,OAAO,KAAK,CAAC6C,MAAM,GAAG;wBACpB,GAAGC,YAAY;wBACf,kBAAkB/B,oBAAoB,6BAA6B,CACjE+B,cACA;4BACE,cAAc3B,cAAc,YAAY;4BACxC,aAAaA,cAAc,WAAW;wBACxC;oBAEJ;gBACF;YACF;QACF;QAGA,IAAIH,iBAAiB;YACnB,IAAImB,WAAW,cAAc,EAAE,QAC7B1B,oBAAoB,aAAa,CAAC0B,WAAW,cAAc;YAE7D,IAAIA,WAAW,mBAAmB,EAAE,QAClC,KAAK,MAAMY,SAASZ,WAAW,mBAAmB,CAChD1B,oBAAoB,mBAAmB,CAACsC;YAI5C,IAAIZ,WAAW,GAAG,EAChB1B,oBAAoB,gBAAgB,CAAC0B,WAAW,GAAG;QAEvD,OAEE,IAAIA,WAAW,GAAG,EAChB1B,oBAAoB,mBAAmB,CAAC0B,WAAW,GAAG;QAK1D,IAAIA,WAAW,MAAM,EACnB1B,oBAAoB,YAAY,CAAC0B,WAAW,MAAM;QAGpD1B,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMsB;gBACR;aACD;QACH;QAEA,OAAOQ;IACT,EAAE,OAAOS,YAAY;QAEnB,MAAMC,eACJD,sBAAsB5C,QAAQ4C,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,yBAAAA,oBAAoBA,CAC5B,CAAC,iBAAiB,EAAEF,cAAc,EAClClB,aACAC;IAEJ;AACF"}
1
+ {"version":3,"file":"ai-model/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { type TUserPrompt, userPromptToString } from '@/common';\nimport type {\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n} from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport { buildYamlFlowFromPlans, findAllMidsceneLocatorField } from '../common';\nimport { planningModelFamilyRequiredForLocateMessage } from './errors';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport {\n extractXMLTag,\n parseMarkFinishedIndexes,\n parseSubGoalsFromXML,\n} from './prompt/util';\nimport { AIResponseParseError, callAI } from './service-caller/index';\nimport type { JsonParser, JsonParserSource } from './service-caller/json';\nimport { prepareModelImage } from './workflows/image-preprocess';\nimport type { PlanOptions } from './workflows/planning/types';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\nconst noPreviousActionsText =\n 'No previous actions have been executed in this aiAct execution yet. If the instruction asks for actions, choose the first action to execute.';\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse.\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n jsonParser: JsonParser,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse sub-goal related tags\n const updatePlanContent = extractXMLTag(xmlString, 'update-plan-content');\n const markSubGoalDone = extractXMLTag(xmlString, 'mark-sub-goal-done');\n\n const updateSubGoals = updatePlanContent\n ? parseSubGoalsFromXML(updatePlanContent)\n : undefined;\n const markFinishedIndexes = markSubGoalDone\n ? parseMarkFinishedIndexes(markSubGoalDone)\n : undefined;\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n // Strip any trailing XML tags that LLM might have leaked into the action type\n // e.g. \"KeyboardPress</action-type>\\n<action-param-json>\" -> \"KeyboardPress\"\n const type = actionType.split('<')[0].trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = jsonParser(actionParamStr, {\n source: 'planning-action-param',\n preserveStringValueKeys:\n type.toLowerCase() === 'input' ? ['value'] : undefined,\n });\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n ...(updateSubGoals?.length ? { updateSubGoals } : {}),\n ...(markFinishedIndexes?.length ? { markFinishedIndexes } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: TUserPrompt,\n opts: PlanOptions,\n): Promise<PlanningAIResponse> {\n const { context, conversationHistory } = opts;\n const modelRuntime = opts.modelRuntime;\n const { adapter } = modelRuntime;\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n if (opts.includeLocateInPlanning && !modelRuntime.config.modelFamily) {\n throw new Error(\n planningModelFamilyRequiredForLocateMessage(modelRuntime.config.slot),\n );\n }\n\n const locateResultAdapter =\n modelRuntime.config.modelFamily && adapter.locate.kind === 'standard'\n ? adapter.locate.resultAdapter\n : undefined;\n\n // Only enable sub-goals when aiAct is in deep-thinking planning mode.\n const includeSubGoals = opts.deepThink === true;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n locatePromptSpec: locateResultAdapter?.promptSpec,\n includeLocateInPlanning: opts.includeLocateInPlanning,\n includeThought: true, // always include thought\n includeSubGoals,\n });\n\n const preparedImage = await prepareModelImage({\n imageBase64: screenshotBase64,\n width: shotSize.width,\n height: shotSize.height,\n policy: adapter.imagePreprocess,\n });\n const imagePayload = preparedImage.imageBase64;\n\n const userInstructionText = userPromptToString(userInstruction);\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const referenceImageMessages = opts.referenceImageMessages ?? [];\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstructionText}</user_instruction>`,\n },\n ],\n },\n ...referenceImageMessages,\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build sub-goal status text to include in the message\n // In planning deep-think mode: show full sub-goals with logs\n // Otherwise: show historical execution logs\n const executionProgressText = includeSubGoals\n ? conversationHistory.subGoalsToText()\n : conversationHistory.historicalLogsToText();\n const executionProgressSection = executionProgressText\n ? `\\n\\n${executionProgressText}`\n : conversationHistory.pendingFeedbackMessage\n ? ''\n : `\\n\\n${noPreviousActionsText}`;\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `This is the current screenshot.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n let {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Planning with locate results is localization-sensitive. Adapters decide\n // whether this should request original image detail.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n\n // Parse XML response to JSON object, retry once on parse failure\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n } catch {\n const retry = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Keep retry requests consistent with the initial planning call.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n rawResponse = retry.content;\n usage = retry.usage;\n reasoning_content = retry.reasoning_content;\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n }\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n // Mark all sub-goals as finished when goal is completed in planning deep-think mode.\n if (includeSubGoals) {\n conversationHistory.markAllSubGoalsFinished();\n }\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult) {\n if (!opts.includeLocateInPlanning) {\n if (typeof locateResult === 'object') {\n // In prompt-only planning mode, ignore any accidental coordinates from the model.\n action.param[field] = { prompt: locateResult.prompt };\n }\n return;\n }\n\n assert(\n locateResultAdapter,\n 'generic planning locate normalization requires a standard locate adapter',\n );\n action.param[field] = {\n ...locateResult,\n locatedPixelBbox: locateResultAdapter.adaptPlanningParamToPixelBbox(\n locateResult,\n {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n },\n ),\n };\n }\n });\n });\n\n // Update sub-goals in conversation history only in planning deep-think mode.\n if (includeSubGoals) {\n if (planFromAI.updateSubGoals?.length) {\n conversationHistory.mergeSubGoals(planFromAI.updateSubGoals);\n }\n if (planFromAI.markFinishedIndexes?.length) {\n for (const index of planFromAI.markFinishedIndexes) {\n conversationHistory.markSubGoalFinished(index);\n }\n }\n // Append the planning log to the currently running sub-goal\n if (planFromAI.log) {\n conversationHistory.appendSubGoalLog(planFromAI.log);\n }\n } else {\n // Without planning deep-think mode, accumulate logs as historical execution steps.\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","warnLog","noPreviousActionsText","parseXMLPlanningResponse","xmlString","jsonParser","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","updatePlanContent","markSubGoalDone","updateSubGoals","parseSubGoalsFromXML","markFinishedIndexes","parseMarkFinishedIndexes","action","type","param","e","Error","plan","userInstruction","opts","context","conversationHistory","modelRuntime","adapter","shotSize","screenshotBase64","planningModelFamilyRequiredForLocateMessage","locateResultAdapter","includeSubGoals","systemPrompt","systemPromptToTaskPlanning","preparedImage","prepareModelImage","imagePayload","userInstructionText","userPromptToString","actionContext","referenceImageMessages","instruction","latestFeedbackMessage","executionProgressText","executionProgressSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","retry","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","index","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;ACeA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,UAAUD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,YAAY;IAAE,SAAS;AAAK;AAErD,MAAME,wBACJ;AAKK,SAASC,yBACdC,SAAiB,EACjBC,UAAsB;IAEtB,MAAMC,UAAUC,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACzC,MAAMI,SAASD,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACxC,MAAMK,MAAMF,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACvC,MAAMO,aAAaJ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,MAAMC,oBAAoBX,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACnD,MAAMe,kBAAkBZ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAEjD,MAAMgB,iBAAiBF,oBACnBG,AAAAA,IAAAA,wBAAAA,oBAAAA,AAAAA,EAAqBH,qBACrBD;IACJ,MAAMK,sBAAsBH,kBACxBI,AAAAA,IAAAA,wBAAAA,wBAAAA,AAAAA,EAAyBJ,mBACzBF;IAGJ,IAAIO,SAAc;IAClB,IAAIb,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QAGrD,MAAMc,OAAOd,WAAW,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI;QAC1C,IAAIe;QAEJ,IAAId,gBACF,IAAI;YAEFc,QAAQrB,WAAWO,gBAAgB;gBACjC,QAAQ;gBACR,yBACEa,AAAuB,YAAvBA,KAAK,WAAW,KAAiB;oBAAC;iBAAQ,GAAGR;YACjD;QACF,EAAE,OAAOU,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFH,SAAS;YACPC;YACA,GAAIC,AAAUT,WAAVS,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAIpB,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1Bc;QACA,GAAIT,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAII,gBAAgB,SAAS;YAAEA;QAAe,IAAI,CAAC,CAAC;QACpD,GAAIE,qBAAqB,SAAS;YAAEA;QAAoB,IAAI,CAAC,CAAC;IAChE;AACF;AAEO,eAAeO,KACpBC,eAA4B,EAC5BC,IAAiB;IAEjB,MAAM,EAAEC,OAAO,EAAEC,mBAAmB,EAAE,GAAGF;IACzC,MAAMG,eAAeH,KAAK,YAAY;IACtC,MAAM,EAAEI,OAAO,EAAE,GAAGD;IACpB,MAAM,EAAEE,QAAQ,EAAE,GAAGJ;IACrB,MAAMK,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElD,IAAID,KAAK,uBAAuB,IAAI,CAACG,aAAa,MAAM,CAAC,WAAW,EAClE,MAAM,IAAIN,MACRU,AAAAA,IAAAA,mCAAAA,2CAAAA,AAAAA,EAA4CJ,aAAa,MAAM,CAAC,IAAI;IAIxE,MAAMK,sBACJL,aAAa,MAAM,CAAC,WAAW,IAAIC,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,GAClDA,QAAQ,MAAM,CAAC,aAAa,GAC5BlB;IAGN,MAAMuB,kBAAkBT,AAAmB,SAAnBA,KAAK,SAAS;IAEtC,MAAMU,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaX,KAAK,WAAW;QAC7B,kBAAkBQ,qBAAqB;QACvC,yBAAyBR,KAAK,uBAAuB;QACrD,gBAAgB;QAChBS;IACF;IAEA,MAAMG,gBAAgB,MAAMC,AAAAA,IAAAA,oCAAAA,iBAAAA,AAAAA,EAAkB;QAC5C,aAAaP;QACb,OAAOD,SAAS,KAAK;QACrB,QAAQA,SAAS,MAAM;QACvB,QAAQD,QAAQ,eAAe;IACjC;IACA,MAAMU,eAAeF,cAAc,WAAW;IAE9C,MAAMG,sBAAsBC,AAAAA,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBjB;IAC/C,MAAMkB,gBAAgBjB,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMkB,yBAAyBlB,KAAK,sBAAsB,IAAI,EAAE;IAChE,MAAMmB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGF,cAAc,kBAAkB,EAAEF,oBAAoB,mBAAmB,CAAC;gBACrF;aACD;QACH;WACGG;KACJ;IAED,IAAIE;IAKJ,MAAMC,wBAAwBZ,kBAC1BP,oBAAoB,cAAc,KAClCA,oBAAoB,oBAAoB;IAC5C,MAAMoB,2BAA2BD,wBAC7B,CAAC,IAAI,EAAEA,uBAAuB,GAC9BnB,oBAAoB,sBAAsB,GACxC,KACA,CAAC,IAAI,EAAE/B,uBAAuB;IAGpC,MAAMoD,eAAerB,oBAAoB,cAAc;IACvD,MAAMsB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIrB,oBAAoB,sBAAsB,EAAE;QAC9CkB,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGlB,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEsB,kBAAkBF,0BAA0B;gBACzN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKR;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAZ,oBAAoB,mCAAmC;IACzD,OACEkB,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,+BAA+B,EAAEI,kBAAkBF,0BAA0B;YACtF;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKR;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFZ,oBAAoB,MAAM,CAACkB;IAG3BlB,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMuB,aAAavB,oBAAoB,QAAQ,CAACF,KAAK,kBAAkB;IAEvE,MAAM0B,OAAqC;QACzC;YAAE,MAAM;YAAU,SAAShB;QAAa;WACrCS;WACAM;KACJ;IAED,IAAI,EACF,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOJ,MAAMvB,cAAc;QACnC,aAAaH,KAAK,WAAW;QAG7B,6BAA6BA,KAAK,uBAAuB;IAC3D;IAGA,IAAI+B;IACJ,IAAI;QACF,IAAI;YACFA,aAAa3D,yBAAyBuD,aAAavB,QAAQ,UAAU;QACvE,EAAE,OAAM;YACN,MAAM4B,QAAQ,MAAMF,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOJ,MAAMvB,cAAc;gBAC7C,aAAaH,KAAK,WAAW;gBAE7B,6BAA6BA,KAAK,uBAAuB;YAC3D;YACA2B,cAAcK,MAAM,OAAO;YAC3BJ,QAAQI,MAAM,KAAK;YACnBH,oBAAoBG,MAAM,iBAAiB;YAC3CD,aAAa3D,yBAAyBuD,aAAavB,QAAQ,UAAU;QACvE;QAEA,IAAI2B,WAAW,MAAM,IAAIA,AAA+B7C,WAA/B6C,WAAW,eAAe,EAAgB;YACjE7D,QACE;YAEF6D,WAAW,eAAe,GAAG7C;YAC7B6C,WAAW,eAAe,GAAG7C;QAC/B;QAEA,MAAM+C,UAAUF,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIG,yBAAyB;QAG7B,IAAIH,AAA+B7C,WAA/B6C,WAAW,eAAe,EAAgB;YAC5C/D,MAAM;YACNkE,yBAAyB;YAEzB,IAAIzB,iBACFP,oBAAoB,uBAAuB;QAE/C;QAEA,MAAMiC,cAAkC;YACtC,GAAGJ,UAAU;YACbE;YACAN;YACAC;YACAC;YACA,UAAUO,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBH,SAASjC,KAAK,WAAW;YAC1DkC;QACF;QAEAG,IAAAA,sBAAAA,MAAAA,AAAAA,EAAON,YAAY;QAEnBE,QAAQ,OAAO,CAAC,CAACxC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAM6C,sBAAsBtC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACP,SAAWA,OAAO,IAAI,KAAKC;YAG9B1B,MAAM,+BAA+BsE;YACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAENtE,MAAM,gBAAgBuE;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAejD,OAAO,KAAK,CAACgD,MAAM;gBACxC,IAAIC,cAAc;oBAChB,IAAI,CAAC1C,KAAK,uBAAuB,EAAE;wBACjC,IAAI,AAAwB,YAAxB,OAAO0C,cAETjD,OAAO,KAAK,CAACgD,MAAM,GAAG;4BAAE,QAAQC,aAAa,MAAM;wBAAC;wBAEtD;oBACF;oBAEAL,IAAAA,sBAAAA,MAAAA,AAAAA,EACE7B,qBACA;oBAEFf,OAAO,KAAK,CAACgD,MAAM,GAAG;wBACpB,GAAGC,YAAY;wBACf,kBAAkBlC,oBAAoB,6BAA6B,CACjEkC,cACA;4BACE,cAAc9B,cAAc,YAAY;4BACxC,aAAaA,cAAc,WAAW;wBACxC;oBAEJ;gBACF;YACF;QACF;QAGA,IAAIH,iBAAiB;YACnB,IAAIsB,WAAW,cAAc,EAAE,QAC7B7B,oBAAoB,aAAa,CAAC6B,WAAW,cAAc;YAE7D,IAAIA,WAAW,mBAAmB,EAAE,QAClC,KAAK,MAAMY,SAASZ,WAAW,mBAAmB,CAChD7B,oBAAoB,mBAAmB,CAACyC;YAI5C,IAAIZ,WAAW,GAAG,EAChB7B,oBAAoB,gBAAgB,CAAC6B,WAAW,GAAG;QAEvD,OAEE,IAAIA,WAAW,GAAG,EAChB7B,oBAAoB,mBAAmB,CAAC6B,WAAW,GAAG;QAK1D,IAAIA,WAAW,MAAM,EACnB7B,oBAAoB,YAAY,CAAC6B,WAAW,MAAM;QAGpD7B,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMyB;gBACR;aACD;QACH;QAEA,OAAOQ;IACT,EAAE,OAAOS,YAAY;QAEnB,MAAMC,eACJD,sBAAsB/C,QAAQ+C,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,yBAAAA,oBAAoBA,CAC5B,CAAC,iBAAiB,EAAEF,cAAc,EAClClB,aACAC;IAEJ;AACF"}
@@ -26,6 +26,7 @@ __webpack_require__.r(__webpack_exports__);
26
26
  __webpack_require__.d(__webpack_exports__, {
27
27
  autoGlmPlanning: ()=>autoGlmPlanning
28
28
  });
29
+ const external_common_js_namespaceObject = require("../../../common.js");
29
30
  const logger_namespaceObject = require("@midscene/shared/logger");
30
31
  const index_js_namespaceObject = require("../../service-caller/index.js");
31
32
  const external_actions_js_namespaceObject = require("./actions.js");
@@ -35,15 +36,17 @@ async function autoGlmPlanning(userInstruction, options, getSystemPrompt) {
35
36
  const { conversationHistory, context, actionContext } = options;
36
37
  const systemPrompt = getSystemPrompt() + (actionContext ? `<high_priority_knowledge>${actionContext}</high_priority_knowledge>` : '');
37
38
  const imagePayloadBase64 = context.screenshot.base64;
38
- conversationHistory.append({
39
+ const userInstructionText = (0, external_common_js_namespaceObject.userPromptToString)(userInstruction);
40
+ const referenceImageMessages = options.referenceImageMessages ?? [];
41
+ const userInstructionMessage = {
39
42
  role: 'user',
40
43
  content: [
41
44
  {
42
45
  type: 'text',
43
- text: userInstruction
46
+ text: userInstructionText
44
47
  }
45
48
  ]
46
- });
49
+ };
47
50
  conversationHistory.append({
48
51
  role: 'user',
49
52
  content: [
@@ -60,6 +63,8 @@ async function autoGlmPlanning(userInstruction, options, getSystemPrompt) {
60
63
  role: 'system',
61
64
  content: systemPrompt
62
65
  },
66
+ userInstructionMessage,
67
+ ...referenceImageMessages,
63
68
  ...conversationHistory.snapshot(1)
64
69
  ];
65
70
  const { content: rawResponse, usage } = await (0, index_js_namespaceObject.callAIWithStringResponse)(msgs, options.modelRuntime, {
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/models/auto-glm/planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../../src/ai-model/models/auto-glm/planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { PlanningAIResponse } from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from '../../service-caller/index';\nimport type { PlanOptions } from '../../workflows/planning/types';\nimport { transformAutoGLMAction } from './actions';\nimport { parseAction, parseAutoGLMResponse } from './parser';\n\nconst debug = getDebug('auto-glm-planning');\n\nexport async function autoGlmPlanning(\n userInstruction: string,\n options: PlanOptions,\n getSystemPrompt: () => string,\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, actionContext } = options;\n\n const systemPrompt =\n getSystemPrompt() +\n (actionContext\n ? `<high_priority_knowledge>${actionContext}</high_priority_knowledge>`\n : '');\n\n const imagePayloadBase64 = context.screenshot.base64;\n\n conversationHistory.append({\n role: 'user',\n content: [{ type: 'text', text: userInstruction }],\n });\n conversationHistory.append({\n role: 'user',\n content: [{ type: 'image_url', image_url: { url: imagePayloadBase64 } }],\n });\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...conversationHistory.snapshot(1),\n ];\n\n const { content: rawResponse, usage } = await callAIWithStringResponse(\n msgs,\n options.modelRuntime,\n {\n abortSignal: options.abortSignal,\n },\n );\n\n debug('autoGLMPlanning rawResponse:', rawResponse);\n\n let parsedResponse: ReturnType<typeof parseAutoGLMResponse>;\n let transformedActions: ReturnType<typeof transformAutoGLMAction>;\n\n try {\n parsedResponse = parseAutoGLMResponse(rawResponse);\n debug('thinking in response:', parsedResponse.think);\n debug('action in response:', parsedResponse.content);\n\n const parsedAction = parseAction(parsedResponse);\n debug('Parsed action object:', parsedAction);\n transformedActions = transformAutoGLMAction(\n parsedAction,\n context.shotSize,\n options.actionSpace,\n );\n debug('Transformed actions:', transformedActions);\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `Parse error: ${errorMessage}`,\n JSON.stringify(rawResponse, undefined, 2),\n usage,\n );\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: `<think>${parsedResponse.think}</think><answer>${parsedResponse.content}</answer>`,\n });\n\n const shouldContinuePlanning = !parsedResponse.content.startsWith('finish(');\n\n return {\n actions: transformedActions,\n log: rawResponse,\n usage,\n shouldContinuePlanning,\n rawResponse: JSON.stringify(rawResponse, undefined, 2),\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","autoGlmPlanning","userInstruction","options","getSystemPrompt","conversationHistory","context","actionContext","systemPrompt","imagePayloadBase64","msgs","rawResponse","usage","callAIWithStringResponse","parsedResponse","transformedActions","parseAutoGLMResponse","parsedAction","parseAction","transformAutoGLMAction","parseError","errorMessage","Error","String","AIResponseParseError","JSON","undefined","shouldContinuePlanning"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;ACKA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAEhB,eAAeC,gBACpBC,eAAuB,EACvBC,OAAoB,EACpBC,eAA6B;IAE7B,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,aAAa,EAAE,GAAGJ;IAExD,MAAMK,eACJJ,oBACCG,CAAAA,gBACG,CAAC,yBAAyB,EAAEA,cAAc,0BAA0B,CAAC,GACrE,EAAC;IAEP,MAAME,qBAAqBH,QAAQ,UAAU,CAAC,MAAM;IAEpDD,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YAAC;gBAAE,MAAM;gBAAQ,MAAMH;YAAgB;SAAE;IACpD;IACAG,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YAAC;gBAAE,MAAM;gBAAa,WAAW;oBAAE,KAAKI;gBAAmB;YAAE;SAAE;IAC1E;IAEA,MAAMC,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASF;QAAa;WACrCH,oBAAoB,QAAQ,CAAC;KACjC;IAED,MAAM,EAAE,SAASM,WAAW,EAAEC,KAAK,EAAE,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAC5CH,MACAP,QAAQ,YAAY,EACpB;QACE,aAAaA,QAAQ,WAAW;IAClC;IAGFJ,MAAM,gCAAgCY;IAEtC,IAAIG;IACJ,IAAIC;IAEJ,IAAI;QACFD,iBAAiBE,AAAAA,IAAAA,mCAAAA,oBAAAA,AAAAA,EAAqBL;QACtCZ,MAAM,yBAAyBe,eAAe,KAAK;QACnDf,MAAM,uBAAuBe,eAAe,OAAO;QAEnD,MAAMG,eAAeC,AAAAA,IAAAA,mCAAAA,WAAAA,AAAAA,EAAYJ;QACjCf,MAAM,yBAAyBkB;QAC/BF,qBAAqBI,AAAAA,IAAAA,oCAAAA,sBAAAA,AAAAA,EACnBF,cACAX,QAAQ,QAAQ,EAChBH,QAAQ,WAAW;QAErBJ,MAAM,wBAAwBgB;IAChC,EAAE,OAAOK,YAAY;QAEnB,MAAMC,eACJD,sBAAsBE,QAAQF,WAAW,OAAO,GAAGG,OAAOH;QAC5D,MAAM,IAAII,yBAAAA,oBAAoBA,CAC5B,CAAC,aAAa,EAAEH,cAAc,EAC9BI,KAAK,SAAS,CAACd,aAAae,QAAW,IACvCd;IAEJ;IAEAP,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS,CAAC,OAAO,EAAES,eAAe,KAAK,CAAC,gBAAgB,EAAEA,eAAe,OAAO,CAAC,SAAS,CAAC;IAC7F;IAEA,MAAMa,yBAAyB,CAACb,eAAe,OAAO,CAAC,UAAU,CAAC;IAElE,OAAO;QACL,SAASC;QACT,KAAKJ;QACLC;QACAe;QACA,aAAaF,KAAK,SAAS,CAACd,aAAae,QAAW;IACtD;AACF"}
1
+ {"version":3,"file":"ai-model/models/auto-glm/planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../../src/ai-model/models/auto-glm/planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { type TUserPrompt, userPromptToString } from '@/common';\nimport type { PlanningAIResponse } from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from '../../service-caller/index';\nimport type { PlanOptions } from '../../workflows/planning/types';\nimport { transformAutoGLMAction } from './actions';\nimport { parseAction, parseAutoGLMResponse } from './parser';\n\nconst debug = getDebug('auto-glm-planning');\n\nexport async function autoGlmPlanning(\n userInstruction: TUserPrompt,\n options: PlanOptions,\n getSystemPrompt: () => string,\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, actionContext } = options;\n\n const systemPrompt =\n getSystemPrompt() +\n (actionContext\n ? `<high_priority_knowledge>${actionContext}</high_priority_knowledge>`\n : '');\n\n const imagePayloadBase64 = context.screenshot.base64;\n const userInstructionText = userPromptToString(userInstruction);\n const referenceImageMessages = options.referenceImageMessages ?? [];\n\n const userInstructionMessage: ChatCompletionMessageParam = {\n role: 'user',\n content: [{ type: 'text', text: userInstructionText }],\n };\n conversationHistory.append({\n role: 'user',\n content: [{ type: 'image_url', image_url: { url: imagePayloadBase64 } }],\n });\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n userInstructionMessage,\n ...referenceImageMessages,\n ...conversationHistory.snapshot(1),\n ];\n\n const { content: rawResponse, usage } = await callAIWithStringResponse(\n msgs,\n options.modelRuntime,\n {\n abortSignal: options.abortSignal,\n },\n );\n\n debug('autoGLMPlanning rawResponse:', rawResponse);\n\n let parsedResponse: ReturnType<typeof parseAutoGLMResponse>;\n let transformedActions: ReturnType<typeof transformAutoGLMAction>;\n\n try {\n parsedResponse = parseAutoGLMResponse(rawResponse);\n debug('thinking in response:', parsedResponse.think);\n debug('action in response:', parsedResponse.content);\n\n const parsedAction = parseAction(parsedResponse);\n debug('Parsed action object:', parsedAction);\n transformedActions = transformAutoGLMAction(\n parsedAction,\n context.shotSize,\n options.actionSpace,\n );\n debug('Transformed actions:', transformedActions);\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `Parse error: ${errorMessage}`,\n JSON.stringify(rawResponse, undefined, 2),\n usage,\n );\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: `<think>${parsedResponse.think}</think><answer>${parsedResponse.content}</answer>`,\n });\n\n const shouldContinuePlanning = !parsedResponse.content.startsWith('finish(');\n\n return {\n actions: transformedActions,\n log: rawResponse,\n usage,\n shouldContinuePlanning,\n rawResponse: JSON.stringify(rawResponse, undefined, 2),\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","autoGlmPlanning","userInstruction","options","getSystemPrompt","conversationHistory","context","actionContext","systemPrompt","imagePayloadBase64","userInstructionText","userPromptToString","referenceImageMessages","userInstructionMessage","msgs","rawResponse","usage","callAIWithStringResponse","parsedResponse","transformedActions","parseAutoGLMResponse","parsedAction","parseAction","transformAutoGLMAction","parseError","errorMessage","Error","String","AIResponseParseError","JSON","undefined","shouldContinuePlanning"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;ACMA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAEhB,eAAeC,gBACpBC,eAA4B,EAC5BC,OAAoB,EACpBC,eAA6B;IAE7B,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,aAAa,EAAE,GAAGJ;IAExD,MAAMK,eACJJ,oBACCG,CAAAA,gBACG,CAAC,yBAAyB,EAAEA,cAAc,0BAA0B,CAAC,GACrE,EAAC;IAEP,MAAME,qBAAqBH,QAAQ,UAAU,CAAC,MAAM;IACpD,MAAMI,sBAAsBC,AAAAA,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBT;IAC/C,MAAMU,yBAAyBT,QAAQ,sBAAsB,IAAI,EAAE;IAEnE,MAAMU,yBAAqD;QACzD,MAAM;QACN,SAAS;YAAC;gBAAE,MAAM;gBAAQ,MAAMH;YAAoB;SAAE;IACxD;IACAL,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YAAC;gBAAE,MAAM;gBAAa,WAAW;oBAAE,KAAKI;gBAAmB;YAAE;SAAE;IAC1E;IAEA,MAAMK,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASN;QAAa;QACxCK;WACGD;WACAP,oBAAoB,QAAQ,CAAC;KACjC;IAED,MAAM,EAAE,SAASU,WAAW,EAAEC,KAAK,EAAE,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAC5CH,MACAX,QAAQ,YAAY,EACpB;QACE,aAAaA,QAAQ,WAAW;IAClC;IAGFJ,MAAM,gCAAgCgB;IAEtC,IAAIG;IACJ,IAAIC;IAEJ,IAAI;QACFD,iBAAiBE,AAAAA,IAAAA,mCAAAA,oBAAAA,AAAAA,EAAqBL;QACtChB,MAAM,yBAAyBmB,eAAe,KAAK;QACnDnB,MAAM,uBAAuBmB,eAAe,OAAO;QAEnD,MAAMG,eAAeC,AAAAA,IAAAA,mCAAAA,WAAAA,AAAAA,EAAYJ;QACjCnB,MAAM,yBAAyBsB;QAC/BF,qBAAqBI,AAAAA,IAAAA,oCAAAA,sBAAAA,AAAAA,EACnBF,cACAf,QAAQ,QAAQ,EAChBH,QAAQ,WAAW;QAErBJ,MAAM,wBAAwBoB;IAChC,EAAE,OAAOK,YAAY;QAEnB,MAAMC,eACJD,sBAAsBE,QAAQF,WAAW,OAAO,GAAGG,OAAOH;QAC5D,MAAM,IAAII,yBAAAA,oBAAoBA,CAC5B,CAAC,aAAa,EAAEH,cAAc,EAC9BI,KAAK,SAAS,CAACd,aAAae,QAAW,IACvCd;IAEJ;IAEAX,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS,CAAC,OAAO,EAAEa,eAAe,KAAK,CAAC,gBAAgB,EAAEA,eAAe,OAAO,CAAC,SAAS,CAAC;IAC7F;IAEA,MAAMa,yBAAyB,CAACb,eAAe,OAAO,CAAC,UAAU,CAAC;IAElE,OAAO;QACL,SAASC;QACT,KAAKJ;QACLC;QACAe;QACA,aAAaF,KAAK,SAAS,CAACd,aAAae,QAAW;IACtD;AACF"}
@@ -26,6 +26,7 @@ __webpack_require__.r(__webpack_exports__);
26
26
  __webpack_require__.d(__webpack_exports__, {
27
27
  uiTarsPlanning: ()=>uiTarsPlanning
28
28
  });
29
+ const external_common_js_namespaceObject = require("../../../common.js");
29
30
  const logger_namespaceObject = require("@midscene/shared/logger");
30
31
  const us_keyboard_layout_namespaceObject = require("@midscene/shared/us-keyboard-layout");
31
32
  const utils_namespaceObject = require("@midscene/shared/utils");
@@ -57,10 +58,12 @@ function pointToLocateParam(point, thought, size) {
57
58
  }
58
59
  async function uiTarsPlanning(userInstruction, options, uiTarsModelVersion) {
59
60
  const { conversationHistory, context, modelRuntime, actionContext } = options;
60
- let instruction = userInstruction;
61
- if (actionContext) instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\n<user_instruction>${userInstruction}</user_instruction>`;
61
+ const userInstructionText = (0, external_common_js_namespaceObject.userPromptToString)(userInstruction);
62
+ let instruction = userInstructionText;
63
+ if (actionContext) instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\n<user_instruction>${userInstructionText}</user_instruction>`;
62
64
  const systemPrompt = (0, ui_tars_planning_js_namespaceObject.getUiTarsPlanningPrompt)() + instruction;
63
65
  const screenshotBase64 = context.screenshot.base64;
66
+ const referenceImageMessages = options.referenceImageMessages ?? [];
64
67
  conversationHistory.append({
65
68
  role: 'user',
66
69
  content: [
@@ -77,6 +80,7 @@ async function uiTarsPlanning(userInstruction, options, uiTarsModelVersion) {
77
80
  role: 'user',
78
81
  content: systemPrompt
79
82
  },
83
+ ...referenceImageMessages,
80
84
  ...conversationHistory.snapshot()
81
85
  ], modelRuntime, {
82
86
  abortSignal: options.abortSignal
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/models/ui-tars/planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../../src/ai-model/models/ui-tars/planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n PlanningAIResponse,\n PlanningAction,\n PlanningLocateParamWithLocatedPixelBbox,\n Size,\n} from '@/types';\nimport type { UITarsModelVersion } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { transformHotkeyInput } from '@midscene/shared/us-keyboard-layout';\nimport { assert } from '@midscene/shared/utils';\nimport { actionParser } from '@ui-tars/action-parser';\nimport {\n getSummary,\n getUiTarsPlanningPrompt,\n} from '../../prompt/ui-tars-planning';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from '../../service-caller/index';\nimport { finalizePixelBbox } from '../../shared/model-locate-result/bbox';\nimport { mapLocateResultToPixelBboxByCoordinates } from '../../shared/model-locate-result/pixel-bbox-mapper';\nimport type { PlanOptions } from '../../workflows/planning/types';\n\ntype ActionType =\n | 'click'\n | 'left_double'\n | 'right_single'\n | 'drag'\n | 'type'\n | 'hotkey'\n | 'finished'\n | 'scroll'\n | 'wait';\n\nconst debug = getDebug('ui-tars-planning');\nconst warnLog = getDebug('ui-tars-planning', { console: true });\n\nfunction pointToLocateParam(\n point: [number, number],\n thought: string | null,\n size: Size,\n): PlanningLocateParamWithLocatedPixelBbox {\n const ctx = { preparedSize: size };\n const pixelBbox = mapLocateResultToPixelBboxByCoordinates(\n { type: 'point', coordinates: point },\n ctx,\n { shape: 'point', order: 'xy', normalizedBy: 1 },\n );\n\n return {\n prompt: thought || '',\n locatedPixelBbox: finalizePixelBbox(pixelBbox, point, ctx),\n };\n}\n\nexport async function uiTarsPlanning(\n userInstruction: string,\n options: PlanOptions,\n uiTarsModelVersion: UITarsModelVersion,\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelRuntime, actionContext } = options;\n\n let instruction = userInstruction;\n if (actionContext) {\n instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\\n<user_instruction>${userInstruction}</user_instruction>`;\n }\n\n const systemPrompt = getUiTarsPlanningPrompt() + instruction;\n\n const screenshotBase64 = context.screenshot.base64;\n\n conversationHistory.append({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n },\n },\n ],\n });\n\n const res = await callAIWithStringResponse(\n [\n {\n role: 'user',\n content: systemPrompt,\n },\n ...conversationHistory.snapshot(),\n ],\n modelRuntime,\n {\n abortSignal: options.abortSignal,\n },\n );\n\n let convertedText: string;\n let parsed: ReturnType<typeof actionParser>['parsed'];\n\n try {\n convertedText = convertBboxToCoordinates(res.content);\n\n const { shotSize } = context;\n const parseResult = actionParser({\n prediction: convertedText,\n factor: [1000, 1000],\n screenContext: {\n width: shotSize.width,\n height: shotSize.height,\n },\n modelVer: uiTarsModelVersion,\n });\n parsed = parseResult.parsed;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `Parse error: ${errorMessage}`,\n JSON.stringify(res.content, undefined, 2),\n res.usage,\n );\n }\n\n const { shotSize } = context;\n\n debug(\n 'ui-tars modelVer',\n uiTarsModelVersion,\n ', parsed',\n JSON.stringify(parsed),\n );\n\n const transformActions: PlanningAction[] = [];\n const unhandledActions: Array<{ type: string; thought: string }> = [];\n let shouldContinue = true;\n parsed.forEach((action) => {\n const actionType = (action.action_type || '').toLowerCase();\n if (actionType === 'click') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box);\n\n const locate = pointToLocateParam(point, action.thought, shotSize);\n\n transformActions.push({\n type: 'Tap',\n param: {\n locate,\n },\n });\n } else if (actionType === 'left_double') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box);\n\n const locate = pointToLocateParam(point, action.thought, shotSize);\n\n transformActions.push({\n type: 'DoubleClick',\n param: {\n locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'right_single') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box);\n\n const locate = pointToLocateParam(point, action.thought, shotSize);\n\n transformActions.push({\n type: 'RightClick',\n param: {\n locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'drag') {\n assert(action.action_inputs.start_box, 'start_box is required');\n assert(action.action_inputs.end_box, 'end_box is required');\n const startPoint = getPoint(action.action_inputs.start_box);\n const endPoint = getPoint(action.action_inputs.end_box);\n transformActions.push({\n type: 'DragAndDrop',\n param: {\n from: pointToLocateParam(startPoint, action.thought, shotSize),\n to: pointToLocateParam(endPoint, action.thought, shotSize),\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'type') {\n transformActions.push({\n type: 'Input',\n param: {\n value: action.action_inputs.content,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'scroll') {\n transformActions.push({\n type: 'Scroll',\n param: {\n direction: action.action_inputs.direction,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'finished') {\n shouldContinue = false;\n transformActions.push({\n type: 'Finished',\n param: {},\n thought: action.action_inputs.content || action.thought || '',\n });\n } else if (actionType === 'hotkey') {\n if (!action.action_inputs.key) {\n warnLog('No key found in action: hotkey. Will not perform action.');\n } else {\n const keys = transformHotkeyInput(action.action_inputs.key);\n\n transformActions.push({\n type: 'KeyboardPress',\n param: {\n keyName: keys.join('+'),\n },\n thought: action.thought || '',\n });\n }\n } else if (actionType === 'wait') {\n transformActions.push({\n type: 'Sleep',\n param: {\n timeMs: 1000,\n },\n thought: action.thought || '',\n });\n } else if (actionType) {\n // Track unhandled action types\n unhandledActions.push({\n type: actionType,\n thought: action.thought || '',\n });\n debug('Unhandled action type:', actionType, 'thought:', action.thought);\n }\n });\n\n if (transformActions.length === 0) {\n const errorDetails: string[] = [];\n\n // Check if parsing failed\n if (parsed.length === 0) {\n errorDetails.push('Action parser returned no actions');\n\n // Check if response has Thought but no Action\n if (\n res.content.includes('Thought:') &&\n !res.content.includes('Action:')\n ) {\n errorDetails.push(\n 'Response contains \"Thought:\" but missing \"Action:\" line',\n );\n } else {\n errorDetails.push('Response may be malformed or empty');\n }\n }\n\n // Check if we have unhandled action types\n if (unhandledActions.length > 0) {\n const types = unhandledActions.map((a) => a.type).join(', ');\n errorDetails.push(`Unhandled action types: ${types}`);\n }\n\n const errorMessage = [\n 'No actions found in UI-TARS response.',\n ...errorDetails,\n ].join('\\n');\n\n // Throw AIResponseParseError with usage and rawResponse preserved\n throw new AIResponseParseError(\n errorMessage,\n JSON.stringify(res.content, undefined, 2),\n res.usage,\n );\n }\n\n debug('transformActions', JSON.stringify(transformActions, null, 2));\n const log = getSummary(res.content);\n\n conversationHistory.append({\n role: 'assistant',\n content: log,\n });\n\n return {\n actions: transformActions,\n log,\n usage: res.usage,\n rawResponse: JSON.stringify(res.content, undefined, 2),\n shouldContinuePlanning: shouldContinue,\n };\n}\n\n/**\n * Converts bounding box notation to coordinate points\n * @param text - The text containing bbox tags to be converted\n * @returns The text with bbox tags replaced by coordinate points\n */\nfunction convertBboxToCoordinates(text: string): string {\n // Match the four numbers after <bbox>\n const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n function replaceMatch(\n match: string,\n x1: string,\n y1: string,\n x2: string,\n y2: string,\n ): string {\n // Convert strings to numbers and calculate center point\n const x1Num = Number.parseInt(x1, 10);\n const y1Num = Number.parseInt(y1, 10);\n const x2Num = Number.parseInt(x2, 10);\n const y2Num = Number.parseInt(y2, 10);\n\n // Use Math.floor to truncate and calculate center point\n const x = Math.floor((x1Num + x2Num) / 2);\n const y = Math.floor((y1Num + y2Num) / 2);\n\n // Return formatted coordinate string\n return `(${x},${y})`;\n }\n\n // Remove common model wrappers before handing the response to UI-TARS parser.\n const cleanedText = text\n .replace(/\\[EOS\\]/g, '')\n .replace(/```(?:[a-zA-Z0-9_-]+)?/g, '');\n return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\nfunction getPoint(startBox: string): [number, number] {\n const [x, y] = JSON.parse(startBox);\n assert(\n typeof x === 'number' &&\n Number.isFinite(x) &&\n typeof y === 'number' &&\n Number.isFinite(y),\n `invalid point data for ui-tars planning: ${startBox}`,\n );\n return [x, y];\n}\n\ninterface BaseAction {\n action_type: ActionType;\n action_inputs: Record<string, any>;\n reflection: string | null;\n thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n action_type: 'click';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface DragAction extends BaseAction {\n action_type: 'drag';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n end_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface WaitAction extends BaseAction {\n action_type: 'wait';\n action_inputs: {\n time: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface LeftDoubleAction extends BaseAction {\n action_type: 'left_double';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface RightSingleAction extends BaseAction {\n action_type: 'right_single';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface TypeAction extends BaseAction {\n action_type: 'type';\n action_inputs: {\n content: string;\n };\n}\n\ninterface HotkeyAction extends BaseAction {\n action_type: 'hotkey';\n action_inputs: {\n key: string;\n };\n}\n\ninterface ScrollAction extends BaseAction {\n action_type: 'scroll';\n action_inputs: {\n direction: 'up' | 'down';\n };\n}\n\ninterface FinishedAction extends BaseAction {\n action_type: 'finished';\n action_inputs: {\n content?: string;\n };\n}\n\nexport type Action =\n | ClickAction\n | LeftDoubleAction\n | RightSingleAction\n | DragAction\n | TypeAction\n | HotkeyAction\n | ScrollAction\n | FinishedAction\n | WaitAction;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","warnLog","pointToLocateParam","point","thought","size","ctx","pixelBbox","mapLocateResultToPixelBboxByCoordinates","finalizePixelBbox","uiTarsPlanning","userInstruction","options","uiTarsModelVersion","conversationHistory","context","modelRuntime","actionContext","instruction","systemPrompt","getUiTarsPlanningPrompt","screenshotBase64","res","callAIWithStringResponse","convertedText","parsed","convertBboxToCoordinates","shotSize","parseResult","actionParser","parseError","errorMessage","Error","String","AIResponseParseError","JSON","undefined","transformActions","unhandledActions","shouldContinue","action","actionType","assert","getPoint","locate","startPoint","endPoint","keys","transformHotkeyInput","errorDetails","types","a","log","getSummary","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","Math","y","cleanedText","startBox"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;AC4BA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,UAAUD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,oBAAoB;IAAE,SAAS;AAAK;AAE7D,SAASE,mBACPC,KAAuB,EACvBC,OAAsB,EACtBC,IAAU;IAEV,MAAMC,MAAM;QAAE,cAAcD;IAAK;IACjC,MAAME,YAAYC,AAAAA,IAAAA,qCAAAA,uCAAAA,AAAAA,EAChB;QAAE,MAAM;QAAS,aAAaL;IAAM,GACpCG,KACA;QAAE,OAAO;QAAS,OAAO;QAAM,cAAc;IAAE;IAGjD,OAAO;QACL,QAAQF,WAAW;QACnB,kBAAkBK,AAAAA,IAAAA,wBAAAA,iBAAAA,AAAAA,EAAkBF,WAAWJ,OAAOG;IACxD;AACF;AAEO,eAAeI,eACpBC,eAAuB,EACvBC,OAAoB,EACpBC,kBAAsC;IAEtC,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,YAAY,EAAEC,aAAa,EAAE,GAAGL;IAEtE,IAAIM,cAAcP;IAClB,IAAIM,eACFC,cAAc,CAAC,yBAAyB,EAAED,cAAc,8CAA8C,EAAEN,gBAAgB,mBAAmB,CAAC;IAG9I,MAAMQ,eAAeC,AAAAA,IAAAA,oCAAAA,uBAAAA,AAAAA,MAA4BF;IAEjD,MAAMG,mBAAmBN,QAAQ,UAAU,CAAC,MAAM;IAElDD,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKO;gBACP;YACF;SACD;IACH;IAEA,MAAMC,MAAM,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAChB;QACE;YACE,MAAM;YACN,SAASJ;QACX;WACGL,oBAAoB,QAAQ;KAChC,EACDE,cACA;QACE,aAAaJ,QAAQ,WAAW;IAClC;IAGF,IAAIY;IACJ,IAAIC;IAEJ,IAAI;QACFD,gBAAgBE,yBAAyBJ,IAAI,OAAO;QAEpD,MAAM,EAAEK,QAAQ,EAAE,GAAGZ;QACrB,MAAMa,cAAcC,AAAAA,IAAAA,8BAAAA,YAAAA,AAAAA,EAAa;YAC/B,YAAYL;YACZ,QAAQ;gBAAC;gBAAM;aAAK;YACpB,eAAe;gBACb,OAAOG,SAAS,KAAK;gBACrB,QAAQA,SAAS,MAAM;YACzB;YACA,UAAUd;QACZ;QACAY,SAASG,YAAY,MAAM;IAC7B,EAAE,OAAOE,YAAY;QAEnB,MAAMC,eACJD,sBAAsBE,QAAQF,WAAW,OAAO,GAAGG,OAAOH;QAC5D,MAAM,IAAII,yBAAAA,oBAAoBA,CAC5B,CAAC,aAAa,EAAEH,cAAc,EAC9BI,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW,IACvCd,IAAI,KAAK;IAEb;IAEA,MAAM,EAAEK,QAAQ,EAAE,GAAGZ;IAErBhB,MACE,oBACAc,oBACA,YACAsB,KAAK,SAAS,CAACV;IAGjB,MAAMY,mBAAqC,EAAE;IAC7C,MAAMC,mBAA6D,EAAE;IACrE,IAAIC,iBAAiB;IACrBd,OAAO,OAAO,CAAC,CAACe;QACd,MAAMC,aAAcD,AAAAA,CAAAA,OAAO,WAAW,IAAI,EAAC,EAAG,WAAW;QACzD,IAAIC,AAAe,YAAfA,YAAwB;YAC1BC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMrC,QAAQwC,SAASH,OAAO,aAAa,CAAC,SAAS;YAErD,MAAMI,SAAS1C,mBAAmBC,OAAOqC,OAAO,OAAO,EAAEb;YAEzDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACLO;gBACF;YACF;QACF,OAAO,IAAIH,AAAe,kBAAfA,YAA8B;YACvCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMrC,QAAQwC,SAASH,OAAO,aAAa,CAAC,SAAS;YAErD,MAAMI,SAAS1C,mBAAmBC,OAAOqC,OAAO,OAAO,EAAEb;YAEzDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACLO;gBACF;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,mBAAfA,YAA+B;YACxCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMrC,QAAQwC,SAASH,OAAO,aAAa,CAAC,SAAS;YAErD,MAAMI,SAAS1C,mBAAmBC,OAAOqC,OAAO,OAAO,EAAEb;YAEzDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACLO;gBACF;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YAAuB;YAChCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvCE,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,OAAO,EAAE;YACrC,MAAMK,aAAaF,SAASH,OAAO,aAAa,CAAC,SAAS;YAC1D,MAAMM,WAAWH,SAASH,OAAO,aAAa,CAAC,OAAO;YACtDH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,MAAMnC,mBAAmB2C,YAAYL,OAAO,OAAO,EAAEb;oBACrD,IAAIzB,mBAAmB4C,UAAUN,OAAO,OAAO,EAAEb;gBACnD;gBACA,SAASa,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,OAAOG,OAAO,aAAa,CAAC,OAAO;YACrC;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,aAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,WAAWG,OAAO,aAAa,CAAC,SAAS;YAC3C;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,eAAfA,YAA2B;YACpCF,iBAAiB;YACjBF,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO,CAAC;gBACR,SAASG,OAAO,aAAa,CAAC,OAAO,IAAIA,OAAO,OAAO,IAAI;YAC7D;QACF,OAAO,IAAIC,AAAe,aAAfA,YACT,IAAKD,OAAO,aAAa,CAAC,GAAG,EAEtB;YACL,MAAMO,OAAOC,AAAAA,IAAAA,mCAAAA,oBAAAA,AAAAA,EAAqBR,OAAO,aAAa,CAAC,GAAG;YAE1DH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,SAASU,KAAK,IAAI,CAAC;gBACrB;gBACA,SAASP,OAAO,OAAO,IAAI;YAC7B;QACF,OAXEvC,QAAQ;aAYL,IAAIwC,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,QAAQ;YACV;YACA,SAASG,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,YAAY;YAErBH,iBAAiB,IAAI,CAAC;gBACpB,MAAMG;gBACN,SAASD,OAAO,OAAO,IAAI;YAC7B;YACAzC,MAAM,0BAA0B0C,YAAY,YAAYD,OAAO,OAAO;QACxE;IACF;IAEA,IAAIH,AAA4B,MAA5BA,iBAAiB,MAAM,EAAQ;QACjC,MAAMY,eAAyB,EAAE;QAGjC,IAAIxB,AAAkB,MAAlBA,OAAO,MAAM,EAAQ;YACvBwB,aAAa,IAAI,CAAC;YAGlB,IACE3B,IAAI,OAAO,CAAC,QAAQ,CAAC,eACrB,CAACA,IAAI,OAAO,CAAC,QAAQ,CAAC,YAEtB2B,aAAa,IAAI,CACf;iBAGFA,aAAa,IAAI,CAAC;QAEtB;QAGA,IAAIX,iBAAiB,MAAM,GAAG,GAAG;YAC/B,MAAMY,QAAQZ,iBAAiB,GAAG,CAAC,CAACa,IAAMA,EAAE,IAAI,EAAE,IAAI,CAAC;YACvDF,aAAa,IAAI,CAAC,CAAC,wBAAwB,EAAEC,OAAO;QACtD;QAEA,MAAMnB,eAAe;YACnB;eACGkB;SACJ,CAAC,IAAI,CAAC;QAGP,MAAM,IAAIf,yBAAAA,oBAAoBA,CAC5BH,cACAI,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW,IACvCd,IAAI,KAAK;IAEb;IAEAvB,MAAM,oBAAoBoC,KAAK,SAAS,CAACE,kBAAkB,MAAM;IACjE,MAAMe,MAAMC,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAW/B,IAAI,OAAO;IAElCR,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAASsC;IACX;IAEA,OAAO;QACL,SAASf;QACTe;QACA,OAAO9B,IAAI,KAAK;QAChB,aAAaa,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW;QACpD,wBAAwBG;IAC1B;AACF;AAOA,SAASb,yBAAyB4B,IAAY;IAE5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAGV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAGlC,MAAMM,IAAIC,KAAK,KAAK,CAAEN,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMI,IAAID,KAAK,KAAK,CAAEJ,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAGvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEE,EAAE,CAAC,CAAC;IACtB;IAGA,MAAMC,cAAchB,KACjB,OAAO,CAAC,YAAY,IACpB,OAAO,CAAC,2BAA2B;IACtC,OAAOgB,YAAY,OAAO,CAACf,SAASC,cAAc,IAAI;AACxD;AAEA,SAASb,SAAS4B,QAAgB;IAChC,MAAM,CAACJ,GAAGE,EAAE,GAAGlC,KAAK,KAAK,CAACoC;IAC1B7B,IAAAA,sBAAAA,MAAAA,AAAAA,EACE,AAAa,YAAb,OAAOyB,KACLJ,OAAO,QAAQ,CAACI,MAChB,AAAa,YAAb,OAAOE,KACPN,OAAO,QAAQ,CAACM,IAClB,CAAC,yCAAyC,EAAEE,UAAU;IAExD,OAAO;QAACJ;QAAGE;KAAE;AACf"}
1
+ {"version":3,"file":"ai-model/models/ui-tars/planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../../src/ai-model/models/ui-tars/planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { type TUserPrompt, userPromptToString } from '@/common';\nimport type {\n PlanningAIResponse,\n PlanningAction,\n PlanningLocateParamWithLocatedPixelBbox,\n Size,\n} from '@/types';\nimport type { UITarsModelVersion } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { transformHotkeyInput } from '@midscene/shared/us-keyboard-layout';\nimport { assert } from '@midscene/shared/utils';\nimport { actionParser } from '@ui-tars/action-parser';\nimport {\n getSummary,\n getUiTarsPlanningPrompt,\n} from '../../prompt/ui-tars-planning';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from '../../service-caller/index';\nimport { finalizePixelBbox } from '../../shared/model-locate-result/bbox';\nimport { mapLocateResultToPixelBboxByCoordinates } from '../../shared/model-locate-result/pixel-bbox-mapper';\nimport type { PlanOptions } from '../../workflows/planning/types';\n\ntype ActionType =\n | 'click'\n | 'left_double'\n | 'right_single'\n | 'drag'\n | 'type'\n | 'hotkey'\n | 'finished'\n | 'scroll'\n | 'wait';\n\nconst debug = getDebug('ui-tars-planning');\nconst warnLog = getDebug('ui-tars-planning', { console: true });\n\nfunction pointToLocateParam(\n point: [number, number],\n thought: string | null,\n size: Size,\n): PlanningLocateParamWithLocatedPixelBbox {\n const ctx = { preparedSize: size };\n const pixelBbox = mapLocateResultToPixelBboxByCoordinates(\n { type: 'point', coordinates: point },\n ctx,\n { shape: 'point', order: 'xy', normalizedBy: 1 },\n );\n\n return {\n prompt: thought || '',\n locatedPixelBbox: finalizePixelBbox(pixelBbox, point, ctx),\n };\n}\n\nexport async function uiTarsPlanning(\n userInstruction: TUserPrompt,\n options: PlanOptions,\n uiTarsModelVersion: UITarsModelVersion,\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelRuntime, actionContext } = options;\n\n const userInstructionText = userPromptToString(userInstruction);\n let instruction = userInstructionText;\n if (actionContext) {\n instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\\n<user_instruction>${userInstructionText}</user_instruction>`;\n }\n\n const systemPrompt = getUiTarsPlanningPrompt() + instruction;\n\n const screenshotBase64 = context.screenshot.base64;\n const referenceImageMessages = options.referenceImageMessages ?? [];\n\n conversationHistory.append({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n },\n },\n ],\n });\n\n const res = await callAIWithStringResponse(\n [\n {\n role: 'user',\n content: systemPrompt,\n },\n ...referenceImageMessages,\n ...conversationHistory.snapshot(),\n ],\n modelRuntime,\n {\n abortSignal: options.abortSignal,\n },\n );\n\n let convertedText: string;\n let parsed: ReturnType<typeof actionParser>['parsed'];\n\n try {\n convertedText = convertBboxToCoordinates(res.content);\n\n const { shotSize } = context;\n const parseResult = actionParser({\n prediction: convertedText,\n factor: [1000, 1000],\n screenContext: {\n width: shotSize.width,\n height: shotSize.height,\n },\n modelVer: uiTarsModelVersion,\n });\n parsed = parseResult.parsed;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `Parse error: ${errorMessage}`,\n JSON.stringify(res.content, undefined, 2),\n res.usage,\n );\n }\n\n const { shotSize } = context;\n\n debug(\n 'ui-tars modelVer',\n uiTarsModelVersion,\n ', parsed',\n JSON.stringify(parsed),\n );\n\n const transformActions: PlanningAction[] = [];\n const unhandledActions: Array<{ type: string; thought: string }> = [];\n let shouldContinue = true;\n parsed.forEach((action) => {\n const actionType = (action.action_type || '').toLowerCase();\n if (actionType === 'click') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box);\n\n const locate = pointToLocateParam(point, action.thought, shotSize);\n\n transformActions.push({\n type: 'Tap',\n param: {\n locate,\n },\n });\n } else if (actionType === 'left_double') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box);\n\n const locate = pointToLocateParam(point, action.thought, shotSize);\n\n transformActions.push({\n type: 'DoubleClick',\n param: {\n locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'right_single') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box);\n\n const locate = pointToLocateParam(point, action.thought, shotSize);\n\n transformActions.push({\n type: 'RightClick',\n param: {\n locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'drag') {\n assert(action.action_inputs.start_box, 'start_box is required');\n assert(action.action_inputs.end_box, 'end_box is required');\n const startPoint = getPoint(action.action_inputs.start_box);\n const endPoint = getPoint(action.action_inputs.end_box);\n transformActions.push({\n type: 'DragAndDrop',\n param: {\n from: pointToLocateParam(startPoint, action.thought, shotSize),\n to: pointToLocateParam(endPoint, action.thought, shotSize),\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'type') {\n transformActions.push({\n type: 'Input',\n param: {\n value: action.action_inputs.content,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'scroll') {\n transformActions.push({\n type: 'Scroll',\n param: {\n direction: action.action_inputs.direction,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'finished') {\n shouldContinue = false;\n transformActions.push({\n type: 'Finished',\n param: {},\n thought: action.action_inputs.content || action.thought || '',\n });\n } else if (actionType === 'hotkey') {\n if (!action.action_inputs.key) {\n warnLog('No key found in action: hotkey. Will not perform action.');\n } else {\n const keys = transformHotkeyInput(action.action_inputs.key);\n\n transformActions.push({\n type: 'KeyboardPress',\n param: {\n keyName: keys.join('+'),\n },\n thought: action.thought || '',\n });\n }\n } else if (actionType === 'wait') {\n transformActions.push({\n type: 'Sleep',\n param: {\n timeMs: 1000,\n },\n thought: action.thought || '',\n });\n } else if (actionType) {\n // Track unhandled action types\n unhandledActions.push({\n type: actionType,\n thought: action.thought || '',\n });\n debug('Unhandled action type:', actionType, 'thought:', action.thought);\n }\n });\n\n if (transformActions.length === 0) {\n const errorDetails: string[] = [];\n\n // Check if parsing failed\n if (parsed.length === 0) {\n errorDetails.push('Action parser returned no actions');\n\n // Check if response has Thought but no Action\n if (\n res.content.includes('Thought:') &&\n !res.content.includes('Action:')\n ) {\n errorDetails.push(\n 'Response contains \"Thought:\" but missing \"Action:\" line',\n );\n } else {\n errorDetails.push('Response may be malformed or empty');\n }\n }\n\n // Check if we have unhandled action types\n if (unhandledActions.length > 0) {\n const types = unhandledActions.map((a) => a.type).join(', ');\n errorDetails.push(`Unhandled action types: ${types}`);\n }\n\n const errorMessage = [\n 'No actions found in UI-TARS response.',\n ...errorDetails,\n ].join('\\n');\n\n // Throw AIResponseParseError with usage and rawResponse preserved\n throw new AIResponseParseError(\n errorMessage,\n JSON.stringify(res.content, undefined, 2),\n res.usage,\n );\n }\n\n debug('transformActions', JSON.stringify(transformActions, null, 2));\n const log = getSummary(res.content);\n\n conversationHistory.append({\n role: 'assistant',\n content: log,\n });\n\n return {\n actions: transformActions,\n log,\n usage: res.usage,\n rawResponse: JSON.stringify(res.content, undefined, 2),\n shouldContinuePlanning: shouldContinue,\n };\n}\n\n/**\n * Converts bounding box notation to coordinate points\n * @param text - The text containing bbox tags to be converted\n * @returns The text with bbox tags replaced by coordinate points\n */\nfunction convertBboxToCoordinates(text: string): string {\n // Match the four numbers after <bbox>\n const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n function replaceMatch(\n match: string,\n x1: string,\n y1: string,\n x2: string,\n y2: string,\n ): string {\n // Convert strings to numbers and calculate center point\n const x1Num = Number.parseInt(x1, 10);\n const y1Num = Number.parseInt(y1, 10);\n const x2Num = Number.parseInt(x2, 10);\n const y2Num = Number.parseInt(y2, 10);\n\n // Use Math.floor to truncate and calculate center point\n const x = Math.floor((x1Num + x2Num) / 2);\n const y = Math.floor((y1Num + y2Num) / 2);\n\n // Return formatted coordinate string\n return `(${x},${y})`;\n }\n\n // Remove common model wrappers before handing the response to UI-TARS parser.\n const cleanedText = text\n .replace(/\\[EOS\\]/g, '')\n .replace(/```(?:[a-zA-Z0-9_-]+)?/g, '');\n return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\nfunction getPoint(startBox: string): [number, number] {\n const [x, y] = JSON.parse(startBox);\n assert(\n typeof x === 'number' &&\n Number.isFinite(x) &&\n typeof y === 'number' &&\n Number.isFinite(y),\n `invalid point data for ui-tars planning: ${startBox}`,\n );\n return [x, y];\n}\n\ninterface BaseAction {\n action_type: ActionType;\n action_inputs: Record<string, any>;\n reflection: string | null;\n thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n action_type: 'click';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface DragAction extends BaseAction {\n action_type: 'drag';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n end_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface WaitAction extends BaseAction {\n action_type: 'wait';\n action_inputs: {\n time: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface LeftDoubleAction extends BaseAction {\n action_type: 'left_double';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface RightSingleAction extends BaseAction {\n action_type: 'right_single';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface TypeAction extends BaseAction {\n action_type: 'type';\n action_inputs: {\n content: string;\n };\n}\n\ninterface HotkeyAction extends BaseAction {\n action_type: 'hotkey';\n action_inputs: {\n key: string;\n };\n}\n\ninterface ScrollAction extends BaseAction {\n action_type: 'scroll';\n action_inputs: {\n direction: 'up' | 'down';\n };\n}\n\ninterface FinishedAction extends BaseAction {\n action_type: 'finished';\n action_inputs: {\n content?: string;\n };\n}\n\nexport type Action =\n | ClickAction\n | LeftDoubleAction\n | RightSingleAction\n | DragAction\n | TypeAction\n | HotkeyAction\n | ScrollAction\n | FinishedAction\n | WaitAction;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","warnLog","pointToLocateParam","point","thought","size","ctx","pixelBbox","mapLocateResultToPixelBboxByCoordinates","finalizePixelBbox","uiTarsPlanning","userInstruction","options","uiTarsModelVersion","conversationHistory","context","modelRuntime","actionContext","userInstructionText","userPromptToString","instruction","systemPrompt","getUiTarsPlanningPrompt","screenshotBase64","referenceImageMessages","res","callAIWithStringResponse","convertedText","parsed","convertBboxToCoordinates","shotSize","parseResult","actionParser","parseError","errorMessage","Error","String","AIResponseParseError","JSON","undefined","transformActions","unhandledActions","shouldContinue","action","actionType","assert","getPoint","locate","startPoint","endPoint","keys","transformHotkeyInput","errorDetails","types","a","log","getSummary","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","Math","y","cleanedText","startBox"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;AC6BA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,UAAUD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,oBAAoB;IAAE,SAAS;AAAK;AAE7D,SAASE,mBACPC,KAAuB,EACvBC,OAAsB,EACtBC,IAAU;IAEV,MAAMC,MAAM;QAAE,cAAcD;IAAK;IACjC,MAAME,YAAYC,AAAAA,IAAAA,qCAAAA,uCAAAA,AAAAA,EAChB;QAAE,MAAM;QAAS,aAAaL;IAAM,GACpCG,KACA;QAAE,OAAO;QAAS,OAAO;QAAM,cAAc;IAAE;IAGjD,OAAO;QACL,QAAQF,WAAW;QACnB,kBAAkBK,AAAAA,IAAAA,wBAAAA,iBAAAA,AAAAA,EAAkBF,WAAWJ,OAAOG;IACxD;AACF;AAEO,eAAeI,eACpBC,eAA4B,EAC5BC,OAAoB,EACpBC,kBAAsC;IAEtC,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,YAAY,EAAEC,aAAa,EAAE,GAAGL;IAEtE,MAAMM,sBAAsBC,AAAAA,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBR;IAC/C,IAAIS,cAAcF;IAClB,IAAID,eACFG,cAAc,CAAC,yBAAyB,EAAEH,cAAc,8CAA8C,EAAEC,oBAAoB,mBAAmB,CAAC;IAGlJ,MAAMG,eAAeC,AAAAA,IAAAA,oCAAAA,uBAAAA,AAAAA,MAA4BF;IAEjD,MAAMG,mBAAmBR,QAAQ,UAAU,CAAC,MAAM;IAClD,MAAMS,yBAAyBZ,QAAQ,sBAAsB,IAAI,EAAE;IAEnEE,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKS;gBACP;YACF;SACD;IACH;IAEA,MAAME,MAAM,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAChB;QACE;YACE,MAAM;YACN,SAASL;QACX;WACGG;WACAV,oBAAoB,QAAQ;KAChC,EACDE,cACA;QACE,aAAaJ,QAAQ,WAAW;IAClC;IAGF,IAAIe;IACJ,IAAIC;IAEJ,IAAI;QACFD,gBAAgBE,yBAAyBJ,IAAI,OAAO;QAEpD,MAAM,EAAEK,QAAQ,EAAE,GAAGf;QACrB,MAAMgB,cAAcC,AAAAA,IAAAA,8BAAAA,YAAAA,AAAAA,EAAa;YAC/B,YAAYL;YACZ,QAAQ;gBAAC;gBAAM;aAAK;YACpB,eAAe;gBACb,OAAOG,SAAS,KAAK;gBACrB,QAAQA,SAAS,MAAM;YACzB;YACA,UAAUjB;QACZ;QACAe,SAASG,YAAY,MAAM;IAC7B,EAAE,OAAOE,YAAY;QAEnB,MAAMC,eACJD,sBAAsBE,QAAQF,WAAW,OAAO,GAAGG,OAAOH;QAC5D,MAAM,IAAII,yBAAAA,oBAAoBA,CAC5B,CAAC,aAAa,EAAEH,cAAc,EAC9BI,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW,IACvCd,IAAI,KAAK;IAEb;IAEA,MAAM,EAAEK,QAAQ,EAAE,GAAGf;IAErBhB,MACE,oBACAc,oBACA,YACAyB,KAAK,SAAS,CAACV;IAGjB,MAAMY,mBAAqC,EAAE;IAC7C,MAAMC,mBAA6D,EAAE;IACrE,IAAIC,iBAAiB;IACrBd,OAAO,OAAO,CAAC,CAACe;QACd,MAAMC,aAAcD,AAAAA,CAAAA,OAAO,WAAW,IAAI,EAAC,EAAG,WAAW;QACzD,IAAIC,AAAe,YAAfA,YAAwB;YAC1BC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMxC,QAAQ2C,SAASH,OAAO,aAAa,CAAC,SAAS;YAErD,MAAMI,SAAS7C,mBAAmBC,OAAOwC,OAAO,OAAO,EAAEb;YAEzDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACLO;gBACF;YACF;QACF,OAAO,IAAIH,AAAe,kBAAfA,YAA8B;YACvCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMxC,QAAQ2C,SAASH,OAAO,aAAa,CAAC,SAAS;YAErD,MAAMI,SAAS7C,mBAAmBC,OAAOwC,OAAO,OAAO,EAAEb;YAEzDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACLO;gBACF;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,mBAAfA,YAA+B;YACxCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMxC,QAAQ2C,SAASH,OAAO,aAAa,CAAC,SAAS;YAErD,MAAMI,SAAS7C,mBAAmBC,OAAOwC,OAAO,OAAO,EAAEb;YAEzDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACLO;gBACF;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YAAuB;YAChCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvCE,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,OAAO,EAAE;YACrC,MAAMK,aAAaF,SAASH,OAAO,aAAa,CAAC,SAAS;YAC1D,MAAMM,WAAWH,SAASH,OAAO,aAAa,CAAC,OAAO;YACtDH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,MAAMtC,mBAAmB8C,YAAYL,OAAO,OAAO,EAAEb;oBACrD,IAAI5B,mBAAmB+C,UAAUN,OAAO,OAAO,EAAEb;gBACnD;gBACA,SAASa,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,OAAOG,OAAO,aAAa,CAAC,OAAO;YACrC;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,aAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,WAAWG,OAAO,aAAa,CAAC,SAAS;YAC3C;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,eAAfA,YAA2B;YACpCF,iBAAiB;YACjBF,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO,CAAC;gBACR,SAASG,OAAO,aAAa,CAAC,OAAO,IAAIA,OAAO,OAAO,IAAI;YAC7D;QACF,OAAO,IAAIC,AAAe,aAAfA,YACT,IAAKD,OAAO,aAAa,CAAC,GAAG,EAEtB;YACL,MAAMO,OAAOC,AAAAA,IAAAA,mCAAAA,oBAAAA,AAAAA,EAAqBR,OAAO,aAAa,CAAC,GAAG;YAE1DH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,SAASU,KAAK,IAAI,CAAC;gBACrB;gBACA,SAASP,OAAO,OAAO,IAAI;YAC7B;QACF,OAXE1C,QAAQ;aAYL,IAAI2C,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,QAAQ;YACV;YACA,SAASG,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,YAAY;YAErBH,iBAAiB,IAAI,CAAC;gBACpB,MAAMG;gBACN,SAASD,OAAO,OAAO,IAAI;YAC7B;YACA5C,MAAM,0BAA0B6C,YAAY,YAAYD,OAAO,OAAO;QACxE;IACF;IAEA,IAAIH,AAA4B,MAA5BA,iBAAiB,MAAM,EAAQ;QACjC,MAAMY,eAAyB,EAAE;QAGjC,IAAIxB,AAAkB,MAAlBA,OAAO,MAAM,EAAQ;YACvBwB,aAAa,IAAI,CAAC;YAGlB,IACE3B,IAAI,OAAO,CAAC,QAAQ,CAAC,eACrB,CAACA,IAAI,OAAO,CAAC,QAAQ,CAAC,YAEtB2B,aAAa,IAAI,CACf;iBAGFA,aAAa,IAAI,CAAC;QAEtB;QAGA,IAAIX,iBAAiB,MAAM,GAAG,GAAG;YAC/B,MAAMY,QAAQZ,iBAAiB,GAAG,CAAC,CAACa,IAAMA,EAAE,IAAI,EAAE,IAAI,CAAC;YACvDF,aAAa,IAAI,CAAC,CAAC,wBAAwB,EAAEC,OAAO;QACtD;QAEA,MAAMnB,eAAe;YACnB;eACGkB;SACJ,CAAC,IAAI,CAAC;QAGP,MAAM,IAAIf,yBAAAA,oBAAoBA,CAC5BH,cACAI,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW,IACvCd,IAAI,KAAK;IAEb;IAEA1B,MAAM,oBAAoBuC,KAAK,SAAS,CAACE,kBAAkB,MAAM;IACjE,MAAMe,MAAMC,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAW/B,IAAI,OAAO;IAElCX,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAASyC;IACX;IAEA,OAAO;QACL,SAASf;QACTe;QACA,OAAO9B,IAAI,KAAK;QAChB,aAAaa,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW;QACpD,wBAAwBG;IAC1B;AACF;AAOA,SAASb,yBAAyB4B,IAAY;IAE5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAGV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAGlC,MAAMM,IAAIC,KAAK,KAAK,CAAEN,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMI,IAAID,KAAK,KAAK,CAAEJ,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAGvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEE,EAAE,CAAC,CAAC;IACtB;IAGA,MAAMC,cAAchB,KACjB,OAAO,CAAC,YAAY,IACpB,OAAO,CAAC,2BAA2B;IACtC,OAAOgB,YAAY,OAAO,CAACf,SAASC,cAAc,IAAI;AACxD;AAEA,SAASb,SAAS4B,QAAgB;IAChC,MAAM,CAACJ,GAAGE,EAAE,GAAGlC,KAAK,KAAK,CAACoC;IAC1B7B,IAAAA,sBAAAA,MAAAA,AAAAA,EACE,AAAa,YAAb,OAAOyB,KACLJ,OAAO,QAAQ,CAACI,MAChB,AAAa,YAAb,OAAOE,KACPN,OAAO,QAAQ,CAACM,IAClB,CAAC,yCAAyC,EAAEE,UAAU;IAExD,OAAO;QAACJ;QAAGE;KAAE;AACf"}
@@ -31,20 +31,23 @@ __webpack_require__.d(__webpack_exports__, {
31
31
  parseActionParam: ()=>parseActionParam,
32
32
  getReadableTimeString: ()=>getReadableTimeString,
33
33
  expandSearchArea: ()=>expandSearchArea,
34
+ userPromptToString: ()=>userPromptToString,
34
35
  SizeSchema: ()=>SizeSchema,
35
36
  findAllMidsceneLocatorField: ()=>findAllMidsceneLocatorField,
37
+ multimodalPromptToChatMessages: ()=>multimodalPromptToChatMessages,
36
38
  getMidsceneLocationSchema: ()=>getMidsceneLocationSchema,
37
39
  finalizeActionName: ()=>finalizeActionName,
38
40
  TMultimodalPromptSchema: ()=>TMultimodalPromptSchema,
39
41
  ifMidsceneLocatorField: ()=>ifMidsceneLocatorField,
40
42
  PointSchema: ()=>PointSchema,
41
43
  RectSchema: ()=>RectSchema,
42
- dumpActionParam: ()=>dumpActionParam
44
+ dumpActionParam: ()=>dumpActionParam,
45
+ userPromptToMultimodalPrompt: ()=>userPromptToMultimodalPrompt
43
46
  });
44
- const utils_namespaceObject = require("@midscene/shared/utils");
45
47
  const constants_namespaceObject = require("@midscene/shared/constants");
46
48
  const extractor_namespaceObject = require("@midscene/shared/extractor");
47
49
  const img_namespaceObject = require("@midscene/shared/img");
50
+ const utils_namespaceObject = require("@midscene/shared/utils");
48
51
  const external_zod_namespaceObject = require("zod");
49
52
  function expandSearchArea(rect, screenSize) {
50
53
  const minArea = 160000;
@@ -136,6 +139,53 @@ const TUserPromptSchema = external_zod_namespaceObject.z.union([
136
139
  prompt: external_zod_namespaceObject.z.string()
137
140
  }).and(TMultimodalPromptSchema.partial())
138
141
  ]);
142
+ const userPromptToString = (prompt)=>'string' == typeof prompt ? prompt : prompt.prompt;
143
+ const userPromptToMultimodalPrompt = (prompt)=>{
144
+ if ('string' == typeof prompt || !prompt.images) return;
145
+ return {
146
+ images: prompt.images,
147
+ convertHttpImage2Base64: !!prompt.convertHttpImage2Base64
148
+ };
149
+ };
150
+ const multimodalPromptToChatMessages = async (multimodalPrompt)=>{
151
+ const msgs = [];
152
+ if (multimodalPrompt?.images?.length) {
153
+ msgs.push({
154
+ role: 'user',
155
+ content: [
156
+ {
157
+ type: 'text',
158
+ text: 'Next, I will provide all the reference images. These reference images are supporting context only, not the current screenshot being evaluated, unless the task explicitly asks for comparison or matching.'
159
+ }
160
+ ]
161
+ });
162
+ for (const item of multimodalPrompt.images){
163
+ const imagePayload = await (0, img_namespaceObject.preProcessImageUrl)(item.url, !!multimodalPrompt.convertHttpImage2Base64);
164
+ msgs.push({
165
+ role: 'user',
166
+ content: [
167
+ {
168
+ type: 'text',
169
+ text: `this is the reference image named '${item.name}'. It is a reference image, not the current screenshot:`
170
+ }
171
+ ]
172
+ });
173
+ msgs.push({
174
+ role: 'user',
175
+ content: [
176
+ {
177
+ type: 'image_url',
178
+ image_url: {
179
+ url: imagePayload,
180
+ detail: 'high'
181
+ }
182
+ }
183
+ ]
184
+ });
185
+ }
186
+ }
187
+ return msgs;
188
+ };
139
189
  const locateFieldFlagName = 'midscene_location_field_flag';
140
190
  const MidsceneLocationInput = external_zod_namespaceObject.z.object({
141
191
  prompt: TUserPromptSchema,
@@ -270,7 +320,10 @@ exports.getMidsceneLocationSchema = __webpack_exports__.getMidsceneLocationSchem
270
320
  exports.getReadableTimeString = __webpack_exports__.getReadableTimeString;
271
321
  exports.ifMidsceneLocatorField = __webpack_exports__.ifMidsceneLocatorField;
272
322
  exports.markupImageForLLM = __webpack_exports__.markupImageForLLM;
323
+ exports.multimodalPromptToChatMessages = __webpack_exports__.multimodalPromptToChatMessages;
273
324
  exports.parseActionParam = __webpack_exports__.parseActionParam;
325
+ exports.userPromptToMultimodalPrompt = __webpack_exports__.userPromptToMultimodalPrompt;
326
+ exports.userPromptToString = __webpack_exports__.userPromptToString;
274
327
  for(var __rspack_i in __webpack_exports__)if (-1 === [
275
328
  "PointSchema",
276
329
  "RectSchema",
@@ -287,7 +340,10 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
287
340
  "getReadableTimeString",
288
341
  "ifMidsceneLocatorField",
289
342
  "markupImageForLLM",
290
- "parseActionParam"
343
+ "multimodalPromptToChatMessages",
344
+ "parseActionParam",
345
+ "userPromptToMultimodalPrompt",
346
+ "userPromptToString"
291
347
  ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
292
348
  Object.defineProperty(exports, '__esModule', {
293
349
  value: true