@midscene/core 1.9.4-beta-20260610101007.0 → 1.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/es/agent/task-builder.mjs +3 -1
  2. package/dist/es/agent/task-builder.mjs.map +1 -1
  3. package/dist/es/agent/tasks.mjs +8 -4
  4. package/dist/es/agent/tasks.mjs.map +1 -1
  5. package/dist/es/agent/utils.mjs +1 -1
  6. package/dist/es/ai-model/inspect.mjs +11 -2
  7. package/dist/es/ai-model/inspect.mjs.map +1 -1
  8. package/dist/es/ai-model/llm-planning.mjs +4 -2
  9. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  10. package/dist/es/ai-model/models/auto-glm/locate.mjs +2 -1
  11. package/dist/es/ai-model/models/auto-glm/locate.mjs.map +1 -1
  12. package/dist/es/ai-model/models/auto-glm/planning.mjs +4 -3
  13. package/dist/es/ai-model/models/auto-glm/planning.mjs.map +1 -1
  14. package/dist/es/ai-model/models/gpt.mjs +12 -6
  15. package/dist/es/ai-model/models/gpt.mjs.map +1 -1
  16. package/dist/es/ai-model/models/kimi.mjs +42 -0
  17. package/dist/es/ai-model/models/kimi.mjs.map +1 -0
  18. package/dist/es/ai-model/models/registry.mjs +3 -1
  19. package/dist/es/ai-model/models/registry.mjs.map +1 -1
  20. package/dist/es/ai-model/models/ui-tars/planning.mjs +3 -2
  21. package/dist/es/ai-model/models/ui-tars/planning.mjs.map +1 -1
  22. package/dist/es/ai-model/service-caller/index.mjs +13 -7
  23. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  24. package/dist/es/service/index.mjs +9 -1
  25. package/dist/es/service/index.mjs.map +1 -1
  26. package/dist/es/types.mjs.map +1 -1
  27. package/dist/es/utils.mjs +2 -2
  28. package/dist/lib/agent/task-builder.js +3 -1
  29. package/dist/lib/agent/task-builder.js.map +1 -1
  30. package/dist/lib/agent/tasks.js +8 -4
  31. package/dist/lib/agent/tasks.js.map +1 -1
  32. package/dist/lib/agent/utils.js +1 -1
  33. package/dist/lib/ai-model/inspect.js +11 -2
  34. package/dist/lib/ai-model/inspect.js.map +1 -1
  35. package/dist/lib/ai-model/llm-planning.js +4 -2
  36. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  37. package/dist/lib/ai-model/models/auto-glm/locate.js +2 -1
  38. package/dist/lib/ai-model/models/auto-glm/locate.js.map +1 -1
  39. package/dist/lib/ai-model/models/auto-glm/planning.js +4 -3
  40. package/dist/lib/ai-model/models/auto-glm/planning.js.map +1 -1
  41. package/dist/lib/ai-model/models/gpt.js +12 -6
  42. package/dist/lib/ai-model/models/gpt.js.map +1 -1
  43. package/dist/lib/ai-model/models/kimi.js +76 -0
  44. package/dist/lib/ai-model/models/kimi.js.map +1 -0
  45. package/dist/lib/ai-model/models/registry.js +3 -1
  46. package/dist/lib/ai-model/models/registry.js.map +1 -1
  47. package/dist/lib/ai-model/models/ui-tars/planning.js +3 -2
  48. package/dist/lib/ai-model/models/ui-tars/planning.js.map +1 -1
  49. package/dist/lib/ai-model/service-caller/index.js +13 -7
  50. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  51. package/dist/lib/service/index.js +9 -1
  52. package/dist/lib/service/index.js.map +1 -1
  53. package/dist/lib/types.js.map +1 -1
  54. package/dist/lib/utils.js +2 -2
  55. package/dist/types/ai-model/inspect.d.ts +2 -0
  56. package/dist/types/ai-model/models/gpt.d.ts +2 -2
  57. package/dist/types/ai-model/models/kimi.d.ts +18 -0
  58. package/dist/types/ai-model/models/registry.d.ts +17 -2
  59. package/dist/types/ai-model/service-caller/index.d.ts +9 -1
  60. package/dist/types/ai-model/workflows/inspect/types.d.ts +1 -0
  61. package/dist/types/types.d.ts +15 -0
  62. package/package.json +2 -2
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/inspect.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIDataExtractionResponse,\n AIElementLocateResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport { cropByRect, scaleImage } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport {\n expandSearchArea,\n multimodalPromptToChatMessages,\n userPromptToMultimodalPrompt,\n userPromptToString,\n} from '../common';\nimport type { ModelRuntime } from './models';\nimport {\n extractDataQueryPrompt,\n parseXMLExtractionResponse,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport {\n AIResponseParseError,\n callAI,\n callAIWithObjectResponse,\n} from './service-caller/index';\nimport { prepareModelImage } from './workflows/image-preprocess';\nimport {\n mergePixelBboxesToRect,\n pixelBboxToRect,\n} from './workflows/inspect/locate-result-rect';\nimport { mapSearchAreaPixelBboxToOriginalPixelBbox } from './workflows/inspect/search-area-mapping';\nimport type {\n LocateOptions,\n LocateResult,\n SearchAreaConfig,\n} from './workflows/inspect/types';\n\nexport type InspectAIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nexport {\n userPromptToString as extraTextFromUserPrompt,\n multimodalPromptToChatMessages as promptsToChatParam,\n} from '../common';\n\nfunction hasLocateResult(input: unknown, resultKey: string) {\n if (!input || typeof input !== 'object') {\n return false;\n }\n\n const record = input as Record<string, unknown>;\n const locateResult = record[resultKey];\n return Array.isArray(locateResult)\n ? locateResult.length > 0\n : locateResult !== undefined;\n}\n\nexport async function buildSearchAreaConfig(options: {\n context: UIContext;\n baseRect: Rect;\n}): Promise<SearchAreaConfig> {\n const { context, baseRect } = options;\n const scaleRatio = 2;\n const sectionRect = expandSearchArea(baseRect, context.shotSize);\n\n const croppedResult = await cropByRect(\n context.screenshot.base64,\n sectionRect,\n );\n\n const scaledResult = await scaleImage(croppedResult.imageBase64, scaleRatio);\n return {\n sourceRect: sectionRect,\n image: {\n imageBase64: scaledResult.imageBase64,\n width: scaledResult.width,\n height: scaledResult.height,\n },\n mapping: {\n offset: {\n x: sectionRect.left,\n y: sectionRect.top,\n },\n scale: scaleRatio,\n },\n };\n}\n\nexport async function AiLocateElement(\n options: LocateOptions & { targetElementDescription: TUserPrompt },\n): Promise<LocateResult> {\n const { targetElementDescription, ...locateOptions } = options;\n const locateAdapter = options.modelRuntime.adapter.locate;\n if (locateAdapter.kind === 'custom') {\n return locateAdapter.locateFn(targetElementDescription, locateOptions);\n }\n return genericLocate(targetElementDescription, locateOptions);\n}\n\nexport async function genericLocate(\n elementDescription: TUserPrompt,\n options: LocateOptions,\n): Promise<LocateResult> {\n const { context } = options;\n const modelRuntime = options.modelRuntime;\n const { adapter } = modelRuntime;\n assert(\n adapter.locate.kind === 'standard',\n 'generic locate requires a standard locate adapter',\n );\n const screenshotBase64 = context.screenshot.base64;\n\n assert(elementDescription, 'cannot find the target element description');\n const elementDescriptionText = userPromptToString(elementDescription);\n const userInstructionPrompt = findElementPrompt(elementDescriptionText);\n const systemPrompt = systemPromptToLocateElement(\n adapter.locate.resultAdapter.promptSpec,\n );\n\n const modelImage = options.searchConfig?.image ?? {\n imageBase64: screenshotBase64,\n width: context.shotSize.width,\n height: context.shotSize.height,\n };\n const preparedImage = await prepareModelImage({\n imageBase64: modelImage.imageBase64,\n width: modelImage.width,\n height: modelImage.height,\n policy: adapter.imagePreprocess,\n });\n\n const imagePayload = preparedImage.imageBase64;\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof elementDescription !== 'string') {\n const addOns = await multimodalPromptToChatMessages(\n userPromptToMultimodalPrompt(elementDescription),\n );\n msgs.push(...addOns);\n }\n\n let res: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AIElementLocateResponse>>\n >;\n try {\n res = await callAIWithObjectResponse<AIElementLocateResponse>(\n msgs,\n modelRuntime,\n {\n abortSignal: options.abortSignal,\n jsonParserSource: 'locate',\n },\n );\n } catch (callError) {\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n parseResult: {\n element: undefined,\n errors: [`AI call error: ${errorMessage}`],\n },\n rawResponse,\n usage,\n reasoning_content: undefined,\n };\n }\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElement: LocateResultElement | undefined;\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n const resultAdapter = adapter.locate.resultAdapter;\n if (!hasLocateResult(res.content, resultAdapter.promptSpec.resultKey)) {\n return {\n rect: undefined,\n parseResult: {\n element: undefined,\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n }\n\n try {\n const mapping = options.searchConfig?.mapping;\n const targetPixelBbox = resultAdapter.adaptElementLocateResultToPixelBbox(\n res.content,\n {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n },\n );\n resRect = pixelBboxToRect(\n mapSearchAreaPixelBboxToOriginalPixelBbox(targetPixelBbox, mapping),\n );\n\n debugInspect('resRect', resRect);\n\n const element: LocateResultElement = generateElementByRect(\n resRect,\n elementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElement = element;\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse locate result: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n element: matchedElement,\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelRuntime: ModelRuntime;\n abortSignal?: AbortSignal;\n}): Promise<{\n searchAreaConfig?: SearchAreaConfig;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription } = options;\n const modelRuntime = options.modelRuntime;\n const { adapter } = modelRuntime;\n assert(\n adapter.locate.kind === 'standard',\n 'section locate requires a standard locate adapter',\n );\n const screenshotBase64 = context.screenshot.base64;\n const preparedImage = await prepareModelImage({\n imageBase64: screenshotBase64,\n width: context.shotSize.width,\n height: context.shotSize.height,\n policy: adapter.imagePreprocess,\n });\n\n const systemPrompt = systemPromptToLocateSection(\n adapter.locate.resultAdapter.promptSpec,\n );\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n userPromptToString(sectionDescription),\n );\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: preparedImage.imageBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await multimodalPromptToChatMessages(\n userPromptToMultimodalPrompt(sectionDescription),\n );\n msgs.push(...addOns);\n }\n\n let result: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AISectionLocatorResponse>>\n >;\n try {\n result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelRuntime,\n {\n abortSignal: options.abortSignal,\n jsonParserSource: 'section-locator',\n },\n );\n } catch (callError) {\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n searchAreaConfig: undefined,\n error: `AI call error: ${errorMessage}`,\n rawResponse,\n usage,\n };\n }\n\n let searchAreaConfig:\n | Awaited<ReturnType<typeof buildSearchAreaConfig>>\n | undefined;\n let sectionError = result.content.error;\n const resultAdapter = adapter.locate.resultAdapter;\n if (!hasLocateResult(result.content, resultAdapter.promptSpec.resultKey)) {\n return {\n searchAreaConfig: undefined,\n error: sectionError,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n }\n\n try {\n const adaptedResult =\n resultAdapter.adaptSectionLocateResultToPixelBboxGroup(result.content, {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n });\n const mergedRect = mergePixelBboxesToRect([\n adaptedResult.target,\n ...(adaptedResult.references ?? []),\n ]);\n debugSection('mergedRect %j', mergedRect);\n\n const expandedRect = expandSearchArea(mergedRect, context.shotSize);\n const originalWidth = expandedRect.width;\n const originalHeight = expandedRect.height;\n debugSection('expanded sectionRect %j', expandedRect);\n\n searchAreaConfig = await buildSearchAreaConfig({\n context,\n baseRect: mergedRect,\n });\n\n debugSection(\n 'scaled section image from %dx%d to %dx%d (scale=%d)',\n originalWidth,\n originalHeight,\n searchAreaConfig.image.width,\n searchAreaConfig.image.height,\n searchAreaConfig.mapping.scale,\n );\n } catch (error) {\n const parseErrorMessage =\n error instanceof Error\n ? `Failed to parse section locate result: ${error.message}`\n : 'unknown error in section locate';\n sectionError = sectionError\n ? `${sectionError} (${parseErrorMessage})`\n : parseErrorMessage;\n }\n\n return {\n searchAreaConfig,\n error: sectionError,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelRuntime: ModelRuntime;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelRuntime } =\n options;\n const systemPrompt = systemPromptToExtract({\n screenshotIncluded: extractOption?.screenshotIncluded !== false,\n referenceImagesIncluded: !!multimodalPrompt?.images?.length,\n });\n const screenshotBase64 = context.screenshot.base64;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'text',\n text: 'This is the current screenshot to evaluate. Unless <DATA_DEMAND> explicitly asks for comparison or matching against reference images, base your answer on this screenshot and its contents when provided.',\n });\n\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await multimodalPromptToChatMessages(multimodalPrompt);\n msgs.push(...addOns);\n }\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelRuntime);\n\n let parseResult: AIDataExtractionResponse<T>;\n try {\n parseResult = parseXMLExtractionResponse<T>(rawResponse);\n } catch (parseError) {\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n\n return {\n parseResult,\n rawResponse,\n usage,\n reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n modelRuntime: ModelRuntime,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n debugInspect('AiJudgeOrderSensitive: description=%s', description);\n\n const result = await callAIWithObjectResponse<{ isOrderSensitive: boolean }>(\n msgs,\n modelRuntime,\n {\n jsonParserSource: 'generic-object',\n },\n );\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","hasLocateResult","input","resultKey","record","locateResult","Array","undefined","buildSearchAreaConfig","options","context","baseRect","scaleRatio","sectionRect","expandSearchArea","croppedResult","cropByRect","scaledResult","scaleImage","AiLocateElement","targetElementDescription","locateOptions","locateAdapter","genericLocate","elementDescription","modelRuntime","adapter","assert","screenshotBase64","elementDescriptionText","userPromptToString","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","modelImage","preparedImage","prepareModelImage","imagePayload","msgs","addOns","multimodalPromptToChatMessages","userPromptToMultimodalPrompt","res","callAIWithObjectResponse","callError","errorMessage","Error","String","rawResponse","AIResponseParseError","usage","JSON","resRect","matchedElement","errors","resultAdapter","mapping","targetPixelBbox","pixelBboxToRect","mapSearchAreaPixelBboxToOriginalPixelBbox","element","generateElementByRect","e","msg","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","searchAreaConfig","sectionError","adaptedResult","mergedRect","mergePixelBboxesToRect","expandedRect","originalWidth","originalHeight","error","parseErrorMessage","AiExtractElementInfo","dataQuery","extractOption","multimodalPrompt","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","reasoning_content","callAI","parseResult","parseXMLExtractionResponse","parseError","AiJudgeOrderSensitive","description","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;;;;;AC2DA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAO9B,SAASE,gBAAgBC,KAAc,EAAEC,SAAiB;IACxD,IAAI,CAACD,SAAS,AAAiB,YAAjB,OAAOA,OACnB,OAAO;IAGT,MAAME,SAASF;IACf,MAAMG,eAAeD,MAAM,CAACD,UAAU;IACtC,OAAOG,MAAM,OAAO,CAACD,gBACjBA,aAAa,MAAM,GAAG,IACtBA,AAAiBE,WAAjBF;AACN;AAEO,eAAeG,sBAAsBC,OAG3C;IACC,MAAM,EAAEC,OAAO,EAAEC,QAAQ,EAAE,GAAGF;IAC9B,MAAMG,aAAa;IACnB,MAAMC,cAAcC,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBH,UAAUD,QAAQ,QAAQ;IAE/D,MAAMK,gBAAgB,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAC1BN,QAAQ,UAAU,CAAC,MAAM,EACzBG;IAGF,MAAMI,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAAWH,cAAc,WAAW,EAAEH;IACjE,OAAO;QACL,YAAYC;QACZ,OAAO;YACL,aAAaI,aAAa,WAAW;YACrC,OAAOA,aAAa,KAAK;YACzB,QAAQA,aAAa,MAAM;QAC7B;QACA,SAAS;YACP,QAAQ;gBACN,GAAGJ,YAAY,IAAI;gBACnB,GAAGA,YAAY,GAAG;YACpB;YACA,OAAOD;QACT;IACF;AACF;AAEO,eAAeO,gBACpBV,OAAkE;IAElE,MAAM,EAAEW,wBAAwB,EAAE,GAAGC,eAAe,GAAGZ;IACvD,MAAMa,gBAAgBb,QAAQ,YAAY,CAAC,OAAO,CAAC,MAAM;IACzD,IAAIa,AAAuB,aAAvBA,cAAc,IAAI,EACpB,OAAOA,cAAc,QAAQ,CAACF,0BAA0BC;IAE1D,OAAOE,cAAcH,0BAA0BC;AACjD;AAEO,eAAeE,cACpBC,kBAA+B,EAC/Bf,OAAsB;IAEtB,MAAM,EAAEC,OAAO,EAAE,GAAGD;IACpB,MAAMgB,eAAehB,QAAQ,YAAY;IACzC,MAAM,EAAEiB,OAAO,EAAE,GAAGD;IACpBE,IAAAA,sBAAAA,MAAAA,AAAAA,EACED,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,EACnB;IAEF,MAAME,mBAAmBlB,QAAQ,UAAU,CAAC,MAAM;IAElDiB,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOH,oBAAoB;IAC3B,MAAMK,yBAAyBC,AAAAA,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBN;IAClD,MAAMO,wBAAwBC,AAAAA,IAAAA,+BAAAA,iBAAAA,AAAAA,EAAkBH;IAChD,MAAMI,eAAeC,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EACnBR,QAAQ,MAAM,CAAC,aAAa,CAAC,UAAU;IAGzC,MAAMS,aAAa1B,QAAQ,YAAY,EAAE,SAAS;QAChD,aAAamB;QACb,OAAOlB,QAAQ,QAAQ,CAAC,KAAK;QAC7B,QAAQA,QAAQ,QAAQ,CAAC,MAAM;IACjC;IACA,MAAM0B,gBAAgB,MAAMC,AAAAA,IAAAA,oCAAAA,iBAAAA,AAAAA,EAAkB;QAC5C,aAAaF,WAAW,WAAW;QACnC,OAAOA,WAAW,KAAK;QACvB,QAAQA,WAAW,MAAM;QACzB,QAAQT,QAAQ,eAAe;IACjC;IAEA,MAAMY,eAAeF,cAAc,WAAW;IAE9C,MAAMG,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASN;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKK;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMP;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOP,oBAAiC;QAC1C,MAAMgB,SAAS,MAAMC,AAAAA,IAAAA,mCAAAA,8BAAAA,AAAAA,EACnBC,AAAAA,IAAAA,mCAAAA,4BAAAA,AAAAA,EAA6BlB;QAE/Be,KAAK,IAAI,IAAIC;IACf;IAEA,IAAIG;IAGJ,IAAI;QACFA,MAAM,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACVL,MACAd,cACA;YACE,aAAahB,QAAQ,WAAW;YAChC,kBAAkB;QACpB;IAEJ,EAAE,OAAOoC,WAAW;QAClB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,yBAAAA,oBAAoBA,GACrCL,UAAU,WAAW,GACrBC;QACN,MAAMK,QACJN,qBAAqBK,yBAAAA,oBAAoBA,GAAGL,UAAU,KAAK,GAAGtC;QAChE,OAAO;YACL,MAAMA;YACN,aAAa;gBACX,SAASA;gBACT,QAAQ;oBAAC,CAAC,eAAe,EAAEuC,cAAc;iBAAC;YAC5C;YACAG;YACAE;YACA,mBAAmB5C;QACrB;IACF;IAEA,MAAM0C,cAAcG,KAAK,SAAS,CAACT,IAAI,OAAO;IAE9C,IAAIU;IACJ,IAAIC;IACJ,IAAIC,SACF,YAAYZ,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,MAAMa,gBAAgB9B,QAAQ,MAAM,CAAC,aAAa;IAClD,IAAI,CAACzB,gBAAgB0C,IAAI,OAAO,EAAEa,cAAc,UAAU,CAAC,SAAS,GAClE,OAAO;QACL,MAAMjD;QACN,aAAa;YACX,SAASA;YACT,QAAQgD;QACV;QACAN;QACA,OAAON,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;IAGF,IAAI;QACF,MAAMc,UAAUhD,QAAQ,YAAY,EAAE;QACtC,MAAMiD,kBAAkBF,cAAc,mCAAmC,CACvEb,IAAI,OAAO,EACX;YACE,cAAcP,cAAc,YAAY;YACxC,aAAaA,cAAc,WAAW;QACxC;QAEFiB,UAAUM,AAAAA,IAAAA,sCAAAA,eAAAA,AAAAA,EACRC,AAAAA,IAAAA,uCAAAA,yCAAAA,AAAAA,EAA0CF,iBAAiBD;QAG7D3D,aAAa,WAAWuD;QAExB,MAAMQ,UAA+BC,AAAAA,IAAAA,0BAAAA,qBAAAA,AAAAA,EACnCT,SACAxB;QAEF0B,SAAS,EAAE;QAEX,IAAIM,SACFP,iBAAiBO;IAErB,EAAE,OAAOE,GAAG;QACV,MAAMC,MACJD,aAAahB,QACT,CAAC,+BAA+B,EAAEgB,EAAE,OAAO,EAAE,GAC7C;QACN,IAAI,AAACR,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAES,IAAI,CAAC,CAAC;aAFtBT,SAAS;YAACS;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMX;QACN,aAAa;YACX,SAASC;YACT,QAAQC;QACV;QACAN;QACA,OAAON,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAesB,gBAAgBxD,OAKrC;IAMC,MAAM,EAAEC,OAAO,EAAEwD,kBAAkB,EAAE,GAAGzD;IACxC,MAAMgB,eAAehB,QAAQ,YAAY;IACzC,MAAM,EAAEiB,OAAO,EAAE,GAAGD;IACpBE,IAAAA,sBAAAA,MAAAA,AAAAA,EACED,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,EACnB;IAEF,MAAME,mBAAmBlB,QAAQ,UAAU,CAAC,MAAM;IAClD,MAAM0B,gBAAgB,MAAMC,AAAAA,IAAAA,oCAAAA,iBAAAA,AAAAA,EAAkB;QAC5C,aAAaT;QACb,OAAOlB,QAAQ,QAAQ,CAAC,KAAK;QAC7B,QAAQA,QAAQ,QAAQ,CAAC,MAAM;QAC/B,QAAQgB,QAAQ,eAAe;IACjC;IAEA,MAAMO,eAAekC,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EACnBzC,QAAQ,MAAM,CAAC,aAAa,CAAC,UAAU;IAEzC,MAAM0C,gCAAgCC,AAAAA,IAAAA,uCAAAA,yBAAAA,AAAAA,EACpCvC,AAAAA,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBoC;IAErB,MAAM3B,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASN;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKG,cAAc,WAAW;wBAC9B,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMgC;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAM1B,SAAS,MAAMC,AAAAA,IAAAA,mCAAAA,8BAAAA,AAAAA,EACnBC,AAAAA,IAAAA,mCAAAA,4BAAAA,AAAAA,EAA6BwB;QAE/B3B,KAAK,IAAI,IAAIC;IACf;IAEA,IAAI8B;IAGJ,IAAI;QACFA,SAAS,MAAM1B,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACbL,MACAd,cACA;YACE,aAAahB,QAAQ,WAAW;YAChC,kBAAkB;QACpB;IAEJ,EAAE,OAAOoC,WAAW;QAClB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,yBAAAA,oBAAoBA,GACrCL,UAAU,WAAW,GACrBC;QACN,MAAMK,QACJN,qBAAqBK,yBAAAA,oBAAoBA,GAAGL,UAAU,KAAK,GAAGtC;QAChE,OAAO;YACL,kBAAkBA;YAClB,OAAO,CAAC,eAAe,EAAEuC,cAAc;YACvCG;YACAE;QACF;IACF;IAEA,IAAIoB;IAGJ,IAAIC,eAAeF,OAAO,OAAO,CAAC,KAAK;IACvC,MAAMd,gBAAgB9B,QAAQ,MAAM,CAAC,aAAa;IAClD,IAAI,CAACzB,gBAAgBqE,OAAO,OAAO,EAAEd,cAAc,UAAU,CAAC,SAAS,GACrE,OAAO;QACL,kBAAkBjD;QAClB,OAAOiE;QACP,aAAapB,KAAK,SAAS,CAACkB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;IAGF,IAAI;QACF,MAAMG,gBACJjB,cAAc,wCAAwC,CAACc,OAAO,OAAO,EAAE;YACrE,cAAclC,cAAc,YAAY;YACxC,aAAaA,cAAc,WAAW;QACxC;QACF,MAAMsC,aAAaC,AAAAA,IAAAA,sCAAAA,sBAAAA,AAAAA,EAAuB;YACxCF,cAAc,MAAM;eAChBA,cAAc,UAAU,IAAI,EAAE;SACnC;QACDzE,aAAa,iBAAiB0E;QAE9B,MAAME,eAAe9D,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiB4D,YAAYhE,QAAQ,QAAQ;QAClE,MAAMmE,gBAAgBD,aAAa,KAAK;QACxC,MAAME,iBAAiBF,aAAa,MAAM;QAC1C5E,aAAa,2BAA2B4E;QAExCL,mBAAmB,MAAM/D,sBAAsB;YAC7CE;YACA,UAAUgE;QACZ;QAEA1E,aACE,uDACA6E,eACAC,gBACAP,iBAAiB,KAAK,CAAC,KAAK,EAC5BA,iBAAiB,KAAK,CAAC,MAAM,EAC7BA,iBAAiB,OAAO,CAAC,KAAK;IAElC,EAAE,OAAOQ,OAAO;QACd,MAAMC,oBACJD,iBAAiBhC,QACb,CAAC,uCAAuC,EAAEgC,MAAM,OAAO,EAAE,GACzD;QACNP,eAAeA,eACX,GAAGA,aAAa,EAAE,EAAEQ,kBAAkB,CAAC,CAAC,GACxCA;IACN;IAEA,OAAO;QACLT;QACA,OAAOC;QACP,aAAapB,KAAK,SAAS,CAACkB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAeW,qBAAwBxE,OAO7C;IACC,MAAM,EAAEyE,SAAS,EAAExE,OAAO,EAAEyE,aAAa,EAAEC,gBAAgB,EAAE3D,YAAY,EAAE,GACzEhB;IACF,MAAMwB,eAAeoD,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA,EAAsB;QACzC,oBAAoBF,eAAe,uBAAuB;QAC1D,yBAAyB,CAAC,CAACC,kBAAkB,QAAQ;IACvD;IACA,MAAMxD,mBAAmBlB,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM4E,wBAAwBC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAC5B9E,QAAQ,eAAe,IAAI,IAC3ByE;IAGF,MAAMM,cAAyD,EAAE;IAEjE,IAAIL,eAAe,uBAAuB,OAAO;QAC/CK,YAAY,IAAI,CAAC;YACf,MAAM;YACN,MAAM;QACR;QAEAA,YAAY,IAAI,CAAC;YACf,MAAM;YACN,WAAW;gBACT,KAAK5D;gBACL,QAAQ;YACV;QACF;IACF;IAEA4D,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAM/C,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASN;QAAa;QACxC;YACE,MAAM;YACN,SAASuD;QACX;KACD;IAED,IAAIJ,kBAAkB;QACpB,MAAM5C,SAAS,MAAMC,AAAAA,IAAAA,mCAAAA,8BAAAA,AAAAA,EAA+B2C;QACpD7C,KAAK,IAAI,IAAIC;IACf;IAEA,MAAM,EACJ,SAASS,WAAW,EACpBE,KAAK,EACLsC,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOnD,MAAMd;IAEvB,IAAIkE;IACJ,IAAI;QACFA,cAAcC,AAAAA,IAAAA,8BAAAA,0BAAAA,AAAAA,EAA8B3C;IAC9C,EAAE,OAAO4C,YAAY;QACnB,MAAM/C,eACJ+C,sBAAsB9C,QAAQ8C,WAAW,OAAO,GAAG7C,OAAO6C;QAC5D,MAAM,IAAI3C,yBAAAA,oBAAoBA,CAC5B,CAAC,iBAAiB,EAAEJ,cAAc,EAClCG,aACAE;IAEJ;IAEA,OAAO;QACLwC;QACA1C;QACAE;QACAsC;IACF;AACF;AAEO,eAAeK,sBACpBC,WAAmB,EACnBtE,YAA0B;IAK1B,MAAMQ,eAAe+D,AAAAA,IAAAA,yCAAAA,iCAAAA,AAAAA;IACrB,MAAMC,aAAaC,AAAAA,IAAAA,yCAAAA,yBAAAA,AAAAA,EAA0BH;IAE7C,MAAMxD,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASN;QAAa;QACxC;YACE,MAAM;YACN,SAASgE;QACX;KACD;IAEDnG,aAAa,yCAAyCiG;IAEtD,MAAMzB,SAAS,MAAM1B,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnBL,MACAd,cACA;QACE,kBAAkB;IACpB;IAGF,OAAO;QACL,kBAAkB6C,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
1
+ {"version":3,"file":"ai-model/inspect.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIDataExtractionResponse,\n AIElementLocateResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport { cropByRect, scaleImage } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport {\n expandSearchArea,\n multimodalPromptToChatMessages,\n userPromptToMultimodalPrompt,\n userPromptToString,\n} from '../common';\nimport type { ModelRuntime } from './models';\nimport {\n extractDataQueryPrompt,\n parseXMLExtractionResponse,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport {\n AIResponseParseError,\n callAI,\n callAIWithObjectResponse,\n} from './service-caller/index';\nimport { prepareModelImage } from './workflows/image-preprocess';\nimport {\n mergePixelBboxesToRect,\n pixelBboxToRect,\n} from './workflows/inspect/locate-result-rect';\nimport { mapSearchAreaPixelBboxToOriginalPixelBbox } from './workflows/inspect/search-area-mapping';\nimport type {\n LocateOptions,\n LocateResult,\n SearchAreaConfig,\n} from './workflows/inspect/types';\n\nexport type InspectAIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nexport {\n userPromptToString as extraTextFromUserPrompt,\n multimodalPromptToChatMessages as promptsToChatParam,\n} from '../common';\n\nfunction hasLocateResult(input: unknown, resultKey: string) {\n if (!input || typeof input !== 'object') {\n return false;\n }\n\n const record = input as Record<string, unknown>;\n const locateResult = record[resultKey];\n return Array.isArray(locateResult)\n ? locateResult.length > 0\n : locateResult !== undefined;\n}\n\nexport async function buildSearchAreaConfig(options: {\n context: UIContext;\n baseRect: Rect;\n}): Promise<SearchAreaConfig> {\n const { context, baseRect } = options;\n const scaleRatio = 2;\n const sectionRect = expandSearchArea(baseRect, context.shotSize);\n\n const croppedResult = await cropByRect(\n context.screenshot.base64,\n sectionRect,\n );\n\n const scaledResult = await scaleImage(croppedResult.imageBase64, scaleRatio);\n return {\n sourceRect: sectionRect,\n image: {\n imageBase64: scaledResult.imageBase64,\n width: scaledResult.width,\n height: scaledResult.height,\n },\n mapping: {\n offset: {\n x: sectionRect.left,\n y: sectionRect.top,\n },\n scale: scaleRatio,\n },\n };\n}\n\nexport async function AiLocateElement(\n options: LocateOptions & { targetElementDescription: TUserPrompt },\n): Promise<LocateResult> {\n const { targetElementDescription, ...locateOptions } = options;\n const locateAdapter = options.modelRuntime.adapter.locate;\n if (locateAdapter.kind === 'custom') {\n return locateAdapter.locateFn(targetElementDescription, locateOptions);\n }\n return genericLocate(targetElementDescription, locateOptions);\n}\n\nexport async function genericLocate(\n elementDescription: TUserPrompt,\n options: LocateOptions,\n): Promise<LocateResult> {\n const { context } = options;\n const modelRuntime = options.modelRuntime;\n const { adapter } = modelRuntime;\n assert(\n adapter.locate.kind === 'standard',\n 'generic locate requires a standard locate adapter',\n );\n const screenshotBase64 = context.screenshot.base64;\n\n assert(elementDescription, 'cannot find the target element description');\n const elementDescriptionText = userPromptToString(elementDescription);\n const userInstructionPrompt = findElementPrompt(elementDescriptionText);\n const systemPrompt = systemPromptToLocateElement(\n adapter.locate.resultAdapter.promptSpec,\n );\n\n const modelImage = options.searchConfig?.image ?? {\n imageBase64: screenshotBase64,\n width: context.shotSize.width,\n height: context.shotSize.height,\n };\n const preparedImage = await prepareModelImage({\n imageBase64: modelImage.imageBase64,\n width: modelImage.width,\n height: modelImage.height,\n policy: adapter.imagePreprocess,\n });\n\n const imagePayload = preparedImage.imageBase64;\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof elementDescription !== 'string') {\n const addOns = await multimodalPromptToChatMessages(\n userPromptToMultimodalPrompt(elementDescription),\n );\n msgs.push(...addOns);\n }\n\n let res: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AIElementLocateResponse>>\n >;\n try {\n res = await callAIWithObjectResponse<AIElementLocateResponse>(\n msgs,\n modelRuntime,\n {\n abortSignal: options.abortSignal,\n jsonParserSource: 'locate',\n },\n );\n } catch (callError) {\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n const rawChoiceMessage =\n callError instanceof AIResponseParseError\n ? callError.rawChoiceMessage\n : undefined;\n return {\n rect: undefined,\n parseResult: {\n element: undefined,\n errors: [`AI call error: ${errorMessage}`],\n },\n rawResponse,\n rawChoiceMessage,\n usage,\n reasoning_content: undefined,\n };\n }\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElement: LocateResultElement | undefined;\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n const resultAdapter = adapter.locate.resultAdapter;\n if (!hasLocateResult(res.content, resultAdapter.promptSpec.resultKey)) {\n return {\n rect: undefined,\n parseResult: {\n element: undefined,\n errors: errors as string[],\n },\n rawResponse,\n rawChoiceMessage: res.rawChoiceMessage,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n }\n\n try {\n const mapping = options.searchConfig?.mapping;\n const targetPixelBbox = resultAdapter.adaptElementLocateResultToPixelBbox(\n res.content,\n {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n },\n );\n resRect = pixelBboxToRect(\n mapSearchAreaPixelBboxToOriginalPixelBbox(targetPixelBbox, mapping),\n );\n\n debugInspect('resRect', resRect);\n\n const element: LocateResultElement = generateElementByRect(\n resRect,\n elementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElement = element;\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse locate result: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n element: matchedElement,\n errors: errors as string[],\n },\n rawResponse,\n rawChoiceMessage: res.rawChoiceMessage,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelRuntime: ModelRuntime;\n abortSignal?: AbortSignal;\n}): Promise<{\n searchAreaConfig?: SearchAreaConfig;\n error?: string;\n rawResponse: string;\n rawChoiceMessage?: unknown;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription } = options;\n const modelRuntime = options.modelRuntime;\n const { adapter } = modelRuntime;\n assert(\n adapter.locate.kind === 'standard',\n 'section locate requires a standard locate adapter',\n );\n const screenshotBase64 = context.screenshot.base64;\n const preparedImage = await prepareModelImage({\n imageBase64: screenshotBase64,\n width: context.shotSize.width,\n height: context.shotSize.height,\n policy: adapter.imagePreprocess,\n });\n\n const systemPrompt = systemPromptToLocateSection(\n adapter.locate.resultAdapter.promptSpec,\n );\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n userPromptToString(sectionDescription),\n );\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: preparedImage.imageBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await multimodalPromptToChatMessages(\n userPromptToMultimodalPrompt(sectionDescription),\n );\n msgs.push(...addOns);\n }\n\n let result: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AISectionLocatorResponse>>\n >;\n try {\n result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelRuntime,\n {\n abortSignal: options.abortSignal,\n jsonParserSource: 'section-locator',\n },\n );\n } catch (callError) {\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n const rawChoiceMessage =\n callError instanceof AIResponseParseError\n ? callError.rawChoiceMessage\n : undefined;\n return {\n searchAreaConfig: undefined,\n error: `AI call error: ${errorMessage}`,\n rawResponse,\n rawChoiceMessage,\n usage,\n };\n }\n\n let searchAreaConfig:\n | Awaited<ReturnType<typeof buildSearchAreaConfig>>\n | undefined;\n let sectionError = result.content.error;\n const resultAdapter = adapter.locate.resultAdapter;\n if (!hasLocateResult(result.content, resultAdapter.promptSpec.resultKey)) {\n return {\n searchAreaConfig: undefined,\n error: sectionError,\n rawResponse: JSON.stringify(result.content),\n rawChoiceMessage: result.rawChoiceMessage,\n usage: result.usage,\n };\n }\n\n try {\n const adaptedResult =\n resultAdapter.adaptSectionLocateResultToPixelBboxGroup(result.content, {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n });\n const mergedRect = mergePixelBboxesToRect([\n adaptedResult.target,\n ...(adaptedResult.references ?? []),\n ]);\n debugSection('mergedRect %j', mergedRect);\n\n const expandedRect = expandSearchArea(mergedRect, context.shotSize);\n const originalWidth = expandedRect.width;\n const originalHeight = expandedRect.height;\n debugSection('expanded sectionRect %j', expandedRect);\n\n searchAreaConfig = await buildSearchAreaConfig({\n context,\n baseRect: mergedRect,\n });\n\n debugSection(\n 'scaled section image from %dx%d to %dx%d (scale=%d)',\n originalWidth,\n originalHeight,\n searchAreaConfig.image.width,\n searchAreaConfig.image.height,\n searchAreaConfig.mapping.scale,\n );\n } catch (error) {\n const parseErrorMessage =\n error instanceof Error\n ? `Failed to parse section locate result: ${error.message}`\n : 'unknown error in section locate';\n sectionError = sectionError\n ? `${sectionError} (${parseErrorMessage})`\n : parseErrorMessage;\n }\n\n return {\n searchAreaConfig,\n error: sectionError,\n rawResponse: JSON.stringify(result.content),\n rawChoiceMessage: result.rawChoiceMessage,\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelRuntime: ModelRuntime;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelRuntime } =\n options;\n const systemPrompt = systemPromptToExtract({\n screenshotIncluded: extractOption?.screenshotIncluded !== false,\n referenceImagesIncluded: !!multimodalPrompt?.images?.length,\n });\n const screenshotBase64 = context.screenshot.base64;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'text',\n text: 'This is the current screenshot to evaluate. Unless <DATA_DEMAND> explicitly asks for comparison or matching against reference images, base your answer on this screenshot and its contents when provided.',\n });\n\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await multimodalPromptToChatMessages(multimodalPrompt);\n msgs.push(...addOns);\n }\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n rawChoiceMessage,\n } = await callAI(msgs, modelRuntime);\n\n let parseResult: AIDataExtractionResponse<T>;\n try {\n parseResult = parseXMLExtractionResponse<T>(rawResponse);\n } catch (parseError) {\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n rawChoiceMessage,\n );\n }\n\n return {\n parseResult,\n rawResponse,\n rawChoiceMessage,\n usage,\n reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n modelRuntime: ModelRuntime,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n debugInspect('AiJudgeOrderSensitive: description=%s', description);\n\n const result = await callAIWithObjectResponse<{ isOrderSensitive: boolean }>(\n msgs,\n modelRuntime,\n {\n jsonParserSource: 'generic-object',\n },\n );\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","hasLocateResult","input","resultKey","record","locateResult","Array","undefined","buildSearchAreaConfig","options","context","baseRect","scaleRatio","sectionRect","expandSearchArea","croppedResult","cropByRect","scaledResult","scaleImage","AiLocateElement","targetElementDescription","locateOptions","locateAdapter","genericLocate","elementDescription","modelRuntime","adapter","assert","screenshotBase64","elementDescriptionText","userPromptToString","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","modelImage","preparedImage","prepareModelImage","imagePayload","msgs","addOns","multimodalPromptToChatMessages","userPromptToMultimodalPrompt","res","callAIWithObjectResponse","callError","errorMessage","Error","String","rawResponse","AIResponseParseError","usage","rawChoiceMessage","JSON","resRect","matchedElement","errors","resultAdapter","mapping","targetPixelBbox","pixelBboxToRect","mapSearchAreaPixelBboxToOriginalPixelBbox","element","generateElementByRect","e","msg","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","searchAreaConfig","sectionError","adaptedResult","mergedRect","mergePixelBboxesToRect","expandedRect","originalWidth","originalHeight","error","parseErrorMessage","AiExtractElementInfo","dataQuery","extractOption","multimodalPrompt","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","reasoning_content","callAI","parseResult","parseXMLExtractionResponse","parseError","AiJudgeOrderSensitive","description","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;;;;;AC2DA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAO9B,SAASE,gBAAgBC,KAAc,EAAEC,SAAiB;IACxD,IAAI,CAACD,SAAS,AAAiB,YAAjB,OAAOA,OACnB,OAAO;IAGT,MAAME,SAASF;IACf,MAAMG,eAAeD,MAAM,CAACD,UAAU;IACtC,OAAOG,MAAM,OAAO,CAACD,gBACjBA,aAAa,MAAM,GAAG,IACtBA,AAAiBE,WAAjBF;AACN;AAEO,eAAeG,sBAAsBC,OAG3C;IACC,MAAM,EAAEC,OAAO,EAAEC,QAAQ,EAAE,GAAGF;IAC9B,MAAMG,aAAa;IACnB,MAAMC,cAAcC,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBH,UAAUD,QAAQ,QAAQ;IAE/D,MAAMK,gBAAgB,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAC1BN,QAAQ,UAAU,CAAC,MAAM,EACzBG;IAGF,MAAMI,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAAWH,cAAc,WAAW,EAAEH;IACjE,OAAO;QACL,YAAYC;QACZ,OAAO;YACL,aAAaI,aAAa,WAAW;YACrC,OAAOA,aAAa,KAAK;YACzB,QAAQA,aAAa,MAAM;QAC7B;QACA,SAAS;YACP,QAAQ;gBACN,GAAGJ,YAAY,IAAI;gBACnB,GAAGA,YAAY,GAAG;YACpB;YACA,OAAOD;QACT;IACF;AACF;AAEO,eAAeO,gBACpBV,OAAkE;IAElE,MAAM,EAAEW,wBAAwB,EAAE,GAAGC,eAAe,GAAGZ;IACvD,MAAMa,gBAAgBb,QAAQ,YAAY,CAAC,OAAO,CAAC,MAAM;IACzD,IAAIa,AAAuB,aAAvBA,cAAc,IAAI,EACpB,OAAOA,cAAc,QAAQ,CAACF,0BAA0BC;IAE1D,OAAOE,cAAcH,0BAA0BC;AACjD;AAEO,eAAeE,cACpBC,kBAA+B,EAC/Bf,OAAsB;IAEtB,MAAM,EAAEC,OAAO,EAAE,GAAGD;IACpB,MAAMgB,eAAehB,QAAQ,YAAY;IACzC,MAAM,EAAEiB,OAAO,EAAE,GAAGD;IACpBE,IAAAA,sBAAAA,MAAAA,AAAAA,EACED,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,EACnB;IAEF,MAAME,mBAAmBlB,QAAQ,UAAU,CAAC,MAAM;IAElDiB,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOH,oBAAoB;IAC3B,MAAMK,yBAAyBC,AAAAA,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBN;IAClD,MAAMO,wBAAwBC,AAAAA,IAAAA,+BAAAA,iBAAAA,AAAAA,EAAkBH;IAChD,MAAMI,eAAeC,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EACnBR,QAAQ,MAAM,CAAC,aAAa,CAAC,UAAU;IAGzC,MAAMS,aAAa1B,QAAQ,YAAY,EAAE,SAAS;QAChD,aAAamB;QACb,OAAOlB,QAAQ,QAAQ,CAAC,KAAK;QAC7B,QAAQA,QAAQ,QAAQ,CAAC,MAAM;IACjC;IACA,MAAM0B,gBAAgB,MAAMC,AAAAA,IAAAA,oCAAAA,iBAAAA,AAAAA,EAAkB;QAC5C,aAAaF,WAAW,WAAW;QACnC,OAAOA,WAAW,KAAK;QACvB,QAAQA,WAAW,MAAM;QACzB,QAAQT,QAAQ,eAAe;IACjC;IAEA,MAAMY,eAAeF,cAAc,WAAW;IAE9C,MAAMG,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASN;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKK;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMP;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOP,oBAAiC;QAC1C,MAAMgB,SAAS,MAAMC,AAAAA,IAAAA,mCAAAA,8BAAAA,AAAAA,EACnBC,AAAAA,IAAAA,mCAAAA,4BAAAA,AAAAA,EAA6BlB;QAE/Be,KAAK,IAAI,IAAIC;IACf;IAEA,IAAIG;IAGJ,IAAI;QACFA,MAAM,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACVL,MACAd,cACA;YACE,aAAahB,QAAQ,WAAW;YAChC,kBAAkB;QACpB;IAEJ,EAAE,OAAOoC,WAAW;QAClB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,yBAAAA,oBAAoBA,GACrCL,UAAU,WAAW,GACrBC;QACN,MAAMK,QACJN,qBAAqBK,yBAAAA,oBAAoBA,GAAGL,UAAU,KAAK,GAAGtC;QAChE,MAAM6C,mBACJP,qBAAqBK,yBAAAA,oBAAoBA,GACrCL,UAAU,gBAAgB,GAC1BtC;QACN,OAAO;YACL,MAAMA;YACN,aAAa;gBACX,SAASA;gBACT,QAAQ;oBAAC,CAAC,eAAe,EAAEuC,cAAc;iBAAC;YAC5C;YACAG;YACAG;YACAD;YACA,mBAAmB5C;QACrB;IACF;IAEA,MAAM0C,cAAcI,KAAK,SAAS,CAACV,IAAI,OAAO;IAE9C,IAAIW;IACJ,IAAIC;IACJ,IAAIC,SACF,YAAYb,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,MAAMc,gBAAgB/B,QAAQ,MAAM,CAAC,aAAa;IAClD,IAAI,CAACzB,gBAAgB0C,IAAI,OAAO,EAAEc,cAAc,UAAU,CAAC,SAAS,GAClE,OAAO;QACL,MAAMlD;QACN,aAAa;YACX,SAASA;YACT,QAAQiD;QACV;QACAP;QACA,kBAAkBN,IAAI,gBAAgB;QACtC,OAAOA,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;IAGF,IAAI;QACF,MAAMe,UAAUjD,QAAQ,YAAY,EAAE;QACtC,MAAMkD,kBAAkBF,cAAc,mCAAmC,CACvEd,IAAI,OAAO,EACX;YACE,cAAcP,cAAc,YAAY;YACxC,aAAaA,cAAc,WAAW;QACxC;QAEFkB,UAAUM,AAAAA,IAAAA,sCAAAA,eAAAA,AAAAA,EACRC,AAAAA,IAAAA,uCAAAA,yCAAAA,AAAAA,EAA0CF,iBAAiBD;QAG7D5D,aAAa,WAAWwD;QAExB,MAAMQ,UAA+BC,AAAAA,IAAAA,0BAAAA,qBAAAA,AAAAA,EACnCT,SACAzB;QAEF2B,SAAS,EAAE;QAEX,IAAIM,SACFP,iBAAiBO;IAErB,EAAE,OAAOE,GAAG;QACV,MAAMC,MACJD,aAAajB,QACT,CAAC,+BAA+B,EAAEiB,EAAE,OAAO,EAAE,GAC7C;QACN,IAAI,AAACR,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAES,IAAI,CAAC,CAAC;aAFtBT,SAAS;YAACS;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMX;QACN,aAAa;YACX,SAASC;YACT,QAAQC;QACV;QACAP;QACA,kBAAkBN,IAAI,gBAAgB;QACtC,OAAOA,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAeuB,gBAAgBzD,OAKrC;IAOC,MAAM,EAAEC,OAAO,EAAEyD,kBAAkB,EAAE,GAAG1D;IACxC,MAAMgB,eAAehB,QAAQ,YAAY;IACzC,MAAM,EAAEiB,OAAO,EAAE,GAAGD;IACpBE,IAAAA,sBAAAA,MAAAA,AAAAA,EACED,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,EACnB;IAEF,MAAME,mBAAmBlB,QAAQ,UAAU,CAAC,MAAM;IAClD,MAAM0B,gBAAgB,MAAMC,AAAAA,IAAAA,oCAAAA,iBAAAA,AAAAA,EAAkB;QAC5C,aAAaT;QACb,OAAOlB,QAAQ,QAAQ,CAAC,KAAK;QAC7B,QAAQA,QAAQ,QAAQ,CAAC,MAAM;QAC/B,QAAQgB,QAAQ,eAAe;IACjC;IAEA,MAAMO,eAAemC,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EACnB1C,QAAQ,MAAM,CAAC,aAAa,CAAC,UAAU;IAEzC,MAAM2C,gCAAgCC,AAAAA,IAAAA,uCAAAA,yBAAAA,AAAAA,EACpCxC,AAAAA,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBqC;IAErB,MAAM5B,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASN;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKG,cAAc,WAAW;wBAC9B,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMiC;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAM3B,SAAS,MAAMC,AAAAA,IAAAA,mCAAAA,8BAAAA,AAAAA,EACnBC,AAAAA,IAAAA,mCAAAA,4BAAAA,AAAAA,EAA6ByB;QAE/B5B,KAAK,IAAI,IAAIC;IACf;IAEA,IAAI+B;IAGJ,IAAI;QACFA,SAAS,MAAM3B,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACbL,MACAd,cACA;YACE,aAAahB,QAAQ,WAAW;YAChC,kBAAkB;QACpB;IAEJ,EAAE,OAAOoC,WAAW;QAClB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,yBAAAA,oBAAoBA,GACrCL,UAAU,WAAW,GACrBC;QACN,MAAMK,QACJN,qBAAqBK,yBAAAA,oBAAoBA,GAAGL,UAAU,KAAK,GAAGtC;QAChE,MAAM6C,mBACJP,qBAAqBK,yBAAAA,oBAAoBA,GACrCL,UAAU,gBAAgB,GAC1BtC;QACN,OAAO;YACL,kBAAkBA;YAClB,OAAO,CAAC,eAAe,EAAEuC,cAAc;YACvCG;YACAG;YACAD;QACF;IACF;IAEA,IAAIqB;IAGJ,IAAIC,eAAeF,OAAO,OAAO,CAAC,KAAK;IACvC,MAAMd,gBAAgB/B,QAAQ,MAAM,CAAC,aAAa;IAClD,IAAI,CAACzB,gBAAgBsE,OAAO,OAAO,EAAEd,cAAc,UAAU,CAAC,SAAS,GACrE,OAAO;QACL,kBAAkBlD;QAClB,OAAOkE;QACP,aAAapB,KAAK,SAAS,CAACkB,OAAO,OAAO;QAC1C,kBAAkBA,OAAO,gBAAgB;QACzC,OAAOA,OAAO,KAAK;IACrB;IAGF,IAAI;QACF,MAAMG,gBACJjB,cAAc,wCAAwC,CAACc,OAAO,OAAO,EAAE;YACrE,cAAcnC,cAAc,YAAY;YACxC,aAAaA,cAAc,WAAW;QACxC;QACF,MAAMuC,aAAaC,AAAAA,IAAAA,sCAAAA,sBAAAA,AAAAA,EAAuB;YACxCF,cAAc,MAAM;eAChBA,cAAc,UAAU,IAAI,EAAE;SACnC;QACD1E,aAAa,iBAAiB2E;QAE9B,MAAME,eAAe/D,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiB6D,YAAYjE,QAAQ,QAAQ;QAClE,MAAMoE,gBAAgBD,aAAa,KAAK;QACxC,MAAME,iBAAiBF,aAAa,MAAM;QAC1C7E,aAAa,2BAA2B6E;QAExCL,mBAAmB,MAAMhE,sBAAsB;YAC7CE;YACA,UAAUiE;QACZ;QAEA3E,aACE,uDACA8E,eACAC,gBACAP,iBAAiB,KAAK,CAAC,KAAK,EAC5BA,iBAAiB,KAAK,CAAC,MAAM,EAC7BA,iBAAiB,OAAO,CAAC,KAAK;IAElC,EAAE,OAAOQ,OAAO;QACd,MAAMC,oBACJD,iBAAiBjC,QACb,CAAC,uCAAuC,EAAEiC,MAAM,OAAO,EAAE,GACzD;QACNP,eAAeA,eACX,GAAGA,aAAa,EAAE,EAAEQ,kBAAkB,CAAC,CAAC,GACxCA;IACN;IAEA,OAAO;QACLT;QACA,OAAOC;QACP,aAAapB,KAAK,SAAS,CAACkB,OAAO,OAAO;QAC1C,kBAAkBA,OAAO,gBAAgB;QACzC,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAeW,qBAAwBzE,OAO7C;IACC,MAAM,EAAE0E,SAAS,EAAEzE,OAAO,EAAE0E,aAAa,EAAEC,gBAAgB,EAAE5D,YAAY,EAAE,GACzEhB;IACF,MAAMwB,eAAeqD,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA,EAAsB;QACzC,oBAAoBF,eAAe,uBAAuB;QAC1D,yBAAyB,CAAC,CAACC,kBAAkB,QAAQ;IACvD;IACA,MAAMzD,mBAAmBlB,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM6E,wBAAwBC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAC5B/E,QAAQ,eAAe,IAAI,IAC3B0E;IAGF,MAAMM,cAAyD,EAAE;IAEjE,IAAIL,eAAe,uBAAuB,OAAO;QAC/CK,YAAY,IAAI,CAAC;YACf,MAAM;YACN,MAAM;QACR;QAEAA,YAAY,IAAI,CAAC;YACf,MAAM;YACN,WAAW;gBACT,KAAK7D;gBACL,QAAQ;YACV;QACF;IACF;IAEA6D,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMhD,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASN;QAAa;QACxC;YACE,MAAM;YACN,SAASwD;QACX;KACD;IAED,IAAIJ,kBAAkB;QACpB,MAAM7C,SAAS,MAAMC,AAAAA,IAAAA,mCAAAA,8BAAAA,AAAAA,EAA+B4C;QACpD9C,KAAK,IAAI,IAAIC;IACf;IAEA,MAAM,EACJ,SAASS,WAAW,EACpBE,KAAK,EACLuC,iBAAiB,EACjBtC,gBAAgB,EACjB,GAAG,MAAMuC,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOpD,MAAMd;IAEvB,IAAImE;IACJ,IAAI;QACFA,cAAcC,AAAAA,IAAAA,8BAAAA,0BAAAA,AAAAA,EAA8B5C;IAC9C,EAAE,OAAO6C,YAAY;QACnB,MAAMhD,eACJgD,sBAAsB/C,QAAQ+C,WAAW,OAAO,GAAG9C,OAAO8C;QAC5D,MAAM,IAAI5C,yBAAAA,oBAAoBA,CAC5B,CAAC,iBAAiB,EAAEJ,cAAc,EAClCG,aACAE,OACAC;IAEJ;IAEA,OAAO;QACLwC;QACA3C;QACAG;QACAD;QACAuC;IACF;AACF;AAEO,eAAeK,sBACpBC,WAAmB,EACnBvE,YAA0B;IAK1B,MAAMQ,eAAegE,AAAAA,IAAAA,yCAAAA,iCAAAA,AAAAA;IACrB,MAAMC,aAAaC,AAAAA,IAAAA,yCAAAA,yBAAAA,AAAAA,EAA0BH;IAE7C,MAAMzD,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASN;QAAa;QACxC;YACE,MAAM;YACN,SAASiE;QACX;KACD;IAEDpG,aAAa,yCAAyCkG;IAEtD,MAAMzB,SAAS,MAAM3B,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnBL,MACAd,cACA;QACE,kBAAkB;IACpB;IAGF,OAAO;QACL,kBAAkB8C,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
@@ -194,7 +194,7 @@ async function plan(userInstruction, opts) {
194
194
  ...instruction,
195
195
  ...historyLog
196
196
  ];
197
- let { content: rawResponse, usage, reasoning_content } = await (0, index_js_namespaceObject.callAI)(msgs, modelRuntime, {
197
+ let { content: rawResponse, usage, reasoning_content, rawChoiceMessage } = await (0, index_js_namespaceObject.callAI)(msgs, modelRuntime, {
198
198
  abortSignal: opts.abortSignal,
199
199
  requiresOriginalImageDetail: opts.includeLocateInPlanning
200
200
  });
@@ -210,6 +210,7 @@ async function plan(userInstruction, opts) {
210
210
  rawResponse = retry.content;
211
211
  usage = retry.usage;
212
212
  reasoning_content = retry.reasoning_content;
213
+ rawChoiceMessage = retry.rawChoiceMessage;
213
214
  planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);
214
215
  }
215
216
  if (planFromAI.action && void 0 !== planFromAI.finalizeSuccess) {
@@ -230,6 +231,7 @@ async function plan(userInstruction, opts) {
230
231
  ...planFromAI,
231
232
  actions,
232
233
  rawResponse,
234
+ rawChoiceMessage,
233
235
  usage,
234
236
  reasoning_content,
235
237
  yamlFlow: (0, external_common_js_namespaceObject.buildYamlFlowFromPlans)(actions, opts.actionSpace),
@@ -280,7 +282,7 @@ async function plan(userInstruction, opts) {
280
282
  return returnValue;
281
283
  } catch (parseError) {
282
284
  const errorMessage = parseError instanceof Error ? parseError.message : String(parseError);
283
- throw new index_js_namespaceObject.AIResponseParseError(`XML parse error: ${errorMessage}`, rawResponse, usage);
285
+ throw new index_js_namespaceObject.AIResponseParseError(`XML parse error: ${errorMessage}`, rawResponse, usage, rawChoiceMessage);
284
286
  }
285
287
  }
286
288
  exports.parseXMLPlanningResponse = __webpack_exports__.parseXMLPlanningResponse;
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { type TUserPrompt, userPromptToString } from '@/common';\nimport type {\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n} from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport { buildYamlFlowFromPlans, findAllMidsceneLocatorField } from '../common';\nimport { planningModelFamilyRequiredForLocateMessage } from './errors';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport {\n extractXMLTag,\n parseMarkFinishedIndexes,\n parseSubGoalsFromXML,\n} from './prompt/util';\nimport { AIResponseParseError, callAI } from './service-caller/index';\nimport type { JsonParser, JsonParserSource } from './service-caller/json';\nimport { prepareModelImage } from './workflows/image-preprocess';\nimport type { PlanOptions } from './workflows/planning/types';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\nconst noPreviousActionsText =\n 'No previous actions have been executed in this aiAct execution yet. If the instruction asks for actions, choose the first action to execute.';\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse.\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n jsonParser: JsonParser,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse sub-goal related tags\n const updatePlanContent = extractXMLTag(xmlString, 'update-plan-content');\n const markSubGoalDone = extractXMLTag(xmlString, 'mark-sub-goal-done');\n\n const updateSubGoals = updatePlanContent\n ? parseSubGoalsFromXML(updatePlanContent)\n : undefined;\n const markFinishedIndexes = markSubGoalDone\n ? parseMarkFinishedIndexes(markSubGoalDone)\n : undefined;\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n // Strip any trailing XML tags that LLM might have leaked into the action type\n // e.g. \"KeyboardPress</action-type>\\n<action-param-json>\" -> \"KeyboardPress\"\n const type = actionType.split('<')[0].trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = jsonParser(actionParamStr, {\n source: 'planning-action-param',\n preserveStringValueKeys:\n type.toLowerCase() === 'input' ? ['value'] : undefined,\n });\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n ...(updateSubGoals?.length ? { updateSubGoals } : {}),\n ...(markFinishedIndexes?.length ? { markFinishedIndexes } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: TUserPrompt,\n opts: PlanOptions,\n): Promise<PlanningAIResponse> {\n const { context, conversationHistory } = opts;\n const modelRuntime = opts.modelRuntime;\n const { adapter } = modelRuntime;\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n if (opts.includeLocateInPlanning && !modelRuntime.config.modelFamily) {\n throw new Error(\n planningModelFamilyRequiredForLocateMessage(modelRuntime.config.slot),\n );\n }\n\n const locateResultAdapter =\n modelRuntime.config.modelFamily && adapter.locate.kind === 'standard'\n ? adapter.locate.resultAdapter\n : undefined;\n\n // Only enable sub-goals when aiAct is in deep-thinking planning mode.\n const includeSubGoals = opts.deepThink === true;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n locatePromptSpec: locateResultAdapter?.promptSpec,\n includeLocateInPlanning: opts.includeLocateInPlanning,\n includeThought: true, // always include thought\n includeSubGoals,\n });\n\n const preparedImage = await prepareModelImage({\n imageBase64: screenshotBase64,\n width: shotSize.width,\n height: shotSize.height,\n policy: adapter.imagePreprocess,\n });\n const imagePayload = preparedImage.imageBase64;\n\n const userInstructionText = userPromptToString(userInstruction);\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const referenceImageMessages = opts.referenceImageMessages ?? [];\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstructionText}</user_instruction>`,\n },\n ],\n },\n ...referenceImageMessages,\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build sub-goal status text to include in the message\n // In planning deep-think mode: show full sub-goals with logs\n // Otherwise: show historical execution logs\n const executionProgressText = includeSubGoals\n ? conversationHistory.subGoalsToText()\n : conversationHistory.historicalLogsToText();\n const executionProgressSection = executionProgressText\n ? `\\n\\n${executionProgressText}`\n : conversationHistory.pendingFeedbackMessage\n ? ''\n : `\\n\\n${noPreviousActionsText}`;\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `This is the current screenshot.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n let {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Planning with locate results is localization-sensitive. Adapters decide\n // whether this should request original image detail.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n\n // Parse XML response to JSON object, retry once on parse failure\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n } catch {\n const retry = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Keep retry requests consistent with the initial planning call.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n rawResponse = retry.content;\n usage = retry.usage;\n reasoning_content = retry.reasoning_content;\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n }\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n // Mark all sub-goals as finished when goal is completed in planning deep-think mode.\n if (includeSubGoals) {\n conversationHistory.markAllSubGoalsFinished();\n }\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult) {\n if (!opts.includeLocateInPlanning) {\n if (typeof locateResult === 'object') {\n // In prompt-only planning mode, ignore any accidental coordinates from the model.\n action.param[field] = { prompt: locateResult.prompt };\n }\n return;\n }\n\n assert(\n locateResultAdapter,\n 'generic planning locate normalization requires a standard locate adapter',\n );\n action.param[field] = {\n ...locateResult,\n locatedPixelBbox: locateResultAdapter.adaptPlanningParamToPixelBbox(\n locateResult,\n {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n },\n ),\n };\n }\n });\n });\n\n // Update sub-goals in conversation history only in planning deep-think mode.\n if (includeSubGoals) {\n if (planFromAI.updateSubGoals?.length) {\n conversationHistory.mergeSubGoals(planFromAI.updateSubGoals);\n }\n if (planFromAI.markFinishedIndexes?.length) {\n for (const index of planFromAI.markFinishedIndexes) {\n conversationHistory.markSubGoalFinished(index);\n }\n }\n // Append the planning log to the currently running sub-goal\n if (planFromAI.log) {\n conversationHistory.appendSubGoalLog(planFromAI.log);\n }\n } else {\n // Without planning deep-think mode, accumulate logs as historical execution steps.\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","warnLog","noPreviousActionsText","parseXMLPlanningResponse","xmlString","jsonParser","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","updatePlanContent","markSubGoalDone","updateSubGoals","parseSubGoalsFromXML","markFinishedIndexes","parseMarkFinishedIndexes","action","type","param","e","Error","plan","userInstruction","opts","context","conversationHistory","modelRuntime","adapter","shotSize","screenshotBase64","planningModelFamilyRequiredForLocateMessage","locateResultAdapter","includeSubGoals","systemPrompt","systemPromptToTaskPlanning","preparedImage","prepareModelImage","imagePayload","userInstructionText","userPromptToString","actionContext","referenceImageMessages","instruction","latestFeedbackMessage","executionProgressText","executionProgressSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","retry","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","index","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;ACeA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,UAAUD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,YAAY;IAAE,SAAS;AAAK;AAErD,MAAME,wBACJ;AAKK,SAASC,yBACdC,SAAiB,EACjBC,UAAsB;IAEtB,MAAMC,UAAUC,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACzC,MAAMI,SAASD,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACxC,MAAMK,MAAMF,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACvC,MAAMO,aAAaJ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,MAAMC,oBAAoBX,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACnD,MAAMe,kBAAkBZ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAEjD,MAAMgB,iBAAiBF,oBACnBG,AAAAA,IAAAA,wBAAAA,oBAAAA,AAAAA,EAAqBH,qBACrBD;IACJ,MAAMK,sBAAsBH,kBACxBI,AAAAA,IAAAA,wBAAAA,wBAAAA,AAAAA,EAAyBJ,mBACzBF;IAGJ,IAAIO,SAAc;IAClB,IAAIb,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QAGrD,MAAMc,OAAOd,WAAW,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI;QAC1C,IAAIe;QAEJ,IAAId,gBACF,IAAI;YAEFc,QAAQrB,WAAWO,gBAAgB;gBACjC,QAAQ;gBACR,yBACEa,AAAuB,YAAvBA,KAAK,WAAW,KAAiB;oBAAC;iBAAQ,GAAGR;YACjD;QACF,EAAE,OAAOU,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFH,SAAS;YACPC;YACA,GAAIC,AAAUT,WAAVS,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAIpB,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1Bc;QACA,GAAIT,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAII,gBAAgB,SAAS;YAAEA;QAAe,IAAI,CAAC,CAAC;QACpD,GAAIE,qBAAqB,SAAS;YAAEA;QAAoB,IAAI,CAAC,CAAC;IAChE;AACF;AAEO,eAAeO,KACpBC,eAA4B,EAC5BC,IAAiB;IAEjB,MAAM,EAAEC,OAAO,EAAEC,mBAAmB,EAAE,GAAGF;IACzC,MAAMG,eAAeH,KAAK,YAAY;IACtC,MAAM,EAAEI,OAAO,EAAE,GAAGD;IACpB,MAAM,EAAEE,QAAQ,EAAE,GAAGJ;IACrB,MAAMK,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElD,IAAID,KAAK,uBAAuB,IAAI,CAACG,aAAa,MAAM,CAAC,WAAW,EAClE,MAAM,IAAIN,MACRU,AAAAA,IAAAA,mCAAAA,2CAAAA,AAAAA,EAA4CJ,aAAa,MAAM,CAAC,IAAI;IAIxE,MAAMK,sBACJL,aAAa,MAAM,CAAC,WAAW,IAAIC,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,GAClDA,QAAQ,MAAM,CAAC,aAAa,GAC5BlB;IAGN,MAAMuB,kBAAkBT,AAAmB,SAAnBA,KAAK,SAAS;IAEtC,MAAMU,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaX,KAAK,WAAW;QAC7B,kBAAkBQ,qBAAqB;QACvC,yBAAyBR,KAAK,uBAAuB;QACrD,gBAAgB;QAChBS;IACF;IAEA,MAAMG,gBAAgB,MAAMC,AAAAA,IAAAA,oCAAAA,iBAAAA,AAAAA,EAAkB;QAC5C,aAAaP;QACb,OAAOD,SAAS,KAAK;QACrB,QAAQA,SAAS,MAAM;QACvB,QAAQD,QAAQ,eAAe;IACjC;IACA,MAAMU,eAAeF,cAAc,WAAW;IAE9C,MAAMG,sBAAsBC,AAAAA,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBjB;IAC/C,MAAMkB,gBAAgBjB,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMkB,yBAAyBlB,KAAK,sBAAsB,IAAI,EAAE;IAChE,MAAMmB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGF,cAAc,kBAAkB,EAAEF,oBAAoB,mBAAmB,CAAC;gBACrF;aACD;QACH;WACGG;KACJ;IAED,IAAIE;IAKJ,MAAMC,wBAAwBZ,kBAC1BP,oBAAoB,cAAc,KAClCA,oBAAoB,oBAAoB;IAC5C,MAAMoB,2BAA2BD,wBAC7B,CAAC,IAAI,EAAEA,uBAAuB,GAC9BnB,oBAAoB,sBAAsB,GACxC,KACA,CAAC,IAAI,EAAE/B,uBAAuB;IAGpC,MAAMoD,eAAerB,oBAAoB,cAAc;IACvD,MAAMsB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIrB,oBAAoB,sBAAsB,EAAE;QAC9CkB,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGlB,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEsB,kBAAkBF,0BAA0B;gBACzN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKR;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAZ,oBAAoB,mCAAmC;IACzD,OACEkB,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,+BAA+B,EAAEI,kBAAkBF,0BAA0B;YACtF;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKR;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFZ,oBAAoB,MAAM,CAACkB;IAG3BlB,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMuB,aAAavB,oBAAoB,QAAQ,CAACF,KAAK,kBAAkB;IAEvE,MAAM0B,OAAqC;QACzC;YAAE,MAAM;YAAU,SAAShB;QAAa;WACrCS;WACAM;KACJ;IAED,IAAI,EACF,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOJ,MAAMvB,cAAc;QACnC,aAAaH,KAAK,WAAW;QAG7B,6BAA6BA,KAAK,uBAAuB;IAC3D;IAGA,IAAI+B;IACJ,IAAI;QACF,IAAI;YACFA,aAAa3D,yBAAyBuD,aAAavB,QAAQ,UAAU;QACvE,EAAE,OAAM;YACN,MAAM4B,QAAQ,MAAMF,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOJ,MAAMvB,cAAc;gBAC7C,aAAaH,KAAK,WAAW;gBAE7B,6BAA6BA,KAAK,uBAAuB;YAC3D;YACA2B,cAAcK,MAAM,OAAO;YAC3BJ,QAAQI,MAAM,KAAK;YACnBH,oBAAoBG,MAAM,iBAAiB;YAC3CD,aAAa3D,yBAAyBuD,aAAavB,QAAQ,UAAU;QACvE;QAEA,IAAI2B,WAAW,MAAM,IAAIA,AAA+B7C,WAA/B6C,WAAW,eAAe,EAAgB;YACjE7D,QACE;YAEF6D,WAAW,eAAe,GAAG7C;YAC7B6C,WAAW,eAAe,GAAG7C;QAC/B;QAEA,MAAM+C,UAAUF,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIG,yBAAyB;QAG7B,IAAIH,AAA+B7C,WAA/B6C,WAAW,eAAe,EAAgB;YAC5C/D,MAAM;YACNkE,yBAAyB;YAEzB,IAAIzB,iBACFP,oBAAoB,uBAAuB;QAE/C;QAEA,MAAMiC,cAAkC;YACtC,GAAGJ,UAAU;YACbE;YACAN;YACAC;YACAC;YACA,UAAUO,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBH,SAASjC,KAAK,WAAW;YAC1DkC;QACF;QAEAG,IAAAA,sBAAAA,MAAAA,AAAAA,EAAON,YAAY;QAEnBE,QAAQ,OAAO,CAAC,CAACxC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAM6C,sBAAsBtC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACP,SAAWA,OAAO,IAAI,KAAKC;YAG9B1B,MAAM,+BAA+BsE;YACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAENtE,MAAM,gBAAgBuE;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAejD,OAAO,KAAK,CAACgD,MAAM;gBACxC,IAAIC,cAAc;oBAChB,IAAI,CAAC1C,KAAK,uBAAuB,EAAE;wBACjC,IAAI,AAAwB,YAAxB,OAAO0C,cAETjD,OAAO,KAAK,CAACgD,MAAM,GAAG;4BAAE,QAAQC,aAAa,MAAM;wBAAC;wBAEtD;oBACF;oBAEAL,IAAAA,sBAAAA,MAAAA,AAAAA,EACE7B,qBACA;oBAEFf,OAAO,KAAK,CAACgD,MAAM,GAAG;wBACpB,GAAGC,YAAY;wBACf,kBAAkBlC,oBAAoB,6BAA6B,CACjEkC,cACA;4BACE,cAAc9B,cAAc,YAAY;4BACxC,aAAaA,cAAc,WAAW;wBACxC;oBAEJ;gBACF;YACF;QACF;QAGA,IAAIH,iBAAiB;YACnB,IAAIsB,WAAW,cAAc,EAAE,QAC7B7B,oBAAoB,aAAa,CAAC6B,WAAW,cAAc;YAE7D,IAAIA,WAAW,mBAAmB,EAAE,QAClC,KAAK,MAAMY,SAASZ,WAAW,mBAAmB,CAChD7B,oBAAoB,mBAAmB,CAACyC;YAI5C,IAAIZ,WAAW,GAAG,EAChB7B,oBAAoB,gBAAgB,CAAC6B,WAAW,GAAG;QAEvD,OAEE,IAAIA,WAAW,GAAG,EAChB7B,oBAAoB,mBAAmB,CAAC6B,WAAW,GAAG;QAK1D,IAAIA,WAAW,MAAM,EACnB7B,oBAAoB,YAAY,CAAC6B,WAAW,MAAM;QAGpD7B,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMyB;gBACR;aACD;QACH;QAEA,OAAOQ;IACT,EAAE,OAAOS,YAAY;QAEnB,MAAMC,eACJD,sBAAsB/C,QAAQ+C,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,yBAAAA,oBAAoBA,CAC5B,CAAC,iBAAiB,EAAEF,cAAc,EAClClB,aACAC;IAEJ;AACF"}
1
+ {"version":3,"file":"ai-model/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { type TUserPrompt, userPromptToString } from '@/common';\nimport type {\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n} from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport { buildYamlFlowFromPlans, findAllMidsceneLocatorField } from '../common';\nimport { planningModelFamilyRequiredForLocateMessage } from './errors';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport {\n extractXMLTag,\n parseMarkFinishedIndexes,\n parseSubGoalsFromXML,\n} from './prompt/util';\nimport { AIResponseParseError, callAI } from './service-caller/index';\nimport type { JsonParser, JsonParserSource } from './service-caller/json';\nimport { prepareModelImage } from './workflows/image-preprocess';\nimport type { PlanOptions } from './workflows/planning/types';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\nconst noPreviousActionsText =\n 'No previous actions have been executed in this aiAct execution yet. If the instruction asks for actions, choose the first action to execute.';\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse.\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n jsonParser: JsonParser,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse sub-goal related tags\n const updatePlanContent = extractXMLTag(xmlString, 'update-plan-content');\n const markSubGoalDone = extractXMLTag(xmlString, 'mark-sub-goal-done');\n\n const updateSubGoals = updatePlanContent\n ? parseSubGoalsFromXML(updatePlanContent)\n : undefined;\n const markFinishedIndexes = markSubGoalDone\n ? parseMarkFinishedIndexes(markSubGoalDone)\n : undefined;\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n // Strip any trailing XML tags that LLM might have leaked into the action type\n // e.g. \"KeyboardPress</action-type>\\n<action-param-json>\" -> \"KeyboardPress\"\n const type = actionType.split('<')[0].trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = jsonParser(actionParamStr, {\n source: 'planning-action-param',\n preserveStringValueKeys:\n type.toLowerCase() === 'input' ? ['value'] : undefined,\n });\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n ...(updateSubGoals?.length ? { updateSubGoals } : {}),\n ...(markFinishedIndexes?.length ? { markFinishedIndexes } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: TUserPrompt,\n opts: PlanOptions,\n): Promise<PlanningAIResponse> {\n const { context, conversationHistory } = opts;\n const modelRuntime = opts.modelRuntime;\n const { adapter } = modelRuntime;\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n if (opts.includeLocateInPlanning && !modelRuntime.config.modelFamily) {\n throw new Error(\n planningModelFamilyRequiredForLocateMessage(modelRuntime.config.slot),\n );\n }\n\n const locateResultAdapter =\n modelRuntime.config.modelFamily && adapter.locate.kind === 'standard'\n ? adapter.locate.resultAdapter\n : undefined;\n\n // Only enable sub-goals when aiAct is in deep-thinking planning mode.\n const includeSubGoals = opts.deepThink === true;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n locatePromptSpec: locateResultAdapter?.promptSpec,\n includeLocateInPlanning: opts.includeLocateInPlanning,\n includeThought: true, // always include thought\n includeSubGoals,\n });\n\n const preparedImage = await prepareModelImage({\n imageBase64: screenshotBase64,\n width: shotSize.width,\n height: shotSize.height,\n policy: adapter.imagePreprocess,\n });\n const imagePayload = preparedImage.imageBase64;\n\n const userInstructionText = userPromptToString(userInstruction);\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const referenceImageMessages = opts.referenceImageMessages ?? [];\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstructionText}</user_instruction>`,\n },\n ],\n },\n ...referenceImageMessages,\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build sub-goal status text to include in the message\n // In planning deep-think mode: show full sub-goals with logs\n // Otherwise: show historical execution logs\n const executionProgressText = includeSubGoals\n ? conversationHistory.subGoalsToText()\n : conversationHistory.historicalLogsToText();\n const executionProgressSection = executionProgressText\n ? `\\n\\n${executionProgressText}`\n : conversationHistory.pendingFeedbackMessage\n ? ''\n : `\\n\\n${noPreviousActionsText}`;\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `This is the current screenshot.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n let {\n content: rawResponse,\n usage,\n reasoning_content,\n rawChoiceMessage,\n } = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Planning with locate results is localization-sensitive. Adapters decide\n // whether this should request original image detail.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n\n // Parse XML response to JSON object, retry once on parse failure\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n } catch {\n const retry = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Keep retry requests consistent with the initial planning call.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n rawResponse = retry.content;\n usage = retry.usage;\n reasoning_content = retry.reasoning_content;\n rawChoiceMessage = retry.rawChoiceMessage;\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n }\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n // Mark all sub-goals as finished when goal is completed in planning deep-think mode.\n if (includeSubGoals) {\n conversationHistory.markAllSubGoalsFinished();\n }\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n rawChoiceMessage,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult) {\n if (!opts.includeLocateInPlanning) {\n if (typeof locateResult === 'object') {\n // In prompt-only planning mode, ignore any accidental coordinates from the model.\n action.param[field] = { prompt: locateResult.prompt };\n }\n return;\n }\n\n assert(\n locateResultAdapter,\n 'generic planning locate normalization requires a standard locate adapter',\n );\n action.param[field] = {\n ...locateResult,\n locatedPixelBbox: locateResultAdapter.adaptPlanningParamToPixelBbox(\n locateResult,\n {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n },\n ),\n };\n }\n });\n });\n\n // Update sub-goals in conversation history only in planning deep-think mode.\n if (includeSubGoals) {\n if (planFromAI.updateSubGoals?.length) {\n conversationHistory.mergeSubGoals(planFromAI.updateSubGoals);\n }\n if (planFromAI.markFinishedIndexes?.length) {\n for (const index of planFromAI.markFinishedIndexes) {\n conversationHistory.markSubGoalFinished(index);\n }\n }\n // Append the planning log to the currently running sub-goal\n if (planFromAI.log) {\n conversationHistory.appendSubGoalLog(planFromAI.log);\n }\n } else {\n // Without planning deep-think mode, accumulate logs as historical execution steps.\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n rawChoiceMessage,\n );\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","warnLog","noPreviousActionsText","parseXMLPlanningResponse","xmlString","jsonParser","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","updatePlanContent","markSubGoalDone","updateSubGoals","parseSubGoalsFromXML","markFinishedIndexes","parseMarkFinishedIndexes","action","type","param","e","Error","plan","userInstruction","opts","context","conversationHistory","modelRuntime","adapter","shotSize","screenshotBase64","planningModelFamilyRequiredForLocateMessage","locateResultAdapter","includeSubGoals","systemPrompt","systemPromptToTaskPlanning","preparedImage","prepareModelImage","imagePayload","userInstructionText","userPromptToString","actionContext","referenceImageMessages","instruction","latestFeedbackMessage","executionProgressText","executionProgressSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","rawChoiceMessage","callAI","planFromAI","retry","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","index","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;ACeA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,UAAUD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,YAAY;IAAE,SAAS;AAAK;AAErD,MAAME,wBACJ;AAKK,SAASC,yBACdC,SAAiB,EACjBC,UAAsB;IAEtB,MAAMC,UAAUC,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACzC,MAAMI,SAASD,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACxC,MAAMK,MAAMF,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACvC,MAAMO,aAAaJ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,MAAMC,oBAAoBX,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACnD,MAAMe,kBAAkBZ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAEjD,MAAMgB,iBAAiBF,oBACnBG,AAAAA,IAAAA,wBAAAA,oBAAAA,AAAAA,EAAqBH,qBACrBD;IACJ,MAAMK,sBAAsBH,kBACxBI,AAAAA,IAAAA,wBAAAA,wBAAAA,AAAAA,EAAyBJ,mBACzBF;IAGJ,IAAIO,SAAc;IAClB,IAAIb,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QAGrD,MAAMc,OAAOd,WAAW,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI;QAC1C,IAAIe;QAEJ,IAAId,gBACF,IAAI;YAEFc,QAAQrB,WAAWO,gBAAgB;gBACjC,QAAQ;gBACR,yBACEa,AAAuB,YAAvBA,KAAK,WAAW,KAAiB;oBAAC;iBAAQ,GAAGR;YACjD;QACF,EAAE,OAAOU,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFH,SAAS;YACPC;YACA,GAAIC,AAAUT,WAAVS,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAIpB,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1Bc;QACA,GAAIT,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAII,gBAAgB,SAAS;YAAEA;QAAe,IAAI,CAAC,CAAC;QACpD,GAAIE,qBAAqB,SAAS;YAAEA;QAAoB,IAAI,CAAC,CAAC;IAChE;AACF;AAEO,eAAeO,KACpBC,eAA4B,EAC5BC,IAAiB;IAEjB,MAAM,EAAEC,OAAO,EAAEC,mBAAmB,EAAE,GAAGF;IACzC,MAAMG,eAAeH,KAAK,YAAY;IACtC,MAAM,EAAEI,OAAO,EAAE,GAAGD;IACpB,MAAM,EAAEE,QAAQ,EAAE,GAAGJ;IACrB,MAAMK,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElD,IAAID,KAAK,uBAAuB,IAAI,CAACG,aAAa,MAAM,CAAC,WAAW,EAClE,MAAM,IAAIN,MACRU,AAAAA,IAAAA,mCAAAA,2CAAAA,AAAAA,EAA4CJ,aAAa,MAAM,CAAC,IAAI;IAIxE,MAAMK,sBACJL,aAAa,MAAM,CAAC,WAAW,IAAIC,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,GAClDA,QAAQ,MAAM,CAAC,aAAa,GAC5BlB;IAGN,MAAMuB,kBAAkBT,AAAmB,SAAnBA,KAAK,SAAS;IAEtC,MAAMU,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaX,KAAK,WAAW;QAC7B,kBAAkBQ,qBAAqB;QACvC,yBAAyBR,KAAK,uBAAuB;QACrD,gBAAgB;QAChBS;IACF;IAEA,MAAMG,gBAAgB,MAAMC,AAAAA,IAAAA,oCAAAA,iBAAAA,AAAAA,EAAkB;QAC5C,aAAaP;QACb,OAAOD,SAAS,KAAK;QACrB,QAAQA,SAAS,MAAM;QACvB,QAAQD,QAAQ,eAAe;IACjC;IACA,MAAMU,eAAeF,cAAc,WAAW;IAE9C,MAAMG,sBAAsBC,AAAAA,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBjB;IAC/C,MAAMkB,gBAAgBjB,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMkB,yBAAyBlB,KAAK,sBAAsB,IAAI,EAAE;IAChE,MAAMmB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGF,cAAc,kBAAkB,EAAEF,oBAAoB,mBAAmB,CAAC;gBACrF;aACD;QACH;WACGG;KACJ;IAED,IAAIE;IAKJ,MAAMC,wBAAwBZ,kBAC1BP,oBAAoB,cAAc,KAClCA,oBAAoB,oBAAoB;IAC5C,MAAMoB,2BAA2BD,wBAC7B,CAAC,IAAI,EAAEA,uBAAuB,GAC9BnB,oBAAoB,sBAAsB,GACxC,KACA,CAAC,IAAI,EAAE/B,uBAAuB;IAGpC,MAAMoD,eAAerB,oBAAoB,cAAc;IACvD,MAAMsB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIrB,oBAAoB,sBAAsB,EAAE;QAC9CkB,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGlB,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEsB,kBAAkBF,0BAA0B;gBACzN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKR;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAZ,oBAAoB,mCAAmC;IACzD,OACEkB,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,+BAA+B,EAAEI,kBAAkBF,0BAA0B;YACtF;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKR;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFZ,oBAAoB,MAAM,CAACkB;IAG3BlB,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMuB,aAAavB,oBAAoB,QAAQ,CAACF,KAAK,kBAAkB;IAEvE,MAAM0B,OAAqC;QACzC;YAAE,MAAM;YAAU,SAAShB;QAAa;WACrCS;WACAM;KACJ;IAED,IAAI,EACF,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EACjBC,gBAAgB,EACjB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOL,MAAMvB,cAAc;QACnC,aAAaH,KAAK,WAAW;QAG7B,6BAA6BA,KAAK,uBAAuB;IAC3D;IAGA,IAAIgC;IACJ,IAAI;QACF,IAAI;YACFA,aAAa5D,yBAAyBuD,aAAavB,QAAQ,UAAU;QACvE,EAAE,OAAM;YACN,MAAM6B,QAAQ,MAAMF,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOL,MAAMvB,cAAc;gBAC7C,aAAaH,KAAK,WAAW;gBAE7B,6BAA6BA,KAAK,uBAAuB;YAC3D;YACA2B,cAAcM,MAAM,OAAO;YAC3BL,QAAQK,MAAM,KAAK;YACnBJ,oBAAoBI,MAAM,iBAAiB;YAC3CH,mBAAmBG,MAAM,gBAAgB;YACzCD,aAAa5D,yBAAyBuD,aAAavB,QAAQ,UAAU;QACvE;QAEA,IAAI4B,WAAW,MAAM,IAAIA,AAA+B9C,WAA/B8C,WAAW,eAAe,EAAgB;YACjE9D,QACE;YAEF8D,WAAW,eAAe,GAAG9C;YAC7B8C,WAAW,eAAe,GAAG9C;QAC/B;QAEA,MAAMgD,UAAUF,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIG,yBAAyB;QAG7B,IAAIH,AAA+B9C,WAA/B8C,WAAW,eAAe,EAAgB;YAC5ChE,MAAM;YACNmE,yBAAyB;YAEzB,IAAI1B,iBACFP,oBAAoB,uBAAuB;QAE/C;QAEA,MAAMkC,cAAkC;YACtC,GAAGJ,UAAU;YACbE;YACAP;YACAG;YACAF;YACAC;YACA,UAAUQ,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBH,SAASlC,KAAK,WAAW;YAC1DmC;QACF;QAEAG,IAAAA,sBAAAA,MAAAA,AAAAA,EAAON,YAAY;QAEnBE,QAAQ,OAAO,CAAC,CAACzC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAM8C,sBAAsBvC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACP,SAAWA,OAAO,IAAI,KAAKC;YAG9B1B,MAAM,+BAA+BuE;YACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAENvE,MAAM,gBAAgBwE;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAelD,OAAO,KAAK,CAACiD,MAAM;gBACxC,IAAIC,cAAc;oBAChB,IAAI,CAAC3C,KAAK,uBAAuB,EAAE;wBACjC,IAAI,AAAwB,YAAxB,OAAO2C,cAETlD,OAAO,KAAK,CAACiD,MAAM,GAAG;4BAAE,QAAQC,aAAa,MAAM;wBAAC;wBAEtD;oBACF;oBAEAL,IAAAA,sBAAAA,MAAAA,AAAAA,EACE9B,qBACA;oBAEFf,OAAO,KAAK,CAACiD,MAAM,GAAG;wBACpB,GAAGC,YAAY;wBACf,kBAAkBnC,oBAAoB,6BAA6B,CACjEmC,cACA;4BACE,cAAc/B,cAAc,YAAY;4BACxC,aAAaA,cAAc,WAAW;wBACxC;oBAEJ;gBACF;YACF;QACF;QAGA,IAAIH,iBAAiB;YACnB,IAAIuB,WAAW,cAAc,EAAE,QAC7B9B,oBAAoB,aAAa,CAAC8B,WAAW,cAAc;YAE7D,IAAIA,WAAW,mBAAmB,EAAE,QAClC,KAAK,MAAMY,SAASZ,WAAW,mBAAmB,CAChD9B,oBAAoB,mBAAmB,CAAC0C;YAI5C,IAAIZ,WAAW,GAAG,EAChB9B,oBAAoB,gBAAgB,CAAC8B,WAAW,GAAG;QAEvD,OAEE,IAAIA,WAAW,GAAG,EAChB9B,oBAAoB,mBAAmB,CAAC8B,WAAW,GAAG;QAK1D,IAAIA,WAAW,MAAM,EACnB9B,oBAAoB,YAAY,CAAC8B,WAAW,MAAM;QAGpD9B,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMyB;gBACR;aACD;QACH;QAEA,OAAOS;IACT,EAAE,OAAOS,YAAY;QAEnB,MAAMC,eACJD,sBAAsBhD,QAAQgD,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,yBAAAA,oBAAoBA,CAC5B,CAAC,iBAAiB,EAAEF,cAAc,EAClCnB,aACAC,OACAE;IAEJ;AACF"}
@@ -81,7 +81,7 @@ async function autoGlmLocate(elementDescription, options, getSystemPrompt) {
81
81
  });
82
82
  msgs.push(...addOns);
83
83
  }
84
- const { content: rawResponseContent, usage } = await (0, index_js_namespaceObject.callAIWithStringResponse)(msgs, modelRuntime, {
84
+ const { content: rawResponseContent, usage, rawChoiceMessage } = await (0, index_js_namespaceObject.callAIWithStringResponse)(msgs, modelRuntime, {
85
85
  abortSignal: options.abortSignal
86
86
  });
87
87
  debugInspect('auto-glm rawResponse:', rawResponseContent);
@@ -131,6 +131,7 @@ async function autoGlmLocate(elementDescription, options, getSystemPrompt) {
131
131
  errors
132
132
  },
133
133
  rawResponse: rawResponseContent,
134
+ rawChoiceMessage,
134
135
  usage,
135
136
  reasoning_content: parsed.think
136
137
  };
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/models/auto-glm/locate.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../../src/ai-model/models/auto-glm/locate.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { Rect } from '@/types';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type { TUserPrompt } from '../../../common';\nimport {\n type InspectAIArgs,\n extraTextFromUserPrompt,\n promptsToChatParam,\n} from '../../inspect';\nimport { findElementPrompt } from '../../prompt/llm-locator';\nimport { callAIWithStringResponse } from '../../service-caller/index';\nimport { finalizePixelBbox } from '../../shared/model-locate-result/bbox';\nimport { mapLocateResultToPixelBboxByCoordinates } from '../../shared/model-locate-result/pixel-bbox-mapper';\nimport { pixelBboxToRect } from '../../workflows/inspect/locate-result-rect';\nimport { mapSearchAreaPixelBboxToOriginalPixelBbox } from '../../workflows/inspect/search-area-mapping';\nimport type {\n LocateOptions,\n LocateResult,\n} from '../../workflows/inspect/types';\nimport { parseAutoGLMLocateResponse } from './parser';\n\nconst debugInspect = getDebug('ai:inspect');\n\nexport async function autoGlmLocate(\n elementDescription: TUserPrompt,\n options: LocateOptions,\n getSystemPrompt: () => string,\n): Promise<LocateResult> {\n const { context, modelRuntime } = options;\n const screenshotBase64 = context.screenshot.base64;\n\n assert(elementDescription, 'cannot find the target element description');\n const elementDescriptionText = extraTextFromUserPrompt(elementDescription);\n const userInstructionPrompt = findElementPrompt(elementDescriptionText);\n\n const locateImage = options.searchConfig?.image ?? {\n imageBase64: screenshotBase64,\n width: context.shotSize.width,\n height: context.shotSize.height,\n };\n const imagePayload = locateImage.imageBase64;\n const imageWidth = locateImage.width;\n const imageHeight = locateImage.height;\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: getSystemPrompt() },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: `Tap: ${userInstructionPrompt}`,\n },\n ],\n },\n ];\n\n if (typeof elementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: elementDescription.images,\n convertHttpImage2Base64: elementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const { content: rawResponseContent, usage } = await callAIWithStringResponse(\n msgs,\n modelRuntime,\n {\n abortSignal: options.abortSignal,\n },\n );\n\n debugInspect('auto-glm rawResponse:', rawResponseContent);\n\n const parsed = parseAutoGLMLocateResponse(rawResponseContent);\n\n debugInspect('auto-glm thinking:', parsed.think);\n debugInspect('auto-glm coordinates:', parsed.coordinates);\n\n let resRect: Rect | undefined;\n let matchedElement: LocateResultElement | undefined;\n let errors: string[] = [];\n\n if (parsed.error || !parsed.coordinates) {\n errors = [parsed.error || 'Failed to parse auto-glm response'];\n debugInspect('auto-glm parse error:', errors[0]);\n } else {\n const { x, y } = parsed.coordinates;\n\n debugInspect('auto-glm coordinates [0-999]:', { x, y });\n\n const ctx = { preparedSize: { width: imageWidth, height: imageHeight } };\n const targetPixelBbox = finalizePixelBbox(\n mapLocateResultToPixelBboxByCoordinates(\n { type: 'point', coordinates: [x, y] },\n ctx,\n { shape: 'point', order: 'xy', normalizedBy: 1000 },\n ),\n parsed.coordinates,\n ctx,\n );\n resRect = pixelBboxToRect(\n mapSearchAreaPixelBboxToOriginalPixelBbox(\n targetPixelBbox,\n options.searchConfig?.mapping,\n ),\n );\n\n debugInspect('auto-glm resRect:', resRect);\n\n const element: LocateResultElement = generateElementByRect(\n resRect,\n elementDescriptionText as string,\n );\n\n if (element) {\n matchedElement = element;\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n element: matchedElement,\n errors,\n },\n rawResponse: rawResponseContent,\n usage,\n reasoning_content: parsed.think,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","autoGlmLocate","elementDescription","options","getSystemPrompt","context","modelRuntime","screenshotBase64","assert","elementDescriptionText","extraTextFromUserPrompt","userInstructionPrompt","findElementPrompt","locateImage","imagePayload","imageWidth","imageHeight","msgs","addOns","promptsToChatParam","rawResponseContent","usage","callAIWithStringResponse","parsed","parseAutoGLMLocateResponse","resRect","matchedElement","errors","x","y","ctx","targetPixelBbox","finalizePixelBbox","mapLocateResultToPixelBboxByCoordinates","pixelBboxToRect","mapSearchAreaPixelBboxToOriginalPixelBbox","element","generateElementByRect"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;ACiBA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAEvB,eAAeC,cACpBC,kBAA+B,EAC/BC,OAAsB,EACtBC,eAA6B;IAE7B,MAAM,EAAEC,OAAO,EAAEC,YAAY,EAAE,GAAGH;IAClC,MAAMI,mBAAmBF,QAAQ,UAAU,CAAC,MAAM;IAElDG,IAAAA,sBAAAA,MAAAA,AAAAA,EAAON,oBAAoB;IAC3B,MAAMO,yBAAyBC,AAAAA,IAAAA,oCAAAA,uBAAAA,AAAAA,EAAwBR;IACvD,MAAMS,wBAAwBC,AAAAA,IAAAA,+BAAAA,iBAAAA,AAAAA,EAAkBH;IAEhD,MAAMI,cAAcV,QAAQ,YAAY,EAAE,SAAS;QACjD,aAAaI;QACb,OAAOF,QAAQ,QAAQ,CAAC,KAAK;QAC7B,QAAQA,QAAQ,QAAQ,CAAC,MAAM;IACjC;IACA,MAAMS,eAAeD,YAAY,WAAW;IAC5C,MAAME,aAAaF,YAAY,KAAK;IACpC,MAAMG,cAAcH,YAAY,MAAM;IAEtC,MAAMI,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASb;QAAkB;QAC7C;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKU;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAM,CAAC,KAAK,EAAEH,uBAAuB;gBACvC;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOT,oBAAiC;QAC1C,MAAMgB,SAAS,MAAMC,AAAAA,IAAAA,oCAAAA,kBAAAA,AAAAA,EAAmB;YACtC,QAAQjB,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAe,KAAK,IAAI,IAAIC;IACf;IAEA,MAAM,EAAE,SAASE,kBAAkB,EAAEC,KAAK,EAAE,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnDL,MACAX,cACA;QACE,aAAaH,QAAQ,WAAW;IAClC;IAGFJ,aAAa,yBAAyBqB;IAEtC,MAAMG,SAASC,AAAAA,IAAAA,mCAAAA,0BAAAA,AAAAA,EAA2BJ;IAE1CrB,aAAa,sBAAsBwB,OAAO,KAAK;IAC/CxB,aAAa,yBAAyBwB,OAAO,WAAW;IAExD,IAAIE;IACJ,IAAIC;IACJ,IAAIC,SAAmB,EAAE;IAEzB,IAAIJ,OAAO,KAAK,IAAI,CAACA,OAAO,WAAW,EAAE;QACvCI,SAAS;YAACJ,OAAO,KAAK,IAAI;SAAoC;QAC9DxB,aAAa,yBAAyB4B,MAAM,CAAC,EAAE;IACjD,OAAO;QACL,MAAM,EAAEC,CAAC,EAAEC,CAAC,EAAE,GAAGN,OAAO,WAAW;QAEnCxB,aAAa,iCAAiC;YAAE6B;YAAGC;QAAE;QAErD,MAAMC,MAAM;YAAE,cAAc;gBAAE,OAAOf;gBAAY,QAAQC;YAAY;QAAE;QACvE,MAAMe,kBAAkBC,AAAAA,IAAAA,wBAAAA,iBAAAA,AAAAA,EACtBC,AAAAA,IAAAA,qCAAAA,uCAAAA,AAAAA,EACE;YAAE,MAAM;YAAS,aAAa;gBAACL;gBAAGC;aAAE;QAAC,GACrCC,KACA;YAAE,OAAO;YAAS,OAAO;YAAM,cAAc;QAAK,IAEpDP,OAAO,WAAW,EAClBO;QAEFL,UAAUS,AAAAA,IAAAA,sCAAAA,eAAAA,AAAAA,EACRC,AAAAA,IAAAA,uCAAAA,yCAAAA,AAAAA,EACEJ,iBACA5B,QAAQ,YAAY,EAAE;QAI1BJ,aAAa,qBAAqB0B;QAElC,MAAMW,UAA+BC,AAAAA,IAAAA,0BAAAA,qBAAAA,AAAAA,EACnCZ,SACAhB;QAGF,IAAI2B,SACFV,iBAAiBU;IAErB;IAEA,OAAO;QACL,MAAMX;QACN,aAAa;YACX,SAASC;YACTC;QACF;QACA,aAAaP;QACbC;QACA,mBAAmBE,OAAO,KAAK;IACjC;AACF"}
1
+ {"version":3,"file":"ai-model/models/auto-glm/locate.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../../src/ai-model/models/auto-glm/locate.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { Rect } from '@/types';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type { TUserPrompt } from '../../../common';\nimport {\n type InspectAIArgs,\n extraTextFromUserPrompt,\n promptsToChatParam,\n} from '../../inspect';\nimport { findElementPrompt } from '../../prompt/llm-locator';\nimport { callAIWithStringResponse } from '../../service-caller/index';\nimport { finalizePixelBbox } from '../../shared/model-locate-result/bbox';\nimport { mapLocateResultToPixelBboxByCoordinates } from '../../shared/model-locate-result/pixel-bbox-mapper';\nimport { pixelBboxToRect } from '../../workflows/inspect/locate-result-rect';\nimport { mapSearchAreaPixelBboxToOriginalPixelBbox } from '../../workflows/inspect/search-area-mapping';\nimport type {\n LocateOptions,\n LocateResult,\n} from '../../workflows/inspect/types';\nimport { parseAutoGLMLocateResponse } from './parser';\n\nconst debugInspect = getDebug('ai:inspect');\n\nexport async function autoGlmLocate(\n elementDescription: TUserPrompt,\n options: LocateOptions,\n getSystemPrompt: () => string,\n): Promise<LocateResult> {\n const { context, modelRuntime } = options;\n const screenshotBase64 = context.screenshot.base64;\n\n assert(elementDescription, 'cannot find the target element description');\n const elementDescriptionText = extraTextFromUserPrompt(elementDescription);\n const userInstructionPrompt = findElementPrompt(elementDescriptionText);\n\n const locateImage = options.searchConfig?.image ?? {\n imageBase64: screenshotBase64,\n width: context.shotSize.width,\n height: context.shotSize.height,\n };\n const imagePayload = locateImage.imageBase64;\n const imageWidth = locateImage.width;\n const imageHeight = locateImage.height;\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: getSystemPrompt() },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: `Tap: ${userInstructionPrompt}`,\n },\n ],\n },\n ];\n\n if (typeof elementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: elementDescription.images,\n convertHttpImage2Base64: elementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const {\n content: rawResponseContent,\n usage,\n rawChoiceMessage,\n } = await callAIWithStringResponse(msgs, modelRuntime, {\n abortSignal: options.abortSignal,\n });\n\n debugInspect('auto-glm rawResponse:', rawResponseContent);\n\n const parsed = parseAutoGLMLocateResponse(rawResponseContent);\n\n debugInspect('auto-glm thinking:', parsed.think);\n debugInspect('auto-glm coordinates:', parsed.coordinates);\n\n let resRect: Rect | undefined;\n let matchedElement: LocateResultElement | undefined;\n let errors: string[] = [];\n\n if (parsed.error || !parsed.coordinates) {\n errors = [parsed.error || 'Failed to parse auto-glm response'];\n debugInspect('auto-glm parse error:', errors[0]);\n } else {\n const { x, y } = parsed.coordinates;\n\n debugInspect('auto-glm coordinates [0-999]:', { x, y });\n\n const ctx = { preparedSize: { width: imageWidth, height: imageHeight } };\n const targetPixelBbox = finalizePixelBbox(\n mapLocateResultToPixelBboxByCoordinates(\n { type: 'point', coordinates: [x, y] },\n ctx,\n { shape: 'point', order: 'xy', normalizedBy: 1000 },\n ),\n parsed.coordinates,\n ctx,\n );\n resRect = pixelBboxToRect(\n mapSearchAreaPixelBboxToOriginalPixelBbox(\n targetPixelBbox,\n options.searchConfig?.mapping,\n ),\n );\n\n debugInspect('auto-glm resRect:', resRect);\n\n const element: LocateResultElement = generateElementByRect(\n resRect,\n elementDescriptionText as string,\n );\n\n if (element) {\n matchedElement = element;\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n element: matchedElement,\n errors,\n },\n rawResponse: rawResponseContent,\n rawChoiceMessage,\n usage,\n reasoning_content: parsed.think,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","autoGlmLocate","elementDescription","options","getSystemPrompt","context","modelRuntime","screenshotBase64","assert","elementDescriptionText","extraTextFromUserPrompt","userInstructionPrompt","findElementPrompt","locateImage","imagePayload","imageWidth","imageHeight","msgs","addOns","promptsToChatParam","rawResponseContent","usage","rawChoiceMessage","callAIWithStringResponse","parsed","parseAutoGLMLocateResponse","resRect","matchedElement","errors","x","y","ctx","targetPixelBbox","finalizePixelBbox","mapLocateResultToPixelBboxByCoordinates","pixelBboxToRect","mapSearchAreaPixelBboxToOriginalPixelBbox","element","generateElementByRect"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;ACiBA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAEvB,eAAeC,cACpBC,kBAA+B,EAC/BC,OAAsB,EACtBC,eAA6B;IAE7B,MAAM,EAAEC,OAAO,EAAEC,YAAY,EAAE,GAAGH;IAClC,MAAMI,mBAAmBF,QAAQ,UAAU,CAAC,MAAM;IAElDG,IAAAA,sBAAAA,MAAAA,AAAAA,EAAON,oBAAoB;IAC3B,MAAMO,yBAAyBC,AAAAA,IAAAA,oCAAAA,uBAAAA,AAAAA,EAAwBR;IACvD,MAAMS,wBAAwBC,AAAAA,IAAAA,+BAAAA,iBAAAA,AAAAA,EAAkBH;IAEhD,MAAMI,cAAcV,QAAQ,YAAY,EAAE,SAAS;QACjD,aAAaI;QACb,OAAOF,QAAQ,QAAQ,CAAC,KAAK;QAC7B,QAAQA,QAAQ,QAAQ,CAAC,MAAM;IACjC;IACA,MAAMS,eAAeD,YAAY,WAAW;IAC5C,MAAME,aAAaF,YAAY,KAAK;IACpC,MAAMG,cAAcH,YAAY,MAAM;IAEtC,MAAMI,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASb;QAAkB;QAC7C;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKU;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAM,CAAC,KAAK,EAAEH,uBAAuB;gBACvC;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOT,oBAAiC;QAC1C,MAAMgB,SAAS,MAAMC,AAAAA,IAAAA,oCAAAA,kBAAAA,AAAAA,EAAmB;YACtC,QAAQjB,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAe,KAAK,IAAI,IAAIC;IACf;IAEA,MAAM,EACJ,SAASE,kBAAkB,EAC3BC,KAAK,EACLC,gBAAgB,EACjB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAAyBN,MAAMX,cAAc;QACrD,aAAaH,QAAQ,WAAW;IAClC;IAEAJ,aAAa,yBAAyBqB;IAEtC,MAAMI,SAASC,AAAAA,IAAAA,mCAAAA,0BAAAA,AAAAA,EAA2BL;IAE1CrB,aAAa,sBAAsByB,OAAO,KAAK;IAC/CzB,aAAa,yBAAyByB,OAAO,WAAW;IAExD,IAAIE;IACJ,IAAIC;IACJ,IAAIC,SAAmB,EAAE;IAEzB,IAAIJ,OAAO,KAAK,IAAI,CAACA,OAAO,WAAW,EAAE;QACvCI,SAAS;YAACJ,OAAO,KAAK,IAAI;SAAoC;QAC9DzB,aAAa,yBAAyB6B,MAAM,CAAC,EAAE;IACjD,OAAO;QACL,MAAM,EAAEC,CAAC,EAAEC,CAAC,EAAE,GAAGN,OAAO,WAAW;QAEnCzB,aAAa,iCAAiC;YAAE8B;YAAGC;QAAE;QAErD,MAAMC,MAAM;YAAE,cAAc;gBAAE,OAAOhB;gBAAY,QAAQC;YAAY;QAAE;QACvE,MAAMgB,kBAAkBC,AAAAA,IAAAA,wBAAAA,iBAAAA,AAAAA,EACtBC,AAAAA,IAAAA,qCAAAA,uCAAAA,AAAAA,EACE;YAAE,MAAM;YAAS,aAAa;gBAACL;gBAAGC;aAAE;QAAC,GACrCC,KACA;YAAE,OAAO;YAAS,OAAO;YAAM,cAAc;QAAK,IAEpDP,OAAO,WAAW,EAClBO;QAEFL,UAAUS,AAAAA,IAAAA,sCAAAA,eAAAA,AAAAA,EACRC,AAAAA,IAAAA,uCAAAA,yCAAAA,AAAAA,EACEJ,iBACA7B,QAAQ,YAAY,EAAE;QAI1BJ,aAAa,qBAAqB2B;QAElC,MAAMW,UAA+BC,AAAAA,IAAAA,0BAAAA,qBAAAA,AAAAA,EACnCZ,SACAjB;QAGF,IAAI4B,SACFV,iBAAiBU;IAErB;IAEA,OAAO;QACL,MAAMX;QACN,aAAa;YACX,SAASC;YACTC;QACF;QACA,aAAaR;QACbE;QACAD;QACA,mBAAmBG,OAAO,KAAK;IACjC;AACF"}
@@ -67,7 +67,7 @@ async function autoGlmPlanning(userInstruction, options, getSystemPrompt) {
67
67
  ...referenceImageMessages,
68
68
  ...conversationHistory.snapshot(1)
69
69
  ];
70
- const { content: rawResponse, usage } = await (0, index_js_namespaceObject.callAIWithStringResponse)(msgs, options.modelRuntime, {
70
+ const { content: rawResponse, usage, rawChoiceMessage } = await (0, index_js_namespaceObject.callAIWithStringResponse)(msgs, options.modelRuntime, {
71
71
  abortSignal: options.abortSignal
72
72
  });
73
73
  debug('autoGLMPlanning rawResponse:', rawResponse);
@@ -83,7 +83,7 @@ async function autoGlmPlanning(userInstruction, options, getSystemPrompt) {
83
83
  debug('Transformed actions:', transformedActions);
84
84
  } catch (parseError) {
85
85
  const errorMessage = parseError instanceof Error ? parseError.message : String(parseError);
86
- throw new index_js_namespaceObject.AIResponseParseError(`Parse error: ${errorMessage}`, JSON.stringify(rawResponse, void 0, 2), usage);
86
+ throw new index_js_namespaceObject.AIResponseParseError(`Parse error: ${errorMessage}`, JSON.stringify(rawResponse, void 0, 2), usage, rawChoiceMessage);
87
87
  }
88
88
  conversationHistory.append({
89
89
  role: 'assistant',
@@ -95,7 +95,8 @@ async function autoGlmPlanning(userInstruction, options, getSystemPrompt) {
95
95
  log: rawResponse,
96
96
  usage,
97
97
  shouldContinuePlanning,
98
- rawResponse: JSON.stringify(rawResponse, void 0, 2)
98
+ rawResponse: JSON.stringify(rawResponse, void 0, 2),
99
+ rawChoiceMessage
99
100
  };
100
101
  }
101
102
  exports.autoGlmPlanning = __webpack_exports__.autoGlmPlanning;
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/models/auto-glm/planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../../src/ai-model/models/auto-glm/planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { type TUserPrompt, userPromptToString } from '@/common';\nimport type { PlanningAIResponse } from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from '../../service-caller/index';\nimport type { PlanOptions } from '../../workflows/planning/types';\nimport { transformAutoGLMAction } from './actions';\nimport { parseAction, parseAutoGLMResponse } from './parser';\n\nconst debug = getDebug('auto-glm-planning');\n\nexport async function autoGlmPlanning(\n userInstruction: TUserPrompt,\n options: PlanOptions,\n getSystemPrompt: () => string,\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, actionContext } = options;\n\n const systemPrompt =\n getSystemPrompt() +\n (actionContext\n ? `<high_priority_knowledge>${actionContext}</high_priority_knowledge>`\n : '');\n\n const imagePayloadBase64 = context.screenshot.base64;\n const userInstructionText = userPromptToString(userInstruction);\n const referenceImageMessages = options.referenceImageMessages ?? [];\n\n const userInstructionMessage: ChatCompletionMessageParam = {\n role: 'user',\n content: [{ type: 'text', text: userInstructionText }],\n };\n conversationHistory.append({\n role: 'user',\n content: [{ type: 'image_url', image_url: { url: imagePayloadBase64 } }],\n });\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n userInstructionMessage,\n ...referenceImageMessages,\n ...conversationHistory.snapshot(1),\n ];\n\n const { content: rawResponse, usage } = await callAIWithStringResponse(\n msgs,\n options.modelRuntime,\n {\n abortSignal: options.abortSignal,\n },\n );\n\n debug('autoGLMPlanning rawResponse:', rawResponse);\n\n let parsedResponse: ReturnType<typeof parseAutoGLMResponse>;\n let transformedActions: ReturnType<typeof transformAutoGLMAction>;\n\n try {\n parsedResponse = parseAutoGLMResponse(rawResponse);\n debug('thinking in response:', parsedResponse.think);\n debug('action in response:', parsedResponse.content);\n\n const parsedAction = parseAction(parsedResponse);\n debug('Parsed action object:', parsedAction);\n transformedActions = transformAutoGLMAction(\n parsedAction,\n context.shotSize,\n options.actionSpace,\n );\n debug('Transformed actions:', transformedActions);\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `Parse error: ${errorMessage}`,\n JSON.stringify(rawResponse, undefined, 2),\n usage,\n );\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: `<think>${parsedResponse.think}</think><answer>${parsedResponse.content}</answer>`,\n });\n\n const shouldContinuePlanning = !parsedResponse.content.startsWith('finish(');\n\n return {\n actions: transformedActions,\n log: rawResponse,\n usage,\n shouldContinuePlanning,\n rawResponse: JSON.stringify(rawResponse, undefined, 2),\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","autoGlmPlanning","userInstruction","options","getSystemPrompt","conversationHistory","context","actionContext","systemPrompt","imagePayloadBase64","userInstructionText","userPromptToString","referenceImageMessages","userInstructionMessage","msgs","rawResponse","usage","callAIWithStringResponse","parsedResponse","transformedActions","parseAutoGLMResponse","parsedAction","parseAction","transformAutoGLMAction","parseError","errorMessage","Error","String","AIResponseParseError","JSON","undefined","shouldContinuePlanning"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;ACMA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAEhB,eAAeC,gBACpBC,eAA4B,EAC5BC,OAAoB,EACpBC,eAA6B;IAE7B,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,aAAa,EAAE,GAAGJ;IAExD,MAAMK,eACJJ,oBACCG,CAAAA,gBACG,CAAC,yBAAyB,EAAEA,cAAc,0BAA0B,CAAC,GACrE,EAAC;IAEP,MAAME,qBAAqBH,QAAQ,UAAU,CAAC,MAAM;IACpD,MAAMI,sBAAsBC,AAAAA,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBT;IAC/C,MAAMU,yBAAyBT,QAAQ,sBAAsB,IAAI,EAAE;IAEnE,MAAMU,yBAAqD;QACzD,MAAM;QACN,SAAS;YAAC;gBAAE,MAAM;gBAAQ,MAAMH;YAAoB;SAAE;IACxD;IACAL,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YAAC;gBAAE,MAAM;gBAAa,WAAW;oBAAE,KAAKI;gBAAmB;YAAE;SAAE;IAC1E;IAEA,MAAMK,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASN;QAAa;QACxCK;WACGD;WACAP,oBAAoB,QAAQ,CAAC;KACjC;IAED,MAAM,EAAE,SAASU,WAAW,EAAEC,KAAK,EAAE,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAC5CH,MACAX,QAAQ,YAAY,EACpB;QACE,aAAaA,QAAQ,WAAW;IAClC;IAGFJ,MAAM,gCAAgCgB;IAEtC,IAAIG;IACJ,IAAIC;IAEJ,IAAI;QACFD,iBAAiBE,AAAAA,IAAAA,mCAAAA,oBAAAA,AAAAA,EAAqBL;QACtChB,MAAM,yBAAyBmB,eAAe,KAAK;QACnDnB,MAAM,uBAAuBmB,eAAe,OAAO;QAEnD,MAAMG,eAAeC,AAAAA,IAAAA,mCAAAA,WAAAA,AAAAA,EAAYJ;QACjCnB,MAAM,yBAAyBsB;QAC/BF,qBAAqBI,AAAAA,IAAAA,oCAAAA,sBAAAA,AAAAA,EACnBF,cACAf,QAAQ,QAAQ,EAChBH,QAAQ,WAAW;QAErBJ,MAAM,wBAAwBoB;IAChC,EAAE,OAAOK,YAAY;QAEnB,MAAMC,eACJD,sBAAsBE,QAAQF,WAAW,OAAO,GAAGG,OAAOH;QAC5D,MAAM,IAAII,yBAAAA,oBAAoBA,CAC5B,CAAC,aAAa,EAAEH,cAAc,EAC9BI,KAAK,SAAS,CAACd,aAAae,QAAW,IACvCd;IAEJ;IAEAX,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS,CAAC,OAAO,EAAEa,eAAe,KAAK,CAAC,gBAAgB,EAAEA,eAAe,OAAO,CAAC,SAAS,CAAC;IAC7F;IAEA,MAAMa,yBAAyB,CAACb,eAAe,OAAO,CAAC,UAAU,CAAC;IAElE,OAAO;QACL,SAASC;QACT,KAAKJ;QACLC;QACAe;QACA,aAAaF,KAAK,SAAS,CAACd,aAAae,QAAW;IACtD;AACF"}
1
+ {"version":3,"file":"ai-model/models/auto-glm/planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../../src/ai-model/models/auto-glm/planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { type TUserPrompt, userPromptToString } from '@/common';\nimport type { PlanningAIResponse } from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from '../../service-caller/index';\nimport type { PlanOptions } from '../../workflows/planning/types';\nimport { transformAutoGLMAction } from './actions';\nimport { parseAction, parseAutoGLMResponse } from './parser';\n\nconst debug = getDebug('auto-glm-planning');\n\nexport async function autoGlmPlanning(\n userInstruction: TUserPrompt,\n options: PlanOptions,\n getSystemPrompt: () => string,\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, actionContext } = options;\n\n const systemPrompt =\n getSystemPrompt() +\n (actionContext\n ? `<high_priority_knowledge>${actionContext}</high_priority_knowledge>`\n : '');\n\n const imagePayloadBase64 = context.screenshot.base64;\n const userInstructionText = userPromptToString(userInstruction);\n const referenceImageMessages = options.referenceImageMessages ?? [];\n\n const userInstructionMessage: ChatCompletionMessageParam = {\n role: 'user',\n content: [{ type: 'text', text: userInstructionText }],\n };\n conversationHistory.append({\n role: 'user',\n content: [{ type: 'image_url', image_url: { url: imagePayloadBase64 } }],\n });\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n userInstructionMessage,\n ...referenceImageMessages,\n ...conversationHistory.snapshot(1),\n ];\n\n const {\n content: rawResponse,\n usage,\n rawChoiceMessage,\n } = await callAIWithStringResponse(msgs, options.modelRuntime, {\n abortSignal: options.abortSignal,\n });\n\n debug('autoGLMPlanning rawResponse:', rawResponse);\n\n let parsedResponse: ReturnType<typeof parseAutoGLMResponse>;\n let transformedActions: ReturnType<typeof transformAutoGLMAction>;\n\n try {\n parsedResponse = parseAutoGLMResponse(rawResponse);\n debug('thinking in response:', parsedResponse.think);\n debug('action in response:', parsedResponse.content);\n\n const parsedAction = parseAction(parsedResponse);\n debug('Parsed action object:', parsedAction);\n transformedActions = transformAutoGLMAction(\n parsedAction,\n context.shotSize,\n options.actionSpace,\n );\n debug('Transformed actions:', transformedActions);\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `Parse error: ${errorMessage}`,\n JSON.stringify(rawResponse, undefined, 2),\n usage,\n rawChoiceMessage,\n );\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: `<think>${parsedResponse.think}</think><answer>${parsedResponse.content}</answer>`,\n });\n\n const shouldContinuePlanning = !parsedResponse.content.startsWith('finish(');\n\n return {\n actions: transformedActions,\n log: rawResponse,\n usage,\n shouldContinuePlanning,\n rawResponse: JSON.stringify(rawResponse, undefined, 2),\n rawChoiceMessage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","autoGlmPlanning","userInstruction","options","getSystemPrompt","conversationHistory","context","actionContext","systemPrompt","imagePayloadBase64","userInstructionText","userPromptToString","referenceImageMessages","userInstructionMessage","msgs","rawResponse","usage","rawChoiceMessage","callAIWithStringResponse","parsedResponse","transformedActions","parseAutoGLMResponse","parsedAction","parseAction","transformAutoGLMAction","parseError","errorMessage","Error","String","AIResponseParseError","JSON","undefined","shouldContinuePlanning"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;ACMA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAEhB,eAAeC,gBACpBC,eAA4B,EAC5BC,OAAoB,EACpBC,eAA6B;IAE7B,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,aAAa,EAAE,GAAGJ;IAExD,MAAMK,eACJJ,oBACCG,CAAAA,gBACG,CAAC,yBAAyB,EAAEA,cAAc,0BAA0B,CAAC,GACrE,EAAC;IAEP,MAAME,qBAAqBH,QAAQ,UAAU,CAAC,MAAM;IACpD,MAAMI,sBAAsBC,AAAAA,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBT;IAC/C,MAAMU,yBAAyBT,QAAQ,sBAAsB,IAAI,EAAE;IAEnE,MAAMU,yBAAqD;QACzD,MAAM;QACN,SAAS;YAAC;gBAAE,MAAM;gBAAQ,MAAMH;YAAoB;SAAE;IACxD;IACAL,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YAAC;gBAAE,MAAM;gBAAa,WAAW;oBAAE,KAAKI;gBAAmB;YAAE;SAAE;IAC1E;IAEA,MAAMK,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASN;QAAa;QACxCK;WACGD;WACAP,oBAAoB,QAAQ,CAAC;KACjC;IAED,MAAM,EACJ,SAASU,WAAW,EACpBC,KAAK,EACLC,gBAAgB,EACjB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAAyBJ,MAAMX,QAAQ,YAAY,EAAE;QAC7D,aAAaA,QAAQ,WAAW;IAClC;IAEAJ,MAAM,gCAAgCgB;IAEtC,IAAII;IACJ,IAAIC;IAEJ,IAAI;QACFD,iBAAiBE,AAAAA,IAAAA,mCAAAA,oBAAAA,AAAAA,EAAqBN;QACtChB,MAAM,yBAAyBoB,eAAe,KAAK;QACnDpB,MAAM,uBAAuBoB,eAAe,OAAO;QAEnD,MAAMG,eAAeC,AAAAA,IAAAA,mCAAAA,WAAAA,AAAAA,EAAYJ;QACjCpB,MAAM,yBAAyBuB;QAC/BF,qBAAqBI,AAAAA,IAAAA,oCAAAA,sBAAAA,AAAAA,EACnBF,cACAhB,QAAQ,QAAQ,EAChBH,QAAQ,WAAW;QAErBJ,MAAM,wBAAwBqB;IAChC,EAAE,OAAOK,YAAY;QAEnB,MAAMC,eACJD,sBAAsBE,QAAQF,WAAW,OAAO,GAAGG,OAAOH;QAC5D,MAAM,IAAII,yBAAAA,oBAAoBA,CAC5B,CAAC,aAAa,EAAEH,cAAc,EAC9BI,KAAK,SAAS,CAACf,aAAagB,QAAW,IACvCf,OACAC;IAEJ;IAEAZ,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS,CAAC,OAAO,EAAEc,eAAe,KAAK,CAAC,gBAAgB,EAAEA,eAAe,OAAO,CAAC,SAAS,CAAC;IAC7F;IAEA,MAAMa,yBAAyB,CAACb,eAAe,OAAO,CAAC,UAAU,CAAC;IAElE,OAAO;QACL,SAASC;QACT,KAAKL;QACLC;QACAgB;QACA,aAAaF,KAAK,SAAS,CAACf,aAAagB,QAAW;QACpDd;IACF;AACF"}
@@ -27,18 +27,24 @@ __webpack_require__.d(__webpack_exports__, {
27
27
  gptAdapters: ()=>gptAdapters
28
28
  });
29
29
  const originalImageDetailForDefaultIntent = (input)=>'default' === input.intent || input.requiresOriginalImageDetail ? 'original' : void 0;
30
- const buildGpt5ChatCompletionParams = ()=>({
30
+ const buildGpt5ChatCompletionParams = (input)=>{
31
+ const { midsceneDefaults, userConfig } = input;
32
+ const { reasoningEnabled, reasoningEffort } = userConfig;
33
+ const commonOverrideConfig = {};
34
+ if (void 0 !== userConfig.temperature) commonOverrideConfig.temperature = userConfig.temperature;
35
+ const effectiveReasoningEffort = true === reasoningEnabled ? reasoningEffort ?? 'medium' : 'none';
36
+ return {
31
37
  config: {
32
- temperature: void 0
38
+ ...midsceneDefaults,
39
+ ...commonOverrideConfig,
40
+ reasoning_effort: effectiveReasoningEffort
33
41
  }
34
- });
42
+ };
43
+ };
35
44
  const gptAdapters = {
36
45
  'gpt-5': {
37
46
  chatCompletion: {
38
47
  unsupportedUserConfig: [
39
- 'temperature',
40
- 'reasoningEnabled',
41
- 'reasoningEffort',
42
48
  'reasoningBudget'
43
49
  ],
44
50
  buildChatCompletionParams: buildGpt5ChatCompletionParams,
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/models/gpt.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/models/gpt.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { TModelFamily } from '@midscene/shared/env';\nimport type {\n ChatCompletionCallContext,\n ChatCompletionParamsResult,\n ImageDetail,\n ModelAdapterDefinition,\n} from './types';\n\nconst originalImageDetailForDefaultIntent = (\n input: ChatCompletionCallContext,\n): ImageDetail | undefined =>\n input.intent === 'default' || input.requiresOriginalImageDetail\n ? 'original'\n : undefined;\n\nconst buildGpt5ChatCompletionParams = (): ChatCompletionParamsResult => {\n return {\n config: {\n // GPT-5 Chat Completions does not support temperature control.\n temperature: undefined,\n },\n };\n};\n\nexport const gptAdapters = {\n 'gpt-5': {\n chatCompletion: {\n unsupportedUserConfig: [\n 'temperature',\n 'reasoningEnabled',\n 'reasoningEffort',\n 'reasoningBudget',\n ],\n buildChatCompletionParams: buildGpt5ChatCompletionParams,\n resolveImageDetail: originalImageDetailForDefaultIntent,\n },\n locate: {\n resultAdapter: {\n coordinates: { shape: 'bbox', order: 'xy' },\n },\n },\n },\n} satisfies Pick<Record<TModelFamily, ModelAdapterDefinition>, 'gpt-5'>;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","originalImageDetailForDefaultIntent","input","undefined","buildGpt5ChatCompletionParams","gptAdapters"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;ACEA,MAAMI,sCAAsC,CAC1CC,QAEAA,AAAiB,cAAjBA,MAAM,MAAM,IAAkBA,MAAM,2BAA2B,GAC3D,aACAC;AAEN,MAAMC,gCAAgC,IAC7B;QACL,QAAQ;YAEN,aAAaD;QACf;IACF;AAGK,MAAME,cAAc;IACzB,SAAS;QACP,gBAAgB;YACd,uBAAuB;gBACrB;gBACA;gBACA;gBACA;aACD;YACD,2BAA2BD;YAC3B,oBAAoBH;QACtB;QACA,QAAQ;YACN,eAAe;gBACb,aAAa;oBAAE,OAAO;oBAAQ,OAAO;gBAAK;YAC5C;QACF;IACF;AACF"}
1
+ {"version":3,"file":"ai-model/models/gpt.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/models/gpt.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { TModelFamily } from '@midscene/shared/env';\nimport type {\n ChatCompletionCallContext,\n ChatCompletionParamsResult,\n ImageDetail,\n ModelAdapterDefinition,\n} from './types';\n\nconst originalImageDetailForDefaultIntent = (\n input: ChatCompletionCallContext,\n): ImageDetail | undefined =>\n input.intent === 'default' || input.requiresOriginalImageDetail\n ? 'original'\n : undefined;\n\nconst buildGpt5ChatCompletionParams = (\n input: ChatCompletionCallContext,\n): ChatCompletionParamsResult => {\n const { midsceneDefaults, userConfig } = input;\n const { reasoningEnabled, reasoningEffort } = userConfig;\n const commonOverrideConfig: Record<string, unknown> = {};\n\n if (userConfig.temperature !== undefined) {\n commonOverrideConfig.temperature = userConfig.temperature;\n }\n\n const effectiveReasoningEffort =\n reasoningEnabled === true ? (reasoningEffort ?? 'medium') : 'none';\n\n return {\n config: {\n ...midsceneDefaults,\n ...commonOverrideConfig,\n reasoning_effort: effectiveReasoningEffort,\n },\n };\n};\n\nexport const gptAdapters = {\n 'gpt-5': {\n chatCompletion: {\n unsupportedUserConfig: ['reasoningBudget'],\n buildChatCompletionParams: buildGpt5ChatCompletionParams,\n resolveImageDetail: originalImageDetailForDefaultIntent,\n },\n locate: {\n resultAdapter: {\n coordinates: { shape: 'bbox', order: 'xy' },\n },\n },\n },\n} satisfies Pick<Record<TModelFamily, ModelAdapterDefinition>, 'gpt-5'>;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","originalImageDetailForDefaultIntent","input","undefined","buildGpt5ChatCompletionParams","midsceneDefaults","userConfig","reasoningEnabled","reasoningEffort","commonOverrideConfig","effectiveReasoningEffort","gptAdapters"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;ACEA,MAAMI,sCAAsC,CAC1CC,QAEAA,AAAiB,cAAjBA,MAAM,MAAM,IAAkBA,MAAM,2BAA2B,GAC3D,aACAC;AAEN,MAAMC,gCAAgC,CACpCF;IAEA,MAAM,EAAEG,gBAAgB,EAAEC,UAAU,EAAE,GAAGJ;IACzC,MAAM,EAAEK,gBAAgB,EAAEC,eAAe,EAAE,GAAGF;IAC9C,MAAMG,uBAAgD,CAAC;IAEvD,IAAIH,AAA2BH,WAA3BG,WAAW,WAAW,EACxBG,qBAAqB,WAAW,GAAGH,WAAW,WAAW;IAG3D,MAAMI,2BACJH,AAAqB,SAArBA,mBAA6BC,mBAAmB,WAAY;IAE9D,OAAO;QACL,QAAQ;YACN,GAAGH,gBAAgB;YACnB,GAAGI,oBAAoB;YACvB,kBAAkBC;QACpB;IACF;AACF;AAEO,MAAMC,cAAc;IACzB,SAAS;QACP,gBAAgB;YACd,uBAAuB;gBAAC;aAAkB;YAC1C,2BAA2BP;YAC3B,oBAAoBH;QACtB;QACA,QAAQ;YACN,eAAe;gBACb,aAAa;oBAAE,OAAO;oBAAQ,OAAO;gBAAK;YAC5C;QACF;IACF;AACF"}
@@ -0,0 +1,76 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ kimiAdapters: ()=>kimiAdapters
28
+ });
29
+ const buildKimiChatCompletionParams = (input)=>{
30
+ const { midsceneDefaults, userConfig } = input;
31
+ const { reasoningEnabled } = userConfig;
32
+ const effectiveReasoningEnabled = reasoningEnabled ?? false;
33
+ const commonOverrideConfig = {};
34
+ commonOverrideConfig.temperature = void 0;
35
+ const modelSpecificConfig = {
36
+ thinking: {
37
+ type: effectiveReasoningEnabled ? 'enabled' : 'disabled'
38
+ }
39
+ };
40
+ return {
41
+ config: {
42
+ ...midsceneDefaults,
43
+ ...commonOverrideConfig,
44
+ ...modelSpecificConfig
45
+ }
46
+ };
47
+ };
48
+ const kimiAdapters = {
49
+ kimi: {
50
+ chatCompletion: {
51
+ unsupportedUserConfig: [
52
+ 'reasoningEffort',
53
+ 'reasoningBudget'
54
+ ],
55
+ buildChatCompletionParams: buildKimiChatCompletionParams
56
+ },
57
+ locate: {
58
+ resultAdapter: {
59
+ coordinates: {
60
+ shape: 'point',
61
+ order: 'xy',
62
+ normalizedBy: 1
63
+ }
64
+ }
65
+ }
66
+ }
67
+ };
68
+ exports.kimiAdapters = __webpack_exports__.kimiAdapters;
69
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
70
+ "kimiAdapters"
71
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
72
+ Object.defineProperty(exports, '__esModule', {
73
+ value: true
74
+ });
75
+
76
+ //# sourceMappingURL=kimi.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/models/kimi.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/models/kimi.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { TModelFamily } from '@midscene/shared/env';\nimport type {\n ChatCompletionCallContext,\n ChatCompletionParamsResult,\n ModelAdapterDefinition,\n} from './types';\n\nconst buildKimiChatCompletionParams = (\n input: ChatCompletionCallContext,\n): ChatCompletionParamsResult => {\n const { midsceneDefaults, userConfig } = input;\n const { reasoningEnabled } = userConfig;\n const effectiveReasoningEnabled = reasoningEnabled ?? false;\n const commonOverrideConfig: Record<string, unknown> = {};\n\n // kimi disallow custom temperature\n commonOverrideConfig.temperature = undefined;\n\n const modelSpecificConfig: Record<string, unknown> = {\n thinking: {\n type: effectiveReasoningEnabled ? 'enabled' : 'disabled',\n },\n };\n\n return {\n config: {\n ...midsceneDefaults,\n ...commonOverrideConfig,\n ...modelSpecificConfig,\n },\n };\n};\n\nexport const kimiAdapters = {\n kimi: {\n chatCompletion: {\n unsupportedUserConfig: ['reasoningEffort', 'reasoningBudget'],\n buildChatCompletionParams: buildKimiChatCompletionParams,\n },\n locate: {\n resultAdapter: {\n coordinates: { shape: 'point', order: 'xy', normalizedBy: 1 },\n },\n },\n },\n} satisfies Pick<Record<TModelFamily, ModelAdapterDefinition>, 'kimi'>;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","buildKimiChatCompletionParams","input","midsceneDefaults","userConfig","reasoningEnabled","effectiveReasoningEnabled","commonOverrideConfig","undefined","modelSpecificConfig","kimiAdapters"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;ACCA,MAAMI,gCAAgC,CACpCC;IAEA,MAAM,EAAEC,gBAAgB,EAAEC,UAAU,EAAE,GAAGF;IACzC,MAAM,EAAEG,gBAAgB,EAAE,GAAGD;IAC7B,MAAME,4BAA4BD,oBAAoB;IACtD,MAAME,uBAAgD,CAAC;IAGvDA,qBAAqB,WAAW,GAAGC;IAEnC,MAAMC,sBAA+C;QACnD,UAAU;YACR,MAAMH,4BAA4B,YAAY;QAChD;IACF;IAEA,OAAO;QACL,QAAQ;YACN,GAAGH,gBAAgB;YACnB,GAAGI,oBAAoB;YACvB,GAAGE,mBAAmB;QACxB;IACF;AACF;AAEO,MAAMC,eAAe;IAC1B,MAAM;QACJ,gBAAgB;YACd,uBAAuB;gBAAC;gBAAmB;aAAkB;YAC7D,2BAA2BT;QAC7B;QACA,QAAQ;YACN,eAAe;gBACb,aAAa;oBAAE,OAAO;oBAAS,OAAO;oBAAM,cAAc;gBAAE;YAC9D;QACF;IACF;AACF"}
@@ -35,6 +35,7 @@ const external_doubao_js_namespaceObject = require("./doubao.js");
35
35
  const external_gemini_js_namespaceObject = require("./gemini.js");
36
36
  const external_glm_js_namespaceObject = require("./glm.js");
37
37
  const external_gpt_js_namespaceObject = require("./gpt.js");
38
+ const external_kimi_js_namespaceObject = require("./kimi.js");
38
39
  const external_qwen_js_namespaceObject = require("./qwen.js");
39
40
  const external_resolved_js_namespaceObject = require("./resolved.js");
40
41
  const external_ui_tars_adapter_js_namespaceObject = require("./ui-tars/adapter.js");
@@ -45,7 +46,8 @@ const MODEL_ADAPTER_CONFIGS = {
45
46
  ...external_ui_tars_adapter_js_namespaceObject.uiTarsAdapters,
46
47
  ...external_glm_js_namespaceObject.glmAdapters,
47
48
  ...adapter_js_namespaceObject.autoGlmAdapters,
48
- ...external_gpt_js_namespaceObject.gptAdapters
49
+ ...external_gpt_js_namespaceObject.gptAdapters,
50
+ ...external_kimi_js_namespaceObject.kimiAdapters
49
51
  };
50
52
  const modelAdapterCache = new Map();
51
53
  const debugModelAdapter = (0, logger_namespaceObject.getDebug)('ai:model-adapter');